AArch64InstructionSelector.cpp source code [llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp]

1	//===- AArch64InstructionSelector.cpp ----------------------------- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the targeting of the InstructionSelector class for
10	/// AArch64.
11	/// \todo This should be generated by TableGen.
12	//===----------------------------------------------------------------------===//
13
14	#include "AArch64GlobalISelUtils.h"
15	#include "AArch64InstrInfo.h"
16	#include "AArch64MachineFunctionInfo.h"
17	#include "AArch64RegisterBankInfo.h"
18	#include "AArch64RegisterInfo.h"
19	#include "AArch64Subtarget.h"
20	#include "AArch64TargetMachine.h"
21	#include "MCTargetDesc/AArch64AddressingModes.h"
22	#include "MCTargetDesc/AArch64MCTargetDesc.h"
23	#include "llvm/BinaryFormat/Dwarf.h"
24	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
25	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26	#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
28	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29	#include "llvm/CodeGen/GlobalISel/Utils.h"
30	#include "llvm/CodeGen/MachineBasicBlock.h"
31	#include "llvm/CodeGen/MachineConstantPool.h"
32	#include "llvm/CodeGen/MachineFrameInfo.h"
33	#include "llvm/CodeGen/MachineFunction.h"
34	#include "llvm/CodeGen/MachineInstr.h"
35	#include "llvm/CodeGen/MachineInstrBuilder.h"
36	#include "llvm/CodeGen/MachineMemOperand.h"
37	#include "llvm/CodeGen/MachineOperand.h"
38	#include "llvm/CodeGen/MachineRegisterInfo.h"
39	#include "llvm/CodeGen/TargetOpcodes.h"
40	#include "llvm/CodeGen/TargetRegisterInfo.h"
41	#include "llvm/IR/Constants.h"
42	#include "llvm/IR/DerivedTypes.h"
43	#include "llvm/IR/Instructions.h"
44	#include "llvm/IR/IntrinsicsAArch64.h"
45	#include "llvm/IR/PatternMatch.h"
46	#include "llvm/IR/Type.h"
47	#include "llvm/Pass.h"
48	#include "llvm/Support/Debug.h"
49	#include "llvm/Support/raw_ostream.h"
50	#include <optional>
51
52	#define DEBUG_TYPE "aarch64-isel"
53
54	using namespace llvm;
55	using namespace MIPatternMatch;
56	using namespace AArch64GISelUtils;
57
58	namespace llvm {
59	class BlockFrequencyInfo;
60	class ProfileSummaryInfo;
61	}
62
63	namespace {
64
65	#define GET_GLOBALISEL_PREDICATE_BITSET
66	#include "AArch64GenGlobalISel.inc"
67	#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70	class AArch64InstructionSelector : public InstructionSelector {
71	public:
72	AArch64InstructionSelector(const AArch64TargetMachine &TM,
73	const AArch64Subtarget &STI,
74	const AArch64RegisterBankInfo &RBI);
75
76	bool select(MachineInstr &I) override;
77	static const char getName() { return* DEBUG_TYPE; }
78
79	void setupMF(MachineFunction &MF, GISelKnownBits *KB,
80	CodeGenCoverage CoverageInfo, ProfileSummaryInfo PSI,
81	BlockFrequencyInfo *BFI) override {
82	InstructionSelector::setupMF(mf&: MF, kb: KB, covinfo: CoverageInfo, psi: PSI, bfi: BFI);
83	MIB.setMF(MF);
84
85	// hasFnAttribute() is expensive to call on every BRCOND selection, so
86	// cache it here for each run of the selector.
87	ProduceNonFlagSettingCondBr =
88	!MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening);
89	MFReturnAddr = Register ();
90
91	processPHIs(MF);
92	}
93
94	private:
95	/// tblgen-erated 'select' implementation, used as the initial selector for
96	/// the patterns that don't require complex C++.
97	bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99	// A lowering phase that runs before any selection attempts.
100	// Returns true if the instruction was modified.
101	bool preISelLower(MachineInstr &I);
102
103	// An early selection function that runs before the selectImpl() call.
104	bool earlySelect(MachineInstr &I);
105
106	/// Save state that is shared between select calls, call select on \p I and
107	/// then restore the saved state. This can be used to recursively call select
108	/// within a select call.
109	bool selectAndRestoreState(MachineInstr &I);
110
111	// Do some preprocessing of G_PHIs before we begin selection.
112	void processPHIs(MachineFunction &MF);
113
114	bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116	/// Eliminate same-sized cross-bank copies into stores before selectImpl().
117	bool contractCrossBankCopyIntoStore(MachineInstr &I,
118	MachineRegisterInfo &MRI);
119
120	bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122	bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123	MachineRegisterInfo &MRI) const;
124	bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125	MachineRegisterInfo &MRI) const;
126
127	///@{
128	/// Helper functions for selectCompareBranch.
129	bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130	MachineIRBuilder &MIB) const;
131	bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132	MachineIRBuilder &MIB) const;
133	bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134	MachineIRBuilder &MIB) const;
135	bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136	MachineBasicBlock *DstMBB,
137	MachineIRBuilder &MIB) const;
138	///@}
139
140	bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
141	MachineRegisterInfo &MRI);
142
143	bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144	bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146	// Helper to generate an equivalent of scalar_to_vector into a new register,
147	// returned via 'Dst'.
148	MachineInstr emitScalarToVector(unsigned* EltSize,
149	const TargetRegisterClass *DstRC,
150	Register Scalar,
151	MachineIRBuilder &MIRBuilder) const;
152	/// Helper to narrow vector that was widened by emitScalarToVector.
153	/// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154	/// vector, correspondingly.
155	MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156	MachineIRBuilder &MIRBuilder,
157	MachineRegisterInfo &MRI) const;
158
159	/// Emit a lane insert into \p DstReg, or a new vector register if
160	/// std::nullopt is provided.
161	///
162	/// The lane inserted into is defined by \p LaneIdx. The vector source
163	/// register is given by \p SrcReg. The register containing the element is
164	/// given by \p EltReg.
165	MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166	Register EltReg, unsigned LaneIdx,
167	const RegisterBank &RB,
168	MachineIRBuilder &MIRBuilder) const;
169
170	/// Emit a sequence of instructions representing a constant \p CV for a
171	/// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172	///
173	/// \returns the last instruction in the sequence on success, and nullptr
174	/// otherwise.
175	MachineInstr emitConstantVector(Register Dst, Constant CV,
176	MachineIRBuilder &MIRBuilder,
177	MachineRegisterInfo &MRI);
178
179	MachineInstr tryAdvSIMDModImm8(Register Dst, unsigned* DstSize, APInt Bits,
180	MachineIRBuilder &MIRBuilder);
181
182	MachineInstr tryAdvSIMDModImm16(Register Dst, unsigned* DstSize, APInt Bits,
183	MachineIRBuilder &MIRBuilder, bool Inv);
184
185	MachineInstr tryAdvSIMDModImm32(Register Dst, unsigned* DstSize, APInt Bits,
186	MachineIRBuilder &MIRBuilder, bool Inv);
187	MachineInstr tryAdvSIMDModImm64(Register Dst, unsigned* DstSize, APInt Bits,
188	MachineIRBuilder &MIRBuilder);
189	MachineInstr tryAdvSIMDModImm321s(Register Dst, unsigned* DstSize, APInt Bits,
190	MachineIRBuilder &MIRBuilder, bool Inv);
191	MachineInstr tryAdvSIMDModImmFP(Register Dst, unsigned* DstSize, APInt Bits,
192	MachineIRBuilder &MIRBuilder);
193
194	bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
195	MachineRegisterInfo &MRI);
196	/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197	/// SUBREG_TO_REG.
198	bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199	bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
200	bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
201	bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
202
203	bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204	bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205	bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206	bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208	/// Helper function to select vector load intrinsics like
209	/// @llvm.aarch64.neon.ld2., @llvm.aarch64.neon.ld4., etc.
210	/// \p Opc is the opcode that the selected instruction should use.
211	/// \p NumVecs is the number of vector destinations for the instruction.
212	/// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213	bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214	MachineInstr &I);
215	bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216	MachineInstr &I);
217	void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218	unsigned Opc);
219	bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220	unsigned Opc);
221	bool selectIntrinsicWithSideEffects(MachineInstr &I,
222	MachineRegisterInfo &MRI);
223	bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224	bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225	bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226	bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227	bool selectPtrAuthGlobalValue(MachineInstr &I,
228	MachineRegisterInfo &MRI) const;
229	bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230	bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231	bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232	void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233	unsigned Opc1, unsigned Opc2, bool isExt);
234
235	bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236	bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237	bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239	unsigned emitConstantPoolEntry(const Constant *CPVal,
240	MachineFunction &MF) const;
241	MachineInstr emitLoadFromConstantPool(const* Constant *CPVal,
242	MachineIRBuilder &MIRBuilder) const;
243
244	// Emit a vector concat operation.
245	MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246	Register Op2,
247	MachineIRBuilder &MIRBuilder) const;
248
249	// Emit an integer compare between LHS and RHS, which checks for Predicate.
250	MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251	MachineOperand &Predicate,
252	MachineIRBuilder &MIRBuilder) const;
253
254	/// Emit a floating point comparison between \p LHS and \p RHS.
255	/// \p Pred if given is the intended predicate to use.
256	MachineInstr *
257	emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258	std::optional<CmpInst::Predicate> = std::nullopt) const;
259
260	MachineInstr *
261	emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262	std::initializer_list<llvm::SrcOp> SrcOps,
263	MachineIRBuilder &MIRBuilder,
264	const ComplexRendererFns &RenderFns = std::nullopt) const;
265	/// Helper function to emit an add or sub instruction.
266	///
267	/// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268	/// in a specific order.
269	///
270	/// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271	///
272	/// \code
273	/// const std::array<std::array<unsigned, 2>, 4> Table {
274	/// {{AArch64::ADDXri, AArch64::ADDWri},
275	/// {AArch64::ADDXrs, AArch64::ADDWrs},
276	/// {AArch64::ADDXrr, AArch64::ADDWrr},
277	/// {AArch64::SUBXri, AArch64::SUBWri},
278	/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279	/// \endcode
280	///
281	/// Each row in the table corresponds to a different addressing mode. Each
282	/// column corresponds to a different register size.
283	///
284	/// \attention Rows must be structured as follows:
285	/// - Row 0: The ri opcode variants
286	/// - Row 1: The rs opcode variants
287	/// - Row 2: The rr opcode variants
288	/// - Row 3: The ri opcode variants for negative immediates
289	/// - Row 4: The rx opcode variants
290	///
291	/// \attention Columns must be structured as follows:
292	/// - Column 0: The 64-bit opcode variants
293	/// - Column 1: The 32-bit opcode variants
294	///
295	/// \p Dst is the destination register of the binop to emit.
296	/// \p LHS is the left-hand operand of the binop to emit.
297	/// \p RHS is the right-hand operand of the binop to emit.
298	MachineInstr *emitAddSub(
299	const std::array<std::array<unsigned, `2`>, `5`> &AddrModeAndSizeToOpcode,
300	Register Dst, MachineOperand &LHS, MachineOperand &RHS,
301	MachineIRBuilder &MIRBuilder) const;
302	MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
303	MachineOperand &RHS,
304	MachineIRBuilder &MIRBuilder) const;
305	MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
306	MachineIRBuilder &MIRBuilder) const;
307	MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
308	MachineIRBuilder &MIRBuilder) const;
309	MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
310	MachineIRBuilder &MIRBuilder) const;
311	MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
312	MachineIRBuilder &MIRBuilder) const;
313	MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
314	MachineIRBuilder &MIRBuilder) const;
315	MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
316	MachineIRBuilder &MIRBuilder) const;
317	MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
318	AArch64CC::CondCode CC,
319	MachineIRBuilder &MIRBuilder) const;
320	MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321	const RegisterBank &DstRB, LLT ScalarTy,
322	Register VecReg, unsigned LaneIdx,
323	MachineIRBuilder &MIRBuilder) const;
324	MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
325	AArch64CC::CondCode Pred,
326	MachineIRBuilder &MIRBuilder) const;
327	/// Emit a CSet for a FP compare.
328	///
329	/// \p Dst is expected to be a 32-bit scalar register.
330	MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331	MachineIRBuilder &MIRBuilder) const;
332
333	/// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334	/// Might elide the instruction if the previous instruction already sets NZCV
335	/// correctly.
336	MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338	/// Emit the overflow op for \p Opcode.
339	///
340	/// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341	/// G_USUBO, etc.
342	std::pair<MachineInstr *, AArch64CC::CondCode>
343	emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344	MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346	bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348	/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349	/// In some cases this is even possible with OR operations in the expression.
350	MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
351	MachineIRBuilder &MIB) const;
352	MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
353	CmpInst::Predicate CC,
354	AArch64CC::CondCode Predicate,
355	AArch64CC::CondCode OutCC,
356	MachineIRBuilder &MIB) const;
357	MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
358	bool Negate, Register CCOp,
359	AArch64CC::CondCode Predicate,
360	MachineIRBuilder &MIB) const;
361
362	/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363	/// \p IsNegative is true if the test should be "not zero".
364	/// This will also optimize the test bit instruction when possible.
365	MachineInstr emitTestBit(Register TestReg, uint64_t Bit, bool* IsNegative,
366	MachineBasicBlock *DstMBB,
367	MachineIRBuilder &MIB) const;
368
369	/// Emit a CB(N)Z instruction which branches to \p DestMBB.
370	MachineInstr emitCBZ(Register CompareReg, bool* IsNegative,
371	MachineBasicBlock *DestMBB,
372	MachineIRBuilder &MIB) const;
373
374	// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375	// We use these manually instead of using the importer since it doesn't
376	// support SDNodeXForm.
377	ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378	ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379	ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380	ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382	ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383	ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384	ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386	ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387	unsigned Size) const;
388
389	ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390	return selectAddrModeUnscaled(Root, Size: `1`);
391	}
392	ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393	return selectAddrModeUnscaled(Root, Size: `2`);
394	}
395	ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396	return selectAddrModeUnscaled(Root, Size: `4`);
397	}
398	ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399	return selectAddrModeUnscaled(Root, Size: `8`);
400	}
401	ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402	return selectAddrModeUnscaled(Root, Size: `16`);
403	}
404
405	/// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406	/// from complex pattern matchers like selectAddrModeIndexed().
407	ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408	MachineRegisterInfo &MRI) const;
409
410	ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411	unsigned Size) const;
412	template <int Width>
413	ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414	return selectAddrModeIndexed(Root, Size: Width / `8`);
415	}
416
417	std::optional<bool>
418	isWorthFoldingIntoAddrMode(MachineInstr &MI,
419	const MachineRegisterInfo &MRI) const;
420
421	bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
422	const MachineRegisterInfo &MRI,
423	bool IsAddrOperand) const;
424	ComplexRendererFns
425	selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426	unsigned SizeInBytes) const;
427
428	/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429	/// or not a shift + extend should be folded into an addressing mode. Returns
430	/// None when this is not profitable or possible.
431	ComplexRendererFns
432	selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433	MachineOperand &Offset, unsigned SizeInBytes,
434	bool WantsExt) const;
435	ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436	ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437	unsigned SizeInBytes) const;
438	template <int Width>
439	ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440	return selectAddrModeXRO(Root, SizeInBytes: Width / `8`);
441	}
442
443	ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444	unsigned SizeInBytes) const;
445	template <int Width>
446	ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447	return selectAddrModeWRO(Root, SizeInBytes: Width / `8`);
448	}
449
450	ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451	bool AllowROR = false) const;
452
453	ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454	return selectShiftedRegister(Root);
455	}
456
457	ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458	return selectShiftedRegister(Root, AllowROR: true);
459	}
460
461	/// Given an extend instruction, determine the correct shift-extend type for
462	/// that instruction.
463	///
464	/// If the instruction is going to be used in a load or store, pass
465	/// \p IsLoadStore = true.
466	AArch64_AM::ShiftExtendType
467	getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468	bool IsLoadStore = false) const;
469
470	/// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471	///
472	/// \returns Either \p Reg if no change was necessary, or the new register
473	/// created by moving \p Reg.
474	///
475	/// Note: This uses emitCopy right now.
476	Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477	MachineIRBuilder &MIB) const;
478
479	ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481	ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483	void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484	int OpIdx = -`1`) const;
485	void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486	int OpIdx = -`1`) const;
487	void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488	int OpIdx = -`1`) const;
489	void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490	int OpIdx) const;
491	void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492	int OpIdx = -`1`) const;
493	void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494	int OpIdx = -`1`) const;
495	void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496	int OpIdx = -`1`) const;
497	void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498	const MachineInstr &MI,
499	int OpIdx = -`1`) const;
500
501	// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502	void materializeLargeCMVal(MachineInstr &I, const Value V, unsigned* OpFlags);
503
504	// Optimization methods.
505	bool tryOptSelect(GSelect &Sel);
506	bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507	MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
508	MachineOperand &Predicate,
509	MachineIRBuilder &MIRBuilder) const;
510
511	/// Return true if \p MI is a load or store of \p NumBytes bytes.
512	bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514	/// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515	/// register zeroed out. In other words, the result of MI has been explicitly
516	/// zero extended.
517	bool isDef32(const MachineInstr &MI) const;
518
519	const AArch64TargetMachine &TM;
520	const AArch64Subtarget &STI;
521	const AArch64InstrInfo &TII;
522	const AArch64RegisterInfo &TRI;
523	const AArch64RegisterBankInfo &RBI;
524
525	bool ProduceNonFlagSettingCondBr = false;
526
527	// Some cached values used during selection.
528	// We use LR as a live-in register, and we keep track of it here as it can be
529	// clobbered by calls.
530	Register MFReturnAddr;
531
532	MachineIRBuilder MIB;
533
534	#define GET_GLOBALISEL_PREDICATES_DECL
535	#include "AArch64GenGlobalISel.inc"
536	#undef GET_GLOBALISEL_PREDICATES_DECL
537
538	// We declare the temporaries used by selectImpl() in the class to minimize the
539	// cost of constructing placeholder values.
540	#define GET_GLOBALISEL_TEMPORARIES_DECL
541	#include "AArch64GenGlobalISel.inc"
542	#undef GET_GLOBALISEL_TEMPORARIES_DECL
543	};
544
545	} // end anonymous namespace
546
547	#define GET_GLOBALISEL_IMPL
548	#include "AArch64GenGlobalISel.inc"
549	#undef GET_GLOBALISEL_IMPL
550
551	AArch64InstructionSelector::AArch64InstructionSelector(
552	const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553	const AArch64RegisterBankInfo &RBI)
554	: TM(TM), STI(STI), TII(STI.getInstrInfo()), TRI(STI.getRegisterInfo()),
555	RBI(RBI),
556	#define GET_GLOBALISEL_PREDICATES_INIT
557	#include "AArch64GenGlobalISel.inc"
558	#undef GET_GLOBALISEL_PREDICATES_INIT
559	#define GET_GLOBALISEL_TEMPORARIES_INIT
560	#include "AArch64GenGlobalISel.inc"
561	#undef GET_GLOBALISEL_TEMPORARIES_INIT
562	{
563	}
564
565	// FIXME: This should be target-independent, inferred from the types declared
566	// for each class in the bank.
567	//
568	/// Given a register bank, and a type, return the smallest register class that
569	/// can represent that combination.
570	static const TargetRegisterClass *
571	getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572	bool GetAllRegSet = false) {
573	if (RB.getID() == AArch64::GPRRegBankID) {
574	if (Ty.getSizeInBits() <= `32`)
575	return GetAllRegSet ? &AArch64::GPR32allRegClass
576	: &AArch64::GPR32RegClass;
577	if (Ty.getSizeInBits() == `64`)
578	return GetAllRegSet ? &AArch64::GPR64allRegClass
579	: &AArch64::GPR64RegClass;
580	if (Ty.getSizeInBits() == `128`)
581	return &AArch64::XSeqPairsClassRegClass;
582	return nullptr;
583	}
584
585	if (RB.getID() == AArch64::FPRRegBankID) {
586	switch (Ty.getSizeInBits()) {
587	case `8`:
588	return &AArch64::FPR8RegClass;
589	case `16`:
590	return &AArch64::FPR16RegClass;
591	case `32`:
592	return &AArch64::FPR32RegClass;
593	case `64`:
594	return &AArch64::FPR64RegClass;
595	case `128`:
596	return &AArch64::FPR128RegClass;
597	}
598	return nullptr;
599	}
600
601	return nullptr;
602	}
603
604	/// Given a register bank, and size in bits, return the smallest register class
605	/// that can represent that combination.
606	static const TargetRegisterClass *
607	getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
608	bool GetAllRegSet = false) {
609	if (SizeInBits.isScalable()) {
610	assert(RB.getID() == AArch64::FPRRegBankID &&
611	"Expected FPR regbank for scalable type size");
612	return &AArch64::ZPRRegClass;
613	}
614
615	unsigned RegBankID = RB.getID();
616
617	if (RegBankID == AArch64::GPRRegBankID) {
618	if (SizeInBits <= `32`)
619	return GetAllRegSet ? &AArch64::GPR32allRegClass
620	: &AArch64::GPR32RegClass;
621	if (SizeInBits == `64`)
622	return GetAllRegSet ? &AArch64::GPR64allRegClass
623	: &AArch64::GPR64RegClass;
624	if (SizeInBits == `128`)
625	return &AArch64::XSeqPairsClassRegClass;
626	}
627
628	if (RegBankID == AArch64::FPRRegBankID) {
629	switch (SizeInBits) {
630	default:
631	return nullptr;
632	case `8`:
633	return &AArch64::FPR8RegClass;
634	case `16`:
635	return &AArch64::FPR16RegClass;
636	case `32`:
637	return &AArch64::FPR32RegClass;
638	case `64`:
639	return &AArch64::FPR64RegClass;
640	case `128`:
641	return &AArch64::FPR128RegClass;
642	}
643	}
644
645	return nullptr;
646	}
647
648	/// Returns the correct subregister to use for a given register class.
649	static bool getSubRegForClass(const TargetRegisterClass *RC,
650	const TargetRegisterInfo &TRI, unsigned &SubReg) {
651	switch (TRI.getRegSizeInBits(RC: *RC)) {
652	case `8`:
653	SubReg = AArch64::bsub;
654	break;
655	case `16`:
656	SubReg = AArch64::hsub;
657	break;
658	case `32`:
659	if (RC != &AArch64::FPR32RegClass)
660	SubReg = AArch64::sub_32;
661	else
662	SubReg = AArch64::ssub;
663	break;
664	case `64`:
665	SubReg = AArch64::dsub;
666	break;
667	default:
668	LLVM_DEBUG(
669	dbgs() << "Couldn't find appropriate subregister for register class.");
670	return false;
671	}
672
673	return true;
674	}
675
676	/// Returns the minimum size the given register bank can hold.
677	static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
678	switch (RB.getID()) {
679	case AArch64::GPRRegBankID:
680	return `32`;
681	case AArch64::FPRRegBankID:
682	return `8`;
683	default:
684	llvm_unreachable("Tried to get minimum size for unknown register bank.");
685	}
686	}
687
688	/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
689	/// Helper function for functions like createDTuple and createQTuple.
690	///
691	/// \p RegClassIDs - The list of register class IDs available for some tuple of
692	/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
693	/// expected to contain between 2 and 4 tuple classes.
694	///
695	/// \p SubRegs - The list of subregister classes associated with each register
696	/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
697	/// subregister class. The index of each subregister class is expected to
698	/// correspond with the index of each register class.
699	///
700	/// \returns Either the destination register of REG_SEQUENCE instruction that
701	/// was created, or the 0th element of \p Regs if \p Regs contains a single
702	/// element.
703	static Register createTuple(ArrayRef<Register> Regs,
704	const unsigned RegClassIDs[],
705	const unsigned SubRegs[], MachineIRBuilder &MIB) {
706	unsigned NumRegs = Regs.size();
707	if (NumRegs == `1`)
708	return Regs [`0`];
709	assert(NumRegs >= `2` && NumRegs <= `4` &&
710	"Only support between two and 4 registers in a tuple!");
711	const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
712	auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - `2`]);
713	auto RegSequence =
714	MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {});
715	for (unsigned I = `0`, E = Regs.size(); I < E; ++I) {
716	RegSequence.addUse(RegNo: Regs [I]);
717	RegSequence.addImm(Val: SubRegs[I]);
718	}
719	return RegSequence.getReg(Idx: `0`);
720	}
721
722	/// Create a tuple of D-registers using the registers in \p Regs.
723	static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
724	static const unsigned RegClassIDs[] = {
725	AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726	static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727	AArch64::dsub2, AArch64::dsub3};
728	return createTuple(Regs, RegClassIDs, SubRegs, MIB);
729	}
730
731	/// Create a tuple of Q-registers using the registers in \p Regs.
732	static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
733	static const unsigned RegClassIDs[] = {
734	AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735	static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736	AArch64::qsub2, AArch64::qsub3};
737	return createTuple(Regs, RegClassIDs, SubRegs, MIB);
738	}
739
740	static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
741	auto &MI = *Root.getParent();
742	auto &MBB = *MI.getParent();
743	auto &MF = *MBB.getParent();
744	auto &MRI = MF.getRegInfo();
745	uint64_t Immed;
746	if (Root.isImm())
747	Immed = Root.getImm();
748	else if (Root.isCImm())
749	Immed = Root.getCImm()->getZExtValue();
750	else if (Root.isReg()) {
751	auto ValAndVReg =
752	getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true);
753	if (!ValAndVReg)
754	return std::nullopt;
755	Immed = ValAndVReg ->Value.getSExtValue();
756	} else
757	return std::nullopt;
758	return Immed;
759	}
760
761	/// Check whether \p I is a currently unsupported binary operation:
762	/// - it has an unsized type
763	/// - an operand is not a vreg
764	/// - all operands are not in the same bank
765	/// These are checks that should someday live in the verifier, but right now,
766	/// these are mostly limitations of the aarch64 selector.
767	static bool unsupportedBinOp(const MachineInstr &I,
768	const AArch64RegisterBankInfo &RBI,
769	const MachineRegisterInfo &MRI,
770	const AArch64RegisterInfo &TRI) {
771	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
772	if (!Ty.isValid()) {
773	LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
774	return true;
775	}
776
777	const RegisterBank PrevOpBank = nullptr*;
778	for (auto &MO : I.operands()) {
779	// FIXME: Support non-register operands.
780	if (!MO.isReg()) {
781	LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
782	return true;
783	}
784
785	// FIXME: Can generic operations have physical registers operands? If
786	// so, this will need to be taught about that, and we'll need to get the
787	// bank out of the minimal class for the register.
788	// Either way, this needs to be documented (and possibly verified).
789	if (!MO.getReg().isVirtual()) {
790	LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
791	return true;
792	}
793
794	const RegisterBank *OpBank = RBI.getRegBank(Reg: MO.getReg(), MRI, TRI);
795	if (!OpBank) {
796	LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
797	return true;
798	}
799
800	if (PrevOpBank && OpBank != PrevOpBank) {
801	LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
802	return true;
803	}
804	PrevOpBank = OpBank;
805	}
806	return false;
807	}
808
809	/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
810	/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
811	/// and of size \p OpSize.
812	/// \returns \p GenericOpc if the combination is unsupported.
813	static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
814	unsigned OpSize) {
815	switch (RegBankID) {
816	case AArch64::GPRRegBankID:
817	if (OpSize == `32`) {
818	switch (GenericOpc) {
819	case TargetOpcode::G_SHL:
820	return AArch64::LSLVWr;
821	case TargetOpcode::G_LSHR:
822	return AArch64::LSRVWr;
823	case TargetOpcode::G_ASHR:
824	return AArch64::ASRVWr;
825	default:
826	return GenericOpc;
827	}
828	} else if (OpSize == `64`) {
829	switch (GenericOpc) {
830	case TargetOpcode::G_PTR_ADD:
831	return AArch64::ADDXrr;
832	case TargetOpcode::G_SHL:
833	return AArch64::LSLVXr;
834	case TargetOpcode::G_LSHR:
835	return AArch64::LSRVXr;
836	case TargetOpcode::G_ASHR:
837	return AArch64::ASRVXr;
838	default:
839	return GenericOpc;
840	}
841	}
842	break;
843	case AArch64::FPRRegBankID:
844	switch (OpSize) {
845	case `32`:
846	switch (GenericOpc) {
847	case TargetOpcode::G_FADD:
848	return AArch64::FADDSrr;
849	case TargetOpcode::G_FSUB:
850	return AArch64::FSUBSrr;
851	case TargetOpcode::G_FMUL:
852	return AArch64::FMULSrr;
853	case TargetOpcode::G_FDIV:
854	return AArch64::FDIVSrr;
855	default:
856	return GenericOpc;
857	}
858	case `64`:
859	switch (GenericOpc) {
860	case TargetOpcode::G_FADD:
861	return AArch64::FADDDrr;
862	case TargetOpcode::G_FSUB:
863	return AArch64::FSUBDrr;
864	case TargetOpcode::G_FMUL:
865	return AArch64::FMULDrr;
866	case TargetOpcode::G_FDIV:
867	return AArch64::FDIVDrr;
868	case TargetOpcode::G_OR:
869	return AArch64::ORRv8i8;
870	default:
871	return GenericOpc;
872	}
873	}
874	break;
875	}
876	return GenericOpc;
877	}
878
879	/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
880	/// appropriate for the (value) register bank \p RegBankID and of memory access
881	/// size \p OpSize. This returns the variant with the base+unsigned-immediate
882	/// addressing mode (e.g., LDRXui).
883	/// \returns \p GenericOpc if the combination is unsupported.
884	static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
885	unsigned OpSize) {
886	const bool isStore = GenericOpc == TargetOpcode::G_STORE;
887	switch (RegBankID) {
888	case AArch64::GPRRegBankID:
889	switch (OpSize) {
890	case `8`:
891	return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
892	case `16`:
893	return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
894	case `32`:
895	return isStore ? AArch64::STRWui : AArch64::LDRWui;
896	case `64`:
897	return isStore ? AArch64::STRXui : AArch64::LDRXui;
898	}
899	break;
900	case AArch64::FPRRegBankID:
901	switch (OpSize) {
902	case `8`:
903	return isStore ? AArch64::STRBui : AArch64::LDRBui;
904	case `16`:
905	return isStore ? AArch64::STRHui : AArch64::LDRHui;
906	case `32`:
907	return isStore ? AArch64::STRSui : AArch64::LDRSui;
908	case `64`:
909	return isStore ? AArch64::STRDui : AArch64::LDRDui;
910	case `128`:
911	return isStore ? AArch64::STRQui : AArch64::LDRQui;
912	}
913	break;
914	}
915	return GenericOpc;
916	}
917
918	/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
919	/// to \p To.*
920	///
921	/// E.g "To = COPY SrcReg:SubReg"
922	static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
923	const RegisterBankInfo &RBI, Register SrcReg,
924	const TargetRegisterClass To, unsigned* SubReg) {
925	assert(SrcReg.isValid() && "Expected a valid source register?");
926	assert(To && "Destination register class cannot be null");
927	assert(SubReg && "Expected a valid subregister");
928
929	MachineIRBuilder MIB(I);
930	auto SubRegCopy =
931	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, flags: `0`, SubReg);
932	MachineOperand &RegOp = I.getOperand(i: `1`);
933	RegOp.setReg(SubRegCopy.getReg(Idx: `0`));
934
935	// It's possible that the destination register won't be constrained. Make
936	// sure that happens.
937	if (!I.getOperand(i: `0`).getReg().isPhysical())
938	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(), RC: *To, MRI);
939
940	return true;
941	}
942
943	/// Helper function to get the source and destination register classes for a
944	/// copy. Returns a std::pair containing the source register class for the
945	/// copy, and the destination register class for the copy. If a register class
946	/// cannot be determined, then it will be nullptr.
947	static std::pair<const TargetRegisterClass , const* TargetRegisterClass *>
948	getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
949	MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
950	const RegisterBankInfo &RBI) {
951	Register DstReg = I.getOperand(i: `0`).getReg();
952	Register SrcReg = I.getOperand(i: `1`).getReg();
953	const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
954	const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
955
956	TypeSize DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI);
957	TypeSize SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI);
958
959	// Special casing for cross-bank copies of s1s. We can technically represent
960	// a 1-bit value with any size of register. The minimum size for a GPR is 32
961	// bits. So, we need to put the FPR on 32 bits as well.
962	//
963	// FIXME: I'm not sure if this case holds true outside of copies. If it does,
964	// then we can pull it into the helpers that get the appropriate class for a
965	// register bank. Or make a new helper that carries along some constraint
966	// information.
967	if (SrcRegBank != DstRegBank && (DstSize == `1` && SrcSize == `1`))
968	SrcSize = DstSize = TypeSize::getFixed(ExactSize: `32`);
969
970	return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true),
971	getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)};
972	}
973
974	// FIXME: We need some sort of API in RBI/TRI to allow generic code to
975	// constrain operands of simple instructions given a TargetRegisterClass
976	// and LLT
977	static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
978	const RegisterBankInfo &RBI) {
979	for (MachineOperand &MO : I.operands()) {
980	if (!MO.isReg())
981	continue;
982	Register Reg = MO.getReg();
983	if (!Reg)
984	continue;
985	if (Reg.isPhysical())
986	continue;
987	LLT Ty = MRI.getType(Reg);
988	const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
989	const TargetRegisterClass *RC =
990	RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
991	if (!RC) {
992	const RegisterBank &RB = RegClassOrBank.get<const* RegisterBank *>();
993	RC = getRegClassForTypeOnBank(Ty, RB);
994	if (!RC) {
995	LLVM_DEBUG(
996	dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
997	break;
998	}
999	}
1000	RBI.constrainGenericRegister(Reg, RC: *RC, MRI);
1001	}
1002
1003	return true;
1004	}
1005
1006	static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
1007	MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
1008	const RegisterBankInfo &RBI) {
1009	Register DstReg = I.getOperand(i: `0`).getReg();
1010	Register SrcReg = I.getOperand(i: `1`).getReg();
1011	const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
1012	const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
1013
1014	// Find the correct register classes for the source and destination registers.
1015	const TargetRegisterClass *SrcRC;
1016	const TargetRegisterClass *DstRC;
1017	std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1018
1019	if (!DstRC) {
1020	LLVM_DEBUG(dbgs() << "Unexpected dest size "
1021	<< RBI.getSizeInBits(DstReg, MRI, TRI) << `'\n'`);
1022	return false;
1023	}
1024
1025	// Is this a copy? If so, then we may need to insert a subregister copy.
1026	if (I.isCopy()) {
1027	// Yes. Check if there's anything to fix up.
1028	if (!SrcRC) {
1029	LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1030	return false;
1031	}
1032
1033	const TypeSize SrcSize = TRI.getRegSizeInBits(RC: *SrcRC);
1034	const TypeSize DstSize = TRI.getRegSizeInBits(RC: *DstRC);
1035	unsigned SubReg;
1036
1037	// If the source bank doesn't support a subregister copy small enough,
1038	// then we first need to copy to the destination bank.
1039	if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) {
1040	const TargetRegisterClass *DstTempRC =
1041	getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, / GetAllRegSet / true);
1042	getSubRegForClass(RC: DstRC, TRI, SubReg);
1043
1044	MachineIRBuilder MIB(I);
1045	auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg});
1046	copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: `0`), To: DstRC, SubReg);
1047	} else if (SrcSize > DstSize) {
1048	// If the source register is bigger than the destination we need to
1049	// perform a subregister copy.
1050	const TargetRegisterClass *SubRegRC =
1051	getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, / GetAllRegSet / true);
1052	getSubRegForClass(RC: SubRegRC, TRI, SubReg);
1053	copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg);
1054	} else if (DstSize > SrcSize) {
1055	// If the destination register is bigger than the source we need to do
1056	// a promotion using SUBREG_TO_REG.
1057	const TargetRegisterClass *PromotionRC =
1058	getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, / GetAllRegSet / true);
1059	getSubRegForClass(RC: SrcRC, TRI, SubReg);
1060
1061	Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC);
1062	BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
1063	MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: PromoteReg)
1064	.addImm(Val: `0`)
1065	.addUse(RegNo: SrcReg)
1066	.addImm(Val: SubReg);
1067	MachineOperand &RegOp = I.getOperand(i: `1`);
1068	RegOp.setReg(PromoteReg);
1069	}
1070
1071	// If the destination is a physical register, then there's nothing to
1072	// change, so we're done.
1073	if (DstReg.isPhysical())
1074	return true;
1075	}
1076
1077	// No need to constrain SrcReg. It will get constrained when we hit another
1078	// of its use or its defs. Copies do not have constraints.
1079	if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
1080	LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1081	<< " operand\n");
1082	return false;
1083	}
1084
1085	// If this a GPR ZEXT that we want to just reduce down into a copy.
1086	// The sizes will be mismatched with the source < 32b but that's ok.
1087	if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1088	I.setDesc(TII.get(Opcode: AArch64::COPY));
1089	assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1090	return selectCopy(I, TII, MRI, TRI, RBI);
1091	}
1092
1093	I.setDesc(TII.get(Opcode: AArch64::COPY));
1094	return true;
1095	}
1096
1097	static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1098	if (!DstTy.isScalar() \|\| !SrcTy.isScalar())
1099	return GenericOpc;
1100
1101	const unsigned DstSize = DstTy.getSizeInBits();
1102	const unsigned SrcSize = SrcTy.getSizeInBits();
1103
1104	switch (DstSize) {
1105	case `32`:
1106	switch (SrcSize) {
1107	case `32`:
1108	switch (GenericOpc) {
1109	case TargetOpcode::G_SITOFP:
1110	return AArch64::SCVTFUWSri;
1111	case TargetOpcode::G_UITOFP:
1112	return AArch64::UCVTFUWSri;
1113	case TargetOpcode::G_FPTOSI:
1114	return AArch64::FCVTZSUWSr;
1115	case TargetOpcode::G_FPTOUI:
1116	return AArch64::FCVTZUUWSr;
1117	default:
1118	return GenericOpc;
1119	}
1120	case `64`:
1121	switch (GenericOpc) {
1122	case TargetOpcode::G_SITOFP:
1123	return AArch64::SCVTFUXSri;
1124	case TargetOpcode::G_UITOFP:
1125	return AArch64::UCVTFUXSri;
1126	case TargetOpcode::G_FPTOSI:
1127	return AArch64::FCVTZSUWDr;
1128	case TargetOpcode::G_FPTOUI:
1129	return AArch64::FCVTZUUWDr;
1130	default:
1131	return GenericOpc;
1132	}
1133	default:
1134	return GenericOpc;
1135	}
1136	case `64`:
1137	switch (SrcSize) {
1138	case `32`:
1139	switch (GenericOpc) {
1140	case TargetOpcode::G_SITOFP:
1141	return AArch64::SCVTFUWDri;
1142	case TargetOpcode::G_UITOFP:
1143	return AArch64::UCVTFUWDri;
1144	case TargetOpcode::G_FPTOSI:
1145	return AArch64::FCVTZSUXSr;
1146	case TargetOpcode::G_FPTOUI:
1147	return AArch64::FCVTZUUXSr;
1148	default:
1149	return GenericOpc;
1150	}
1151	case `64`:
1152	switch (GenericOpc) {
1153	case TargetOpcode::G_SITOFP:
1154	return AArch64::SCVTFUXDri;
1155	case TargetOpcode::G_UITOFP:
1156	return AArch64::UCVTFUXDri;
1157	case TargetOpcode::G_FPTOSI:
1158	return AArch64::FCVTZSUXDr;
1159	case TargetOpcode::G_FPTOUI:
1160	return AArch64::FCVTZUUXDr;
1161	default:
1162	return GenericOpc;
1163	}
1164	default:
1165	return GenericOpc;
1166	}
1167	default:
1168	return GenericOpc;
1169	};
1170	return GenericOpc;
1171	}
1172
1173	MachineInstr *
1174	AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1175	Register False, AArch64CC::CondCode CC,
1176	MachineIRBuilder &MIB) const {
1177	MachineRegisterInfo &MRI = *MIB.getMRI();
1178	assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1179	RBI.getRegBank(True, MRI, TRI)->getID() &&
1180	"Expected both select operands to have the same regbank?");
1181	LLT Ty = MRI.getType(Reg: True);
1182	if (Ty.isVector())
1183	return nullptr;
1184	const unsigned Size = Ty.getSizeInBits();
1185	assert((Size == `32` \|\| Size == `64`) &&
1186	"Expected 32 bit or 64 bit select only?");
1187	const bool Is32Bit = Size == `32`;
1188	if (RBI.getRegBank(Reg: True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1189	unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190	auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1191	constrainSelectedInstRegOperands(I&: *FCSel, TII, TRI, RBI);
1192	return &*FCSel;
1193	}
1194
1195	// By default, we'll try and emit a CSEL.
1196	unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1197	bool Optimized = false;
1198	auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1199	&Optimized](Register &Reg, Register &OtherReg,
1200	bool Invert) {
1201	if (Optimized)
1202	return false;
1203
1204	// Attempt to fold:
1205	//
1206	// %sub = G_SUB 0, %x
1207	// %select = G_SELECT cc, %reg, %sub
1208	//
1209	// Into:
1210	// %select = CSNEG %reg, %x, cc
1211	Register MatchReg;
1212	if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) {
1213	Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1214	Reg = MatchReg;
1215	if (Invert) {
1216	CC = AArch64CC::getInvertedCondCode(Code: CC);
1217	std::swap(a&: Reg, b&: OtherReg);
1218	}
1219	return true;
1220	}
1221
1222	// Attempt to fold:
1223	//
1224	// %xor = G_XOR %x, -1
1225	// %select = G_SELECT cc, %reg, %xor
1226	//
1227	// Into:
1228	// %select = CSINV %reg, %x, cc
1229	if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) {
1230	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1231	Reg = MatchReg;
1232	if (Invert) {
1233	CC = AArch64CC::getInvertedCondCode(Code: CC);
1234	std::swap(a&: Reg, b&: OtherReg);
1235	}
1236	return true;
1237	}
1238
1239	// Attempt to fold:
1240	//
1241	// %add = G_ADD %x, 1
1242	// %select = G_SELECT cc, %reg, %add
1243	//
1244	// Into:
1245	// %select = CSINC %reg, %x, cc
1246	if (mi_match(R: Reg, MRI,
1247	P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: `1`)),
1248	preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: `1`))))) {
1249	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1250	Reg = MatchReg;
1251	if (Invert) {
1252	CC = AArch64CC::getInvertedCondCode(Code: CC);
1253	std::swap(a&: Reg, b&: OtherReg);
1254	}
1255	return true;
1256	}
1257
1258	return false;
1259	};
1260
1261	// Helper lambda which tries to use CSINC/CSINV for the instruction when its
1262	// true/false values are constants.
1263	// FIXME: All of these patterns already exist in tablegen. We should be
1264	// able to import these.
1265	auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1266	&Optimized]() {
1267	if (Optimized)
1268	return false;
1269	auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI);
1270	auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI);
1271	if (!TrueCst && !FalseCst)
1272	return false;
1273
1274	Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275	if (TrueCst && FalseCst) {
1276	int64_t T = TrueCst ->Value.getSExtValue();
1277	int64_t F = FalseCst ->Value.getSExtValue();
1278
1279	if (T == `0` && F == `1`) {
1280	// G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1281	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1282	True = ZReg;
1283	False = ZReg;
1284	return true;
1285	}
1286
1287	if (T == `0` && F == -`1`) {
1288	// G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1289	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1290	True = ZReg;
1291	False = ZReg;
1292	return true;
1293	}
1294	}
1295
1296	if (TrueCst) {
1297	int64_t T = TrueCst ->Value.getSExtValue();
1298	if (T == `1`) {
1299	// G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1300	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1301	True = False;
1302	False = ZReg;
1303	CC = AArch64CC::getInvertedCondCode(Code: CC);
1304	return true;
1305	}
1306
1307	if (T == -`1`) {
1308	// G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1309	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1310	True = False;
1311	False = ZReg;
1312	CC = AArch64CC::getInvertedCondCode(Code: CC);
1313	return true;
1314	}
1315	}
1316
1317	if (FalseCst) {
1318	int64_t F = FalseCst ->Value.getSExtValue();
1319	if (F == `1`) {
1320	// G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1321	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1322	False = ZReg;
1323	return true;
1324	}
1325
1326	if (F == -`1`) {
1327	// G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1328	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1329	False = ZReg;
1330	return true;
1331	}
1332	}
1333	return false;
1334	};
1335
1336	Optimized \|= TryFoldBinOpIntoSelect (False, True, /Invert = / false);
1337	Optimized \|= TryFoldBinOpIntoSelect (True, False, /Invert = / true);
1338	Optimized \|= TryOptSelectCst ();
1339	auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1340	constrainSelectedInstRegOperands(I&: *SelectInst, TII, TRI, RBI);
1341	return &*SelectInst;
1342	}
1343
1344	static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1345	switch (P) {
1346	default:
1347	llvm_unreachable("Unknown condition code!");
1348	case CmpInst::ICMP_NE:
1349	return AArch64CC::NE;
1350	case CmpInst::ICMP_EQ:
1351	return AArch64CC::EQ;
1352	case CmpInst::ICMP_SGT:
1353	return AArch64CC::GT;
1354	case CmpInst::ICMP_SGE:
1355	return AArch64CC::GE;
1356	case CmpInst::ICMP_SLT:
1357	return AArch64CC::LT;
1358	case CmpInst::ICMP_SLE:
1359	return AArch64CC::LE;
1360	case CmpInst::ICMP_UGT:
1361	return AArch64CC::HI;
1362	case CmpInst::ICMP_UGE:
1363	return AArch64CC::HS;
1364	case CmpInst::ICMP_ULT:
1365	return AArch64CC::LO;
1366	case CmpInst::ICMP_ULE:
1367	return AArch64CC::LS;
1368	}
1369	}
1370
1371	/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1372	static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1373	AArch64CC::CondCode &CondCode,
1374	AArch64CC::CondCode &CondCode2) {
1375	CondCode2 = AArch64CC::AL;
1376	switch (CC) {
1377	default:
1378	llvm_unreachable("Unknown FP condition!");
1379	case CmpInst::FCMP_OEQ:
1380	CondCode = AArch64CC::EQ;
1381	break;
1382	case CmpInst::FCMP_OGT:
1383	CondCode = AArch64CC::GT;
1384	break;
1385	case CmpInst::FCMP_OGE:
1386	CondCode = AArch64CC::GE;
1387	break;
1388	case CmpInst::FCMP_OLT:
1389	CondCode = AArch64CC::MI;
1390	break;
1391	case CmpInst::FCMP_OLE:
1392	CondCode = AArch64CC::LS;
1393	break;
1394	case CmpInst::FCMP_ONE:
1395	CondCode = AArch64CC::MI;
1396	CondCode2 = AArch64CC::GT;
1397	break;
1398	case CmpInst::FCMP_ORD:
1399	CondCode = AArch64CC::VC;
1400	break;
1401	case CmpInst::FCMP_UNO:
1402	CondCode = AArch64CC::VS;
1403	break;
1404	case CmpInst::FCMP_UEQ:
1405	CondCode = AArch64CC::EQ;
1406	CondCode2 = AArch64CC::VS;
1407	break;
1408	case CmpInst::FCMP_UGT:
1409	CondCode = AArch64CC::HI;
1410	break;
1411	case CmpInst::FCMP_UGE:
1412	CondCode = AArch64CC::PL;
1413	break;
1414	case CmpInst::FCMP_ULT:
1415	CondCode = AArch64CC::LT;
1416	break;
1417	case CmpInst::FCMP_ULE:
1418	CondCode = AArch64CC::LE;
1419	break;
1420	case CmpInst::FCMP_UNE:
1421	CondCode = AArch64CC::NE;
1422	break;
1423	}
1424	}
1425
1426	/// Convert an IR fp condition code to an AArch64 CC.
1427	/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1428	/// should be AND'ed instead of OR'ed.
1429	static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1430	AArch64CC::CondCode &CondCode,
1431	AArch64CC::CondCode &CondCode2) {
1432	CondCode2 = AArch64CC::AL;
1433	switch (CC) {
1434	default:
1435	changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1436	assert(CondCode2 == AArch64CC::AL);
1437	break;
1438	case CmpInst::FCMP_ONE:
1439	// (a one b)
1440	// == ((a olt b) \|\| (a ogt b))
1441	// == ((a ord b) && (a une b))
1442	CondCode = AArch64CC::VC;
1443	CondCode2 = AArch64CC::NE;
1444	break;
1445	case CmpInst::FCMP_UEQ:
1446	// (a ueq b)
1447	// == ((a uno b) \|\| (a oeq b))
1448	// == ((a ule b) && (a uge b))
1449	CondCode = AArch64CC::PL;
1450	CondCode2 = AArch64CC::LE;
1451	break;
1452	}
1453	}
1454
1455	/// Return a register which can be used as a bit to test in a TB(N)Z.
1456	static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1457	MachineRegisterInfo &MRI) {
1458	assert(Reg.isValid() && "Expected valid register!");
1459	bool HasZext = false;
1460	while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1461	unsigned Opc = MI->getOpcode();
1462
1463	if (!MI->getOperand(i: `0`).isReg() \|\|
1464	!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()))
1465	break;
1466
1467	// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1468	//
1469	// (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1470	// on the truncated x is the same as the bit number on x.
1471	if (Opc == TargetOpcode::G_ANYEXT \|\| Opc == TargetOpcode::G_ZEXT \|\|
1472	Opc == TargetOpcode::G_TRUNC) {
1473	if (Opc == TargetOpcode::G_ZEXT)
1474	HasZext = true;
1475
1476	Register NextReg = MI->getOperand(i: `1`).getReg();
1477	// Did we find something worth folding?
1478	if (!NextReg.isValid() \|\| !MRI.hasOneNonDBGUse(RegNo: NextReg))
1479	break;
1480
1481	// NextReg is worth folding. Keep looking.
1482	Reg = NextReg;
1483	continue;
1484	}
1485
1486	// Attempt to find a suitable operation with a constant on one side.
1487	std::optional<uint64_t> C;
1488	Register TestReg;
1489	switch (Opc) {
1490	default:
1491	break;
1492	case TargetOpcode::G_AND:
1493	case TargetOpcode::G_XOR: {
1494	TestReg = MI->getOperand(i: `1`).getReg();
1495	Register ConstantReg = MI->getOperand(i: `2`).getReg();
1496	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1497	if (!VRegAndVal) {
1498	// AND commutes, check the other side for a constant.
1499	// FIXME: Can we canonicalize the constant so that it's always on the
1500	// same side at some point earlier?
1501	std::swap(a&: ConstantReg, b&: TestReg);
1502	VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1503	}
1504	if (VRegAndVal) {
1505	if (HasZext)
1506	C = VRegAndVal ->Value.getZExtValue();
1507	else
1508	C = VRegAndVal ->Value.getSExtValue();
1509	}
1510	break;
1511	}
1512	case TargetOpcode::G_ASHR:
1513	case TargetOpcode::G_LSHR:
1514	case TargetOpcode::G_SHL: {
1515	TestReg = MI->getOperand(i: `1`).getReg();
1516	auto VRegAndVal =
1517	getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: `2`).getReg(), MRI);
1518	if (VRegAndVal)
1519	C = VRegAndVal ->Value.getSExtValue();
1520	break;
1521	}
1522	}
1523
1524	// Didn't find a constant or viable register. Bail out of the loop.
1525	if (!C \|\| !TestReg.isValid())
1526	break;
1527
1528	// We found a suitable instruction with a constant. Check to see if we can
1529	// walk through the instruction.
1530	Register NextReg;
1531	unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits();
1532	switch (Opc) {
1533	default:
1534	break;
1535	case TargetOpcode::G_AND:
1536	// (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1537	if ((*C >> Bit) & `1`)
1538	NextReg = TestReg;
1539	break;
1540	case TargetOpcode::G_SHL:
1541	// (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1542	// the type of the register.
1543	if (C <= Bit && (Bit - C) < TestRegSize) {
1544	NextReg = TestReg;
1545	Bit = Bit - *C;
1546	}
1547	break;
1548	case TargetOpcode::G_ASHR:
1549	// (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1550	// in x
1551	NextReg = TestReg;
1552	Bit = Bit + *C;
1553	if (Bit >= TestRegSize)
1554	Bit = TestRegSize - `1`;
1555	break;
1556	case TargetOpcode::G_LSHR:
1557	// (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1558	if ((Bit + *C) < TestRegSize) {
1559	NextReg = TestReg;
1560	Bit = Bit + *C;
1561	}
1562	break;
1563	case TargetOpcode::G_XOR:
1564	// We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1565	// appropriate.
1566	//
1567	// e.g. If x' = xor x, c, and the b-th bit is set in c then
1568	//
1569	// tbz x', b -> tbnz x, b
1570	//
1571	// Because x' only has the b-th bit set if x does not.
1572	if ((*C >> Bit) & `1`)
1573	Invert = !Invert;
1574	NextReg = TestReg;
1575	break;
1576	}
1577
1578	// Check if we found anything worth folding.
1579	if (!NextReg.isValid())
1580	return Reg;
1581	Reg = NextReg;
1582	}
1583
1584	return Reg;
1585	}
1586
1587	MachineInstr *AArch64InstructionSelector::emitTestBit(
1588	Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1589	MachineIRBuilder &MIB) const {
1590	assert(TestReg.isValid());
1591	assert(ProduceNonFlagSettingCondBr &&
1592	"Cannot emit TB(N)Z with speculation tracking!");
1593	MachineRegisterInfo &MRI = *MIB.getMRI();
1594
1595	// Attempt to optimize the test bit by walking over instructions.
1596	TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI);
1597	LLT Ty = MRI.getType(Reg: TestReg);
1598	unsigned Size = Ty.getSizeInBits();
1599	assert(!Ty.isVector() && "Expected a scalar!");
1600	assert(Bit < `64` && "Bit is too large!");
1601
1602	// When the test register is a 64-bit register, we have to narrow to make
1603	// TBNZW work.
1604	bool UseWReg = Bit < `32`;
1605	unsigned NecessarySize = UseWReg ? `32` : `64`;
1606	if (Size != NecessarySize)
1607	TestReg = moveScalarRegClass(
1608	Reg: TestReg, RC: UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1609	MIB);
1610
1611	static const unsigned OpcTable[`2`][`2`] = {{AArch64::TBZX, AArch64::TBNZX},
1612	{AArch64::TBZW, AArch64::TBNZW}};
1613	unsigned Opc = OpcTable[UseWReg][IsNegative];
1614	auto TestBitMI =
1615	MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB);
1616	constrainSelectedInstRegOperands(I&: *TestBitMI, TII, TRI, RBI);
1617	return &*TestBitMI;
1618	}
1619
1620	bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1621	MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1622	MachineIRBuilder &MIB) const {
1623	assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1624	// Given something like this:
1625	//
1626	// %x = ...Something...
1627	// %one = G_CONSTANT i64 1
1628	// %zero = G_CONSTANT i64 0
1629	// %and = G_AND %x, %one
1630	// %cmp = G_ICMP intpred(ne), %and, %zero
1631	// %cmp_trunc = G_TRUNC %cmp
1632	// G_BRCOND %cmp_trunc, %bb.3
1633	//
1634	// We want to try and fold the AND into the G_BRCOND and produce either a
1635	// TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1636	//
1637	// In this case, we'd get
1638	//
1639	// TBNZ %x %bb.3
1640	//
1641
1642	// Check if the AND has a constant on its RHS which we can use as a mask.
1643	// If it's a power of 2, then it's the same as checking a specific bit.
1644	// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1645	auto MaybeBit = getIConstantVRegValWithLookThrough(
1646	VReg: AndInst.getOperand(i: `2`).getReg(), MRI: *MIB.getMRI());
1647	if (!MaybeBit)
1648	return false;
1649
1650	int32_t Bit = MaybeBit ->Value.exactLogBase2();
1651	if (Bit < `0`)
1652	return false;
1653
1654	Register TestReg = AndInst.getOperand(i: `1`).getReg();
1655
1656	// Emit a TB(N)Z.
1657	emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB);
1658	return true;
1659	}
1660
1661	MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1662	bool IsNegative,
1663	MachineBasicBlock *DestMBB,
1664	MachineIRBuilder &MIB) const {
1665	assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1666	MachineRegisterInfo &MRI = *MIB.getMRI();
1667	assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1668	AArch64::GPRRegBankID &&
1669	"Expected GPRs only?");
1670	auto Ty = MRI.getType(Reg: CompareReg);
1671	unsigned Width = Ty.getSizeInBits();
1672	assert(!Ty.isVector() && "Expected scalar only?");
1673	assert(Width <= `64` && "Expected width to be at most 64?");
1674	static const unsigned OpcTable[`2`][`2`] = {{AArch64::CBZW, AArch64::CBZX},
1675	{AArch64::CBNZW, AArch64::CBNZX}};
1676	unsigned Opc = OpcTable[IsNegative][Width == `64`];
1677	auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB);
1678	constrainSelectedInstRegOperands(I&: *BranchMI, TII, TRI, RBI);
1679	return &*BranchMI;
1680	}
1681
1682	bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1683	MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1684	assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1685	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1686	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1687	// totally clean. Some of them require two branches to implement.
1688	auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: `1`).getPredicate();
1689	emitFPCompare(LHS: FCmp.getOperand(i: `2`).getReg(), RHS: FCmp.getOperand(i: `3`).getReg(), MIRBuilder&: MIB,
1690	Pred);
1691	AArch64CC::CondCode CC1, CC2;
1692	changeFCMPPredToAArch64CC(P: static_cast<CmpInst::Predicate>(Pred), CondCode&: CC1, CondCode2&: CC2);
1693	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1694	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC1).addMBB(MBB: DestMBB);
1695	if (CC2 != AArch64CC::AL)
1696	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC2).addMBB(MBB: DestMBB);
1697	I.eraseFromParent();
1698	return true;
1699	}
1700
1701	bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1702	MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1703	assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1704	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1705	// Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1706	//
1707	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1708	// instructions will not be produced, as they are conditional branch
1709	// instructions that do not set flags.
1710	if (!ProduceNonFlagSettingCondBr)
1711	return false;
1712
1713	MachineRegisterInfo &MRI = *MIB.getMRI();
1714	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1715	auto Pred =
1716	static_cast<CmpInst::Predicate>(ICmp.getOperand(i: `1`).getPredicate());
1717	Register LHS = ICmp.getOperand(i: `2`).getReg();
1718	Register RHS = ICmp.getOperand(i: `3`).getReg();
1719
1720	// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1721	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1722	MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1723
1724	// When we can emit a TB(N)Z, prefer that.
1725	//
1726	// Handle non-commutative condition codes first.
1727	// Note that we don't want to do this when we have a G_AND because it can
1728	// become a tst. The tst will make the test bit in the TB(N)Z redundant.
1729	if (VRegAndVal && !AndInst) {
1730	int64_t C = VRegAndVal ->Value.getSExtValue();
1731
1732	// When we have a greater-than comparison, we can just test if the msb is
1733	// zero.
1734	if (C == -`1` && Pred == CmpInst::ICMP_SGT) {
1735	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1736	emitTestBit(TestReg: LHS, Bit, /IsNegative = / false, DstMBB: DestMBB, MIB);
1737	I.eraseFromParent();
1738	return true;
1739	}
1740
1741	// When we have a less than comparison, we can just test if the msb is not
1742	// zero.
1743	if (C == `0` && Pred == CmpInst::ICMP_SLT) {
1744	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1745	emitTestBit(TestReg: LHS, Bit, /IsNegative = / true, DstMBB: DestMBB, MIB);
1746	I.eraseFromParent();
1747	return true;
1748	}
1749
1750	// Inversely, if we have a signed greater-than-or-equal comparison to zero,
1751	// we can test if the msb is zero.
1752	if (C == `0` && Pred == CmpInst::ICMP_SGE) {
1753	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1754	emitTestBit(TestReg: LHS, Bit, /IsNegative = / false, DstMBB: DestMBB, MIB);
1755	I.eraseFromParent();
1756	return true;
1757	}
1758	}
1759
1760	// Attempt to handle commutative condition codes. Right now, that's only
1761	// eq/ne.
1762	if (ICmpInst::isEquality(P: Pred)) {
1763	if (!VRegAndVal) {
1764	std::swap(a&: RHS, b&: LHS);
1765	VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1766	AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1767	}
1768
1769	if (VRegAndVal && VRegAndVal ->Value == `0`) {
1770	// If there's a G_AND feeding into this branch, try to fold it away by
1771	// emitting a TB(N)Z instead.
1772	//
1773	// Note: If we have LT, then it is* possible to fold, but it wouldn't be*
1774	// beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1775	// would be redundant.
1776	if (AndInst &&
1777	tryOptAndIntoCompareBranch(
1778	AndInst&: AndInst, /Invert = /* Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) {
1779	I.eraseFromParent();
1780	return true;
1781	}
1782
1783	// Otherwise, try to emit a CB(N)Z instead.
1784	auto LHSTy = MRI.getType(Reg: LHS);
1785	if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= `64`) {
1786	emitCBZ(CompareReg: LHS, /IsNegative = / Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1787	I.eraseFromParent();
1788	return true;
1789	}
1790	}
1791	}
1792
1793	return false;
1794	}
1795
1796	bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1797	MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1798	assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1799	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1800	if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1801	return true;
1802
1803	// Couldn't optimize. Emit a compare + a Bcc.
1804	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1805	auto PredOp = ICmp.getOperand(i: `1`);
1806	emitIntegerCompare(LHS&: ICmp.getOperand(i: `2`), RHS&: ICmp.getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
1807	const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1808	P: static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1809	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC).addMBB(MBB: DestMBB);
1810	I.eraseFromParent();
1811	return true;
1812	}
1813
1814	bool AArch64InstructionSelector::selectCompareBranch(
1815	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1816	Register CondReg = I.getOperand(i: `0`).getReg();
1817	MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg);
1818	// Try to select the G_BRCOND using whatever is feeding the condition if
1819	// possible.
1820	unsigned CCMIOpc = CCMI->getOpcode();
1821	if (CCMIOpc == TargetOpcode::G_FCMP)
1822	return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB);
1823	if (CCMIOpc == TargetOpcode::G_ICMP)
1824	return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB);
1825
1826	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1827	// instructions will not be produced, as they are conditional branch
1828	// instructions that do not set flags.
1829	if (ProduceNonFlagSettingCondBr) {
1830	emitTestBit(TestReg: CondReg, /Bit = / `0`, /IsNegative = / true,
1831	DstMBB: I.getOperand(i: `1`).getMBB(), MIB);
1832	I.eraseFromParent();
1833	return true;
1834	}
1835
1836	// Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1837	auto TstMI =
1838	MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {LLT::scalar(SizeInBits: `32`)}, SrcOps: {CondReg}).addImm(Val: `1`);
1839	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
1840	auto Bcc = MIB.buildInstr(Opcode: AArch64::Bcc)
1841	.addImm(Val: AArch64CC::NE)
1842	.addMBB(MBB: I.getOperand(i: `1`).getMBB());
1843	I.eraseFromParent();
1844	return constrainSelectedInstRegOperands(I&: *Bcc, TII, TRI, RBI);
1845	}
1846
1847	/// Returns the element immediate value of a vector shift operand if found.
1848	/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1849	static std::optional<int64_t> getVectorShiftImm(Register Reg,
1850	MachineRegisterInfo &MRI) {
1851	assert(MRI.getType(Reg).isVector() && "Expected a vector shift operand");
1852	MachineInstr *OpMI = MRI.getVRegDef(Reg);
1853	return getAArch64VectorSplatScalar(MI: *OpMI, MRI);
1854	}
1855
1856	/// Matches and returns the shift immediate value for a SHL instruction given
1857	/// a shift operand.
1858	static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1859	MachineRegisterInfo &MRI) {
1860	std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1861	if (!ShiftImm)
1862	return std::nullopt;
1863	// Check the immediate is in range for a SHL.
1864	int64_t Imm = *ShiftImm;
1865	if (Imm < `0`)
1866	return std::nullopt;
1867	switch (SrcTy.getElementType().getSizeInBits()) {
1868	default:
1869	LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1870	return std::nullopt;
1871	case `8`:
1872	if (Imm > `7`)
1873	return std::nullopt;
1874	break;
1875	case `16`:
1876	if (Imm > `15`)
1877	return std::nullopt;
1878	break;
1879	case `32`:
1880	if (Imm > `31`)
1881	return std::nullopt;
1882	break;
1883	case `64`:
1884	if (Imm > `63`)
1885	return std::nullopt;
1886	break;
1887	}
1888	return Imm;
1889	}
1890
1891	bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1892	MachineRegisterInfo &MRI) {
1893	assert(I.getOpcode() == TargetOpcode::G_SHL);
1894	Register DstReg = I.getOperand(i: `0`).getReg();
1895	const LLT Ty = MRI.getType(Reg: DstReg);
1896	Register Src1Reg = I.getOperand(i: `1`).getReg();
1897	Register Src2Reg = I.getOperand(i: `2`).getReg();
1898
1899	if (!Ty.isVector())
1900	return false;
1901
1902	// Check if we have a vector of constants on RHS that we can select as the
1903	// immediate form.
1904	std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI);
1905
1906	unsigned Opc = `0`;
1907	if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1908	Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1909	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
1910	Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1911	} else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`)) {
1912	Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1913	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`)) {
1914	Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1915	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`)) {
1916	Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1917	} else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`)) {
1918	Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1919	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`)) {
1920	Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1921	} else {
1922	LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1923	return false;
1924	}
1925
1926	auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg});
1927	if (ImmVal)
1928	Shl.addImm(Val: *ImmVal);
1929	else
1930	Shl.addUse(RegNo: Src2Reg);
1931	constrainSelectedInstRegOperands(I&: *Shl, TII, TRI, RBI);
1932	I.eraseFromParent();
1933	return true;
1934	}
1935
1936	bool AArch64InstructionSelector::selectVectorAshrLshr(
1937	MachineInstr &I, MachineRegisterInfo &MRI) {
1938	assert(I.getOpcode() == TargetOpcode::G_ASHR \|\|
1939	I.getOpcode() == TargetOpcode::G_LSHR);
1940	Register DstReg = I.getOperand(i: `0`).getReg();
1941	const LLT Ty = MRI.getType(Reg: DstReg);
1942	Register Src1Reg = I.getOperand(i: `1`).getReg();
1943	Register Src2Reg = I.getOperand(i: `2`).getReg();
1944
1945	if (!Ty.isVector())
1946	return false;
1947
1948	bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1949
1950	// We expect the immediate case to be lowered in the PostLegalCombiner to
1951	// AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1952
1953	// There is not a shift right register instruction, but the shift left
1954	// register instruction takes a signed value, where negative numbers specify a
1955	// right shift.
1956
1957	unsigned Opc = `0`;
1958	unsigned NegOpc = `0`;
1959	const TargetRegisterClass *RC =
1960	getRegClassForTypeOnBank(Ty, RB: RBI.getRegBank(ID: AArch64::FPRRegBankID));
1961	if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1962	Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963	NegOpc = AArch64::NEGv2i64;
1964	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
1965	Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966	NegOpc = AArch64::NEGv4i32;
1967	} else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`)) {
1968	Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969	NegOpc = AArch64::NEGv2i32;
1970	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`)) {
1971	Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972	NegOpc = AArch64::NEGv4i16;
1973	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`)) {
1974	Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975	NegOpc = AArch64::NEGv8i16;
1976	} else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`)) {
1977	Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978	NegOpc = AArch64::NEGv16i8;
1979	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`)) {
1980	Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981	NegOpc = AArch64::NEGv8i8;
1982	} else {
1983	LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1984	return false;
1985	}
1986
1987	auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg});
1988	constrainSelectedInstRegOperands(I&: *Neg, TII, TRI, RBI);
1989	auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg});
1990	constrainSelectedInstRegOperands(I&: *SShl, TII, TRI, RBI);
1991	I.eraseFromParent();
1992	return true;
1993	}
1994
1995	bool AArch64InstructionSelector::selectVaStartAAPCS(
1996	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1997	return false;
1998	}
1999
2000	bool AArch64InstructionSelector::selectVaStartDarwin(
2001	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2002	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2003	Register ListReg = I.getOperand(i: `0`).getReg();
2004
2005	Register ArgsAddrReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2006
2007	int FrameIdx = FuncInfo->getVarArgsStackIndex();
2008	if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2009	CC: MF.getFunction().getCallingConv(), IsVarArg: MF.getFunction().isVarArg())) {
2010	FrameIdx = FuncInfo->getVarArgsGPRSize() > `0`
2011	? FuncInfo->getVarArgsGPRIndex()
2012	: FuncInfo->getVarArgsStackIndex();
2013	}
2014
2015	auto MIB =
2016	BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri))
2017	.addDef(RegNo: ArgsAddrReg)
2018	.addFrameIndex(Idx: FrameIdx)
2019	.addImm(Val: `0`)
2020	.addImm(Val: `0`);
2021
2022	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2023
2024	MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRXui))
2025	.addUse(RegNo: ArgsAddrReg)
2026	.addUse(RegNo: ListReg)
2027	.addImm(Val: `0`)
2028	.addMemOperand(MMO: *I.memoperands_begin());
2029
2030	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2031	I.eraseFromParent();
2032	return true;
2033	}
2034
2035	void AArch64InstructionSelector::materializeLargeCMVal(
2036	MachineInstr &I, const Value V, unsigned* OpFlags) {
2037	MachineBasicBlock &MBB = *I.getParent();
2038	MachineFunction &MF = *MBB.getParent();
2039	MachineRegisterInfo &MRI = MF.getRegInfo();
2040
2041	auto MovZ = MIB.buildInstr(Opc: AArch64::MOVZXi, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {});
2042	MovZ ->addOperand(MF, Op: I.getOperand(i: `1`));
2043	MovZ ->getOperand(i: `1`).setTargetFlags(OpFlags \| AArch64II::MO_G0 \|
2044	AArch64II::MO_NC);
2045	MovZ ->addOperand(MF, Op: MachineOperand::CreateImm(Val: `0`));
2046	constrainSelectedInstRegOperands(I&: *MovZ, TII, TRI, RBI);
2047
2048	auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2049	Register ForceDstReg) {
2050	Register DstReg = ForceDstReg
2051	? ForceDstReg
2052	: MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2053	auto MovI = MIB.buildInstr(Opcode: AArch64::MOVKXi).addDef(RegNo: DstReg).addUse(RegNo: SrcReg);
2054	if (auto *GV = dyn_cast<GlobalValue>(Val: V)) {
2055	MovI ->addOperand(MF, Op: MachineOperand::CreateGA(
2056	GV, Offset: MovZ ->getOperand(i: `1`).getOffset(), TargetFlags: Flags));
2057	} else {
2058	MovI ->addOperand(
2059	MF, Op: MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V),
2060	Offset: MovZ ->getOperand(i: `1`).getOffset(), TargetFlags: Flags));
2061	}
2062	MovI ->addOperand(MF, Op: MachineOperand::CreateImm(Val: Offset));
2063	constrainSelectedInstRegOperands(I&: *MovI, TII, TRI, RBI);
2064	return DstReg;
2065	};
2066	Register DstReg = BuildMovK (MovZ.getReg(Idx: `0`),
2067	AArch64II::MO_G1 \| AArch64II::MO_NC, `16`, `0`);
2068	DstReg = BuildMovK (DstReg, AArch64II::MO_G2 \| AArch64II::MO_NC, `32`, `0`);
2069	BuildMovK (DstReg, AArch64II::MO_G3, `48`, I.getOperand(i: `0`).getReg());
2070	}
2071
2072	bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2073	MachineBasicBlock &MBB = *I.getParent();
2074	MachineFunction &MF = *MBB.getParent();
2075	MachineRegisterInfo &MRI = MF.getRegInfo();
2076
2077	switch (I.getOpcode()) {
2078	case TargetOpcode::G_STORE: {
2079	bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2080	MachineOperand &SrcOp = I.getOperand(i: `0`);
2081	if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) {
2082	// Allow matching with imported patterns for stores of pointers. Unlike
2083	// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2084	// and constrain.
2085	auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: SrcOp);
2086	Register NewSrc = Copy.getReg(Idx: `0`);
2087	SrcOp.setReg(NewSrc);
2088	RBI.constrainGenericRegister(Reg: NewSrc, RC: AArch64::GPR64RegClass, MRI);
2089	Changed = true;
2090	}
2091	return Changed;
2092	}
2093	case TargetOpcode::G_PTR_ADD:
2094	return convertPtrAddToAdd(I, MRI);
2095	case TargetOpcode::G_LOAD: {
2096	// For scalar loads of pointers, we try to convert the dest type from p0
2097	// to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2098	// conversion, this should be ok because all users should have been
2099	// selected already, so the type doesn't matter for them.
2100	Register DstReg = I.getOperand(i: `0`).getReg();
2101	const LLT DstTy = MRI.getType(Reg: DstReg);
2102	if (!DstTy.isPointer())
2103	return false;
2104	MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: `64`));
2105	return true;
2106	}
2107	case AArch64::G_DUP: {
2108	// Convert the type from p0 to s64 to help selection.
2109	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2110	if (!DstTy.isPointerVector())
2111	return false;
2112	auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: I.getOperand(i: `1`).getReg());
2113	MRI.setType(VReg: I.getOperand(i: `0`).getReg(),
2114	Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `64`)));
2115	MRI.setRegClass(Reg: NewSrc.getReg(Idx: `0`), RC: &AArch64::GPR64RegClass);
2116	I.getOperand(i: `1`).setReg(NewSrc.getReg(Idx: `0`));
2117	return true;
2118	}
2119	case TargetOpcode::G_UITOFP:
2120	case TargetOpcode::G_SITOFP: {
2121	// If both source and destination regbanks are FPR, then convert the opcode
2122	// to G_SITOF so that the importer can select it to an fpr variant.
2123	// Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2124	// copy.
2125	Register SrcReg = I.getOperand(i: `1`).getReg();
2126	LLT SrcTy = MRI.getType(Reg: SrcReg);
2127	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2128	if (SrcTy.isVector() \|\| SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2129	return false;
2130
2131	if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2132	if (I.getOpcode() == TargetOpcode::G_SITOFP)
2133	I.setDesc(TII.get(Opcode: AArch64::G_SITOF));
2134	else
2135	I.setDesc(TII.get(Opcode: AArch64::G_UITOF));
2136	return true;
2137	}
2138	return false;
2139	}
2140	default:
2141	return false;
2142	}
2143	}
2144
2145	/// This lowering tries to look for G_PTR_ADD instructions and then converts
2146	/// them to a standard G_ADD with a COPY on the source.
2147	///
2148	/// The motivation behind this is to expose the add semantics to the imported
2149	/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2150	/// because the selector works bottom up, uses before defs. By the time we
2151	/// end up trying to select a G_PTR_ADD, we should have already attempted to
2152	/// fold this into addressing modes and were therefore unsuccessful.
2153	bool AArch64InstructionSelector::convertPtrAddToAdd(
2154	MachineInstr &I, MachineRegisterInfo &MRI) {
2155	assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2156	Register DstReg = I.getOperand(i: `0`).getReg();
2157	Register AddOp1Reg = I.getOperand(i: `1`).getReg();
2158	const LLT PtrTy = MRI.getType(Reg: DstReg);
2159	if (PtrTy.getAddressSpace() != `0`)
2160	return false;
2161
2162	const LLT CastPtrTy =
2163	PtrTy.isVector() ? LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) : LLT::scalar(SizeInBits: `64`);
2164	auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg);
2165	// Set regbanks on the registers.
2166	if (PtrTy.isVector())
2167	MRI.setRegBank(Reg: PtrToInt.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::FPRRegBankID));
2168	else
2169	MRI.setRegBank(Reg: PtrToInt.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
2170
2171	// Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2172	// %dst(intty) = G_ADD %intbase, off
2173	I.setDesc(TII.get(Opcode: TargetOpcode::G_ADD));
2174	MRI.setType(VReg: DstReg, Ty: CastPtrTy);
2175	I.getOperand(i: `1`).setReg(PtrToInt.getReg(Idx: `0`));
2176	if (!select(I&: *PtrToInt)) {
2177	LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2178	return false;
2179	}
2180
2181	// Also take the opportunity here to try to do some optimization.
2182	// Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2183	Register NegatedReg;
2184	if (!mi_match(R: I.getOperand(i: `2`).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg))))
2185	return true;
2186	I.getOperand(i: `2`).setReg(NegatedReg);
2187	I.setDesc(TII.get(Opcode: TargetOpcode::G_SUB));
2188	return true;
2189	}
2190
2191	bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2192	MachineRegisterInfo &MRI) {
2193	// We try to match the immediate variant of LSL, which is actually an alias
2194	// for a special case of UBFM. Otherwise, we fall back to the imported
2195	// selector which will match the register variant.
2196	assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2197	const auto &MO = I.getOperand(i: `2`);
2198	auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI);
2199	if (!VRegAndVal)
2200	return false;
2201
2202	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2203	if (DstTy.isVector())
2204	return false;
2205	bool Is64Bit = DstTy.getSizeInBits() == `64`;
2206	auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO);
2207	auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO);
2208
2209	if (!Imm1Fn \|\| !Imm2Fn)
2210	return false;
2211
2212	auto NewI =
2213	MIB.buildInstr(Opc: Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2214	DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {I.getOperand(i: `1`).getReg()});
2215
2216	for (auto &RenderFn : *Imm1Fn)
2217	RenderFn (NewI);
2218	for (auto &RenderFn : *Imm2Fn)
2219	RenderFn (NewI);
2220
2221	I.eraseFromParent();
2222	return constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
2223	}
2224
2225	bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2226	MachineInstr &I, MachineRegisterInfo &MRI) {
2227	assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2228	// If we're storing a scalar, it doesn't matter what register bank that
2229	// scalar is on. All that matters is the size.
2230	//
2231	// So, if we see something like this (with a 32-bit scalar as an example):
2232	//
2233	// %x:gpr(s32) = ... something ...
2234	// %y:fpr(s32) = COPY %x:gpr(s32)
2235	// G_STORE %y:fpr(s32)
2236	//
2237	// We can fix this up into something like this:
2238	//
2239	// G_STORE %x:gpr(s32)
2240	//
2241	// And then continue the selection process normally.
2242	Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: `0`).getReg(), MRI);
2243	if (!DefDstReg.isValid())
2244	return false;
2245	LLT DefDstTy = MRI.getType(Reg: DefDstReg);
2246	Register StoreSrcReg = I.getOperand(i: `0`).getReg();
2247	LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg);
2248
2249	// If we get something strange like a physical register, then we shouldn't
2250	// go any further.
2251	if (!DefDstTy.isValid())
2252	return false;
2253
2254	// Are the source and dst types the same size?
2255	if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2256	return false;
2257
2258	if (RBI.getRegBank(Reg: StoreSrcReg, MRI, TRI) ==
2259	RBI.getRegBank(Reg: DefDstReg, MRI, TRI))
2260	return false;
2261
2262	// We have a cross-bank copy, which is entering a store. Let's fold it.
2263	I.getOperand(i: `0`).setReg(DefDstReg);
2264	return true;
2265	}
2266
2267	bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2268	assert(I.getParent() && "Instruction should be in a basic block!");
2269	assert(I.getParent()->getParent() && "Instruction should be in a function!");
2270
2271	MachineBasicBlock &MBB = *I.getParent();
2272	MachineFunction &MF = *MBB.getParent();
2273	MachineRegisterInfo &MRI = MF.getRegInfo();
2274
2275	switch (I.getOpcode()) {
2276	case AArch64::G_DUP: {
2277	// Before selecting a DUP instruction, check if it is better selected as a
2278	// MOV or load from a constant pool.
2279	Register Src = I.getOperand(i: `1`).getReg();
2280	auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI);
2281	if (!ValAndVReg)
2282	return false;
2283	LLVMContext &Ctx = MF.getFunction().getContext();
2284	Register Dst = I.getOperand(i: `0`).getReg();
2285	auto *CV = ConstantDataVector::getSplat(
2286	NumElts: MRI.getType(Reg: Dst).getNumElements(),
2287	Elt: ConstantInt::get(Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Src).getSizeInBits()),
2288	V: ValAndVReg ->Value));
2289	if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI))
2290	return false;
2291	I.eraseFromParent();
2292	return true;
2293	}
2294	case TargetOpcode::G_SEXT:
2295	// Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2296	// over a normal extend.
2297	if (selectUSMovFromExtend(I, MRI))
2298	return true;
2299	return false;
2300	case TargetOpcode::G_BR:
2301	return false;
2302	case TargetOpcode::G_SHL:
2303	return earlySelectSHL(I, MRI);
2304	case TargetOpcode::G_CONSTANT: {
2305	bool IsZero = false;
2306	if (I.getOperand(i: `1`).isCImm())
2307	IsZero = I.getOperand(i: `1`).getCImm()->isZero();
2308	else if (I.getOperand(i: `1`).isImm())
2309	IsZero = I.getOperand(i: `1`).getImm() == `0`;
2310
2311	if (!IsZero)
2312	return false;
2313
2314	Register DefReg = I.getOperand(i: `0`).getReg();
2315	LLT Ty = MRI.getType(Reg: DefReg);
2316	if (Ty.getSizeInBits() == `64`) {
2317	I.getOperand(i: `1`).ChangeToRegister(Reg: AArch64::XZR, isDef: false);
2318	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
2319	} else if (Ty.getSizeInBits() == `32`) {
2320	I.getOperand(i: `1`).ChangeToRegister(Reg: AArch64::WZR, isDef: false);
2321	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR32RegClass, MRI);
2322	} else
2323	return false;
2324
2325	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
2326	return true;
2327	}
2328
2329	case TargetOpcode::G_ADD: {
2330	// Check if this is being fed by a G_ICMP on either side.
2331	//
2332	// (cmp pred, x, y) + z
2333	//
2334	// In the above case, when the cmp is true, we increment z by 1. So, we can
2335	// fold the add into the cset for the cmp by using cinc.
2336	//
2337	// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2338	Register AddDst = I.getOperand(i: `0`).getReg();
2339	Register AddLHS = I.getOperand(i: `1`).getReg();
2340	Register AddRHS = I.getOperand(i: `2`).getReg();
2341	// Only handle scalars.
2342	LLT Ty = MRI.getType(Reg: AddLHS);
2343	if (Ty.isVector())
2344	return false;
2345	// Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2346	// bits.
2347	unsigned Size = Ty.getSizeInBits();
2348	if (Size != `32` && Size != `64`)
2349	return false;
2350	auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2351	if (!MRI.hasOneNonDBGUse(RegNo: Reg))
2352	return nullptr;
2353	// If the LHS of the add is 32 bits, then we want to fold a 32-bit
2354	// compare.
2355	if (Size == `32`)
2356	return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI);
2357	// We model scalar compares using 32-bit destinations right now.
2358	// If it's a 64-bit compare, it'll have 64-bit sources.
2359	Register ZExt;
2360	if (!mi_match(R: Reg, MRI,
2361	P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt))))))
2362	return nullptr;
2363	auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI);
2364	if (!Cmp \|\|
2365	MRI.getType(Reg: Cmp->getOperand(i: `2`).getReg()).getSizeInBits() != `64`)
2366	return nullptr;
2367	return Cmp;
2368	};
2369	// Try to match
2370	// z + (cmp pred, x, y)
2371	MachineInstr *Cmp = MatchCmp (AddRHS);
2372	if (!Cmp) {
2373	// (cmp pred, x, y) + z
2374	std::swap(a&: AddLHS, b&: AddRHS);
2375	Cmp = MatchCmp (AddRHS);
2376	if (!Cmp)
2377	return false;
2378	}
2379	auto &PredOp = Cmp->getOperand(i: `1`);
2380	auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2381	const AArch64CC::CondCode InvCC =
2382	changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred));
2383	MIB.setInstrAndDebugLoc(I);
2384	emitIntegerCompare(/LHS=/Cmp->getOperand(i: `2`),
2385	/RHS=/Cmp->getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
2386	emitCSINC(/Dst=/AddDst, /Src =/Src1: AddLHS, /Src2=/AddLHS, Pred: InvCC, MIRBuilder&: MIB);
2387	I.eraseFromParent();
2388	return true;
2389	}
2390	case TargetOpcode::G_OR: {
2391	// Look for operations that take the lower `Width=Size-ShiftImm` bits of
2392	// `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2393	// shifting and masking that we can replace with a BFI (encoded as a BFM).
2394	Register Dst = I.getOperand(i: `0`).getReg();
2395	LLT Ty = MRI.getType(Reg: Dst);
2396
2397	if (!Ty.isScalar())
2398	return false;
2399
2400	unsigned Size = Ty.getSizeInBits();
2401	if (Size != `32` && Size != `64`)
2402	return false;
2403
2404	Register ShiftSrc;
2405	int64_t ShiftImm;
2406	Register MaskSrc;
2407	int64_t MaskImm;
2408	if (!mi_match(
2409	R: Dst, MRI,
2410	P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))),
2411	R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm))))))
2412	return false;
2413
2414	if (ShiftImm > Size \|\| ((`1ULL` << ShiftImm) - `1ULL`) != uint64_t(MaskImm))
2415	return false;
2416
2417	int64_t Immr = Size - ShiftImm;
2418	int64_t Imms = Size - ShiftImm - `1`;
2419	unsigned Opc = Size == `32` ? AArch64::BFMWri : AArch64::BFMXri;
2420	emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB);
2421	I.eraseFromParent();
2422	return true;
2423	}
2424	case TargetOpcode::G_FENCE: {
2425	if (I.getOperand(i: `1`).getImm() == `0`)
2426	BuildMI(BB&: MBB, I, MIMD: MIMetadata (I), MCID: TII.get(Opcode: TargetOpcode::MEMBARRIER));
2427	else
2428	BuildMI(BB&: MBB, I, MIMD: MIMetadata (I), MCID: TII.get(Opcode: AArch64::DMB))
2429	.addImm(Val: I.getOperand(i: `0`).getImm() == `4` ? `0x9` : `0xb`);
2430	I.eraseFromParent();
2431	return true;
2432	}
2433	default:
2434	return false;
2435	}
2436	}
2437
2438	bool AArch64InstructionSelector::select(MachineInstr &I) {
2439	assert(I.getParent() && "Instruction should be in a basic block!");
2440	assert(I.getParent()->getParent() && "Instruction should be in a function!");
2441
2442	MachineBasicBlock &MBB = *I.getParent();
2443	MachineFunction &MF = *MBB.getParent();
2444	MachineRegisterInfo &MRI = MF.getRegInfo();
2445
2446	const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2447	if (Subtarget->requiresStrictAlign()) {
2448	// We don't support this feature yet.
2449	LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2450	return false;
2451	}
2452
2453	MIB.setInstrAndDebugLoc(I);
2454
2455	unsigned Opcode = I.getOpcode();
2456	// G_PHI requires same handling as PHI
2457	if (!I.isPreISelOpcode() \|\| Opcode == TargetOpcode::G_PHI) {
2458	// Certain non-generic instructions also need some special handling.
2459
2460	if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2461	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2462
2463	if (Opcode == TargetOpcode::PHI \|\| Opcode == TargetOpcode::G_PHI) {
2464	const Register DefReg = I.getOperand(i: `0`).getReg();
2465	const LLT DefTy = MRI.getType(Reg: DefReg);
2466
2467	const RegClassOrRegBank &RegClassOrBank =
2468	MRI.getRegClassOrRegBank(Reg: DefReg);
2469
2470	const TargetRegisterClass *DefRC
2471	= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2472	if (!DefRC) {
2473	if (!DefTy.isValid()) {
2474	LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2475	return false;
2476	}
2477	const RegisterBank &RB = RegClassOrBank.get<const* RegisterBank *>();
2478	DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB);
2479	if (!DefRC) {
2480	LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2481	return false;
2482	}
2483	}
2484
2485	I.setDesc(TII.get(Opcode: TargetOpcode::PHI));
2486
2487	return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI);
2488	}
2489
2490	if (I.isCopy())
2491	return selectCopy(I, TII, MRI, TRI, RBI);
2492
2493	if (I.isDebugInstr())
2494	return selectDebugInstr(I, MRI, RBI);
2495
2496	return true;
2497	}
2498
2499
2500	if (I.getNumOperands() != I.getNumExplicitOperands()) {
2501	LLVM_DEBUG(
2502	dbgs() << "Generic instruction has unexpected implicit operands\n");
2503	return false;
2504	}
2505
2506	// Try to do some lowering before we start instruction selecting. These
2507	// lowerings are purely transformations on the input G_MIR and so selection
2508	// must continue after any modification of the instruction.
2509	if (preISelLower(I)) {
2510	Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2511	}
2512
2513	// There may be patterns where the importer can't deal with them optimally,
2514	// but does select it to a suboptimal sequence so our custom C++ selection
2515	// code later never has a chance to work on it. Therefore, we have an early
2516	// selection attempt here to give priority to certain selection routines
2517	// over the imported ones.
2518	if (earlySelect(I))
2519	return true;
2520
2521	if (selectImpl(I, CoverageInfo&: *CoverageInfo))
2522	return true;
2523
2524	LLT Ty =
2525	I.getOperand(i: `0`).isReg() ? MRI.getType(Reg: I.getOperand(i: `0`).getReg()) : LLT {};
2526
2527	switch (Opcode) {
2528	case TargetOpcode::G_SBFX:
2529	case TargetOpcode::G_UBFX: {
2530	static const unsigned OpcTable[`2`][`2`] = {
2531	{AArch64::UBFMWri, AArch64::UBFMXri},
2532	{AArch64::SBFMWri, AArch64::SBFMXri}};
2533	bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2534	unsigned Size = Ty.getSizeInBits();
2535	unsigned Opc = OpcTable[IsSigned][Size == `64`];
2536	auto Cst1 =
2537	getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: `2`).getReg(), MRI);
2538	assert(Cst1 && "Should have gotten a constant for src 1?");
2539	auto Cst2 =
2540	getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: `3`).getReg(), MRI);
2541	assert(Cst2 && "Should have gotten a constant for src 2?");
2542	auto LSB = Cst1 ->Value.getZExtValue();
2543	auto Width = Cst2 ->Value.getZExtValue();
2544	auto BitfieldInst =
2545	MIB.buildInstr(Opc, DstOps: {I.getOperand(i: `0`)}, SrcOps: {I.getOperand(i: `1`)})
2546	.addImm(Val: LSB)
2547	.addImm(Val: LSB + Width - `1`);
2548	I.eraseFromParent();
2549	return constrainSelectedInstRegOperands(I&: *BitfieldInst, TII, TRI, RBI);
2550	}
2551	case TargetOpcode::G_BRCOND:
2552	return selectCompareBranch(I, MF, MRI);
2553
2554	case TargetOpcode::G_BRINDIRECT: {
2555	const Function &Fn = MF.getFunction();
2556	if (std::optional<uint16_t> BADisc =
2557	STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: Fn)) {
2558	auto MI = MIB.buildInstr(Opc: AArch64::BRA, DstOps: {}, SrcOps: {I.getOperand(i: `0`).getReg()});
2559	MI.addImm(Val: AArch64PACKey::IA);
2560	MI.addImm(Val: *BADisc);
2561	MI.addReg(/AddrDisc=/RegNo: AArch64::XZR);
2562	I.eraseFromParent();
2563	return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
2564	}
2565	I.setDesc(TII.get(Opcode: AArch64::BR));
2566	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2567	}
2568
2569	case TargetOpcode::G_BRJT:
2570	return selectBrJT(I, MRI);
2571
2572	case AArch64::G_ADD_LOW: {
2573	// This op may have been separated from it's ADRP companion by the localizer
2574	// or some other code motion pass. Given that many CPUs will try to
2575	// macro fuse these operations anyway, select this into a MOVaddr pseudo
2576	// which will later be expanded into an ADRP+ADD pair after scheduling.
2577	MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: `1`).getReg());
2578	if (BaseMI->getOpcode() != AArch64::ADRP) {
2579	I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2580	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2581	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2582	}
2583	assert(TM.getCodeModel() == CodeModel::Small &&
2584	"Expected small code model");
2585	auto Op1 = BaseMI->getOperand(i: `1`);
2586	auto Op2 = I.getOperand(i: `2`);
2587	auto MovAddr = MIB.buildInstr(Opc: AArch64::MOVaddr, DstOps: {I.getOperand(i: `0`)}, SrcOps: {})
2588	.addGlobalAddress(GV: Op1.getGlobal(), Offset: Op1.getOffset(),
2589	TargetFlags: Op1.getTargetFlags())
2590	.addGlobalAddress(GV: Op2.getGlobal(), Offset: Op2.getOffset(),
2591	TargetFlags: Op2.getTargetFlags());
2592	I.eraseFromParent();
2593	return constrainSelectedInstRegOperands(I&: *MovAddr, TII, TRI, RBI);
2594	}
2595
2596	case TargetOpcode::G_FCONSTANT:
2597	case TargetOpcode::G_CONSTANT: {
2598	const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2599
2600	const LLT s8 = LLT::scalar(SizeInBits: `8`);
2601	const LLT s16 = LLT::scalar(SizeInBits: `16`);
2602	const LLT s32 = LLT::scalar(SizeInBits: `32`);
2603	const LLT s64 = LLT::scalar(SizeInBits: `64`);
2604	const LLT s128 = LLT::scalar(SizeInBits: `128`);
2605	const LLT p0 = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
2606
2607	const Register DefReg = I.getOperand(i: `0`).getReg();
2608	const LLT DefTy = MRI.getType(Reg: DefReg);
2609	const unsigned DefSize = DefTy.getSizeInBits();
2610	const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
2611
2612	// FIXME: Redundant check, but even less readable when factored out.
2613	if (isFP) {
2614	if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2615	LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2616	<< " constant, expected: " << s16 << " or " << s32
2617	<< " or " << s64 << " or " << s128 << `'\n'`);
2618	return false;
2619	}
2620
2621	if (RB.getID() != AArch64::FPRRegBankID) {
2622	LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2623	<< " constant on bank: " << RB
2624	<< ", expected: FPR\n");
2625	return false;
2626	}
2627
2628	// The case when we have 0.0 is covered by tablegen. Reject it here so we
2629	// can be sure tablegen works correctly and isn't rescued by this code.
2630	// 0.0 is not covered by tablegen for FP128. So we will handle this
2631	// scenario in the code here.
2632	if (DefSize != `128` && I.getOperand(i: `1`).getFPImm()->isExactlyValue(V: `0.0`))
2633	return false;
2634	} else {
2635	// s32 and s64 are covered by tablegen.
2636	if (Ty != p0 && Ty != s8 && Ty != s16) {
2637	LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2638	<< " constant, expected: " << s32 << ", " << s64
2639	<< ", or " << p0 << `'\n'`);
2640	return false;
2641	}
2642
2643	if (RB.getID() != AArch64::GPRRegBankID) {
2644	LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2645	<< " constant on bank: " << RB
2646	<< ", expected: GPR\n");
2647	return false;
2648	}
2649	}
2650
2651	if (isFP) {
2652	const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB);
2653	// For 16, 64, and 128b values, emit a constant pool load.
2654	switch (DefSize) {
2655	default:
2656	llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2657	case `32`:
2658	case `64`: {
2659	bool OptForSize = shouldOptForSize(MF: &MF);
2660	const auto &TLI = MF.getSubtarget().getTargetLowering();
2661	// If TLI says that this fpimm is illegal, then we'll expand to a
2662	// constant pool load.
2663	if (TLI->isFPImmLegal(I.getOperand(i: `1`).getFPImm()->getValueAPF(),
2664	EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize))
2665	break;
2666	[[fallthrough]];
2667	}
2668	case `16`:
2669	case `128`: {
2670	auto *FPImm = I.getOperand(i: `1`).getFPImm();
2671	auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB);
2672	if (!LoadMI) {
2673	LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2674	return false;
2675	}
2676	MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: `0`).getReg()});
2677	I.eraseFromParent();
2678	return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI);
2679	}
2680	}
2681
2682	assert((DefSize == `32` \|\| DefSize == `64`) && "Unexpected const def size");
2683	// Either emit a FMOV, or emit a copy to emit a normal mov.
2684	const Register DefGPRReg = MRI.createVirtualRegister(
2685	RegClass: DefSize == `32` ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2686	MachineOperand &RegOp = I.getOperand(i: `0`);
2687	RegOp.setReg(DefGPRReg);
2688	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2689	MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg});
2690
2691	if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) {
2692	LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2693	return false;
2694	}
2695
2696	MachineOperand &ImmOp = I.getOperand(i: `1`);
2697	// FIXME: Is going through int64_t always correct?
2698	ImmOp.ChangeToImmediate(
2699	ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2700	} else if (I.getOperand(i: `1`).isCImm()) {
2701	uint64_t Val = I.getOperand(i: `1`).getCImm()->getZExtValue();
2702	I.getOperand(i: `1`).ChangeToImmediate(ImmVal: Val);
2703	} else if (I.getOperand(i: `1`).isImm()) {
2704	uint64_t Val = I.getOperand(i: `1`).getImm();
2705	I.getOperand(i: `1`).ChangeToImmediate(ImmVal: Val);
2706	}
2707
2708	const unsigned MovOpc =
2709	DefSize == `64` ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2710	I.setDesc(TII.get(Opcode: MovOpc));
2711	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2712	return true;
2713	}
2714	case TargetOpcode::G_EXTRACT: {
2715	Register DstReg = I.getOperand(i: `0`).getReg();
2716	Register SrcReg = I.getOperand(i: `1`).getReg();
2717	LLT SrcTy = MRI.getType(Reg: SrcReg);
2718	LLT DstTy = MRI.getType(Reg: DstReg);
2719	(void)DstTy;
2720	unsigned SrcSize = SrcTy.getSizeInBits();
2721
2722	if (SrcTy.getSizeInBits() > `64`) {
2723	// This should be an extract of an s128, which is like a vector extract.
2724	if (SrcTy.getSizeInBits() != `128`)
2725	return false;
2726	// Only support extracting 64 bits from an s128 at the moment.
2727	if (DstTy.getSizeInBits() != `64`)
2728	return false;
2729
2730	unsigned Offset = I.getOperand(i: `2`).getImm();
2731	if (Offset % `64` != `0`)
2732	return false;
2733
2734	// Check we have the right regbank always.
2735	const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
2736	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
2737	assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2738
2739	if (SrcRB.getID() == AArch64::GPRRegBankID) {
2740	auto NewI =
2741	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
2742	.addUse(RegNo: SrcReg, Flags: `0`,
2743	SubReg: Offset == `0` ? AArch64::sube64 : AArch64::subo64);
2744	constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt&: *NewI,
2745	RegClass: AArch64::GPR64RegClass, RegMO&: NewI ->getOperand(i: `0`));
2746	I.eraseFromParent();
2747	return true;
2748	}
2749
2750	// Emit the same code as a vector extract.
2751	// Offset must be a multiple of 64.
2752	unsigned LaneIdx = Offset / `64`;
2753	MachineInstr *Extract = emitExtractVectorElt(
2754	DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: `64`), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB);
2755	if (!Extract)
2756	return false;
2757	I.eraseFromParent();
2758	return true;
2759	}
2760
2761	I.setDesc(TII.get(Opcode: SrcSize == `64` ? AArch64::UBFMXri : AArch64::UBFMWri));
2762	MachineInstrBuilder (MF, I).addImm(Val: I.getOperand(i: `2`).getImm() +
2763	Ty.getSizeInBits() - `1`);
2764
2765	if (SrcSize < `64`) {
2766	assert(SrcSize == `32` && DstTy.getSizeInBits() == `16` &&
2767	"unexpected G_EXTRACT types");
2768	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2769	}
2770
2771	DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
2772	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2773	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {})
2774	.addReg(RegNo: DstReg, flags: `0`, SubReg: AArch64::sub_32);
2775	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(),
2776	RC: AArch64::GPR32RegClass, MRI);
2777	I.getOperand(i: `0`).setReg(DstReg);
2778
2779	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2780	}
2781
2782	case TargetOpcode::G_INSERT: {
2783	LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `2`).getReg());
2784	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2785	unsigned DstSize = DstTy.getSizeInBits();
2786	// Larger inserts are vectors, same-size ones should be something else by
2787	// now (split up or turned into COPYs).
2788	if (Ty.getSizeInBits() > `64` \|\| SrcTy.getSizeInBits() > `32`)
2789	return false;
2790
2791	I.setDesc(TII.get(Opcode: DstSize == `64` ? AArch64::BFMXri : AArch64::BFMWri));
2792	unsigned LSB = I.getOperand(i: `3`).getImm();
2793	unsigned Width = MRI.getType(Reg: I.getOperand(i: `2`).getReg()).getSizeInBits();
2794	I.getOperand(i: `3`).setImm((DstSize - LSB) % DstSize);
2795	MachineInstrBuilder (MF, I).addImm(Val: Width - `1`);
2796
2797	if (DstSize < `64`) {
2798	assert(DstSize == `32` && SrcTy.getSizeInBits() == `16` &&
2799	"unexpected G_INSERT types");
2800	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2801	}
2802
2803	Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
2804	BuildMI(BB&: MBB, I: I.getIterator(), MIMD: I.getDebugLoc(),
2805	MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
2806	.addDef(RegNo: SrcReg)
2807	.addImm(Val: `0`)
2808	.addUse(RegNo: I.getOperand(i: `2`).getReg())
2809	.addImm(Val: AArch64::sub_32);
2810	RBI.constrainGenericRegister(Reg: I.getOperand(i: `2`).getReg(),
2811	RC: AArch64::GPR32RegClass, MRI);
2812	I.getOperand(i: `2`).setReg(SrcReg);
2813
2814	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2815	}
2816	case TargetOpcode::G_FRAME_INDEX: {
2817	// allocas and G_FRAME_INDEX are only supported in addrspace(0).
2818	if (Ty != LLT::pointer(AddressSpace: `0`, SizeInBits: `64`)) {
2819	LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2820	<< ", expected: " << LLT::pointer(`0`, `64`) << `'\n'`);
2821	return false;
2822	}
2823	I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2824
2825	// MOs for a #0 shifted immediate.
2826	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2827	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2828
2829	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2830	}
2831
2832	case TargetOpcode::G_GLOBAL_VALUE: {
2833	const GlobalValue GV = nullptr*;
2834	unsigned OpFlags;
2835	if (I.getOperand(i: `1`).isSymbol()) {
2836	OpFlags = I.getOperand(i: `1`).getTargetFlags();
2837	// Currently only used by "RtLibUseGOT".
2838	assert(OpFlags == AArch64II::MO_GOT);
2839	} else {
2840	GV = I.getOperand(i: `1`).getGlobal();
2841	if (GV->isThreadLocal())
2842	return selectTLSGlobalValue(I, MRI);
2843	OpFlags = STI.ClassifyGlobalReference(GV, TM);
2844	}
2845
2846	if (OpFlags & AArch64II::MO_GOT) {
2847	I.setDesc(TII.get(Opcode: AArch64::LOADgot));
2848	I.getOperand(i: `1`).setTargetFlags(OpFlags);
2849	} else if (TM.getCodeModel() == CodeModel::Large &&
2850	!TM.isPositionIndependent()) {
2851	// Materialize the global using movz/movk instructions.
2852	materializeLargeCMVal(I, V: GV, OpFlags);
2853	I.eraseFromParent();
2854	return true;
2855	} else if (TM.getCodeModel() == CodeModel::Tiny) {
2856	I.setDesc(TII.get(Opcode: AArch64::ADR));
2857	I.getOperand(i: `1`).setTargetFlags(OpFlags);
2858	} else {
2859	I.setDesc(TII.get(Opcode: AArch64::MOVaddr));
2860	I.getOperand(i: `1`).setTargetFlags(OpFlags \| AArch64II::MO_PAGE);
2861	MachineInstrBuilder MIB(MF, I);
2862	MIB.addGlobalAddress(GV, Offset: I.getOperand(i: `1`).getOffset(),
2863	TargetFlags: OpFlags \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
2864	}
2865	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2866	}
2867
2868	case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2869	return selectPtrAuthGlobalValue(I, MRI);
2870
2871	case TargetOpcode::G_ZEXTLOAD:
2872	case TargetOpcode::G_LOAD:
2873	case TargetOpcode::G_STORE: {
2874	GLoadStore &LdSt = cast<GLoadStore>(Val&: I);
2875	bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2876	LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg());
2877
2878	if (PtrTy != LLT::pointer(AddressSpace: `0`, SizeInBits: `64`)) {
2879	LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2880	<< ", expected: " << LLT::pointer(`0`, `64`) << `'\n'`);
2881	return false;
2882	}
2883
2884	uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2885	unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2886	AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2887
2888	// Need special instructions for atomics that affect ordering.
2889	if (Order != AtomicOrdering::NotAtomic &&
2890	Order != AtomicOrdering::Unordered &&
2891	Order != AtomicOrdering::Monotonic) {
2892	assert(!isa<GZExtLoad>(LdSt));
2893	assert(MemSizeInBytes <= `8` &&
2894	"128-bit atomics should already be custom-legalized");
2895
2896	if (isa<GLoad>(Val: LdSt)) {
2897	static constexpr unsigned LDAPROpcodes[] = {
2898	AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2899	static constexpr unsigned LDAROpcodes[] = {
2900	AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2901	ArrayRef<unsigned> Opcodes =
2902	STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2903	? LDAPROpcodes
2904	: LDAROpcodes;
2905	I.setDesc(TII.get(Opcode: Opcodes [Log2_32(Value: MemSizeInBytes)]));
2906	} else {
2907	static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2908	AArch64::STLRW, AArch64::STLRX};
2909	Register ValReg = LdSt.getReg(Idx: `0`);
2910	if (MRI.getType(Reg: ValReg).getSizeInBits() == `64` && MemSizeInBits != `64`) {
2911	// Emit a subreg copy of 32 bits.
2912	Register NewVal = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
2913	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {NewVal}, SrcOps: {})
2914	.addReg(RegNo: I.getOperand(i: `0`).getReg(), flags: `0`, SubReg: AArch64::sub_32);
2915	I.getOperand(i: `0`).setReg(NewVal);
2916	}
2917	I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)]));
2918	}
2919	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2920	return true;
2921	}
2922
2923	#ifndef NDEBUG
2924	const Register PtrReg = LdSt.getPointerReg();
2925	const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2926	// Check that the pointer register is valid.
2927	assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2928	"Load/Store pointer operand isn't a GPR");
2929	assert(MRI.getType(PtrReg).isPointer() &&
2930	"Load/Store pointer operand isn't a pointer");
2931	#endif
2932
2933	const Register ValReg = LdSt.getReg(Idx: `0`);
2934	const LLT ValTy = MRI.getType(Reg: ValReg);
2935	const RegisterBank &RB = *RBI.getRegBank(Reg: ValReg, MRI, TRI);
2936
2937	// The code below doesn't support truncating stores, so we need to split it
2938	// again.
2939	if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2940	unsigned SubReg;
2941	LLT MemTy = LdSt.getMMO().getMemoryType();
2942	auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
2943	if (!getSubRegForClass(RC, TRI, SubReg))
2944	return false;
2945
2946	// Generate a subreg copy.
2947	auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {})
2948	.addReg(RegNo: ValReg, flags: `0`, SubReg)
2949	.getReg(Idx: `0`);
2950	RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI);
2951	LdSt.getOperand(i: `0`).setReg(Copy);
2952	} else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2953	// If this is an any-extending load from the FPR bank, split it into a regular
2954	// load + extend.
2955	if (RB.getID() == AArch64::FPRRegBankID) {
2956	unsigned SubReg;
2957	LLT MemTy = LdSt.getMMO().getMemoryType();
2958	auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
2959	if (!getSubRegForClass(RC, TRI, SubReg))
2960	return false;
2961	Register OldDst = LdSt.getReg(Idx: `0`);
2962	Register NewDst =
2963	MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType());
2964	LdSt.getOperand(i: `0`).setReg(NewDst);
2965	MRI.setRegBank(Reg: NewDst, RegBank: RB);
2966	// Generate a SUBREG_TO_REG to extend it.
2967	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator()));
2968	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {OldDst}, SrcOps: {})
2969	.addImm(Val: `0`)
2970	.addUse(RegNo: NewDst)
2971	.addImm(Val: SubReg);
2972	auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB);
2973	RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI);
2974	MIB.setInstr(LdSt);
2975	}
2976	}
2977
2978	// Helper lambda for partially selecting I. Either returns the original
2979	// instruction with an updated opcode, or a new instruction.
2980	auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2981	bool IsStore = isa<GStore>(Val: I);
2982	const unsigned NewOpc =
2983	selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits);
2984	if (NewOpc == I.getOpcode())
2985	return nullptr;
2986	// Check if we can fold anything into the addressing mode.
2987	auto AddrModeFns =
2988	selectAddrModeIndexed(Root&: I.getOperand(i: `1`), Size: MemSizeInBytes);
2989	if (!AddrModeFns) {
2990	// Can't fold anything. Use the original instruction.
2991	I.setDesc(TII.get(Opcode: NewOpc));
2992	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2993	return &I;
2994	}
2995
2996	// Folded something. Create a new instruction and return it.
2997	auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags());
2998	Register CurValReg = I.getOperand(i: `0`).getReg();
2999	IsStore ? NewInst.addUse(RegNo: CurValReg) : NewInst.addDef(RegNo: CurValReg);
3000	NewInst.cloneMemRefs(OtherMI: I);
3001	for (auto &Fn : *AddrModeFns)
3002	Fn (NewInst);
3003	I.eraseFromParent();
3004	return &*NewInst;
3005	};
3006
3007	MachineInstr *LoadStore = SelectLoadStoreAddressingMode ();
3008	if (!LoadStore)
3009	return false;
3010
3011	// If we're storing a 0, use WZR/XZR.
3012	if (Opcode == TargetOpcode::G_STORE) {
3013	auto CVal = getIConstantVRegValWithLookThrough(
3014	VReg: LoadStore->getOperand(i: `0`).getReg(), MRI);
3015	if (CVal && CVal ->Value == `0`) {
3016	switch (LoadStore->getOpcode()) {
3017	case AArch64::STRWui:
3018	case AArch64::STRHHui:
3019	case AArch64::STRBBui:
3020	LoadStore->getOperand(i: `0`).setReg(AArch64::WZR);
3021	break;
3022	case AArch64::STRXui:
3023	LoadStore->getOperand(i: `0`).setReg(AArch64::XZR);
3024	break;
3025	}
3026	}
3027	}
3028
3029	if (IsZExtLoad \|\| (Opcode == TargetOpcode::G_LOAD &&
3030	ValTy == LLT::scalar(SizeInBits: `64`) && MemSizeInBits == `32`)) {
3031	// The any/zextload from a smaller type to i32 should be handled by the
3032	// importer.
3033	if (MRI.getType(Reg: LoadStore->getOperand(i: `0`).getReg()).getSizeInBits() != `64`)
3034	return false;
3035	// If we have an extending load then change the load's type to be a
3036	// narrower reg and zero_extend with SUBREG_TO_REG.
3037	Register LdReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3038	Register DstReg = LoadStore->getOperand(i: `0`).getReg();
3039	LoadStore->getOperand(i: `0`).setReg(LdReg);
3040
3041	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator()));
3042	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DstReg}, SrcOps: {})
3043	.addImm(Val: `0`)
3044	.addUse(RegNo: LdReg)
3045	.addImm(Val: AArch64::sub_32);
3046	constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3047	return RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64allRegClass,
3048	MRI);
3049	}
3050	return constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3051	}
3052
3053	case TargetOpcode::G_INDEXED_ZEXTLOAD:
3054	case TargetOpcode::G_INDEXED_SEXTLOAD:
3055	return selectIndexedExtLoad(I, MRI);
3056	case TargetOpcode::G_INDEXED_LOAD:
3057	return selectIndexedLoad(I, MRI);
3058	case TargetOpcode::G_INDEXED_STORE:
3059	return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI);
3060
3061	case TargetOpcode::G_LSHR:
3062	case TargetOpcode::G_ASHR:
3063	if (MRI.getType(Reg: I.getOperand(i: `0`).getReg()).isVector())
3064	return selectVectorAshrLshr(I, MRI);
3065	[[fallthrough]];
3066	case TargetOpcode::G_SHL:
3067	if (Opcode == TargetOpcode::G_SHL &&
3068	MRI.getType(Reg: I.getOperand(i: `0`).getReg()).isVector())
3069	return selectVectorSHL(I, MRI);
3070
3071	// These shifts were legalized to have 64 bit shift amounts because we
3072	// want to take advantage of the selection patterns that assume the
3073	// immediates are s64s, however, selectBinaryOp will assume both operands
3074	// will have the same bit size.
3075	{
3076	Register SrcReg = I.getOperand(i: `1`).getReg();
3077	Register ShiftReg = I.getOperand(i: `2`).getReg();
3078	const LLT ShiftTy = MRI.getType(Reg: ShiftReg);
3079	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3080	if (!SrcTy.isVector() && SrcTy.getSizeInBits() == `32` &&
3081	ShiftTy.getSizeInBits() == `64`) {
3082	assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3083	// Insert a subregister copy to implement a 64->32 trunc
3084	auto Trunc = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {SrcTy}, SrcOps: {})
3085	.addReg(RegNo: ShiftReg, flags: `0`, SubReg: AArch64::sub_32);
3086	MRI.setRegBank(Reg: Trunc.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
3087	I.getOperand(i: `2`).setReg(Trunc.getReg(Idx: `0`));
3088	}
3089	}
3090	[[fallthrough]];
3091	case TargetOpcode::G_OR: {
3092	// Reject the various things we don't support yet.
3093	if (unsupportedBinOp(I, RBI, MRI, TRI))
3094	return false;
3095
3096	const unsigned OpSize = Ty.getSizeInBits();
3097
3098	const Register DefReg = I.getOperand(i: `0`).getReg();
3099	const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
3100
3101	const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize);
3102	if (NewOpc == I.getOpcode())
3103	return false;
3104
3105	I.setDesc(TII.get(Opcode: NewOpc));
3106	// FIXME: Should the type be always reset in setDesc?
3107
3108	// Now that we selected an opcode, we need to constrain the register
3109	// operands to use appropriate classes.
3110	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3111	}
3112
3113	case TargetOpcode::G_PTR_ADD: {
3114	emitADD(DefReg: I.getOperand(i: `0`).getReg(), LHS&: I.getOperand(i: `1`), RHS&: I.getOperand(i: `2`), MIRBuilder&: MIB);
3115	I.eraseFromParent();
3116	return true;
3117	}
3118
3119	case TargetOpcode::G_SADDE:
3120	case TargetOpcode::G_UADDE:
3121	case TargetOpcode::G_SSUBE:
3122	case TargetOpcode::G_USUBE:
3123	case TargetOpcode::G_SADDO:
3124	case TargetOpcode::G_UADDO:
3125	case TargetOpcode::G_SSUBO:
3126	case TargetOpcode::G_USUBO:
3127	return selectOverflowOp(I, MRI);
3128
3129	case TargetOpcode::G_PTRMASK: {
3130	Register MaskReg = I.getOperand(i: `2`).getReg();
3131	std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI);
3132	// TODO: Implement arbitrary cases
3133	if (!MaskVal \|\| !isShiftedMask_64(Value: *MaskVal))
3134	return false;
3135
3136	uint64_t Mask = *MaskVal;
3137	I.setDesc(TII.get(Opcode: AArch64::ANDXri));
3138	I.getOperand(i: `2`).ChangeToImmediate(
3139	ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: `64`));
3140
3141	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3142	}
3143	case TargetOpcode::G_PTRTOINT:
3144	case TargetOpcode::G_TRUNC: {
3145	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3146	const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3147
3148	const Register DstReg = I.getOperand(i: `0`).getReg();
3149	const Register SrcReg = I.getOperand(i: `1`).getReg();
3150
3151	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3152	const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3153
3154	if (DstRB.getID() != SrcRB.getID()) {
3155	LLVM_DEBUG(
3156	dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3157	return false;
3158	}
3159
3160	if (DstRB.getID() == AArch64::GPRRegBankID) {
3161	const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3162	if (!DstRC)
3163	return false;
3164
3165	const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB);
3166	if (!SrcRC)
3167	return false;
3168
3169	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) \|\|
3170	!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
3171	LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3172	return false;
3173	}
3174
3175	if (DstRC == SrcRC) {
3176	// Nothing to be done
3177	} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: `32`) &&
3178	SrcTy == LLT::scalar(SizeInBits: `64`)) {
3179	llvm_unreachable("TableGen can import this case");
3180	return false;
3181	} else if (DstRC == &AArch64::GPR32RegClass &&
3182	SrcRC == &AArch64::GPR64RegClass) {
3183	I.getOperand(i: `1`).setSubReg(AArch64::sub_32);
3184	} else {
3185	LLVM_DEBUG(
3186	dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3187	return false;
3188	}
3189
3190	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3191	return true;
3192	} else if (DstRB.getID() == AArch64::FPRRegBankID) {
3193	if (DstTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`) &&
3194	SrcTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
3195	I.setDesc(TII.get(Opcode: AArch64::XTNv4i16));
3196	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3197	return true;
3198	}
3199
3200	if (!SrcTy.isVector() && SrcTy.getSizeInBits() == `128`) {
3201	MachineInstr *Extract = emitExtractVectorElt(
3202	DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: `0`, MIRBuilder&: MIB);
3203	if (!Extract)
3204	return false;
3205	I.eraseFromParent();
3206	return true;
3207	}
3208
3209	// We might have a vector G_PTRTOINT, in which case just emit a COPY.
3210	if (Opcode == TargetOpcode::G_PTRTOINT) {
3211	assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3212	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3213	return selectCopy(I, TII, MRI, TRI, RBI);
3214	}
3215	}
3216
3217	return false;
3218	}
3219
3220	case TargetOpcode::G_ANYEXT: {
3221	if (selectUSMovFromExtend(I, MRI))
3222	return true;
3223
3224	const Register DstReg = I.getOperand(i: `0`).getReg();
3225	const Register SrcReg = I.getOperand(i: `1`).getReg();
3226
3227	const RegisterBank &RBDst = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3228	if (RBDst.getID() != AArch64::GPRRegBankID) {
3229	LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3230	<< ", expected: GPR\n");
3231	return false;
3232	}
3233
3234	const RegisterBank &RBSrc = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3235	if (RBSrc.getID() != AArch64::GPRRegBankID) {
3236	LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3237	<< ", expected: GPR\n");
3238	return false;
3239	}
3240
3241	const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits();
3242
3243	if (DstSize == `0`) {
3244	LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3245	return false;
3246	}
3247
3248	if (DstSize != `64` && DstSize > `32`) {
3249	LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3250	<< ", expected: 32 or 64\n");
3251	return false;
3252	}
3253	// At this point G_ANYEXT is just like a plain COPY, but we need
3254	// to explicitly form the 64-bit value if any.
3255	if (DstSize > `32`) {
3256	Register ExtSrc = MRI.createVirtualRegister(RegClass: &AArch64::GPR64allRegClass);
3257	BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
3258	.addDef(RegNo: ExtSrc)
3259	.addImm(Val: `0`)
3260	.addUse(RegNo: SrcReg)
3261	.addImm(Val: AArch64::sub_32);
3262	I.getOperand(i: `1`).setReg(ExtSrc);
3263	}
3264	return selectCopy(I, TII, MRI, TRI, RBI);
3265	}
3266
3267	case TargetOpcode::G_ZEXT:
3268	case TargetOpcode::G_SEXT_INREG:
3269	case TargetOpcode::G_SEXT: {
3270	if (selectUSMovFromExtend(I, MRI))
3271	return true;
3272
3273	unsigned Opcode = I.getOpcode();
3274	const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3275	const Register DefReg = I.getOperand(i: `0`).getReg();
3276	Register SrcReg = I.getOperand(i: `1`).getReg();
3277	const LLT DstTy = MRI.getType(Reg: DefReg);
3278	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3279	unsigned DstSize = DstTy.getSizeInBits();
3280	unsigned SrcSize = SrcTy.getSizeInBits();
3281
3282	// SEXT_INREG has the same src reg size as dst, the size of the value to be
3283	// extended is encoded in the imm.
3284	if (Opcode == TargetOpcode::G_SEXT_INREG)
3285	SrcSize = I.getOperand(i: `2`).getImm();
3286
3287	if (DstTy.isVector())
3288	return false; // Should be handled by imported patterns.
3289
3290	assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3291	AArch64::GPRRegBankID &&
3292	"Unexpected ext regbank");
3293
3294	MachineInstr *ExtI;
3295
3296	// First check if we're extending the result of a load which has a dest type
3297	// smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3298	// GPR register on AArch64 and all loads which are smaller automatically
3299	// zero-extend the upper bits. E.g.
3300	// %v(s8) = G_LOAD %p, :: (load 1)
3301	// %v2(s32) = G_ZEXT %v(s8)
3302	if (!IsSigned) {
3303	auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI);
3304	bool IsGPR =
3305	RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3306	if (LoadMI && IsGPR) {
3307	const MachineMemOperand MemOp = LoadMI->memoperands_begin();
3308	unsigned BytesLoaded = MemOp->getSize().getValue();
3309	if (BytesLoaded < `4` && SrcTy.getSizeInBytes() == BytesLoaded)
3310	return selectCopy(I, TII, MRI, TRI, RBI);
3311	}
3312
3313	// For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3314	// + SUBREG_TO_REG.
3315	if (IsGPR && SrcSize == `32` && DstSize == `64`) {
3316	Register SubregToRegSrc =
3317	MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3318	const Register ZReg = AArch64::WZR;
3319	MIB.buildInstr(Opc: AArch64::ORRWrs, DstOps: {SubregToRegSrc}, SrcOps: {ZReg, SrcReg})
3320	.addImm(Val: `0`);
3321
3322	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
3323	.addImm(Val: `0`)
3324	.addUse(RegNo: SubregToRegSrc)
3325	.addImm(Val: AArch64::sub_32);
3326
3327	if (!RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass,
3328	MRI)) {
3329	LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3330	return false;
3331	}
3332
3333	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3334	MRI)) {
3335	LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3336	return false;
3337	}
3338
3339	I.eraseFromParent();
3340	return true;
3341	}
3342	}
3343
3344	if (DstSize == `64`) {
3345	if (Opcode != TargetOpcode::G_SEXT_INREG) {
3346	// FIXME: Can we avoid manually doing this?
3347	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3348	MRI)) {
3349	LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3350	<< " operand\n");
3351	return false;
3352	}
3353	SrcReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG,
3354	DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
3355	.addImm(Val: `0`)
3356	.addUse(RegNo: SrcReg)
3357	.addImm(Val: AArch64::sub_32)
3358	.getReg(Idx: `0`);
3359	}
3360
3361	ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3362	DstOps: {DefReg}, SrcOps: {SrcReg})
3363	.addImm(Val: `0`)
3364	.addImm(Val: SrcSize - `1`);
3365	} else if (DstSize <= `32`) {
3366	ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3367	DstOps: {DefReg}, SrcOps: {SrcReg})
3368	.addImm(Val: `0`)
3369	.addImm(Val: SrcSize - `1`);
3370	} else {
3371	return false;
3372	}
3373
3374	constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
3375	I.eraseFromParent();
3376	return true;
3377	}
3378
3379	case TargetOpcode::G_SITOFP:
3380	case TargetOpcode::G_UITOFP:
3381	case TargetOpcode::G_FPTOSI:
3382	case TargetOpcode::G_FPTOUI: {
3383	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg()),
3384	SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3385	const unsigned NewOpc = selectFPConvOpc(GenericOpc: Opcode, DstTy, SrcTy);
3386	if (NewOpc == Opcode)
3387	return false;
3388
3389	I.setDesc(TII.get(Opcode: NewOpc));
3390	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3391	I.setFlags(MachineInstr::NoFPExcept);
3392
3393	return true;
3394	}
3395
3396	case TargetOpcode::G_FREEZE:
3397	return selectCopy(I, TII, MRI, TRI, RBI);
3398
3399	case TargetOpcode::G_INTTOPTR:
3400	// The importer is currently unable to import pointer types since they
3401	// didn't exist in SelectionDAG.
3402	return selectCopy(I, TII, MRI, TRI, RBI);
3403
3404	case TargetOpcode::G_BITCAST:
3405	// Imported SelectionDAG rules can handle every bitcast except those that
3406	// bitcast from a type to the same type. Ideally, these shouldn't occur
3407	// but we might not run an optimizer that deletes them. The other exception
3408	// is bitcasts involving pointer types, as SelectionDAG has no knowledge
3409	// of them.
3410	return selectCopy(I, TII, MRI, TRI, RBI);
3411
3412	case TargetOpcode::G_SELECT: {
3413	auto &Sel = cast<GSelect>(Val&: I);
3414	const Register CondReg = Sel.getCondReg();
3415	const Register TReg = Sel.getTrueReg();
3416	const Register FReg = Sel.getFalseReg();
3417
3418	if (tryOptSelect(Sel))
3419	return true;
3420
3421	// Make sure to use an unused vreg instead of wzr, so that the peephole
3422	// optimizations will be able to optimize these.
3423	Register DeadVReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3424	auto TstMI = MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {DeadVReg}, SrcOps: {CondReg})
3425	.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: `1`, regSize: `32`));
3426	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
3427	if (!emitSelect(Dst: Sel.getReg(Idx: `0`), True: TReg, False: FReg, CC: AArch64CC::NE, MIB))
3428	return false;
3429	Sel.eraseFromParent();
3430	return true;
3431	}
3432	case TargetOpcode::G_ICMP: {
3433	if (Ty.isVector())
3434	return false;
3435
3436	if (Ty != LLT::scalar(SizeInBits: `32`)) {
3437	LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3438	<< ", expected: " << LLT::scalar(`32`) << `'\n'`);
3439	return false;
3440	}
3441
3442	auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(i: `1`).getPredicate());
3443	const AArch64CC::CondCode InvCC =
3444	changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred));
3445	emitIntegerCompare(LHS&: I.getOperand(i: `2`), RHS&: I.getOperand(i: `3`), Predicate&: I.getOperand(i: `1`), MIRBuilder&: MIB);
3446	emitCSINC(/Dst=/I.getOperand(i: `0`).getReg(), /Src1=/AArch64::WZR,
3447	/Src2=/AArch64::WZR, Pred: InvCC, MIRBuilder&: MIB);
3448	I.eraseFromParent();
3449	return true;
3450	}
3451
3452	case TargetOpcode::G_FCMP: {
3453	CmpInst::Predicate Pred =
3454	static_cast<CmpInst::Predicate>(I.getOperand(i: `1`).getPredicate());
3455	if (!emitFPCompare(LHS: I.getOperand(i: `2`).getReg(), RHS: I.getOperand(i: `3`).getReg(), MIRBuilder&: MIB,
3456	Pred) \|\|
3457	!emitCSetForFCmp(Dst: I.getOperand(i: `0`).getReg(), Pred, MIRBuilder&: MIB))
3458	return false;
3459	I.eraseFromParent();
3460	return true;
3461	}
3462	case TargetOpcode::G_VASTART:
3463	return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3464	: selectVaStartAAPCS(I, MF, MRI);
3465	case TargetOpcode::G_INTRINSIC:
3466	return selectIntrinsic(I, MRI);
3467	case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3468	return selectIntrinsicWithSideEffects(I, MRI);
3469	case TargetOpcode::G_IMPLICIT_DEF: {
3470	I.setDesc(TII.get(Opcode: TargetOpcode::IMPLICIT_DEF));
3471	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3472	const Register DstReg = I.getOperand(i: `0`).getReg();
3473	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3474	const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3475	RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI);
3476	return true;
3477	}
3478	case TargetOpcode::G_BLOCK_ADDR: {
3479	Function *BAFn = I.getOperand(i: `1`).getBlockAddress()->getFunction();
3480	if (std::optional<uint16_t> BADisc =
3481	STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: *BAFn)) {
3482	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
3483	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
3484	MIB.buildInstr(Opcode: AArch64::MOVaddrPAC)
3485	.addBlockAddress(BA: I.getOperand(i: `1`).getBlockAddress())
3486	.addImm(Val: AArch64PACKey::IA)
3487	.addReg(/AddrDisc=/RegNo: AArch64::XZR)
3488	.addImm(Val: *BADisc)
3489	.constrainAllUses(TII, TRI, RBI);
3490	MIB.buildCopy(Res: I.getOperand(i: `0`).getReg(), Op: Register (AArch64::X16));
3491	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(),
3492	RC: AArch64::GPR64RegClass, MRI);
3493	I.eraseFromParent();
3494	return true;
3495	}
3496	if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3497	materializeLargeCMVal(I, V: I.getOperand(i: `1`).getBlockAddress(), OpFlags: `0`);
3498	I.eraseFromParent();
3499	return true;
3500	} else {
3501	I.setDesc(TII.get(Opcode: AArch64::MOVaddrBA));
3502	auto MovMI = BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVaddrBA),
3503	DestReg: I.getOperand(i: `0`).getReg())
3504	.addBlockAddress(BA: I.getOperand(i: `1`).getBlockAddress(),
3505	/ Offset / `0`, TargetFlags: AArch64II::MO_PAGE)
3506	.addBlockAddress(
3507	BA: I.getOperand(i: `1`).getBlockAddress(), / Offset / `0`,
3508	TargetFlags: AArch64II::MO_NC \| AArch64II::MO_PAGEOFF);
3509	I.eraseFromParent();
3510	return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3511	}
3512	}
3513	case AArch64::G_DUP: {
3514	// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3515	// imported patterns. Do it manually here. Avoiding generating s16 gpr is
3516	// difficult because at RBS we may end up pessimizing the fpr case if we
3517	// decided to add an anyextend to fix this. Manual selection is the most
3518	// robust solution for now.
3519	if (RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI)->getID() !=
3520	AArch64::GPRRegBankID)
3521	return false; // We expect the fpr regbank case to be imported.
3522	LLT VecTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3523	if (VecTy == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`))
3524	I.setDesc(TII.get(Opcode: AArch64::DUPv8i8gpr));
3525	else if (VecTy == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`))
3526	I.setDesc(TII.get(Opcode: AArch64::DUPv16i8gpr));
3527	else if (VecTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`))
3528	I.setDesc(TII.get(Opcode: AArch64::DUPv4i16gpr));
3529	else if (VecTy == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`))
3530	I.setDesc(TII.get(Opcode: AArch64::DUPv8i16gpr));
3531	else
3532	return false;
3533	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3534	}
3535	case TargetOpcode::G_BUILD_VECTOR:
3536	return selectBuildVector(I, MRI);
3537	case TargetOpcode::G_MERGE_VALUES:
3538	return selectMergeValues(I, MRI);
3539	case TargetOpcode::G_UNMERGE_VALUES:
3540	return selectUnmergeValues(I, MRI);
3541	case TargetOpcode::G_SHUFFLE_VECTOR:
3542	return selectShuffleVector(I, MRI);
3543	case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3544	return selectExtractElt(I, MRI);
3545	case TargetOpcode::G_CONCAT_VECTORS:
3546	return selectConcatVectors(I, MRI);
3547	case TargetOpcode::G_JUMP_TABLE:
3548	return selectJumpTable(I, MRI);
3549	case TargetOpcode::G_MEMCPY:
3550	case TargetOpcode::G_MEMCPY_INLINE:
3551	case TargetOpcode::G_MEMMOVE:
3552	case TargetOpcode::G_MEMSET:
3553	assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3554	return selectMOPS(I, MRI);
3555	}
3556
3557	return false;
3558	}
3559
3560	bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3561	MachineIRBuilderState OldMIBState = MIB.getState();
3562	bool Success = select(I);
3563	MIB.setState(OldMIBState);
3564	return Success;
3565	}
3566
3567	bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3568	MachineRegisterInfo &MRI) {
3569	unsigned Mopcode;
3570	switch (GI.getOpcode()) {
3571	case TargetOpcode::G_MEMCPY:
3572	case TargetOpcode::G_MEMCPY_INLINE:
3573	Mopcode = AArch64::MOPSMemoryCopyPseudo;
3574	break;
3575	case TargetOpcode::G_MEMMOVE:
3576	Mopcode = AArch64::MOPSMemoryMovePseudo;
3577	break;
3578	case TargetOpcode::G_MEMSET:
3579	// For tagged memset see llvm.aarch64.mops.memset.tag
3580	Mopcode = AArch64::MOPSMemorySetPseudo;
3581	break;
3582	}
3583
3584	auto &DstPtr = GI.getOperand(i: `0`);
3585	auto &SrcOrVal = GI.getOperand(i: `1`);
3586	auto &Size = GI.getOperand(i: `2`);
3587
3588	// Create copies of the registers that can be clobbered.
3589	const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg());
3590	const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg());
3591	const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg());
3592
3593	const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3594	const auto &SrcValRegClass =
3595	IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3596
3597	// Constrain to specific registers
3598	RBI.constrainGenericRegister(Reg: DstPtrCopy, RC: AArch64::GPR64commonRegClass, MRI);
3599	RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI);
3600	RBI.constrainGenericRegister(Reg: SizeCopy, RC: AArch64::GPR64RegClass, MRI);
3601
3602	MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr);
3603	MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal);
3604	MIB.buildCopy(Res: SizeCopy, Op: Size);
3605
3606	// New instruction uses the copied registers because it must update them.
3607	// The defs are not used since they don't exist in G_MEM. They are still*
3608	// tied.
3609	// Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3610	Register DefDstPtr = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
3611	Register DefSize = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3612	if (IsSet) {
3613	MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize},
3614	SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy});
3615	} else {
3616	Register DefSrcPtr = MRI.createVirtualRegister(RegClass: &SrcValRegClass);
3617	MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize},
3618	SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy});
3619	}
3620
3621	GI.eraseFromParent();
3622	return true;
3623	}
3624
3625	bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3626	MachineRegisterInfo &MRI) {
3627	assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3628	Register JTAddr = I.getOperand(i: `0`).getReg();
3629	unsigned JTI = I.getOperand(i: `1`).getIndex();
3630	Register Index = I.getOperand(i: `2`).getReg();
3631
3632	MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: `4`, PCRelSym: nullptr);
3633
3634	// With aarch64-jump-table-hardening, we only expand the jump table dispatch
3635	// sequence later, to guarantee the integrity of the intermediate values.
3636	if (MF->getFunction().hasFnAttribute(Kind: "aarch64-jump-table-hardening")) {
3637	CodeModel::Model CM = TM.getCodeModel();
3638	if (STI.isTargetMachO()) {
3639	if (CM != CodeModel::Small && CM != CodeModel::Large)
3640	report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3641	} else {
3642	// Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3643	assert(STI.isTargetELF() &&
3644	"jump table hardening only supported on MachO/ELF");
3645	if (CM != CodeModel::Small)
3646	report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3647	}
3648
3649	MIB.buildCopy(Res: {AArch64::X16}, Op: I.getOperand(i: `2`).getReg());
3650	MIB.buildInstr(Opcode: AArch64::BR_JumpTable)
3651	.addJumpTableIndex(Idx: I.getOperand(i: `1`).getIndex());
3652	I.eraseFromParent();
3653	return true;
3654	}
3655
3656	Register TargetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3657	Register ScratchReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
3658
3659	auto JumpTableInst = MIB.buildInstr(Opc: AArch64::JumpTableDest32,
3660	DstOps: {TargetReg, ScratchReg}, SrcOps: {JTAddr, Index})
3661	.addJumpTableIndex(Idx: JTI);
3662	// Save the jump table info.
3663	MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {},
3664	SrcOps: {static_cast<int64_t>(JTI)});
3665	// Build the indirect branch.
3666	MIB.buildInstr(Opc: AArch64::BR, DstOps: {}, SrcOps: {TargetReg});
3667	I.eraseFromParent();
3668	return constrainSelectedInstRegOperands(I&: *JumpTableInst, TII, TRI, RBI);
3669	}
3670
3671	bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3672	MachineRegisterInfo &MRI) {
3673	assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3674	assert(I.getOperand(`1`).isJTI() && "Jump table op should have a JTI!");
3675
3676	Register DstReg = I.getOperand(i: `0`).getReg();
3677	unsigned JTI = I.getOperand(i: `1`).getIndex();
3678	// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3679	auto MovMI =
3680	MIB.buildInstr(Opc: AArch64::MOVaddrJT, DstOps: {DstReg}, SrcOps: {})
3681	.addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_PAGE)
3682	.addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_NC \| AArch64II::MO_PAGEOFF);
3683	I.eraseFromParent();
3684	return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3685	}
3686
3687	bool AArch64InstructionSelector::selectTLSGlobalValue(
3688	MachineInstr &I, MachineRegisterInfo &MRI) {
3689	if (!STI.isTargetMachO())
3690	return false;
3691	MachineFunction &MF = *I.getParent()->getParent();
3692	MF.getFrameInfo().setAdjustsStack(true);
3693
3694	const auto &GlobalOp = I.getOperand(i: `1`);
3695	assert(GlobalOp.getOffset() == `0` &&
3696	"Shouldn't have an offset on TLS globals!");
3697	const GlobalValue &GV = *GlobalOp.getGlobal();
3698
3699	auto LoadGOT =
3700	MIB.buildInstr(Opc: AArch64::LOADgot, DstOps: {&AArch64::GPR64commonRegClass}, SrcOps: {})
3701	.addGlobalAddress(GV: &GV, Offset: `0`, TargetFlags: AArch64II::MO_TLS);
3702
3703	auto Load = MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {&AArch64::GPR64commonRegClass},
3704	SrcOps: {LoadGOT.getReg(Idx: `0`)})
3705	.addImm(Val: `0`);
3706
3707	MIB.buildCopy(Res: Register (AArch64::X0), Op: LoadGOT.getReg(Idx: `0`));
3708	// TLS calls preserve all registers except those that absolutely must be
3709	// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3710	// silly).
3711	unsigned Opcode = getBLRCallOpcode(MF);
3712
3713	// With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3714	if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-calls")) {
3715	assert(Opcode == AArch64::BLR);
3716	Opcode = AArch64::BLRAAZ;
3717	}
3718
3719	MIB.buildInstr(Opc: Opcode, DstOps: {}, SrcOps: {Load})
3720	.addUse(RegNo: AArch64::X0, Flags: RegState::Implicit)
3721	.addDef(RegNo: AArch64::X0, Flags: RegState::Implicit)
3722	.addRegMask(Mask: TRI.getTLSCallPreservedMask());
3723
3724	MIB.buildCopy(Res: I.getOperand(i: `0`).getReg(), Op: Register (AArch64::X0));
3725	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(), RC: AArch64::GPR64RegClass,
3726	MRI);
3727	I.eraseFromParent();
3728	return true;
3729	}
3730
3731	MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3732	unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3733	MachineIRBuilder &MIRBuilder) const {
3734	auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {});
3735
3736	auto BuildFn = [&](unsigned SubregIndex) {
3737	auto Ins =
3738	MIRBuilder
3739	.buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar})
3740	.addImm(Val: SubregIndex);
3741	constrainSelectedInstRegOperands(I&: *Undef, TII, TRI, RBI);
3742	constrainSelectedInstRegOperands(I&: *Ins, TII, TRI, RBI);
3743	return &*Ins;
3744	};
3745
3746	switch (EltSize) {
3747	case `8`:
3748	return BuildFn (AArch64::bsub);
3749	case `16`:
3750	return BuildFn (AArch64::hsub);
3751	case `32`:
3752	return BuildFn (AArch64::ssub);
3753	case `64`:
3754	return BuildFn (AArch64::dsub);
3755	default:
3756	return nullptr;
3757	}
3758	}
3759
3760	MachineInstr *
3761	AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3762	MachineIRBuilder &MIB,
3763	MachineRegisterInfo &MRI) const {
3764	LLT DstTy = MRI.getType(Reg: DstReg);
3765	const TargetRegisterClass *RC =
3766	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
3767	if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3768	LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3769	return nullptr;
3770	}
3771	unsigned SubReg = `0`;
3772	if (!getSubRegForClass(RC, TRI, SubReg))
3773	return nullptr;
3774	if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3775	LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3776	<< DstTy.getSizeInBits() << "\n");
3777	return nullptr;
3778	}
3779	auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
3780	.addReg(RegNo: SrcReg, flags: `0`, SubReg);
3781	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
3782	return Copy;
3783	}
3784
3785	bool AArch64InstructionSelector::selectMergeValues(
3786	MachineInstr &I, MachineRegisterInfo &MRI) {
3787	assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3788	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3789	const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3790	assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3791	const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI);
3792
3793	if (I.getNumOperands() != `3`)
3794	return false;
3795
3796	// Merging 2 s64s into an s128.
3797	if (DstTy == LLT::scalar(SizeInBits: `128`)) {
3798	if (SrcTy.getSizeInBits() != `64`)
3799	return false;
3800	Register DstReg = I.getOperand(i: `0`).getReg();
3801	Register Src1Reg = I.getOperand(i: `1`).getReg();
3802	Register Src2Reg = I.getOperand(i: `2`).getReg();
3803	auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {});
3804	MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: `0`), EltReg: Src1Reg,
3805	/ LaneIdx / `0`, RB, MIRBuilder&: MIB);
3806	if (!InsMI)
3807	return false;
3808	MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: `0`).getReg(),
3809	EltReg: Src2Reg, / LaneIdx / `1`, RB, MIRBuilder&: MIB);
3810	if (!Ins2MI)
3811	return false;
3812	constrainSelectedInstRegOperands(I&: *InsMI, TII, TRI, RBI);
3813	constrainSelectedInstRegOperands(I&: *Ins2MI, TII, TRI, RBI);
3814	I.eraseFromParent();
3815	return true;
3816	}
3817
3818	if (RB.getID() != AArch64::GPRRegBankID)
3819	return false;
3820
3821	if (DstTy.getSizeInBits() != `64` \|\| SrcTy.getSizeInBits() != `32`)
3822	return false;
3823
3824	auto *DstRC = &AArch64::GPR64RegClass;
3825	Register SubToRegDef = MRI.createVirtualRegister(RegClass: DstRC);
3826	MachineInstr &SubRegMI = BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(),
3827	MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3828	.addDef(RegNo: SubToRegDef)
3829	.addImm(Val: `0`)
3830	.addUse(RegNo: I.getOperand(i: `1`).getReg())
3831	.addImm(Val: AArch64::sub_32);
3832	Register SubToRegDef2 = MRI.createVirtualRegister(RegClass: DstRC);
3833	// Need to anyext the second scalar before we can use bfm
3834	MachineInstr &SubRegMI2 = BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(),
3835	MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3836	.addDef(RegNo: SubToRegDef2)
3837	.addImm(Val: `0`)
3838	.addUse(RegNo: I.getOperand(i: `2`).getReg())
3839	.addImm(Val: AArch64::sub_32);
3840	MachineInstr &BFM =
3841	BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::BFMXri))
3842	.addDef(RegNo: I.getOperand(i: `0`).getReg())
3843	.addUse(RegNo: SubToRegDef)
3844	.addUse(RegNo: SubToRegDef2)
3845	.addImm(Val: `32`)
3846	.addImm(Val: `31`);
3847	constrainSelectedInstRegOperands(I&: SubRegMI, TII, TRI, RBI);
3848	constrainSelectedInstRegOperands(I&: SubRegMI2, TII, TRI, RBI);
3849	constrainSelectedInstRegOperands(I&: BFM, TII, TRI, RBI);
3850	I.eraseFromParent();
3851	return true;
3852	}
3853
3854	static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3855	const unsigned EltSize) {
3856	// Choose a lane copy opcode and subregister based off of the size of the
3857	// vector's elements.
3858	switch (EltSize) {
3859	case `8`:
3860	CopyOpc = AArch64::DUPi8;
3861	ExtractSubReg = AArch64::bsub;
3862	break;
3863	case `16`:
3864	CopyOpc = AArch64::DUPi16;
3865	ExtractSubReg = AArch64::hsub;
3866	break;
3867	case `32`:
3868	CopyOpc = AArch64::DUPi32;
3869	ExtractSubReg = AArch64::ssub;
3870	break;
3871	case `64`:
3872	CopyOpc = AArch64::DUPi64;
3873	ExtractSubReg = AArch64::dsub;
3874	break;
3875	default:
3876	// Unknown size, bail out.
3877	LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3878	return false;
3879	}
3880	return true;
3881	}
3882
3883	MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3884	std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3885	Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3886	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3887	unsigned CopyOpc = `0`;
3888	unsigned ExtractSubReg = `0`;
3889	if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) {
3890	LLVM_DEBUG(
3891	dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3892	return nullptr;
3893	}
3894
3895	const TargetRegisterClass *DstRC =
3896	getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true);
3897	if (!DstRC) {
3898	LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3899	return nullptr;
3900	}
3901
3902	const RegisterBank &VecRB = *RBI.getRegBank(Reg: VecReg, MRI, TRI);
3903	const LLT &VecTy = MRI.getType(Reg: VecReg);
3904	const TargetRegisterClass *VecRC =
3905	getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true);
3906	if (!VecRC) {
3907	LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3908	return nullptr;
3909	}
3910
3911	// The register that we're going to copy into.
3912	Register InsertReg = VecReg;
3913	if (!DstReg)
3914	DstReg = MRI.createVirtualRegister(RegClass: DstRC);
3915	// If the lane index is 0, we just use a subregister COPY.
3916	if (LaneIdx == `0`) {
3917	auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {})
3918	.addReg(RegNo: VecReg, flags: `0`, SubReg: ExtractSubReg);
3919	RBI.constrainGenericRegister(Reg: DstReg, RC: DstRC, MRI);
3920	return &*Copy;
3921	}
3922
3923	// Lane copies require 128-bit wide registers. If we're dealing with an
3924	// unpacked vector, then we need to move up to that width. Insert an implicit
3925	// def and a subregister insert to get us there.
3926	if (VecTy.getSizeInBits() != `128`) {
3927	MachineInstr *ScalarToVector = emitScalarToVector(
3928	EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: VecReg, MIRBuilder);
3929	if (!ScalarToVector)
3930	return nullptr;
3931	InsertReg = ScalarToVector->getOperand(i: `0`).getReg();
3932	}
3933
3934	MachineInstr *LaneCopyMI =
3935	MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx);
3936	constrainSelectedInstRegOperands(I&: *LaneCopyMI, TII, TRI, RBI);
3937
3938	// Make sure that we actually constrain the initial copy.
3939	RBI.constrainGenericRegister(Reg: DstReg, RC: DstRC, MRI);
3940	return LaneCopyMI;
3941	}
3942
3943	bool AArch64InstructionSelector::selectExtractElt(
3944	MachineInstr &I, MachineRegisterInfo &MRI) {
3945	assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3946	"unexpected opcode!");
3947	Register DstReg = I.getOperand(i: `0`).getReg();
3948	const LLT NarrowTy = MRI.getType(Reg: DstReg);
3949	const Register SrcReg = I.getOperand(i: `1`).getReg();
3950	const LLT WideTy = MRI.getType(Reg: SrcReg);
3951	(void)WideTy;
3952	assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3953	"source register size too small!");
3954	assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3955
3956	// Need the lane index to determine the correct copy opcode.
3957	MachineOperand &LaneIdxOp = I.getOperand(i: `2`);
3958	assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3959
3960	if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3961	LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3962	return false;
3963	}
3964
3965	// Find the index to extract from.
3966	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI);
3967	if (!VRegAndVal)
3968	return false;
3969	unsigned LaneIdx = VRegAndVal ->Value.getSExtValue();
3970
3971
3972	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3973	MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg,
3974	LaneIdx, MIRBuilder&: MIB);
3975	if (!Extract)
3976	return false;
3977
3978	I.eraseFromParent();
3979	return true;
3980	}
3981
3982	bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3983	MachineInstr &I, MachineRegisterInfo &MRI) {
3984	unsigned NumElts = I.getNumOperands() - `1`;
3985	Register SrcReg = I.getOperand(i: NumElts).getReg();
3986	const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3987	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3988
3989	assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3990	if (SrcTy.getSizeInBits() > `128`) {
3991	LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3992	return false;
3993	}
3994
3995	// We implement a split vector operation by treating the sub-vectors as
3996	// scalars and extracting them.
3997	const RegisterBank &DstRB =
3998	*RBI.getRegBank(Reg: I.getOperand(i: `0`).getReg(), MRI, TRI);
3999	for (unsigned OpIdx = `0`; OpIdx < NumElts; ++OpIdx) {
4000	Register Dst = I.getOperand(i: OpIdx).getReg();
4001	MachineInstr *Extract =
4002	emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB);
4003	if (!Extract)
4004	return false;
4005	}
4006	I.eraseFromParent();
4007	return true;
4008	}
4009
4010	bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4011	MachineRegisterInfo &MRI) {
4012	assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4013	"unexpected opcode");
4014
4015	// TODO: Handle unmerging into GPRs and from scalars to scalars.
4016	if (RBI.getRegBank(Reg: I.getOperand(i: `0`).getReg(), MRI, TRI)->getID() !=
4017	AArch64::FPRRegBankID \|\|
4018	RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI)->getID() !=
4019	AArch64::FPRRegBankID) {
4020	LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4021	"currently unsupported.\n");
4022	return false;
4023	}
4024
4025	// The last operand is the vector source register, and every other operand is
4026	// a register to unpack into.
4027	unsigned NumElts = I.getNumOperands() - `1`;
4028	Register SrcReg = I.getOperand(i: NumElts).getReg();
4029	const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
4030	const LLT WideTy = MRI.getType(Reg: SrcReg);
4031	(void)WideTy;
4032	assert((WideTy.isVector() \|\| WideTy.getSizeInBits() == `128`) &&
4033	"can only unmerge from vector or s128 types!");
4034	assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4035	"source register size too small!");
4036
4037	if (!NarrowTy.isScalar())
4038	return selectSplitVectorUnmerge(I, MRI);
4039
4040	// Choose a lane copy opcode and subregister based off of the size of the
4041	// vector's elements.
4042	unsigned CopyOpc = `0`;
4043	unsigned ExtractSubReg = `0`;
4044	if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits()))
4045	return false;
4046
4047	// Set up for the lane copies.
4048	MachineBasicBlock &MBB = *I.getParent();
4049
4050	// Stores the registers we'll be copying from.
4051	SmallVector<Register, `4`> InsertRegs;
4052
4053	// We'll use the first register twice, so we only need NumElts-1 registers.
4054	unsigned NumInsertRegs = NumElts - `1`;
4055
4056	// If our elements fit into exactly 128 bits, then we can copy from the source
4057	// directly. Otherwise, we need to do a bit of setup with some subregister
4058	// inserts.
4059	if (NarrowTy.getSizeInBits() * NumElts == `128`) {
4060	InsertRegs = SmallVector<Register, `4`>(NumInsertRegs, SrcReg);
4061	} else {
4062	// No. We have to perform subregister inserts. For each insert, create an
4063	// implicit def and a subregister insert, and save the register we create.
4064	const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4065	Ty: LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: WideTy.getScalarSizeInBits()),
4066	RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
4067	unsigned SubReg = `0`;
4068	bool Found = getSubRegForClass(RC, TRI, SubReg);
4069	(void)Found;
4070	assert(Found && "expected to find last operand's subeg idx");
4071	for (unsigned Idx = `0`; Idx < NumInsertRegs; ++Idx) {
4072	Register ImpDefReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4073	MachineInstr &ImpDefMI =
4074	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF),
4075	DestReg: ImpDefReg);
4076
4077	// Now, create the subregister insert from SrcReg.
4078	Register InsertReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4079	MachineInstr &InsMI =
4080	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(),
4081	MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: InsertReg)
4082	.addUse(RegNo: ImpDefReg)
4083	.addUse(RegNo: SrcReg)
4084	.addImm(Val: SubReg);
4085
4086	constrainSelectedInstRegOperands(I&: ImpDefMI, TII, TRI, RBI);
4087	constrainSelectedInstRegOperands(I&: InsMI, TII, TRI, RBI);
4088
4089	// Save the register so that we can copy from it after.
4090	InsertRegs.push_back(Elt: InsertReg);
4091	}
4092	}
4093
4094	// Now that we've created any necessary subregister inserts, we can
4095	// create the copies.
4096	//
4097	// Perform the first copy separately as a subregister copy.
4098	Register CopyTo = I.getOperand(i: `0`).getReg();
4099	auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {})
4100	.addReg(RegNo: InsertRegs [`0`], flags: `0`, SubReg: ExtractSubReg);
4101	constrainSelectedInstRegOperands(I&: *FirstCopy, TII, TRI, RBI);
4102
4103	// Now, perform the remaining copies as vector lane copies.
4104	unsigned LaneIdx = `1`;
4105	for (Register InsReg : InsertRegs) {
4106	Register CopyTo = I.getOperand(i: LaneIdx).getReg();
4107	MachineInstr &CopyInst =
4108	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: CopyOpc), DestReg: CopyTo)
4109	.addUse(RegNo: InsReg)
4110	.addImm(Val: LaneIdx);
4111	constrainSelectedInstRegOperands(I&: CopyInst, TII, TRI, RBI);
4112	++LaneIdx;
4113	}
4114
4115	// Separately constrain the first copy's destination. Because of the
4116	// limitation in constrainOperandRegClass, we can't guarantee that this will
4117	// actually be constrained. So, do it ourselves using the second operand.
4118	const TargetRegisterClass *RC =
4119	MRI.getRegClassOrNull(Reg: I.getOperand(i: `1`).getReg());
4120	if (!RC) {
4121	LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4122	return false;
4123	}
4124
4125	RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI);
4126	I.eraseFromParent();
4127	return true;
4128	}
4129
4130	bool AArch64InstructionSelector::selectConcatVectors(
4131	MachineInstr &I, MachineRegisterInfo &MRI) {
4132	assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4133	"Unexpected opcode");
4134	Register Dst = I.getOperand(i: `0`).getReg();
4135	Register Op1 = I.getOperand(i: `1`).getReg();
4136	Register Op2 = I.getOperand(i: `2`).getReg();
4137	MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB);
4138	if (!ConcatMI)
4139	return false;
4140	I.eraseFromParent();
4141	return true;
4142	}
4143
4144	unsigned
4145	AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4146	MachineFunction &MF) const {
4147	Type *CPTy = CPVal->getType();
4148	Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy);
4149
4150	MachineConstantPool *MCP = MF.getConstantPool();
4151	return MCP->getConstantPoolIndex(C: CPVal, Alignment);
4152	}
4153
4154	MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4155	const Constant CPVal, MachineIRBuilder &MIRBuilder) const* {
4156	const TargetRegisterClass *RC;
4157	unsigned Opc;
4158	bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4159	unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType());
4160	switch (Size) {
4161	case `16`:
4162	RC = &AArch64::FPR128RegClass;
4163	Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4164	break;
4165	case `8`:
4166	RC = &AArch64::FPR64RegClass;
4167	Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4168	break;
4169	case `4`:
4170	RC = &AArch64::FPR32RegClass;
4171	Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4172	break;
4173	case `2`:
4174	RC = &AArch64::FPR16RegClass;
4175	Opc = AArch64::LDRHui;
4176	break;
4177	default:
4178	LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4179	<< *CPVal->getType());
4180	return nullptr;
4181	}
4182
4183	MachineInstr LoadMI = nullptr*;
4184	auto &MF = MIRBuilder.getMF();
4185	unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4186	if (IsTiny && (Size == `16` \|\| Size == `8` \|\| Size == `4`)) {
4187	// Use load(literal) for tiny code model.
4188	LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx);
4189	} else {
4190	auto Adrp =
4191	MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
4192	.addConstantPoolIndex(Idx: CPIdx, Offset: `0`, TargetFlags: AArch64II::MO_PAGE);
4193
4194	LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {Adrp})
4195	.addConstantPoolIndex(
4196	Idx: CPIdx, Offset: `0`, TargetFlags: AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
4197
4198	constrainSelectedInstRegOperands(I&: *Adrp, TII, TRI, RBI);
4199	}
4200
4201	MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4202	LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo,
4203	F: MachineMemOperand::MOLoad,
4204	Size, BaseAlignment: Align (Size)));
4205	constrainSelectedInstRegOperands(I&: *LoadMI, TII, TRI, RBI);
4206	return LoadMI;
4207	}
4208
4209	/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4210	/// size and RB.
4211	static std::pair<unsigned, unsigned>
4212	getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4213	unsigned Opc, SubregIdx;
4214	if (RB.getID() == AArch64::GPRRegBankID) {
4215	if (EltSize == `8`) {
4216	Opc = AArch64::INSvi8gpr;
4217	SubregIdx = AArch64::bsub;
4218	} else if (EltSize == `16`) {
4219	Opc = AArch64::INSvi16gpr;
4220	SubregIdx = AArch64::ssub;
4221	} else if (EltSize == `32`) {
4222	Opc = AArch64::INSvi32gpr;
4223	SubregIdx = AArch64::ssub;
4224	} else if (EltSize == `64`) {
4225	Opc = AArch64::INSvi64gpr;
4226	SubregIdx = AArch64::dsub;
4227	} else {
4228	llvm_unreachable("invalid elt size!");
4229	}
4230	} else {
4231	if (EltSize == `8`) {
4232	Opc = AArch64::INSvi8lane;
4233	SubregIdx = AArch64::bsub;
4234	} else if (EltSize == `16`) {
4235	Opc = AArch64::INSvi16lane;
4236	SubregIdx = AArch64::hsub;
4237	} else if (EltSize == `32`) {
4238	Opc = AArch64::INSvi32lane;
4239	SubregIdx = AArch64::ssub;
4240	} else if (EltSize == `64`) {
4241	Opc = AArch64::INSvi64lane;
4242	SubregIdx = AArch64::dsub;
4243	} else {
4244	llvm_unreachable("invalid elt size!");
4245	}
4246	}
4247	return std::make_pair(x&: Opc, y&: SubregIdx);
4248	}
4249
4250	MachineInstr *AArch64InstructionSelector::emitInstr(
4251	unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4252	std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4253	const ComplexRendererFns &RenderFns) const {
4254	assert(Opcode && "Expected an opcode?");
4255	assert(!isPreISelGenericOpcode(Opcode) &&
4256	"Function should only be used to produce selected instructions!");
4257	auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps);
4258	if (RenderFns)
4259	for (auto &Fn : *RenderFns)
4260	Fn (MI);
4261	constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
4262	return &*MI;
4263	}
4264
4265	MachineInstr *AArch64InstructionSelector::emitAddSub(
4266	const std::array<std::array<unsigned, `2`>, `5`> &AddrModeAndSizeToOpcode,
4267	Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4268	MachineIRBuilder &MIRBuilder) const {
4269	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4270	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4271	auto Ty = MRI.getType(Reg: LHS.getReg());
4272	assert(!Ty.isVector() && "Expected a scalar or pointer?");
4273	unsigned Size = Ty.getSizeInBits();
4274	assert((Size == `32` \|\| Size == `64`) && "Expected a 32-bit or 64-bit type only");
4275	bool Is32Bit = Size == `32`;
4276
4277	// INSTRri form with positive arithmetic immediate.
4278	if (auto Fns = selectArithImmed(Root&: RHS))
4279	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`0`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4280	MIRBuilder, RenderFns: Fns);
4281
4282	// INSTRri form with negative arithmetic immediate.
4283	if (auto Fns = selectNegArithImmed(Root&: RHS))
4284	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`3`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4285	MIRBuilder, RenderFns: Fns);
4286
4287	// INSTRrx form.
4288	if (auto Fns = selectArithExtendedRegister(Root&: RHS))
4289	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`4`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4290	MIRBuilder, RenderFns: Fns);
4291
4292	// INSTRrs form.
4293	if (auto Fns = selectShiftedRegister(Root&: RHS))
4294	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`1`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4295	MIRBuilder, RenderFns: Fns);
4296	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`2`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS},
4297	MIRBuilder);
4298	}
4299
4300	MachineInstr *
4301	AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4302	MachineOperand &RHS,
4303	MachineIRBuilder &MIRBuilder) const {
4304	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4305	._M_elems: {{AArch64::ADDXri, AArch64::ADDWri},
4306	{AArch64::ADDXrs, AArch64::ADDWrs},
4307	{AArch64::ADDXrr, AArch64::ADDWrr},
4308	{AArch64::SUBXri, AArch64::SUBWri},
4309	{AArch64::ADDXrx, AArch64::ADDWrx}}};
4310	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder);
4311	}
4312
4313	MachineInstr *
4314	AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4315	MachineOperand &RHS,
4316	MachineIRBuilder &MIRBuilder) const {
4317	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4318	._M_elems: {{AArch64::ADDSXri, AArch64::ADDSWri},
4319	{AArch64::ADDSXrs, AArch64::ADDSWrs},
4320	{AArch64::ADDSXrr, AArch64::ADDSWrr},
4321	{AArch64::SUBSXri, AArch64::SUBSWri},
4322	{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4323	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4324	}
4325
4326	MachineInstr *
4327	AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4328	MachineOperand &RHS,
4329	MachineIRBuilder &MIRBuilder) const {
4330	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4331	._M_elems: {{AArch64::SUBSXri, AArch64::SUBSWri},
4332	{AArch64::SUBSXrs, AArch64::SUBSWrs},
4333	{AArch64::SUBSXrr, AArch64::SUBSWrr},
4334	{AArch64::ADDSXri, AArch64::ADDSWri},
4335	{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4336	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4337	}
4338
4339	MachineInstr *
4340	AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4341	MachineOperand &RHS,
4342	MachineIRBuilder &MIRBuilder) const {
4343	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4344	MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4345	bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4346	static const unsigned OpcTable[`2`] = {AArch64::ADCSXr, AArch64::ADCSWr};
4347	return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4348	}
4349
4350	MachineInstr *
4351	AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4352	MachineOperand &RHS,
4353	MachineIRBuilder &MIRBuilder) const {
4354	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4355	MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4356	bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4357	static const unsigned OpcTable[`2`] = {AArch64::SBCSXr, AArch64::SBCSWr};
4358	return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4359	}
4360
4361	MachineInstr *
4362	AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4363	MachineIRBuilder &MIRBuilder) const {
4364	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4365	bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4366	auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4367	return emitADDS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder);
4368	}
4369
4370	MachineInstr *
4371	AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4372	MachineIRBuilder &MIRBuilder) const {
4373	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4374	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4375	LLT Ty = MRI.getType(Reg: LHS.getReg());
4376	unsigned RegSize = Ty.getSizeInBits();
4377	bool Is32Bit = (RegSize == `32`);
4378	const unsigned OpcTable[`3`][`2`] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4379	{AArch64::ANDSXrs, AArch64::ANDSWrs},
4380	{AArch64::ANDSXrr, AArch64::ANDSWrr}};
4381	// ANDS needs a logical immediate for its immediate form. Check if we can
4382	// fold one in.
4383	if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) {
4384	int64_t Imm = ValAndVReg ->Value.getSExtValue();
4385
4386	if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) {
4387	auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[`0`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS});
4388	TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
4389	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
4390	return &*TstMI;
4391	}
4392	}
4393
4394	if (auto Fns = selectLogicalShiftedRegister(Root&: RHS))
4395	return emitInstr(Opcode: OpcTable[`1`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns);
4396	return emitInstr(Opcode: OpcTable[`2`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder);
4397	}
4398
4399	MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4400	MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4401	MachineIRBuilder &MIRBuilder) const {
4402	assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4403	assert(Predicate.isPredicate() && "Expected predicate?");
4404	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4405	LLT CmpTy = MRI.getType(Reg: LHS.getReg());
4406	assert(!CmpTy.isVector() && "Expected scalar or pointer");
4407	unsigned Size = CmpTy.getSizeInBits();
4408	(void)Size;
4409	assert((Size == `32` \|\| Size == `64`) && "Expected a 32-bit or 64-bit LHS/RHS?");
4410	// Fold the compare into a cmn or tst if possible.
4411	if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4412	return FoldCmp;
4413	auto Dst = MRI.cloneVirtualRegister(VReg: LHS.getReg());
4414	return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4415	}
4416
4417	MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4418	Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4419	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4420	#ifndef NDEBUG
4421	LLT Ty = MRI.getType(Dst);
4422	assert(!Ty.isVector() && Ty.getSizeInBits() == `32` &&
4423	"Expected a 32-bit scalar register?");
4424	#endif
4425	const Register ZReg = AArch64::WZR;
4426	AArch64CC::CondCode CC1, CC2;
4427	changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2);
4428	auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1);
4429	if (CC2 == AArch64CC::AL)
4430	return emitCSINC(/Dst=/Dst, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC1,
4431	MIRBuilder);
4432	const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4433	Register Def1Reg = MRI.createVirtualRegister(RegClass: RC);
4434	Register Def2Reg = MRI.createVirtualRegister(RegClass: RC);
4435	auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2);
4436	emitCSINC(/Dst=/Def1Reg, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC1, MIRBuilder);
4437	emitCSINC(/Dst=/Def2Reg, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC2, MIRBuilder);
4438	auto OrMI = MIRBuilder.buildInstr(Opc: AArch64::ORRWrr, DstOps: {Dst}, SrcOps: {Def1Reg, Def2Reg});
4439	constrainSelectedInstRegOperands(I&: *OrMI, TII, TRI, RBI);
4440	return &*OrMI;
4441	}
4442
4443	MachineInstr *AArch64InstructionSelector::emitFPCompare(
4444	Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4445	std::optional<CmpInst::Predicate> Pred) const {
4446	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4447	LLT Ty = MRI.getType(Reg: LHS);
4448	if (Ty.isVector())
4449	return nullptr;
4450	unsigned OpSize = Ty.getSizeInBits();
4451	assert(OpSize == `16` \|\| OpSize == `32` \|\| OpSize == `64`);
4452
4453	// If this is a compare against +0.0, then we don't have
4454	// to explicitly materialize a constant.
4455	const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI);
4456	bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4457
4458	auto IsEqualityPred = [](CmpInst::Predicate P) {
4459	return P == CmpInst::FCMP_OEQ \|\| P == CmpInst::FCMP_ONE \|\|
4460	P == CmpInst::FCMP_UEQ \|\| P == CmpInst::FCMP_UNE;
4461	};
4462	if (!ShouldUseImm && Pred && IsEqualityPred (*Pred)) {
4463	// Try commutating the operands.
4464	const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI);
4465	if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4466	ShouldUseImm = true;
4467	std::swap(a&: LHS, b&: RHS);
4468	}
4469	}
4470	unsigned CmpOpcTbl[`2`][`3`] = {
4471	{AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4472	{AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4473	unsigned CmpOpc =
4474	CmpOpcTbl[ShouldUseImm][OpSize == `16` ? `0` : (OpSize == `32` ? `1` : `2`)];
4475
4476	// Partially build the compare. Decide if we need to add a use for the
4477	// third operand based off whether or not we're comparing against 0.0.
4478	auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS);
4479	CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4480	if (!ShouldUseImm)
4481	CmpMI.addUse(RegNo: RHS);
4482	constrainSelectedInstRegOperands(I&: *CmpMI, TII, TRI, RBI);
4483	return &*CmpMI;
4484	}
4485
4486	MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4487	std::optional<Register> Dst, Register Op1, Register Op2,
4488	MachineIRBuilder &MIRBuilder) const {
4489	// We implement a vector concat by:
4490	// 1. Use scalar_to_vector to insert the lower vector into the larger dest
4491	// 2. Insert the upper vector into the destination's upper element
4492	// TODO: some of this code is common with G_BUILD_VECTOR handling.
4493	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4494
4495	const LLT Op1Ty = MRI.getType(Reg: Op1);
4496	const LLT Op2Ty = MRI.getType(Reg: Op2);
4497
4498	if (Op1Ty != Op2Ty) {
4499	LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4500	return nullptr;
4501	}
4502	assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4503
4504	if (Op1Ty.getSizeInBits() >= `128`) {
4505	LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4506	return nullptr;
4507	}
4508
4509	// At the moment we just support 64 bit vector concats.
4510	if (Op1Ty.getSizeInBits() != `64`) {
4511	LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4512	return nullptr;
4513	}
4514
4515	const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits());
4516	const RegisterBank &FPRBank = *RBI.getRegBank(Reg: Op1, MRI, TRI);
4517	const TargetRegisterClass *DstRC =
4518	getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: `2`), RB: FPRBank);
4519
4520	MachineInstr *WidenedOp1 =
4521	emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder);
4522	MachineInstr *WidenedOp2 =
4523	emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder);
4524	if (!WidenedOp1 \|\| !WidenedOp2) {
4525	LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4526	return nullptr;
4527	}
4528
4529	// Now do the insert of the upper element.
4530	unsigned InsertOpc, InsSubRegIdx;
4531	std::tie(args&: InsertOpc, args&: InsSubRegIdx) =
4532	getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits());
4533
4534	if (!Dst)
4535	Dst = MRI.createVirtualRegister(RegClass: DstRC);
4536	auto InsElt =
4537	MIRBuilder
4538	.buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: `0`).getReg()})
4539	.addImm(Val: `1`) / Lane index /
4540	.addUse(RegNo: WidenedOp2->getOperand(i: `0`).getReg())
4541	.addImm(Val: `0`);
4542	constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
4543	return &*InsElt;
4544	}
4545
4546	MachineInstr *
4547	AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4548	Register Src2, AArch64CC::CondCode Pred,
4549	MachineIRBuilder &MIRBuilder) const {
4550	auto &MRI = *MIRBuilder.getMRI();
4551	const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst);
4552	// If we used a register class, then this won't necessarily have an LLT.
4553	// Compute the size based off whether or not we have a class or bank.
4554	unsigned Size;
4555	if (const auto RC = RegClassOrBank.dyn_cast<const* TargetRegisterClass *>())
4556	Size = TRI.getRegSizeInBits(RC: *RC);
4557	else
4558	Size = MRI.getType(Reg: Dst).getSizeInBits();
4559	// Some opcodes use s1.
4560	assert(Size <= `64` && "Expected 64 bits or less only!");
4561	static const unsigned OpcTable[`2`] = {AArch64::CSINCWr, AArch64::CSINCXr};
4562	unsigned Opc = OpcTable[Size == `64`];
4563	auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred);
4564	constrainSelectedInstRegOperands(I&: *CSINC, TII, TRI, RBI);
4565	return &*CSINC;
4566	}
4567
4568	MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4569	Register CarryReg) {
4570	MachineRegisterInfo *MRI = MIB.getMRI();
4571	unsigned Opcode = I.getOpcode();
4572
4573	// If the instruction is a SUB, we need to negate the carry,
4574	// because borrowing is indicated by carry-flag == 0.
4575	bool NeedsNegatedCarry =
4576	(Opcode == TargetOpcode::G_USUBE \|\| Opcode == TargetOpcode::G_SSUBE);
4577
4578	// If the previous instruction will already produce the correct carry, do not
4579	// emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4580	// generated during legalization of wide add/sub. This optimization depends on
4581	// these sequences not being interrupted by other instructions.
4582	// We have to select the previous instruction before the carry-using
4583	// instruction is deleted by the calling function, otherwise the previous
4584	// instruction might become dead and would get deleted.
4585	MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg);
4586	if (SrcMI == I.getPrevNode()) {
4587	if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) {
4588	bool ProducesNegatedCarry = CarrySrcMI->isSub();
4589	if (NeedsNegatedCarry == ProducesNegatedCarry &&
4590	CarrySrcMI->isUnsigned() &&
4591	CarrySrcMI->getCarryOutReg() == CarryReg &&
4592	selectAndRestoreState(I&: *SrcMI))
4593	return nullptr;
4594	}
4595	}
4596
4597	Register DeadReg = MRI->createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
4598
4599	if (NeedsNegatedCarry) {
4600	// (0 - Carry) sets !C in NZCV when Carry == 1
4601	Register ZReg = AArch64::WZR;
4602	return emitInstr(Opcode: AArch64::SUBSWrr, DstOps: {DeadReg}, SrcOps: {ZReg, CarryReg}, MIRBuilder&: MIB);
4603	}
4604
4605	// (Carry - 1) sets !C in NZCV when Carry == 0
4606	auto Fns = select12BitValueWithLeftShift(Immed: `1`);
4607	return emitInstr(Opcode: AArch64::SUBSWri, DstOps: {DeadReg}, SrcOps: {CarryReg}, MIRBuilder&: MIB, RenderFns: Fns);
4608	}
4609
4610	bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4611	MachineRegisterInfo &MRI) {
4612	auto &CarryMI = cast<GAddSubCarryOut>(Val&: I);
4613
4614	if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) {
4615	// Set NZCV carry according to carry-in VReg
4616	emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg());
4617	}
4618
4619	// Emit the operation and get the correct condition code.
4620	auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(),
4621	LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB);
4622
4623	Register CarryOutReg = CarryMI.getCarryOutReg();
4624
4625	// Don't convert carry-out to VReg if it is never used
4626	if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) {
4627	// Now, put the overflow result in the register given by the first operand
4628	// to the overflow op. CSINC increments the result when the predicate is
4629	// false, so to get the increment when it's true, we need to use the
4630	// inverse. In this case, we want to increment when carry is set.
4631	Register ZReg = AArch64::WZR;
4632	emitCSINC(/Dst=/CarryOutReg, /Src1=/ZReg, /Src2=/ZReg,
4633	Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB);
4634	}
4635
4636	I.eraseFromParent();
4637	return true;
4638	}
4639
4640	std::pair<MachineInstr *, AArch64CC::CondCode>
4641	AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4642	MachineOperand &LHS,
4643	MachineOperand &RHS,
4644	MachineIRBuilder &MIRBuilder) const {
4645	switch (Opcode) {
4646	default:
4647	llvm_unreachable("Unexpected opcode!");
4648	case TargetOpcode::G_SADDO:
4649	return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4650	case TargetOpcode::G_UADDO:
4651	return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4652	case TargetOpcode::G_SSUBO:
4653	return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4654	case TargetOpcode::G_USUBO:
4655	return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4656	case TargetOpcode::G_SADDE:
4657	return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4658	case TargetOpcode::G_UADDE:
4659	return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4660	case TargetOpcode::G_SSUBE:
4661	return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4662	case TargetOpcode::G_USUBE:
4663	return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4664	}
4665	}
4666
4667	/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4668	/// expressed as a conjunction.
4669	/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4670	/// changing the conditions on the CMP tests.
4671	/// (this means we can call emitConjunctionRec() with
4672	/// Negate==true on this sub-tree)
4673	/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4674	/// cannot do the negation naturally. We are required to
4675	/// emit the subtree first in this case.
4676	/// \param WillNegate Is true if are called when the result of this
4677	/// subexpression must be negated. This happens when the
4678	/// outer expression is an OR. We can use this fact to know
4679	/// that we have a double negation (or (or ...) ...) that
4680	/// can be implemented for free.
4681	static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4682	bool WillNegate, MachineRegisterInfo &MRI,
4683	unsigned Depth = `0`) {
4684	if (!MRI.hasOneNonDBGUse(RegNo: Val))
4685	return false;
4686	MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4687	unsigned Opcode = ValDef->getOpcode();
4688	if (isa<GAnyCmp>(Val: ValDef)) {
4689	CanNegate = true;
4690	MustBeFirst = false;
4691	return true;
4692	}
4693	// Protect against exponential runtime and stack overflow.
4694	if (Depth > `6`)
4695	return false;
4696	if (Opcode == TargetOpcode::G_AND \|\| Opcode == TargetOpcode::G_OR) {
4697	bool IsOR = Opcode == TargetOpcode::G_OR;
4698	Register O0 = ValDef->getOperand(i: `1`).getReg();
4699	Register O1 = ValDef->getOperand(i: `2`).getReg();
4700	bool CanNegateL;
4701	bool MustBeFirstL;
4702	if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + `1`))
4703	return false;
4704	bool CanNegateR;
4705	bool MustBeFirstR;
4706	if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + `1`))
4707	return false;
4708
4709	if (MustBeFirstL && MustBeFirstR)
4710	return false;
4711
4712	if (IsOR) {
4713	// For an OR expression we need to be able to naturally negate at least
4714	// one side or we cannot do the transformation at all.
4715	if (!CanNegateL && !CanNegateR)
4716	return false;
4717	// If we the result of the OR will be negated and we can naturally negate
4718	// the leaves, then this sub-tree as a whole negates naturally.
4719	CanNegate = WillNegate && CanNegateL && CanNegateR;
4720	// If we cannot naturally negate the whole sub-tree, then this must be
4721	// emitted first.
4722	MustBeFirst = !CanNegate;
4723	} else {
4724	assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4725	// We cannot naturally negate an AND operation.
4726	CanNegate = false;
4727	MustBeFirst = MustBeFirstL \|\| MustBeFirstR;
4728	}
4729	return true;
4730	}
4731	return false;
4732	}
4733
4734	MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4735	Register LHS, Register RHS, CmpInst::Predicate CC,
4736	AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4737	MachineIRBuilder &MIB) const {
4738	auto &MRI = *MIB.getMRI();
4739	LLT OpTy = MRI.getType(Reg: LHS);
4740	unsigned CCmpOpc;
4741	std::optional<ValueAndVReg> C;
4742	if (CmpInst::isIntPredicate(P: CC)) {
4743	assert(OpTy.getSizeInBits() == `32` \|\| OpTy.getSizeInBits() == `64`);
4744	C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
4745	if (!C \|\| C ->Value.sgt(RHS: `31`) \|\| C ->Value.slt(RHS: -`31`))
4746	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMPWr : AArch64::CCMPXr;
4747	else if (C ->Value.ule(RHS: `31`))
4748	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMPWi : AArch64::CCMPXi;
4749	else
4750	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMNWi : AArch64::CCMNXi;
4751	} else {
4752	assert(OpTy.getSizeInBits() == `16` \|\| OpTy.getSizeInBits() == `32` \|\|
4753	OpTy.getSizeInBits() == `64`);
4754	switch (OpTy.getSizeInBits()) {
4755	case `16`:
4756	assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4757	CCmpOpc = AArch64::FCCMPHrr;
4758	break;
4759	case `32`:
4760	CCmpOpc = AArch64::FCCMPSrr;
4761	break;
4762	case `64`:
4763	CCmpOpc = AArch64::FCCMPDrr;
4764	break;
4765	default:
4766	return nullptr;
4767	}
4768	}
4769	AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4770	unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC);
4771	auto CCmp =
4772	MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS});
4773	if (CCmpOpc == AArch64::CCMPWi \|\| CCmpOpc == AArch64::CCMPXi)
4774	CCmp.addImm(Val: C ->Value.getZExtValue());
4775	else if (CCmpOpc == AArch64::CCMNWi \|\| CCmpOpc == AArch64::CCMNXi)
4776	CCmp.addImm(Val: C ->Value.abs().getZExtValue());
4777	else
4778	CCmp.addReg(RegNo: RHS);
4779	CCmp.addImm(Val: NZCV).addImm(Val: Predicate);
4780	constrainSelectedInstRegOperands(I&: *CCmp, TII, TRI, RBI);
4781	return &*CCmp;
4782	}
4783
4784	MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4785	Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4786	AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4787	// We're at a tree leaf, produce a conditional comparison operation.
4788	auto &MRI = *MIB.getMRI();
4789	MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4790	unsigned Opcode = ValDef->getOpcode();
4791	if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) {
4792	Register LHS = Cmp->getLHSReg();
4793	Register RHS = Cmp->getRHSReg();
4794	CmpInst::Predicate CC = Cmp->getCond();
4795	if (Negate)
4796	CC = CmpInst::getInversePredicate(pred: CC);
4797	if (isa<GICmp>(Val: Cmp)) {
4798	OutCC = changeICMPPredToAArch64CC(P: CC);
4799	} else {
4800	// Handle special FP cases.
4801	AArch64CC::CondCode ExtraCC;
4802	changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC);
4803	// Some floating point conditions can't be tested with a single condition
4804	// code. Construct an additional comparison in this case.
4805	if (ExtraCC != AArch64CC::AL) {
4806	MachineInstr *ExtraCmp;
4807	if (!CCOp)
4808	ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC);
4809	else
4810	ExtraCmp =
4811	emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB);
4812	CCOp = ExtraCmp->getOperand(i: `0`).getReg();
4813	Predicate = ExtraCC;
4814	}
4815	}
4816
4817	// Produce a normal comparison if we are first in the chain
4818	if (!CCOp) {
4819	auto Dst = MRI.cloneVirtualRegister(VReg: LHS);
4820	if (isa<GICmp>(Val: Cmp))
4821	return emitSUBS(Dst, LHS&: Cmp->getOperand(i: `2`), RHS&: Cmp->getOperand(i: `3`), MIRBuilder&: MIB);
4822	return emitFPCompare(LHS: Cmp->getOperand(i: `2`).getReg(),
4823	RHS: Cmp->getOperand(i: `3`).getReg(), MIRBuilder&: MIB);
4824	}
4825	// Otherwise produce a ccmp.
4826	return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4827	}
4828	assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4829
4830	bool IsOR = Opcode == TargetOpcode::G_OR;
4831
4832	Register LHS = ValDef->getOperand(i: `1`).getReg();
4833	bool CanNegateL;
4834	bool MustBeFirstL;
4835	bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI);
4836	assert(ValidL && "Valid conjunction/disjunction tree");
4837	(void)ValidL;
4838
4839	Register RHS = ValDef->getOperand(i: `2`).getReg();
4840	bool CanNegateR;
4841	bool MustBeFirstR;
4842	bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI);
4843	assert(ValidR && "Valid conjunction/disjunction tree");
4844	(void)ValidR;
4845
4846	// Swap sub-tree that must come first to the right side.
4847	if (MustBeFirstL) {
4848	assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4849	std::swap(a&: LHS, b&: RHS);
4850	std::swap(a&: CanNegateL, b&: CanNegateR);
4851	std::swap(a&: MustBeFirstL, b&: MustBeFirstR);
4852	}
4853
4854	bool NegateR;
4855	bool NegateAfterR;
4856	bool NegateL;
4857	bool NegateAfterAll;
4858	if (Opcode == TargetOpcode::G_OR) {
4859	// Swap the sub-tree that we can negate naturally to the left.
4860	if (!CanNegateL) {
4861	assert(CanNegateR && "at least one side must be negatable");
4862	assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4863	assert(!Negate);
4864	std::swap(a&: LHS, b&: RHS);
4865	NegateR = false;
4866	NegateAfterR = true;
4867	} else {
4868	// Negate the left sub-tree if possible, otherwise negate the result.
4869	NegateR = CanNegateR;
4870	NegateAfterR = !CanNegateR;
4871	}
4872	NegateL = true;
4873	NegateAfterAll = !Negate;
4874	} else {
4875	assert(Opcode == TargetOpcode::G_AND &&
4876	"Valid conjunction/disjunction tree");
4877	assert(!Negate && "Valid conjunction/disjunction tree");
4878
4879	NegateL = false;
4880	NegateR = false;
4881	NegateAfterR = false;
4882	NegateAfterAll = false;
4883	}
4884
4885	// Emit sub-trees.
4886	AArch64CC::CondCode RHSCC;
4887	MachineInstr *CmpR =
4888	emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB);
4889	if (NegateAfterR)
4890	RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC);
4891	MachineInstr *CmpL = emitConjunctionRec(
4892	Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: `0`).getReg(), Predicate: RHSCC, MIB);
4893	if (NegateAfterAll)
4894	OutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4895	return CmpL;
4896	}
4897
4898	MachineInstr *AArch64InstructionSelector::emitConjunction(
4899	Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4900	bool DummyCanNegate;
4901	bool DummyMustBeFirst;
4902	if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false,
4903	MRI&: *MIB.getMRI()))
4904	return nullptr;
4905	return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register (), Predicate: AArch64CC::AL, MIB);
4906	}
4907
4908	bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4909	MachineInstr &CondMI) {
4910	AArch64CC::CondCode AArch64CC;
4911	MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB);
4912	if (!ConjMI)
4913	return false;
4914
4915	emitSelect(Dst: SelI.getReg(Idx: `0`), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB);
4916	SelI.eraseFromParent();
4917	return true;
4918	}
4919
4920	bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4921	MachineRegisterInfo &MRI = *MIB.getMRI();
4922	// We want to recognize this pattern:
4923	//
4924	// $z = G_FCMP pred, $x, $y
4925	// ...
4926	// $w = G_SELECT $z, $a, $b
4927	//
4928	// Where the value of $z is only* ever used by the G_SELECT (possibly with*
4929	// some copies/truncs in between.)
4930	//
4931	// If we see this, then we can emit something like this:
4932	//
4933	// fcmp $x, $y
4934	// fcsel $w, $a, $b, pred
4935	//
4936	// Rather than emitting both of the rather long sequences in the standard
4937	// G_FCMP/G_SELECT select methods.
4938
4939	// First, check if the condition is defined by a compare.
4940	MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: `1`).getReg());
4941
4942	// We can only fold if all of the defs have one use.
4943	Register CondDefReg = CondDef->getOperand(i: `0`).getReg();
4944	if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) {
4945	// Unless it's another select.
4946	for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) {
4947	if (CondDef == &UI)
4948	continue;
4949	if (UI.getOpcode() != TargetOpcode::G_SELECT)
4950	return false;
4951	}
4952	}
4953
4954	// Is the condition defined by a compare?
4955	unsigned CondOpc = CondDef->getOpcode();
4956	if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4957	if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef))
4958	return true;
4959	return false;
4960	}
4961
4962	AArch64CC::CondCode CondCode;
4963	if (CondOpc == TargetOpcode::G_ICMP) {
4964	auto Pred =
4965	static_cast<CmpInst::Predicate>(CondDef->getOperand(i: `1`).getPredicate());
4966	CondCode = changeICMPPredToAArch64CC(P: Pred);
4967	emitIntegerCompare(LHS&: CondDef->getOperand(i: `2`), RHS&: CondDef->getOperand(i: `3`),
4968	Predicate&: CondDef->getOperand(i: `1`), MIRBuilder&: MIB);
4969	} else {
4970	// Get the condition code for the select.
4971	auto Pred =
4972	static_cast<CmpInst::Predicate>(CondDef->getOperand(i: `1`).getPredicate());
4973	AArch64CC::CondCode CondCode2;
4974	changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2);
4975
4976	// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4977	// instructions to emit the comparison.
4978	// TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4979	// unnecessary.
4980	if (CondCode2 != AArch64CC::AL)
4981	return false;
4982
4983	if (!emitFPCompare(LHS: CondDef->getOperand(i: `2`).getReg(),
4984	RHS: CondDef->getOperand(i: `3`).getReg(), MIRBuilder&: MIB)) {
4985	LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4986	return false;
4987	}
4988	}
4989
4990	// Emit the select.
4991	emitSelect(Dst: I.getOperand(i: `0`).getReg(), True: I.getOperand(i: `2`).getReg(),
4992	False: I.getOperand(i: `3`).getReg(), CC: CondCode, MIB);
4993	I.eraseFromParent();
4994	return true;
4995	}
4996
4997	MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4998	MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4999	MachineIRBuilder &MIRBuilder) const {
5000	assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5001	"Unexpected MachineOperand");
5002	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5003	// We want to find this sort of thing:
5004	// x = G_SUB 0, y
5005	// G_ICMP z, x
5006	//
5007	// In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5008	// e.g:
5009	//
5010	// cmn z, y
5011
5012	// Check if the RHS or LHS of the G_ICMP is defined by a SUB
5013	MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
5014	MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
5015	auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5016	// Given this:
5017	//
5018	// x = G_SUB 0, y
5019	// G_ICMP x, z
5020	//
5021	// Produce this:
5022	//
5023	// cmn y, z
5024	if (isCMN(MaybeSub: LHSDef, Pred: P, MRI))
5025	return emitCMN(LHS&: LHSDef->getOperand(i: `2`), RHS, MIRBuilder);
5026
5027	// Same idea here, but with the RHS of the compare instead:
5028	//
5029	// Given this:
5030	//
5031	// x = G_SUB 0, y
5032	// G_ICMP z, x
5033	//
5034	// Produce this:
5035	//
5036	// cmn z, y
5037	if (isCMN(MaybeSub: RHSDef, Pred: P, MRI))
5038	return emitCMN(LHS, RHS&: RHSDef->getOperand(i: `2`), MIRBuilder);
5039
5040	// Given this:
5041	//
5042	// z = G_AND x, y
5043	// G_ICMP z, 0
5044	//
5045	// Produce this if the compare is signed:
5046	//
5047	// tst x, y
5048	if (!CmpInst::isUnsigned(predicate: P) && LHSDef &&
5049	LHSDef->getOpcode() == TargetOpcode::G_AND) {
5050	// Make sure that the RHS is 0.
5051	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI);
5052	if (!ValAndVReg \|\| ValAndVReg ->Value != `0`)
5053	return nullptr;
5054
5055	return emitTST(LHS&: LHSDef->getOperand(i: `1`),
5056	RHS&: LHSDef->getOperand(i: `2`), MIRBuilder);
5057	}
5058
5059	return nullptr;
5060	}
5061
5062	bool AArch64InstructionSelector::selectShuffleVector(
5063	MachineInstr &I, MachineRegisterInfo &MRI) {
5064	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5065	Register Src1Reg = I.getOperand(i: `1`).getReg();
5066	const LLT Src1Ty = MRI.getType(Reg: Src1Reg);
5067	Register Src2Reg = I.getOperand(i: `2`).getReg();
5068	const LLT Src2Ty = MRI.getType(Reg: Src2Reg);
5069	ArrayRef<int> Mask = I.getOperand(i: `3`).getShuffleMask();
5070
5071	MachineBasicBlock &MBB = *I.getParent();
5072	MachineFunction &MF = *MBB.getParent();
5073	LLVMContext &Ctx = MF.getFunction().getContext();
5074
5075	// G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5076	// it's originated from a <1 x T> type. Those should have been lowered into
5077	// G_BUILD_VECTOR earlier.
5078	if (!Src1Ty.isVector() \|\| !Src2Ty.isVector()) {
5079	LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5080	return false;
5081	}
5082
5083	unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / `8`;
5084
5085	SmallVector<Constant *, `64`> CstIdxs;
5086	for (int Val : Mask) {
5087	// For now, any undef indexes we'll just assume to be 0. This should be
5088	// optimized in future, e.g. to select DUP etc.
5089	Val = Val < `0` ? `0` : Val;
5090	for (unsigned Byte = `0`; Byte < BytesPerElt; ++Byte) {
5091	unsigned Offset = Byte + Val * BytesPerElt;
5092	CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset));
5093	}
5094	}
5095
5096	// Use a constant pool to load the index vector for TBL.
5097	Constant *CPVal = ConstantVector::get(V: CstIdxs);
5098	MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB);
5099	if (!IndexLoad) {
5100	LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5101	return false;
5102	}
5103
5104	if (DstTy.getSizeInBits() != `128`) {
5105	assert(DstTy.getSizeInBits() == `64` && "Unexpected shuffle result ty");
5106	// This case can be done with TBL1.
5107	MachineInstr *Concat =
5108	emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB);
5109	if (!Concat) {
5110	LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5111	return false;
5112	}
5113
5114	// The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5115	IndexLoad = emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass,
5116	Scalar: IndexLoad->getOperand(i: `0`).getReg(), MIRBuilder&: MIB);
5117
5118	auto TBL1 = MIB.buildInstr(
5119	Opc: AArch64::TBLv16i8One, DstOps: {&AArch64::FPR128RegClass},
5120	SrcOps: {Concat->getOperand(i: `0`).getReg(), IndexLoad->getOperand(i: `0`).getReg()});
5121	constrainSelectedInstRegOperands(I&: *TBL1, TII, TRI, RBI);
5122
5123	auto Copy =
5124	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {})
5125	.addReg(RegNo: TBL1.getReg(Idx: `0`), flags: `0`, SubReg: AArch64::dsub);
5126	RBI.constrainGenericRegister(Reg: Copy.getReg(Idx: `0`), RC: AArch64::FPR64RegClass, MRI);
5127	I.eraseFromParent();
5128	return true;
5129	}
5130
5131	// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5132	// Q registers for regalloc.
5133	SmallVector<Register, `2`> Regs = {Src1Reg, Src2Reg};
5134	auto RegSeq = createQTuple(Regs, MIB);
5135	auto TBL2 = MIB.buildInstr(Opc: AArch64::TBLv16i8Two, DstOps: {I.getOperand(i: `0`)},
5136	SrcOps: {RegSeq, IndexLoad->getOperand(i: `0`)});
5137	constrainSelectedInstRegOperands(I&: *TBL2, TII, TRI, RBI);
5138	I.eraseFromParent();
5139	return true;
5140	}
5141
5142	MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5143	std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5144	unsigned LaneIdx, const RegisterBank &RB,
5145	MachineIRBuilder &MIRBuilder) const {
5146	MachineInstr InsElt = nullptr*;
5147	const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5148	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5149
5150	// Create a register to define with the insert if one wasn't passed in.
5151	if (!DstReg)
5152	DstReg = MRI.createVirtualRegister(RegClass: DstRC);
5153
5154	unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits();
5155	unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5156
5157	if (RB.getID() == AArch64::FPRRegBankID) {
5158	auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder);
5159	InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5160	.addImm(Val: LaneIdx)
5161	.addUse(RegNo: InsSub->getOperand(i: `0`).getReg())
5162	.addImm(Val: `0`);
5163	} else {
5164	InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5165	.addImm(Val: LaneIdx)
5166	.addUse(RegNo: EltReg);
5167	}
5168
5169	constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
5170	return InsElt;
5171	}
5172
5173	bool AArch64InstructionSelector::selectUSMovFromExtend(
5174	MachineInstr &MI, MachineRegisterInfo &MRI) {
5175	if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5176	MI.getOpcode() != TargetOpcode::G_ZEXT &&
5177	MI.getOpcode() != TargetOpcode::G_ANYEXT)
5178	return false;
5179	bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5180	const Register DefReg = MI.getOperand(i: `0`).getReg();
5181	const LLT DstTy = MRI.getType(Reg: DefReg);
5182	unsigned DstSize = DstTy.getSizeInBits();
5183
5184	if (DstSize != `32` && DstSize != `64`)
5185	return false;
5186
5187	MachineInstr *Extract = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT,
5188	Reg: MI.getOperand(i: `1`).getReg(), MRI);
5189	int64_t Lane;
5190	if (!Extract \|\| !mi_match(R: Extract->getOperand(i: `2`).getReg(), MRI, P: m_ICst(Cst&: Lane)))
5191	return false;
5192	Register Src0 = Extract->getOperand(i: `1`).getReg();
5193
5194	const LLT &VecTy = MRI.getType(Reg: Src0);
5195
5196	if (VecTy.getSizeInBits() != `128`) {
5197	const MachineInstr *ScalarToVector = emitScalarToVector(
5198	EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: Src0, MIRBuilder&: MIB);
5199	assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5200	Src0 = ScalarToVector->getOperand(i: `0`).getReg();
5201	}
5202
5203	unsigned Opcode;
5204	if (DstSize == `64` && VecTy.getScalarSizeInBits() == `32`)
5205	Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5206	else if (DstSize == `64` && VecTy.getScalarSizeInBits() == `16`)
5207	Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5208	else if (DstSize == `64` && VecTy.getScalarSizeInBits() == `8`)
5209	Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5210	else if (DstSize == `32` && VecTy.getScalarSizeInBits() == `16`)
5211	Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5212	else if (DstSize == `32` && VecTy.getScalarSizeInBits() == `8`)
5213	Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5214	else
5215	llvm_unreachable("Unexpected type combo for S/UMov!");
5216
5217	// We may need to generate one of these, depending on the type and sign of the
5218	// input:
5219	// DstReg = SMOV Src0, Lane;
5220	// NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5221	MachineInstr ExtI = nullptr*;
5222	if (DstSize == `64` && !IsSigned) {
5223	Register NewReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
5224	MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane);
5225	ExtI = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
5226	.addImm(Val: `0`)
5227	.addUse(RegNo: NewReg)
5228	.addImm(Val: AArch64::sub_32);
5229	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
5230	} else
5231	ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane);
5232
5233	constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
5234	MI.eraseFromParent();
5235	return true;
5236	}
5237
5238	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5239	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5240	unsigned int Op;
5241	if (DstSize == `128`) {
5242	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5243	return nullptr;
5244	Op = AArch64::MOVIv16b_ns;
5245	} else {
5246	Op = AArch64::MOVIv8b_ns;
5247	}
5248
5249	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5250
5251	if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) {
5252	Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val);
5253	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5254	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5255	return &*Mov;
5256	}
5257	return nullptr;
5258	}
5259
5260	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5261	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5262	bool Inv) {
5263
5264	unsigned int Op;
5265	if (DstSize == `128`) {
5266	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5267	return nullptr;
5268	Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5269	} else {
5270	Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5271	}
5272
5273	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5274	uint64_t Shift;
5275
5276	if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) {
5277	Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val);
5278	Shift = `0`;
5279	} else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) {
5280	Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val);
5281	Shift = `8`;
5282	} else
5283	return nullptr;
5284
5285	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5286	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5287	return &*Mov;
5288	}
5289
5290	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5291	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5292	bool Inv) {
5293
5294	unsigned int Op;
5295	if (DstSize == `128`) {
5296	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5297	return nullptr;
5298	Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5299	} else {
5300	Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5301	}
5302
5303	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5304	uint64_t Shift;
5305
5306	if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) {
5307	Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val);
5308	Shift = `0`;
5309	} else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) {
5310	Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val);
5311	Shift = `8`;
5312	} else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) {
5313	Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val);
5314	Shift = `16`;
5315	} else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) {
5316	Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val);
5317	Shift = `24`;
5318	} else
5319	return nullptr;
5320
5321	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5322	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5323	return &*Mov;
5324	}
5325
5326	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5327	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5328
5329	unsigned int Op;
5330	if (DstSize == `128`) {
5331	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5332	return nullptr;
5333	Op = AArch64::MOVIv2d_ns;
5334	} else {
5335	Op = AArch64::MOVID;
5336	}
5337
5338	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5339	if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) {
5340	Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val);
5341	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5342	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5343	return &*Mov;
5344	}
5345	return nullptr;
5346	}
5347
5348	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5349	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5350	bool Inv) {
5351
5352	unsigned int Op;
5353	if (DstSize == `128`) {
5354	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5355	return nullptr;
5356	Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5357	} else {
5358	Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5359	}
5360
5361	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5362	uint64_t Shift;
5363
5364	if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) {
5365	Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val);
5366	Shift = `264`;
5367	} else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) {
5368	Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val);
5369	Shift = `272`;
5370	} else
5371	return nullptr;
5372
5373	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5374	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5375	return &*Mov;
5376	}
5377
5378	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5379	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5380
5381	unsigned int Op;
5382	bool IsWide = false;
5383	if (DstSize == `128`) {
5384	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5385	return nullptr;
5386	Op = AArch64::FMOVv4f32_ns;
5387	IsWide = true;
5388	} else {
5389	Op = AArch64::FMOVv2f32_ns;
5390	}
5391
5392	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5393
5394	if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) {
5395	Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val);
5396	} else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) {
5397	Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val);
5398	Op = AArch64::FMOVv2f64_ns;
5399	} else
5400	return nullptr;
5401
5402	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5403	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5404	return &*Mov;
5405	}
5406
5407	bool AArch64InstructionSelector::selectIndexedExtLoad(
5408	MachineInstr &MI, MachineRegisterInfo &MRI) {
5409	auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI);
5410	Register Dst = ExtLd.getDstReg();
5411	Register WriteBack = ExtLd.getWritebackReg();
5412	Register Base = ExtLd.getBaseReg();
5413	Register Offset = ExtLd.getOffsetReg();
5414	LLT Ty = MRI.getType(Reg: Dst);
5415	assert(Ty.getSizeInBits() <= `64`); // Only for scalar GPRs.
5416	unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5417	bool IsPre = ExtLd.isPre();
5418	bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd);
5419	bool InsertIntoXReg = false;
5420	bool IsDst64 = Ty.getSizeInBits() == `64`;
5421
5422	unsigned Opc = `0`;
5423	LLT NewLdDstTy;
5424	LLT s32 = LLT::scalar(SizeInBits: `32`);
5425	LLT s64 = LLT::scalar(SizeInBits: `64`);
5426
5427	if (MemSizeBits == `8`) {
5428	if (IsSExt) {
5429	if (IsDst64)
5430	Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5431	else
5432	Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5433	NewLdDstTy = IsDst64 ? s64 : s32;
5434	} else {
5435	Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5436	InsertIntoXReg = IsDst64;
5437	NewLdDstTy = s32;
5438	}
5439	} else if (MemSizeBits == `16`) {
5440	if (IsSExt) {
5441	if (IsDst64)
5442	Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5443	else
5444	Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5445	NewLdDstTy = IsDst64 ? s64 : s32;
5446	} else {
5447	Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5448	InsertIntoXReg = IsDst64;
5449	NewLdDstTy = s32;
5450	}
5451	} else if (MemSizeBits == `32`) {
5452	if (IsSExt) {
5453	Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5454	NewLdDstTy = s64;
5455	} else {
5456	Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5457	InsertIntoXReg = IsDst64;
5458	NewLdDstTy = s32;
5459	}
5460	} else {
5461	llvm_unreachable("Unexpected size for indexed load");
5462	}
5463
5464	if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5465	return false; // We should be on gpr.
5466
5467	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5468	if (!Cst)
5469	return false; // Shouldn't happen, but just in case.
5470
5471	auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base})
5472	.addImm(Val: Cst ->getSExtValue());
5473	LdMI.cloneMemRefs(OtherMI: ExtLd);
5474	constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5475	// Make sure to select the load with the MemTy as the dest type, and then
5476	// insert into X reg if needed.
5477	if (InsertIntoXReg) {
5478	// Generate a SUBREG_TO_REG.
5479	auto SubToReg = MIB.buildInstr(Opc: TargetOpcode::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5480	.addImm(Val: `0`)
5481	.addUse(RegNo: LdMI.getReg(Idx: `1`))
5482	.addImm(Val: AArch64::sub_32);
5483	RBI.constrainGenericRegister(Reg: SubToReg.getReg(Idx: `0`), RC: AArch64::GPR64RegClass,
5484	MRI);
5485	} else {
5486	auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: `1`));
5487	selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
5488	}
5489	MI.eraseFromParent();
5490
5491	return true;
5492	}
5493
5494	bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5495	MachineRegisterInfo &MRI) {
5496	auto &Ld = cast<GIndexedLoad>(Val&: MI);
5497	Register Dst = Ld.getDstReg();
5498	Register WriteBack = Ld.getWritebackReg();
5499	Register Base = Ld.getBaseReg();
5500	Register Offset = Ld.getOffsetReg();
5501	assert(MRI.getType(Dst).getSizeInBits() <= `128` &&
5502	"Unexpected type for indexed load");
5503	unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5504
5505	if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes())
5506	return selectIndexedExtLoad(MI, MRI);
5507
5508	unsigned Opc = `0`;
5509	if (Ld.isPre()) {
5510	static constexpr unsigned GPROpcodes[] = {
5511	AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5512	AArch64::LDRXpre};
5513	static constexpr unsigned FPROpcodes[] = {
5514	AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5515	AArch64::LDRQpre};
5516	if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5517	Opc = FPROpcodes[Log2_32(Value: MemSize)];
5518	else
5519	Opc = GPROpcodes[Log2_32(Value: MemSize)];
5520	} else {
5521	static constexpr unsigned GPROpcodes[] = {
5522	AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5523	AArch64::LDRXpost};
5524	static constexpr unsigned FPROpcodes[] = {
5525	AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5526	AArch64::LDRDpost, AArch64::LDRQpost};
5527	if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5528	Opc = FPROpcodes[Log2_32(Value: MemSize)];
5529	else
5530	Opc = GPROpcodes[Log2_32(Value: MemSize)];
5531	}
5532	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5533	if (!Cst)
5534	return false; // Shouldn't happen, but just in case.
5535	auto LdMI =
5536	MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst ->getSExtValue());
5537	LdMI.cloneMemRefs(OtherMI: Ld);
5538	constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5539	MI.eraseFromParent();
5540	return true;
5541	}
5542
5543	bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5544	MachineRegisterInfo &MRI) {
5545	Register Dst = I.getWritebackReg();
5546	Register Val = I.getValueReg();
5547	Register Base = I.getBaseReg();
5548	Register Offset = I.getOffsetReg();
5549	LLT ValTy = MRI.getType(Reg: Val);
5550	assert(ValTy.getSizeInBits() <= `128` && "Unexpected type for indexed store");
5551
5552	unsigned Opc = `0`;
5553	if (I.isPre()) {
5554	static constexpr unsigned GPROpcodes[] = {
5555	AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5556	AArch64::STRXpre};
5557	static constexpr unsigned FPROpcodes[] = {
5558	AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5559	AArch64::STRQpre};
5560
5561	if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5562	Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5563	else
5564	Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5565	} else {
5566	static constexpr unsigned GPROpcodes[] = {
5567	AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5568	AArch64::STRXpost};
5569	static constexpr unsigned FPROpcodes[] = {
5570	AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5571	AArch64::STRDpost, AArch64::STRQpost};
5572
5573	if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5574	Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5575	else
5576	Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5577	}
5578
5579	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5580	if (!Cst)
5581	return false; // Shouldn't happen, but just in case.
5582	auto Str =
5583	MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst ->getSExtValue());
5584	Str.cloneMemRefs(OtherMI: I);
5585	constrainSelectedInstRegOperands(I&: *Str, TII, TRI, RBI);
5586	I.eraseFromParent();
5587	return true;
5588	}
5589
5590	MachineInstr *
5591	AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5592	MachineIRBuilder &MIRBuilder,
5593	MachineRegisterInfo &MRI) {
5594	LLT DstTy = MRI.getType(Reg: Dst);
5595	unsigned DstSize = DstTy.getSizeInBits();
5596	if (CV->isNullValue()) {
5597	if (DstSize == `128`) {
5598	auto Mov =
5599	MIRBuilder.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {Dst}, SrcOps: {}).addImm(Val: `0`);
5600	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5601	return &*Mov;
5602	}
5603
5604	if (DstSize == `64`) {
5605	auto Mov =
5606	MIRBuilder
5607	.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {&AArch64::FPR128RegClass}, SrcOps: {})
5608	.addImm(Val: `0`);
5609	auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {Dst}, SrcOps: {})
5610	.addReg(RegNo: Mov.getReg(Idx: `0`), flags: `0`, SubReg: AArch64::dsub);
5611	RBI.constrainGenericRegister(Reg: Dst, RC: AArch64::FPR64RegClass, MRI);
5612	return &*Copy;
5613	}
5614	}
5615
5616	if (CV->getSplatValue()) {
5617	APInt DefBits = APInt::getSplat(NewLen: DstSize, V: CV->getUniqueInteger());
5618	auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5619	MachineInstr *NewOp;
5620	bool Inv = false;
5621	if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) \|\|
5622	(NewOp =
5623	tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5624	(NewOp =
5625	tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5626	(NewOp =
5627	tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5628	(NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) \|\|
5629	(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)))
5630	return NewOp;
5631
5632	DefBits = ~DefBits;
5633	Inv = true;
5634	if ((NewOp =
5635	tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5636	(NewOp =
5637	tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5638	(NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)))
5639	return NewOp;
5640	return nullptr;
5641	};
5642
5643	if (auto *NewOp = TryMOVIWithBits (DefBits))
5644	return NewOp;
5645
5646	// See if a fneg of the constant can be materialized with a MOVI, etc
5647	auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5648	unsigned NegOpc) -> MachineInstr * {
5649	// FNegate each sub-element of the constant
5650	APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: `1`).zext(width: DstSize);
5651	APInt NegBits(DstSize, `0`);
5652	unsigned NumElts = DstSize / NumBits;
5653	for (unsigned i = `0`; i < NumElts; i++)
5654	NegBits \|= Neg << (NumBits * i);
5655	NegBits = DefBits ^ NegBits;
5656
5657	// Try to create the new constants with MOVI, and if so generate a fneg
5658	// for it.
5659	if (auto *NewOp = TryMOVIWithBits (NegBits)) {
5660	Register NewDst = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
5661	NewOp->getOperand(i: `0`).setReg(NewDst);
5662	return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst});
5663	}
5664	return nullptr;
5665	};
5666	MachineInstr *R;
5667	if ((R = TryWithFNeg (DefBits, `32`, AArch64::FNEGv4f32)) \|\|
5668	(R = TryWithFNeg (DefBits, `64`, AArch64::FNEGv2f64)) \|\|
5669	(STI.hasFullFP16() &&
5670	(R = TryWithFNeg (DefBits, `16`, AArch64::FNEGv8f16))))
5671	return R;
5672	}
5673
5674	auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder);
5675	if (!CPLoad) {
5676	LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5677	return nullptr;
5678	}
5679
5680	auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: `0`));
5681	RBI.constrainGenericRegister(
5682	Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: `0`).getReg()), MRI);
5683	return &*Copy;
5684	}
5685
5686	bool AArch64InstructionSelector::tryOptConstantBuildVec(
5687	MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5688	assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5689	unsigned DstSize = DstTy.getSizeInBits();
5690	assert(DstSize <= `128` && "Unexpected build_vec type!");
5691	if (DstSize < `32`)
5692	return false;
5693	// Check if we're building a constant vector, in which case we want to
5694	// generate a constant pool load instead of a vector insert sequence.
5695	SmallVector<Constant *, `16`> Csts;
5696	for (unsigned Idx = `1`; Idx < I.getNumOperands(); ++Idx) {
5697	// Try to find G_CONSTANT or G_FCONSTANT
5698	auto *OpMI =
5699	getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI);
5700	if (OpMI)
5701	Csts.emplace_back(
5702	Args: const_cast<ConstantInt *>(OpMI->getOperand(i: `1`).getCImm()));
5703	else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT,
5704	Reg: I.getOperand(i: Idx).getReg(), MRI)))
5705	Csts.emplace_back(
5706	Args: const_cast<ConstantFP *>(OpMI->getOperand(i: `1`).getFPImm()));
5707	else
5708	return false;
5709	}
5710	Constant *CV = ConstantVector::get(V: Csts);
5711	if (!emitConstantVector(Dst: I.getOperand(i: `0`).getReg(), CV, MIRBuilder&: MIB, MRI))
5712	return false;
5713	I.eraseFromParent();
5714	return true;
5715	}
5716
5717	bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5718	MachineInstr &I, MachineRegisterInfo &MRI) {
5719	// Given:
5720	// %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5721	//
5722	// Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5723	Register Dst = I.getOperand(i: `0`).getReg();
5724	Register EltReg = I.getOperand(i: `1`).getReg();
5725	LLT EltTy = MRI.getType(Reg: EltReg);
5726	// If the index isn't on the same bank as its elements, then this can't be a
5727	// SUBREG_TO_REG.
5728	const RegisterBank &EltRB = *RBI.getRegBank(Reg: EltReg, MRI, TRI);
5729	const RegisterBank &DstRB = *RBI.getRegBank(Reg: Dst, MRI, TRI);
5730	if (EltRB != DstRB)
5731	return false;
5732	if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: `2`), P: [&MRI](const MachineOperand &Op) {
5733	return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI);
5734	}))
5735	return false;
5736	unsigned SubReg;
5737	const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB);
5738	if (!EltRC)
5739	return false;
5740	const TargetRegisterClass *DstRC =
5741	getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB);
5742	if (!DstRC)
5743	return false;
5744	if (!getSubRegForClass(RC: EltRC, TRI, SubReg))
5745	return false;
5746	auto SubregToReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5747	.addImm(Val: `0`)
5748	.addUse(RegNo: EltReg)
5749	.addImm(Val: SubReg);
5750	I.eraseFromParent();
5751	constrainSelectedInstRegOperands(I&: *SubregToReg, TII, TRI, RBI);
5752	return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI);
5753	}
5754
5755	bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5756	MachineRegisterInfo &MRI) {
5757	assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5758	// Until we port more of the optimized selections, for now just use a vector
5759	// insert sequence.
5760	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5761	const LLT EltTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
5762	unsigned EltSize = EltTy.getSizeInBits();
5763
5764	if (tryOptConstantBuildVec(I, DstTy, MRI))
5765	return true;
5766	if (tryOptBuildVecToSubregToReg(I, MRI))
5767	return true;
5768
5769	if (EltSize != `8` && EltSize != `16` && EltSize != `32` && EltSize != `64`)
5770	return false; // Don't support all element types yet.
5771	const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI);
5772
5773	const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5774	MachineInstr *ScalarToVec =
5775	emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC,
5776	Scalar: I.getOperand(i: `1`).getReg(), MIRBuilder&: MIB);
5777	if (!ScalarToVec)
5778	return false;
5779
5780	Register DstVec = ScalarToVec->getOperand(i: `0`).getReg();
5781	unsigned DstSize = DstTy.getSizeInBits();
5782
5783	// Keep track of the last MI we inserted. Later on, we might be able to save
5784	// a copy using it.
5785	MachineInstr *PrevMI = ScalarToVec;
5786	for (unsigned i = `2`, e = DstSize / EltSize + `1`; i < e; ++i) {
5787	// Note that if we don't do a subregister copy, we can end up making an
5788	// extra register.
5789	Register OpReg = I.getOperand(i).getReg();
5790	// Do not emit inserts for undefs
5791	if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) {
5792	PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - `1`, RB, MIRBuilder&: MIB);
5793	DstVec = PrevMI->getOperand(i: `0`).getReg();
5794	}
5795	}
5796
5797	// If DstTy's size in bits is less than 128, then emit a subregister copy
5798	// from DstVec to the last register we've defined.
5799	if (DstSize < `128`) {
5800	// Force this to be FPR using the destination vector.
5801	const TargetRegisterClass *RC =
5802	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
5803	if (!RC)
5804	return false;
5805	if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5806	LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5807	return false;
5808	}
5809
5810	unsigned SubReg = `0`;
5811	if (!getSubRegForClass(RC, TRI, SubReg))
5812	return false;
5813	if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5814	LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5815	<< "\n");
5816	return false;
5817	}
5818
5819	Register Reg = MRI.createVirtualRegister(RegClass: RC);
5820	Register DstReg = I.getOperand(i: `0`).getReg();
5821
5822	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, flags: `0`, SubReg);
5823	MachineOperand &RegOp = I.getOperand(i: `1`);
5824	RegOp.setReg(Reg);
5825	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5826	} else {
5827	// We either have a vector with all elements (except the first one) undef or
5828	// at least one non-undef non-first element. In the first case, we need to
5829	// constrain the output register ourselves as we may have generated an
5830	// INSERT_SUBREG operation which is a generic operation for which the
5831	// output regclass cannot be automatically chosen.
5832	//
5833	// In the second case, there is no need to do this as it may generate an
5834	// instruction like INSvi32gpr where the regclass can be automatically
5835	// chosen.
5836	//
5837	// Also, we save a copy by re-using the destination register on the final
5838	// insert.
5839	PrevMI->getOperand(i: `0`).setReg(I.getOperand(i: `0`).getReg());
5840	constrainSelectedInstRegOperands(I&: *PrevMI, TII, TRI, RBI);
5841
5842	Register DstReg = PrevMI->getOperand(i: `0`).getReg();
5843	if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5844	const TargetRegisterClass *RC =
5845	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
5846	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5847	}
5848	}
5849
5850	I.eraseFromParent();
5851	return true;
5852	}
5853
5854	bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5855	unsigned NumVecs,
5856	MachineInstr &I) {
5857	assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5858	assert(Opc && "Expected an opcode?");
5859	assert(NumVecs > `1` && NumVecs < `5` && "Only support 2, 3, or 4 vectors");
5860	auto &MRI = *MIB.getMRI();
5861	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5862	unsigned Size = Ty.getSizeInBits();
5863	assert((Size == `64` \|\| Size == `128`) &&
5864	"Destination must be 64 bits or 128 bits?");
5865	unsigned SubReg = Size == `64` ? AArch64::dsub0 : AArch64::qsub0;
5866	auto Ptr = I.getOperand(i: I.getNumOperands() - `1`).getReg();
5867	assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5868	auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr});
5869	Load.cloneMemRefs(OtherMI: I);
5870	constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
5871	Register SelectedLoadDst = Load ->getOperand(i: `0`).getReg();
5872	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
5873	auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {})
5874	.addReg(RegNo: SelectedLoadDst, flags: `0`, SubReg: SubReg + Idx);
5875	// Emit the subreg copies and immediately select them.
5876	// FIXME: We should refactor our copy code into an emitCopy helper and
5877	// clean up uses of this pattern elsewhere in the selector.
5878	selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
5879	}
5880	return true;
5881	}
5882
5883	bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5884	unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5885	assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5886	assert(Opc && "Expected an opcode?");
5887	assert(NumVecs > `1` && NumVecs < `5` && "Only support 2, 3, or 4 vectors");
5888	auto &MRI = *MIB.getMRI();
5889	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5890	bool Narrow = Ty.getSizeInBits() == `64`;
5891
5892	auto FirstSrcRegIt = I.operands_begin() + NumVecs + `1`;
5893	SmallVector<Register, `4`> Regs(NumVecs);
5894	std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(),
5895	unary_op: [](auto MO) { return MO.getReg(); });
5896
5897	if (Narrow) {
5898	transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
5899	return emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
5900	->getOperand(i: `0`)
5901	.getReg();
5902	});
5903	Ty = Ty.multiplyElements(Factor: `2`);
5904	}
5905
5906	Register Tuple = createQTuple(Regs, MIB);
5907	auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI);
5908	if (!LaneNo)
5909	return false;
5910
5911	Register Ptr = (FirstSrcRegIt + NumVecs + `1`)->getReg();
5912	auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {})
5913	.addReg(RegNo: Tuple)
5914	.addImm(Val: LaneNo ->getZExtValue())
5915	.addReg(RegNo: Ptr);
5916	Load.cloneMemRefs(OtherMI: I);
5917	constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
5918	Register SelectedLoadDst = Load ->getOperand(i: `0`).getReg();
5919	unsigned SubReg = AArch64::qsub0;
5920	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
5921	auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY,
5922	DstOps: {Narrow ? DstOp (&AArch64::FPR128RegClass)
5923	: DstOp (I.getOperand(i: Idx).getReg())},
5924	SrcOps: {})
5925	.addReg(RegNo: SelectedLoadDst, flags: `0`, SubReg: SubReg + Idx);
5926	Register WideReg = Vec.getReg(Idx: `0`);
5927	// Emit the subreg copies and immediately select them.
5928	selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
5929	if (Narrow &&
5930	!emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI))
5931	return false;
5932	}
5933	return true;
5934	}
5935
5936	void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
5937	unsigned NumVecs,
5938	unsigned Opc) {
5939	MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5940	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
5941	Register Ptr = I.getOperand(i: `1` + NumVecs).getReg();
5942
5943	SmallVector<Register, `2`> Regs(NumVecs);
5944	std::transform(first: I.operands_begin() + `1`, last: I.operands_begin() + `1` + NumVecs,
5945	result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
5946
5947	Register Tuple = Ty.getSizeInBits() == `128` ? createQTuple(Regs, MIB)
5948	: createDTuple(Regs, MIB);
5949	auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr});
5950	Store.cloneMemRefs(OtherMI: I);
5951	constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
5952	}
5953
5954	bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5955	MachineInstr &I, unsigned NumVecs, unsigned Opc) {
5956	MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
5957	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
5958	bool Narrow = Ty.getSizeInBits() == `64`;
5959
5960	SmallVector<Register, `2`> Regs(NumVecs);
5961	std::transform(first: I.operands_begin() + `1`, last: I.operands_begin() + `1` + NumVecs,
5962	result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
5963
5964	if (Narrow)
5965	transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
5966	return emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
5967	->getOperand(i: `0`)
5968	.getReg();
5969	});
5970
5971	Register Tuple = createQTuple(Regs, MIB);
5972
5973	auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: `1` + NumVecs).getReg(), MRI);
5974	if (!LaneNo)
5975	return false;
5976	Register Ptr = I.getOperand(i: `1` + NumVecs + `1`).getReg();
5977	auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {})
5978	.addReg(RegNo: Tuple)
5979	.addImm(Val: LaneNo ->getZExtValue())
5980	.addReg(RegNo: Ptr);
5981	Store.cloneMemRefs(OtherMI: I);
5982	constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
5983	return true;
5984	}
5985
5986	bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5987	MachineInstr &I, MachineRegisterInfo &MRI) {
5988	// Find the intrinsic ID.
5989	unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
5990
5991	const LLT S8 = LLT::scalar(SizeInBits: `8`);
5992	const LLT S16 = LLT::scalar(SizeInBits: `16`);
5993	const LLT S32 = LLT::scalar(SizeInBits: `32`);
5994	const LLT S64 = LLT::scalar(SizeInBits: `64`);
5995	const LLT P0 = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
5996	// Select the instruction.
5997	switch (IntrinID) {
5998	default:
5999	return false;
6000	case Intrinsic::aarch64_ldxp:
6001	case Intrinsic::aarch64_ldaxp: {
6002	auto NewI = MIB.buildInstr(
6003	Opc: IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6004	DstOps: {I.getOperand(i: `0`).getReg(), I.getOperand(i: `1`).getReg()},
6005	SrcOps: {I.getOperand(i: `3`)});
6006	NewI.cloneMemRefs(OtherMI: I);
6007	constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
6008	break;
6009	}
6010	case Intrinsic::aarch64_neon_ld1x2: {
6011	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6012	unsigned Opc = `0`;
6013	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6014	Opc = AArch64::LD1Twov8b;
6015	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6016	Opc = AArch64::LD1Twov16b;
6017	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6018	Opc = AArch64::LD1Twov4h;
6019	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6020	Opc = AArch64::LD1Twov8h;
6021	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6022	Opc = AArch64::LD1Twov2s;
6023	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6024	Opc = AArch64::LD1Twov4s;
6025	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6026	Opc = AArch64::LD1Twov2d;
6027	else if (Ty == S64 \|\| Ty == P0)
6028	Opc = AArch64::LD1Twov1d;
6029	else
6030	llvm_unreachable("Unexpected type for ld1x2!");
6031	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6032	break;
6033	}
6034	case Intrinsic::aarch64_neon_ld1x3: {
6035	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6036	unsigned Opc = `0`;
6037	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6038	Opc = AArch64::LD1Threev8b;
6039	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6040	Opc = AArch64::LD1Threev16b;
6041	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6042	Opc = AArch64::LD1Threev4h;
6043	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6044	Opc = AArch64::LD1Threev8h;
6045	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6046	Opc = AArch64::LD1Threev2s;
6047	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6048	Opc = AArch64::LD1Threev4s;
6049	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6050	Opc = AArch64::LD1Threev2d;
6051	else if (Ty == S64 \|\| Ty == P0)
6052	Opc = AArch64::LD1Threev1d;
6053	else
6054	llvm_unreachable("Unexpected type for ld1x3!");
6055	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6056	break;
6057	}
6058	case Intrinsic::aarch64_neon_ld1x4: {
6059	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6060	unsigned Opc = `0`;
6061	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6062	Opc = AArch64::LD1Fourv8b;
6063	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6064	Opc = AArch64::LD1Fourv16b;
6065	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6066	Opc = AArch64::LD1Fourv4h;
6067	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6068	Opc = AArch64::LD1Fourv8h;
6069	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6070	Opc = AArch64::LD1Fourv2s;
6071	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6072	Opc = AArch64::LD1Fourv4s;
6073	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6074	Opc = AArch64::LD1Fourv2d;
6075	else if (Ty == S64 \|\| Ty == P0)
6076	Opc = AArch64::LD1Fourv1d;
6077	else
6078	llvm_unreachable("Unexpected type for ld1x4!");
6079	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6080	break;
6081	}
6082	case Intrinsic::aarch64_neon_ld2: {
6083	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6084	unsigned Opc = `0`;
6085	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6086	Opc = AArch64::LD2Twov8b;
6087	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6088	Opc = AArch64::LD2Twov16b;
6089	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6090	Opc = AArch64::LD2Twov4h;
6091	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6092	Opc = AArch64::LD2Twov8h;
6093	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6094	Opc = AArch64::LD2Twov2s;
6095	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6096	Opc = AArch64::LD2Twov4s;
6097	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6098	Opc = AArch64::LD2Twov2d;
6099	else if (Ty == S64 \|\| Ty == P0)
6100	Opc = AArch64::LD1Twov1d;
6101	else
6102	llvm_unreachable("Unexpected type for ld2!");
6103	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6104	break;
6105	}
6106	case Intrinsic::aarch64_neon_ld2lane: {
6107	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6108	unsigned Opc;
6109	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6110	Opc = AArch64::LD2i8;
6111	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6112	Opc = AArch64::LD2i16;
6113	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6114	Opc = AArch64::LD2i32;
6115	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6116	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6117	Opc = AArch64::LD2i64;
6118	else
6119	llvm_unreachable("Unexpected type for st2lane!");
6120	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `2`, I))
6121	return false;
6122	break;
6123	}
6124	case Intrinsic::aarch64_neon_ld2r: {
6125	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6126	unsigned Opc = `0`;
6127	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6128	Opc = AArch64::LD2Rv8b;
6129	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6130	Opc = AArch64::LD2Rv16b;
6131	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6132	Opc = AArch64::LD2Rv4h;
6133	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6134	Opc = AArch64::LD2Rv8h;
6135	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6136	Opc = AArch64::LD2Rv2s;
6137	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6138	Opc = AArch64::LD2Rv4s;
6139	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6140	Opc = AArch64::LD2Rv2d;
6141	else if (Ty == S64 \|\| Ty == P0)
6142	Opc = AArch64::LD2Rv1d;
6143	else
6144	llvm_unreachable("Unexpected type for ld2r!");
6145	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6146	break;
6147	}
6148	case Intrinsic::aarch64_neon_ld3: {
6149	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6150	unsigned Opc = `0`;
6151	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6152	Opc = AArch64::LD3Threev8b;
6153	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6154	Opc = AArch64::LD3Threev16b;
6155	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6156	Opc = AArch64::LD3Threev4h;
6157	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6158	Opc = AArch64::LD3Threev8h;
6159	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6160	Opc = AArch64::LD3Threev2s;
6161	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6162	Opc = AArch64::LD3Threev4s;
6163	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6164	Opc = AArch64::LD3Threev2d;
6165	else if (Ty == S64 \|\| Ty == P0)
6166	Opc = AArch64::LD1Threev1d;
6167	else
6168	llvm_unreachable("Unexpected type for ld3!");
6169	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6170	break;
6171	}
6172	case Intrinsic::aarch64_neon_ld3lane: {
6173	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6174	unsigned Opc;
6175	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6176	Opc = AArch64::LD3i8;
6177	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6178	Opc = AArch64::LD3i16;
6179	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6180	Opc = AArch64::LD3i32;
6181	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6182	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6183	Opc = AArch64::LD3i64;
6184	else
6185	llvm_unreachable("Unexpected type for st3lane!");
6186	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `3`, I))
6187	return false;
6188	break;
6189	}
6190	case Intrinsic::aarch64_neon_ld3r: {
6191	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6192	unsigned Opc = `0`;
6193	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6194	Opc = AArch64::LD3Rv8b;
6195	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6196	Opc = AArch64::LD3Rv16b;
6197	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6198	Opc = AArch64::LD3Rv4h;
6199	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6200	Opc = AArch64::LD3Rv8h;
6201	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6202	Opc = AArch64::LD3Rv2s;
6203	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6204	Opc = AArch64::LD3Rv4s;
6205	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6206	Opc = AArch64::LD3Rv2d;
6207	else if (Ty == S64 \|\| Ty == P0)
6208	Opc = AArch64::LD3Rv1d;
6209	else
6210	llvm_unreachable("Unexpected type for ld3r!");
6211	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6212	break;
6213	}
6214	case Intrinsic::aarch64_neon_ld4: {
6215	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6216	unsigned Opc = `0`;
6217	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6218	Opc = AArch64::LD4Fourv8b;
6219	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6220	Opc = AArch64::LD4Fourv16b;
6221	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6222	Opc = AArch64::LD4Fourv4h;
6223	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6224	Opc = AArch64::LD4Fourv8h;
6225	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6226	Opc = AArch64::LD4Fourv2s;
6227	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6228	Opc = AArch64::LD4Fourv4s;
6229	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6230	Opc = AArch64::LD4Fourv2d;
6231	else if (Ty == S64 \|\| Ty == P0)
6232	Opc = AArch64::LD1Fourv1d;
6233	else
6234	llvm_unreachable("Unexpected type for ld4!");
6235	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6236	break;
6237	}
6238	case Intrinsic::aarch64_neon_ld4lane: {
6239	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6240	unsigned Opc;
6241	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6242	Opc = AArch64::LD4i8;
6243	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6244	Opc = AArch64::LD4i16;
6245	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6246	Opc = AArch64::LD4i32;
6247	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6248	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6249	Opc = AArch64::LD4i64;
6250	else
6251	llvm_unreachable("Unexpected type for st4lane!");
6252	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `4`, I))
6253	return false;
6254	break;
6255	}
6256	case Intrinsic::aarch64_neon_ld4r: {
6257	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6258	unsigned Opc = `0`;
6259	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6260	Opc = AArch64::LD4Rv8b;
6261	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6262	Opc = AArch64::LD4Rv16b;
6263	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6264	Opc = AArch64::LD4Rv4h;
6265	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6266	Opc = AArch64::LD4Rv8h;
6267	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6268	Opc = AArch64::LD4Rv2s;
6269	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6270	Opc = AArch64::LD4Rv4s;
6271	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6272	Opc = AArch64::LD4Rv2d;
6273	else if (Ty == S64 \|\| Ty == P0)
6274	Opc = AArch64::LD4Rv1d;
6275	else
6276	llvm_unreachable("Unexpected type for ld4r!");
6277	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6278	break;
6279	}
6280	case Intrinsic::aarch64_neon_st1x2: {
6281	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6282	unsigned Opc;
6283	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6284	Opc = AArch64::ST1Twov8b;
6285	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6286	Opc = AArch64::ST1Twov16b;
6287	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6288	Opc = AArch64::ST1Twov4h;
6289	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6290	Opc = AArch64::ST1Twov8h;
6291	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6292	Opc = AArch64::ST1Twov2s;
6293	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6294	Opc = AArch64::ST1Twov4s;
6295	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6296	Opc = AArch64::ST1Twov2d;
6297	else if (Ty == S64 \|\| Ty == P0)
6298	Opc = AArch64::ST1Twov1d;
6299	else
6300	llvm_unreachable("Unexpected type for st1x2!");
6301	selectVectorStoreIntrinsic(I, NumVecs: `2`, Opc);
6302	break;
6303	}
6304	case Intrinsic::aarch64_neon_st1x3: {
6305	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6306	unsigned Opc;
6307	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6308	Opc = AArch64::ST1Threev8b;
6309	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6310	Opc = AArch64::ST1Threev16b;
6311	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6312	Opc = AArch64::ST1Threev4h;
6313	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6314	Opc = AArch64::ST1Threev8h;
6315	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6316	Opc = AArch64::ST1Threev2s;
6317	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6318	Opc = AArch64::ST1Threev4s;
6319	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6320	Opc = AArch64::ST1Threev2d;
6321	else if (Ty == S64 \|\| Ty == P0)
6322	Opc = AArch64::ST1Threev1d;
6323	else
6324	llvm_unreachable("Unexpected type for st1x3!");
6325	selectVectorStoreIntrinsic(I, NumVecs: `3`, Opc);
6326	break;
6327	}
6328	case Intrinsic::aarch64_neon_st1x4: {
6329	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6330	unsigned Opc;
6331	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6332	Opc = AArch64::ST1Fourv8b;
6333	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6334	Opc = AArch64::ST1Fourv16b;
6335	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6336	Opc = AArch64::ST1Fourv4h;
6337	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6338	Opc = AArch64::ST1Fourv8h;
6339	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6340	Opc = AArch64::ST1Fourv2s;
6341	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6342	Opc = AArch64::ST1Fourv4s;
6343	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6344	Opc = AArch64::ST1Fourv2d;
6345	else if (Ty == S64 \|\| Ty == P0)
6346	Opc = AArch64::ST1Fourv1d;
6347	else
6348	llvm_unreachable("Unexpected type for st1x4!");
6349	selectVectorStoreIntrinsic(I, NumVecs: `4`, Opc);
6350	break;
6351	}
6352	case Intrinsic::aarch64_neon_st2: {
6353	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6354	unsigned Opc;
6355	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6356	Opc = AArch64::ST2Twov8b;
6357	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6358	Opc = AArch64::ST2Twov16b;
6359	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6360	Opc = AArch64::ST2Twov4h;
6361	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6362	Opc = AArch64::ST2Twov8h;
6363	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6364	Opc = AArch64::ST2Twov2s;
6365	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6366	Opc = AArch64::ST2Twov4s;
6367	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6368	Opc = AArch64::ST2Twov2d;
6369	else if (Ty == S64 \|\| Ty == P0)
6370	Opc = AArch64::ST1Twov1d;
6371	else
6372	llvm_unreachable("Unexpected type for st2!");
6373	selectVectorStoreIntrinsic(I, NumVecs: `2`, Opc);
6374	break;
6375	}
6376	case Intrinsic::aarch64_neon_st3: {
6377	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6378	unsigned Opc;
6379	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6380	Opc = AArch64::ST3Threev8b;
6381	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6382	Opc = AArch64::ST3Threev16b;
6383	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6384	Opc = AArch64::ST3Threev4h;
6385	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6386	Opc = AArch64::ST3Threev8h;
6387	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6388	Opc = AArch64::ST3Threev2s;
6389	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6390	Opc = AArch64::ST3Threev4s;
6391	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6392	Opc = AArch64::ST3Threev2d;
6393	else if (Ty == S64 \|\| Ty == P0)
6394	Opc = AArch64::ST1Threev1d;
6395	else
6396	llvm_unreachable("Unexpected type for st3!");
6397	selectVectorStoreIntrinsic(I, NumVecs: `3`, Opc);
6398	break;
6399	}
6400	case Intrinsic::aarch64_neon_st4: {
6401	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6402	unsigned Opc;
6403	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6404	Opc = AArch64::ST4Fourv8b;
6405	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6406	Opc = AArch64::ST4Fourv16b;
6407	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6408	Opc = AArch64::ST4Fourv4h;
6409	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6410	Opc = AArch64::ST4Fourv8h;
6411	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6412	Opc = AArch64::ST4Fourv2s;
6413	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6414	Opc = AArch64::ST4Fourv4s;
6415	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6416	Opc = AArch64::ST4Fourv2d;
6417	else if (Ty == S64 \|\| Ty == P0)
6418	Opc = AArch64::ST1Fourv1d;
6419	else
6420	llvm_unreachable("Unexpected type for st4!");
6421	selectVectorStoreIntrinsic(I, NumVecs: `4`, Opc);
6422	break;
6423	}
6424	case Intrinsic::aarch64_neon_st2lane: {
6425	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6426	unsigned Opc;
6427	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6428	Opc = AArch64::ST2i8;
6429	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6430	Opc = AArch64::ST2i16;
6431	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6432	Opc = AArch64::ST2i32;
6433	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6434	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6435	Opc = AArch64::ST2i64;
6436	else
6437	llvm_unreachable("Unexpected type for st2lane!");
6438	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `2`, Opc))
6439	return false;
6440	break;
6441	}
6442	case Intrinsic::aarch64_neon_st3lane: {
6443	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6444	unsigned Opc;
6445	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6446	Opc = AArch64::ST3i8;
6447	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6448	Opc = AArch64::ST3i16;
6449	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6450	Opc = AArch64::ST3i32;
6451	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6452	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6453	Opc = AArch64::ST3i64;
6454	else
6455	llvm_unreachable("Unexpected type for st3lane!");
6456	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `3`, Opc))
6457	return false;
6458	break;
6459	}
6460	case Intrinsic::aarch64_neon_st4lane: {
6461	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6462	unsigned Opc;
6463	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6464	Opc = AArch64::ST4i8;
6465	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6466	Opc = AArch64::ST4i16;
6467	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6468	Opc = AArch64::ST4i32;
6469	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6470	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6471	Opc = AArch64::ST4i64;
6472	else
6473	llvm_unreachable("Unexpected type for st4lane!");
6474	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `4`, Opc))
6475	return false;
6476	break;
6477	}
6478	case Intrinsic::aarch64_mops_memset_tag: {
6479	// Transform
6480	// %dst:gpr(p0) = \
6481	// G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6482	// \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6483	// where %dst is updated, into
6484	// %Rd:GPR64common, %Rn:GPR64) = \
6485	// MOPSMemorySetTaggingPseudo \
6486	// %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6487	// where Rd and Rn are tied.
6488	// It is expected that %val has been extended to s64 in legalization.
6489	// Note that the order of the size/value operands are swapped.
6490
6491	Register DstDef = I.getOperand(i: `0`).getReg();
6492	// I.getOperand(1) is the intrinsic function
6493	Register DstUse = I.getOperand(i: `2`).getReg();
6494	Register ValUse = I.getOperand(i: `3`).getReg();
6495	Register SizeUse = I.getOperand(i: `4`).getReg();
6496
6497	// MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6498	// Therefore an additional virtual register is requried for the updated size
6499	// operand. This value is not accessible via the semantics of the intrinsic.
6500	Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
6501
6502	auto Memset = MIB.buildInstr(Opc: AArch64::MOPSMemorySetTaggingPseudo,
6503	DstOps: {DstDef, SizeDef}, SrcOps: {DstUse, SizeUse, ValUse});
6504	Memset.cloneMemRefs(OtherMI: I);
6505	constrainSelectedInstRegOperands(I&: *Memset, TII, TRI, RBI);
6506	break;
6507	}
6508	}
6509
6510	I.eraseFromParent();
6511	return true;
6512	}
6513
6514	bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6515	MachineRegisterInfo &MRI) {
6516	unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6517
6518	switch (IntrinID) {
6519	default:
6520	break;
6521	case Intrinsic::aarch64_crypto_sha1h: {
6522	Register DstReg = I.getOperand(i: `0`).getReg();
6523	Register SrcReg = I.getOperand(i: `2`).getReg();
6524
6525	// FIXME: Should this be an assert?
6526	if (MRI.getType(Reg: DstReg).getSizeInBits() != `32` \|\|
6527	MRI.getType(Reg: SrcReg).getSizeInBits() != `32`)
6528	return false;
6529
6530	// The operation has to happen on FPRs. Set up some new FPR registers for
6531	// the source and destination if they are on GPRs.
6532	if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6533	SrcReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass);
6534	MIB.buildCopy(Res: {SrcReg}, Op: {I.getOperand(i: `2`)});
6535
6536	// Make sure the copy ends up getting constrained properly.
6537	RBI.constrainGenericRegister(Reg: I.getOperand(i: `2`).getReg(),
6538	RC: AArch64::GPR32RegClass, MRI);
6539	}
6540
6541	if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6542	DstReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass);
6543
6544	// Actually insert the instruction.
6545	auto SHA1Inst = MIB.buildInstr(Opc: AArch64::SHA1Hrr, DstOps: {DstReg}, SrcOps: {SrcReg});
6546	constrainSelectedInstRegOperands(I&: *SHA1Inst, TII, TRI, RBI);
6547
6548	// Did we create a new register for the destination?
6549	if (DstReg != I.getOperand(i: `0`).getReg()) {
6550	// Yep. Copy the result of the instruction back into the original
6551	// destination.
6552	MIB.buildCopy(Res: {I.getOperand(i: `0`)}, Op: {DstReg});
6553	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(),
6554	RC: AArch64::GPR32RegClass, MRI);
6555	}
6556
6557	I.eraseFromParent();
6558	return true;
6559	}
6560	case Intrinsic::ptrauth_resign: {
6561	Register DstReg = I.getOperand(i: `0`).getReg();
6562	Register ValReg = I.getOperand(i: `2`).getReg();
6563	uint64_t AUTKey = I.getOperand(i: `3`).getImm();
6564	Register AUTDisc = I.getOperand(i: `4`).getReg();
6565	uint64_t PACKey = I.getOperand(i: `5`).getImm();
6566	Register PACDisc = I.getOperand(i: `6`).getReg();
6567
6568	Register AUTAddrDisc = AUTDisc;
6569	uint16_t AUTConstDiscC = `0`;
6570	std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6571	extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6572
6573	Register PACAddrDisc = PACDisc;
6574	uint16_t PACConstDiscC = `0`;
6575	std::tie(args&: PACConstDiscC, args&: PACAddrDisc) =
6576	extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI);
6577
6578	MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6579	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6580	MIB.buildInstr(Opcode: AArch64::AUTPAC)
6581	.addImm(Val: AUTKey)
6582	.addImm(Val: AUTConstDiscC)
6583	.addUse(RegNo: AUTAddrDisc)
6584	.addImm(Val: PACKey)
6585	.addImm(Val: PACConstDiscC)
6586	.addUse(RegNo: PACAddrDisc)
6587	.constrainAllUses(TII, TRI, RBI);
6588	MIB.buildCopy(Res: {DstReg}, Op: Register (AArch64::X16));
6589
6590	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6591	I.eraseFromParent();
6592	return true;
6593	}
6594	case Intrinsic::ptrauth_auth: {
6595	Register DstReg = I.getOperand(i: `0`).getReg();
6596	Register ValReg = I.getOperand(i: `2`).getReg();
6597	uint64_t AUTKey = I.getOperand(i: `3`).getImm();
6598	Register AUTDisc = I.getOperand(i: `4`).getReg();
6599
6600	Register AUTAddrDisc = AUTDisc;
6601	uint16_t AUTConstDiscC = `0`;
6602	std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6603	extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6604
6605	MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6606	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6607	MIB.buildInstr(Opcode: AArch64::AUT)
6608	.addImm(Val: AUTKey)
6609	.addImm(Val: AUTConstDiscC)
6610	.addUse(RegNo: AUTAddrDisc)
6611	.constrainAllUses(TII, TRI, RBI);
6612	MIB.buildCopy(Res: {DstReg}, Op: Register (AArch64::X16));
6613
6614	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6615	I.eraseFromParent();
6616	return true;
6617	}
6618	case Intrinsic::frameaddress:
6619	case Intrinsic::returnaddress: {
6620	MachineFunction &MF = *I.getParent()->getParent();
6621	MachineFrameInfo &MFI = MF.getFrameInfo();
6622
6623	unsigned Depth = I.getOperand(i: `2`).getImm();
6624	Register DstReg = I.getOperand(i: `0`).getReg();
6625	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6626
6627	if (Depth == `0` && IntrinID == Intrinsic::returnaddress) {
6628	if (!MFReturnAddr) {
6629	// Insert the copy from LR/X30 into the entry block, before it can be
6630	// clobbered by anything.
6631	MFI.setReturnAddressIsTaken(true);
6632	MFReturnAddr = getFunctionLiveInPhysReg(
6633	MF, TII, PhysReg: AArch64::LR, RC: AArch64::GPR64RegClass, DL: I.getDebugLoc());
6634	}
6635
6636	if (STI.hasPAuth()) {
6637	MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {MFReturnAddr});
6638	} else {
6639	MIB.buildCopy(Res: {Register (AArch64::LR)}, Op: {MFReturnAddr});
6640	MIB.buildInstr(Opcode: AArch64::XPACLRI);
6641	MIB.buildCopy(Res: {DstReg}, Op: {Register (AArch64::LR)});
6642	}
6643
6644	I.eraseFromParent();
6645	return true;
6646	}
6647
6648	MFI.setFrameAddressIsTaken(true);
6649	Register FrameAddr(AArch64::FP);
6650	while (Depth--) {
6651	Register NextFrame = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
6652	auto Ldr =
6653	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {NextFrame}, SrcOps: {FrameAddr}).addImm(Val: `0`);
6654	constrainSelectedInstRegOperands(I&: *Ldr, TII, TRI, RBI);
6655	FrameAddr = NextFrame;
6656	}
6657
6658	if (IntrinID == Intrinsic::frameaddress)
6659	MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr});
6660	else {
6661	MFI.setReturnAddressIsTaken(true);
6662
6663	if (STI.hasPAuth()) {
6664	Register TmpReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
6665	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {TmpReg}, SrcOps: {FrameAddr}).addImm(Val: `1`);
6666	MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {TmpReg});
6667	} else {
6668	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {Register (AArch64::LR)}, SrcOps: {FrameAddr})
6669	.addImm(Val: `1`);
6670	MIB.buildInstr(Opcode: AArch64::XPACLRI);
6671	MIB.buildCopy(Res: {DstReg}, Op: {Register (AArch64::LR)});
6672	}
6673	}
6674
6675	I.eraseFromParent();
6676	return true;
6677	}
6678	case Intrinsic::aarch64_neon_tbl2:
6679	SelectTable(I, MRI, NumVecs: `2`, Opc1: AArch64::TBLv8i8Two, Opc2: AArch64::TBLv16i8Two, isExt: false);
6680	return true;
6681	case Intrinsic::aarch64_neon_tbl3:
6682	SelectTable(I, MRI, NumVecs: `3`, Opc1: AArch64::TBLv8i8Three, Opc2: AArch64::TBLv16i8Three,
6683	isExt: false);
6684	return true;
6685	case Intrinsic::aarch64_neon_tbl4:
6686	SelectTable(I, MRI, NumVecs: `4`, Opc1: AArch64::TBLv8i8Four, Opc2: AArch64::TBLv16i8Four, isExt: false);
6687	return true;
6688	case Intrinsic::aarch64_neon_tbx2:
6689	SelectTable(I, MRI, NumVecs: `2`, Opc1: AArch64::TBXv8i8Two, Opc2: AArch64::TBXv16i8Two, isExt: true);
6690	return true;
6691	case Intrinsic::aarch64_neon_tbx3:
6692	SelectTable(I, MRI, NumVecs: `3`, Opc1: AArch64::TBXv8i8Three, Opc2: AArch64::TBXv16i8Three, isExt: true);
6693	return true;
6694	case Intrinsic::aarch64_neon_tbx4:
6695	SelectTable(I, MRI, NumVecs: `4`, Opc1: AArch64::TBXv8i8Four, Opc2: AArch64::TBXv16i8Four, isExt: true);
6696	return true;
6697	case Intrinsic::swift_async_context_addr:
6698	auto Sub = MIB.buildInstr(Opc: AArch64::SUBXri, DstOps: {I.getOperand(i: `0`).getReg()},
6699	SrcOps: {Register (AArch64::FP)})
6700	.addImm(Val: `8`)
6701	.addImm(Val: `0`);
6702	constrainSelectedInstRegOperands(I&: *Sub, TII, TRI, RBI);
6703
6704	MF->getFrameInfo().setFrameAddressIsTaken(true);
6705	MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6706	I.eraseFromParent();
6707	return true;
6708	}
6709	return false;
6710	}
6711
6712	// G_PTRAUTH_GLOBAL_VALUE lowering
6713	//
6714	// We have 3 lowering alternatives to choose from:
6715	// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6716	// If the GV doesn't need a GOT load (i.e., is locally defined)
6717	// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6718	//
6719	// - LOADgotPAC: similar to LOADgot, with added PAC.
6720	// If the GV needs a GOT load, materialize the pointer using the usual
6721	// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6722	// section is assumed to be read-only (for example, via relro mechanism). See
6723	// LowerMOVaddrPAC.
6724	//
6725	// - LOADauthptrstatic: similar to LOADgot, but use a
6726	// special stub slot instead of a GOT slot.
6727	// Load a signed pointer for symbol 'sym' from a stub slot named
6728	// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6729	// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6730	// .data with an
6731	// @AUTH relocation. See LowerLOADauthptrstatic.
6732	//
6733	// All 3 are pseudos that are expand late to longer sequences: this lets us
6734	// provide integrity guarantees on the to-be-signed intermediate values.
6735	//
6736	// LOADauthptrstatic is undesirable because it requires a large section filled
6737	// with often similarly-signed pointers, making it a good harvesting target.
6738	// Thus, it's only used for ptrauth references to extern_weak to avoid null
6739	// checks.
6740
6741	bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6742	MachineInstr &I, MachineRegisterInfo &MRI) const {
6743	Register DefReg = I.getOperand(i: `0`).getReg();
6744	Register Addr = I.getOperand(i: `1`).getReg();
6745	uint64_t Key = I.getOperand(i: `2`).getImm();
6746	Register AddrDisc = I.getOperand(i: `3`).getReg();
6747	uint64_t Disc = I.getOperand(i: `4`).getImm();
6748	int64_t Offset = `0`;
6749
6750	if (Key > AArch64PACKey::LAST)
6751	report_fatal_error(reason: "key in ptrauth global out of range [0, " +
6752	Twine ((int)AArch64PACKey::LAST) + "]");
6753
6754	// Blend only works if the integer discriminator is 16-bit wide.
6755	if (!isUInt<`16`>(x: Disc))
6756	report_fatal_error(
6757	reason: "constant discriminator in ptrauth global out of range [0, 0xffff]");
6758
6759	// Choosing between 3 lowering alternatives is target-specific.
6760	if (!STI.isTargetELF() && !STI.isTargetMachO())
6761	report_fatal_error(reason: "ptrauth global lowering only supported on MachO/ELF");
6762
6763	if (!MRI.hasOneDef(RegNo: Addr))
6764	return false;
6765
6766	// First match any offset we take from the real global.
6767	const MachineInstr DefMI = &MRI.def_instr_begin(RegNo: Addr);
6768	if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6769	Register OffsetReg = DefMI->getOperand(i: `2`).getReg();
6770	if (!MRI.hasOneDef(RegNo: OffsetReg))
6771	return false;
6772	const MachineInstr &OffsetMI = *MRI.def_instr_begin(RegNo: OffsetReg);
6773	if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6774	return false;
6775
6776	Addr = DefMI->getOperand(i: `1`).getReg();
6777	if (!MRI.hasOneDef(RegNo: Addr))
6778	return false;
6779
6780	DefMI = &*MRI.def_instr_begin(RegNo: Addr);
6781	Offset = OffsetMI.getOperand(i: `1`).getCImm()->getSExtValue();
6782	}
6783
6784	// We should be left with a genuine unauthenticated GlobalValue.
6785	const GlobalValue *GV;
6786	if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6787	GV = DefMI->getOperand(i: `1`).getGlobal();
6788	Offset += DefMI->getOperand(i: `1`).getOffset();
6789	} else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6790	GV = DefMI->getOperand(i: `2`).getGlobal();
6791	Offset += DefMI->getOperand(i: `2`).getOffset();
6792	} else {
6793	return false;
6794	}
6795
6796	MachineIRBuilder MIB(I);
6797
6798	// Classify the reference to determine whether it needs a GOT load.
6799	unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6800	const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != `0`);
6801	assert(((OpFlags & (~AArch64II::MO_GOT)) == `0`) &&
6802	"unsupported non-GOT op flags on ptrauth global reference");
6803	assert((!GV->hasExternalWeakLinkage() \|\| NeedsGOTLoad) &&
6804	"unsupported non-GOT reference to weak ptrauth global");
6805
6806	std::optional<APInt> AddrDiscVal = getIConstantVRegVal(VReg: AddrDisc, MRI);
6807	bool HasAddrDisc = !AddrDiscVal \|\| *AddrDiscVal != `0`;
6808
6809	// Non-extern_weak:
6810	// - No GOT load needed -> MOVaddrPAC
6811	// - GOT load for non-extern_weak -> LOADgotPAC
6812	// Note that we disallow extern_weak refs to avoid null checks later.
6813	if (!GV->hasExternalWeakLinkage()) {
6814	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
6815	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6816	MIB.buildInstr(Opcode: NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6817	.addGlobalAddress(GV, Offset)
6818	.addImm(Val: Key)
6819	.addReg(RegNo: HasAddrDisc ? AddrDisc : AArch64::XZR)
6820	.addImm(Val: Disc)
6821	.constrainAllUses(TII, TRI, RBI);
6822	MIB.buildCopy(Res: DefReg, Op: Register (AArch64::X16));
6823	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
6824	I.eraseFromParent();
6825	return true;
6826	}
6827
6828	// extern_weak -> LOADauthptrstatic
6829
6830	// Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6831	// offset alone as a pointer if the symbol wasn't available, which would
6832	// probably break null checks in users. Ptrauth complicates things further:
6833	// error out.
6834	if (Offset != `0`)
6835	report_fatal_error(
6836	reason: "unsupported non-zero offset in weak ptrauth global reference");
6837
6838	if (HasAddrDisc)
6839	report_fatal_error(reason: "unsupported weak addr-div ptrauth global");
6840
6841	MIB.buildInstr(Opc: AArch64::LOADauthptrstatic, DstOps: {DefReg}, SrcOps: {})
6842	.addGlobalAddress(GV, Offset)
6843	.addImm(Val: Key)
6844	.addImm(Val: Disc);
6845	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
6846
6847	I.eraseFromParent();
6848	return true;
6849	}
6850
6851	void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6852	MachineRegisterInfo &MRI,
6853	unsigned NumVec, unsigned Opc1,
6854	unsigned Opc2, bool isExt) {
6855	Register DstReg = I.getOperand(i: `0`).getReg();
6856	unsigned Opc = MRI.getType(Reg: DstReg) == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`) ? Opc1 : Opc2;
6857
6858	// Create the REG_SEQUENCE
6859	SmallVector<Register, `4`> Regs;
6860	for (unsigned i = `0`; i < NumVec; i++)
6861	Regs.push_back(Elt: I.getOperand(i: i + `2` + isExt).getReg());
6862	Register RegSeq = createQTuple(Regs, MIB);
6863
6864	Register IdxReg = I.getOperand(i: `2` + NumVec + isExt).getReg();
6865	MachineInstrBuilder Instr;
6866	if (isExt) {
6867	Register Reg = I.getOperand(i: `2`).getReg();
6868	Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Reg, RegSeq, IdxReg});
6869	} else
6870	Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {RegSeq, IdxReg});
6871	constrainSelectedInstRegOperands(I&: *Instr, TII, TRI, RBI);
6872	I.eraseFromParent();
6873	}
6874
6875	InstructionSelector::ComplexRendererFns
6876	AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6877	auto MaybeImmed = getImmedFromMO(Root);
6878	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `31`)
6879	return std::nullopt;
6880	uint64_t Enc = (`32` - *MaybeImmed) & `0x1f`;
6881	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
6882	}
6883
6884	InstructionSelector::ComplexRendererFns
6885	AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6886	auto MaybeImmed = getImmedFromMO(Root);
6887	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `31`)
6888	return std::nullopt;
6889	uint64_t Enc = `31` - *MaybeImmed;
6890	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
6891	}
6892
6893	InstructionSelector::ComplexRendererFns
6894	AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6895	auto MaybeImmed = getImmedFromMO(Root);
6896	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `63`)
6897	return std::nullopt;
6898	uint64_t Enc = (`64` - *MaybeImmed) & `0x3f`;
6899	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
6900	}
6901
6902	InstructionSelector::ComplexRendererFns
6903	AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
6904	auto MaybeImmed = getImmedFromMO(Root);
6905	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `63`)
6906	return std::nullopt;
6907	uint64_t Enc = `63` - *MaybeImmed;
6908	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
6909	}
6910
6911	/// Helper to select an immediate value that can be represented as a 12-bit
6912	/// value shifted left by either 0 or 12. If it is possible to do so, return
6913	/// the immediate and shift value. If not, return std::nullopt.
6914	///
6915	/// Used by selectArithImmed and selectNegArithImmed.
6916	InstructionSelector::ComplexRendererFns
6917	AArch64InstructionSelector::select12BitValueWithLeftShift(
6918	uint64_t Immed) const {
6919	unsigned ShiftAmt;
6920	if (Immed >> `12` == `0`) {
6921	ShiftAmt = `0`;
6922	} else if ((Immed & `0xfff`) == `0` && Immed >> `24` == `0`) {
6923	ShiftAmt = `12`;
6924	Immed = Immed >> `12`;
6925	} else
6926	return std::nullopt;
6927
6928	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
6929	return {{
6930	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); },
6931	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); },
6932	}};
6933	}
6934
6935	/// SelectArithImmed - Select an immediate value that can be represented as
6936	/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6937	/// Val set to the 12-bit value and Shift set to the shifter operand.
6938	InstructionSelector::ComplexRendererFns
6939	AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6940	// This function is called from the addsub_shifted_imm ComplexPattern,
6941	// which lists [imm] as the list of opcode it's interested in, however
6942	// we still need to check whether the operand is actually an immediate
6943	// here because the ComplexPattern opcode list is only used in
6944	// root-level opcode matching.
6945	auto MaybeImmed = getImmedFromMO(Root);
6946	if (MaybeImmed == std::nullopt)
6947	return std::nullopt;
6948	return select12BitValueWithLeftShift(Immed: *MaybeImmed);
6949	}
6950
6951	/// SelectNegArithImmed - As above, but negates the value before trying to
6952	/// select it.
6953	InstructionSelector::ComplexRendererFns
6954	AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6955	// We need a register here, because we need to know if we have a 64 or 32
6956	// bit immediate.
6957	if (!Root.isReg())
6958	return std::nullopt;
6959	auto MaybeImmed = getImmedFromMO(Root);
6960	if (MaybeImmed == std::nullopt)
6961	return std::nullopt;
6962	uint64_t Immed = *MaybeImmed;
6963
6964	// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6965	// have the opposite effect on the C flag, so this pattern mustn't match under
6966	// those circumstances.
6967	if (Immed == `0`)
6968	return std::nullopt;
6969
6970	// Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6971	// the root.
6972	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6973	if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == `32`)
6974	Immed = ~((uint32_t)Immed) + `1`;
6975	else
6976	Immed = ~Immed + `1ULL`;
6977
6978	if (Immed & `0xFFFFFFFFFF000000ULL`)
6979	return std::nullopt;
6980
6981	Immed &= `0xFFFFFFULL`;
6982	return select12BitValueWithLeftShift(Immed);
6983	}
6984
6985	/// Checks if we are sure that folding MI into load/store addressing mode is
6986	/// beneficial or not.
6987	///
6988	/// Returns:
6989	/// - true if folding MI would be beneficial.
6990	/// - false if folding MI would be bad.
6991	/// - std::nullopt if it is not sure whether folding MI is beneficial.
6992	///
6993	/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
6994	///
6995	/// %13:gpr(s64) = G_CONSTANT i64 1
6996	/// %8:gpr(s64) = G_SHL %6, %13(s64)
6997	/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
6998	/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
6999	std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7000	MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7001	if (MI.getOpcode() == AArch64::G_SHL) {
7002	// Address operands with shifts are free, except for running on subtargets
7003	// with AddrLSLSlow14.
7004	if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7005	VReg: MI.getOperand(i: `2`).getReg(), MRI)) {
7006	const APInt ShiftVal = ValAndVeg ->Value;
7007
7008	// Don't fold if we know this will be slow.
7009	return !(STI.hasAddrLSLSlow14() && (ShiftVal == `1` \|\| ShiftVal == `4`));
7010	}
7011	}
7012	return std::nullopt;
7013	}
7014
7015	/// Return true if it is worth folding MI into an extended register. That is,
7016	/// if it's safe to pull it into the addressing mode of a load or store as a
7017	/// shift.
7018	/// \p IsAddrOperand whether the def of MI is used as an address operand
7019	/// (e.g. feeding into an LDR/STR).
7020	bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7021	MachineInstr &MI, const MachineRegisterInfo &MRI,
7022	bool IsAddrOperand) const {
7023
7024	// Always fold if there is one use, or if we're optimizing for size.
7025	Register DefReg = MI.getOperand(i: `0`).getReg();
7026	if (MRI.hasOneNonDBGUse(RegNo: DefReg) \|\|
7027	MI.getParent()->getParent()->getFunction().hasOptSize())
7028	return true;
7029
7030	if (IsAddrOperand) {
7031	// If we are already sure that folding MI is good or bad, return the result.
7032	if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7033	return *Worth;
7034
7035	// Fold G_PTR_ADD if its offset operand can be folded
7036	if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7037	MachineInstr *OffsetInst =
7038	getDefIgnoringCopies(Reg: MI.getOperand(i: `2`).getReg(), MRI);
7039
7040	// Note, we already know G_PTR_ADD is used by at least two instructions.
7041	// If we are also sure about whether folding is beneficial or not,
7042	// return the result.
7043	if (const auto Worth = isWorthFoldingIntoAddrMode(MI&: *OffsetInst, MRI))
7044	return *Worth;
7045	}
7046	}
7047
7048	// FIXME: Consider checking HasALULSLFast as appropriate.
7049
7050	// We have a fastpath, so folding a shift in and potentially computing it
7051	// many times may be beneficial. Check if this is only used in memory ops.
7052	// If it is, then we should fold.
7053	return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg),
7054	P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7055	}
7056
7057	static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
7058	switch (Type) {
7059	case AArch64_AM::SXTB:
7060	case AArch64_AM::SXTH:
7061	case AArch64_AM::SXTW:
7062	return true;
7063	default:
7064	return false;
7065	}
7066	}
7067
7068	InstructionSelector::ComplexRendererFns
7069	AArch64InstructionSelector::selectExtendedSHL(
7070	MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7071	unsigned SizeInBytes, bool WantsExt) const {
7072	assert(Base.isReg() && "Expected base to be a register operand");
7073	assert(Offset.isReg() && "Expected offset to be a register operand");
7074
7075	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7076	MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg());
7077
7078	unsigned OffsetOpc = OffsetInst->getOpcode();
7079	bool LookedThroughZExt = false;
7080	if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7081	// Try to look through a ZEXT.
7082	if (OffsetOpc != TargetOpcode::G_ZEXT \|\| !WantsExt)
7083	return std::nullopt;
7084
7085	OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: `1`).getReg());
7086	OffsetOpc = OffsetInst->getOpcode();
7087	LookedThroughZExt = true;
7088
7089	if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7090	return std::nullopt;
7091	}
7092	// Make sure that the memory op is a valid size.
7093	int64_t LegalShiftVal = Log2_32(Value: SizeInBytes);
7094	if (LegalShiftVal == `0`)
7095	return std::nullopt;
7096	if (!isWorthFoldingIntoExtendedReg(MI&: OffsetInst, MRI, IsAddrOperand: true*))
7097	return std::nullopt;
7098
7099	// Now, try to find the specific G_CONSTANT. Start by assuming that the
7100	// register we will offset is the LHS, and the register containing the
7101	// constant is the RHS.
7102	Register OffsetReg = OffsetInst->getOperand(i: `1`).getReg();
7103	Register ConstantReg = OffsetInst->getOperand(i: `2`).getReg();
7104	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7105	if (!ValAndVReg) {
7106	// We didn't get a constant on the RHS. If the opcode is a shift, then
7107	// we're done.
7108	if (OffsetOpc == TargetOpcode::G_SHL)
7109	return std::nullopt;
7110
7111	// If we have a G_MUL, we can use either register. Try looking at the RHS.
7112	std::swap(a&: OffsetReg, b&: ConstantReg);
7113	ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7114	if (!ValAndVReg)
7115	return std::nullopt;
7116	}
7117
7118	// The value must fit into 3 bits, and must be positive. Make sure that is
7119	// true.
7120	int64_t ImmVal = ValAndVReg ->Value.getSExtValue();
7121
7122	// Since we're going to pull this into a shift, the constant value must be
7123	// a power of 2. If we got a multiply, then we need to check this.
7124	if (OffsetOpc == TargetOpcode::G_MUL) {
7125	if (!llvm::has_single_bit<uint32_t>(Value: ImmVal))
7126	return std::nullopt;
7127
7128	// Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7129	ImmVal = Log2_32(Value: ImmVal);
7130	}
7131
7132	if ((ImmVal & `0x7`) != ImmVal)
7133	return std::nullopt;
7134
7135	// We are only allowed to shift by LegalShiftVal. This shift value is built
7136	// into the instruction, so we can't just use whatever we want.
7137	if (ImmVal != LegalShiftVal)
7138	return std::nullopt;
7139
7140	unsigned SignExtend = `0`;
7141	if (WantsExt) {
7142	// Check if the offset is defined by an extend, unless we looked through a
7143	// G_ZEXT earlier.
7144	if (!LookedThroughZExt) {
7145	MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI);
7146	auto Ext = getExtendTypeForInst(MI&: ExtInst, MRI, IsLoadStore: true*);
7147	if (Ext == AArch64_AM::InvalidShiftExtend)
7148	return std::nullopt;
7149
7150	SignExtend = isSignExtendShiftType(Type: Ext) ? `1` : `0`;
7151	// We only support SXTW for signed extension here.
7152	if (SignExtend && Ext != AArch64_AM::SXTW)
7153	return std::nullopt;
7154	OffsetReg = ExtInst->getOperand(i: `1`).getReg();
7155	}
7156
7157	// Need a 32-bit wide register here.
7158	MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg()));
7159	OffsetReg = moveScalarRegClass(Reg: OffsetReg, RC: AArch64::GPR32RegClass, MIB);
7160	}
7161
7162	// We can use the LHS of the GEP as the base, and the LHS of the shift as an
7163	// offset. Signify that we are shifting by setting the shift flag to 1.
7164	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); },
7165	[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); },
7166	[=](MachineInstrBuilder &MIB) {
7167	// Need to add both immediates here to make sure that they are both
7168	// added to the instruction.
7169	MIB.addImm(Val: SignExtend);
7170	MIB.addImm(Val: `1`);
7171	}}};
7172	}
7173
7174	/// This is used for computing addresses like this:
7175	///
7176	/// ldr x1, [x2, x3, lsl #3]
7177	///
7178	/// Where x2 is the base register, and x3 is an offset register. The shift-left
7179	/// is a constant value specific to this load instruction. That is, we'll never
7180	/// see anything other than a 3 here (which corresponds to the size of the
7181	/// element being loaded.)
7182	InstructionSelector::ComplexRendererFns
7183	AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7184	MachineOperand &Root, unsigned SizeInBytes) const {
7185	if (!Root.isReg())
7186	return std::nullopt;
7187	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7188
7189	// We want to find something like this:
7190	//
7191	// val = G_CONSTANT LegalShiftVal
7192	// shift = G_SHL off_reg val
7193	// ptr = G_PTR_ADD base_reg shift
7194	// x = G_LOAD ptr
7195	//
7196	// And fold it into this addressing mode:
7197	//
7198	// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7199
7200	// Check if we can find the G_PTR_ADD.
7201	MachineInstr *PtrAdd =
7202	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7203	if (!PtrAdd \|\| !isWorthFoldingIntoExtendedReg(MI&: PtrAdd, MRI, IsAddrOperand: true*))
7204	return std::nullopt;
7205
7206	// Now, try to match an opcode which will match our specific offset.
7207	// We want a G_SHL or a G_MUL.
7208	MachineInstr *OffsetInst =
7209	getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: `2`).getReg(), MRI);
7210	return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: `1`),
7211	Offset&: OffsetInst->getOperand(i: `0`), SizeInBytes,
7212	/WantsExt=/false);
7213	}
7214
7215	/// This is used for computing addresses like this:
7216	///
7217	/// ldr x1, [x2, x3]
7218	///
7219	/// Where x2 is the base register, and x3 is an offset register.
7220	///
7221	/// When possible (or profitable) to fold a G_PTR_ADD into the address
7222	/// calculation, this will do so. Otherwise, it will return std::nullopt.
7223	InstructionSelector::ComplexRendererFns
7224	AArch64InstructionSelector::selectAddrModeRegisterOffset(
7225	MachineOperand &Root) const {
7226	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7227
7228	// We need a GEP.
7229	MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg());
7230	if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7231	return std::nullopt;
7232
7233	// If this is used more than once, let's not bother folding.
7234	// TODO: Check if they are memory ops. If they are, then we can still fold
7235	// without having to recompute anything.
7236	if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: `0`).getReg()))
7237	return std::nullopt;
7238
7239	// Base is the GEP's LHS, offset is its RHS.
7240	return {{[=](MachineInstrBuilder &MIB) {
7241	MIB.addUse(RegNo: Gep->getOperand(i: `1`).getReg());
7242	},
7243	[=](MachineInstrBuilder &MIB) {
7244	MIB.addUse(RegNo: Gep->getOperand(i: `2`).getReg());
7245	},
7246	[=](MachineInstrBuilder &MIB) {
7247	// Need to add both immediates here to make sure that they are both
7248	// added to the instruction.
7249	MIB.addImm(Val: `0`);
7250	MIB.addImm(Val: `0`);
7251	}}};
7252	}
7253
7254	/// This is intended to be equivalent to selectAddrModeXRO in
7255	/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7256	InstructionSelector::ComplexRendererFns
7257	AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7258	unsigned SizeInBytes) const {
7259	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7260	if (!Root.isReg())
7261	return std::nullopt;
7262	MachineInstr *PtrAdd =
7263	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7264	if (!PtrAdd)
7265	return std::nullopt;
7266
7267	// Check for an immediates which cannot be encoded in the [base + imm]
7268	// addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7269	// end up with code like:
7270	//
7271	// mov x0, wide
7272	// add x1 base, x0
7273	// ldr x2, [x1, x0]
7274	//
7275	// In this situation, we can use the [base, xreg] addressing mode to save an
7276	// add/sub:
7277	//
7278	// mov x0, wide
7279	// ldr x2, [base, x0]
7280	auto ValAndVReg =
7281	getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: `2`).getReg(), MRI);
7282	if (ValAndVReg) {
7283	unsigned Scale = Log2_32(Value: SizeInBytes);
7284	int64_t ImmOff = ValAndVReg ->Value.getSExtValue();
7285
7286	// Skip immediates that can be selected in the load/store addresing
7287	// mode.
7288	if (ImmOff % SizeInBytes == `0` && ImmOff >= `0` &&
7289	ImmOff < (`0x1000` << Scale))
7290	return std::nullopt;
7291
7292	// Helper lambda to decide whether or not it is preferable to emit an add.
7293	auto isPreferredADD = [](int64_t ImmOff) {
7294	// Constants in [0x0, 0xfff] can be encoded in an add.
7295	if ((ImmOff & `0xfffffffffffff000LL`) == `0x0LL`)
7296	return true;
7297
7298	// Can it be encoded in an add lsl #12?
7299	if ((ImmOff & `0xffffffffff000fffLL`) != `0x0LL`)
7300	return false;
7301
7302	// It can be encoded in an add lsl #12, but we may not want to. If it is
7303	// possible to select this as a single movz, then prefer that. A single
7304	// movz is faster than an add with a shift.
7305	return (ImmOff & `0xffffffffff00ffffLL`) != `0x0LL` &&
7306	(ImmOff & `0xffffffffffff0fffLL`) != `0x0LL`;
7307	};
7308
7309	// If the immediate can be encoded in a single add/sub, then bail out.
7310	if (isPreferredADD (ImmOff) \|\| isPreferredADD (-ImmOff))
7311	return std::nullopt;
7312	}
7313
7314	// Try to fold shifts into the addressing mode.
7315	auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7316	if (AddrModeFns)
7317	return AddrModeFns;
7318
7319	// If that doesn't work, see if it's possible to fold in registers from
7320	// a GEP.
7321	return selectAddrModeRegisterOffset(Root);
7322	}
7323
7324	/// This is used for computing addresses like this:
7325	///
7326	/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7327	///
7328	/// Where we have a 64-bit base register, a 32-bit offset register, and an
7329	/// extend (which may or may not be signed).
7330	InstructionSelector::ComplexRendererFns
7331	AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7332	unsigned SizeInBytes) const {
7333	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7334
7335	MachineInstr *PtrAdd =
7336	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7337	if (!PtrAdd \|\| !isWorthFoldingIntoExtendedReg(MI&: PtrAdd, MRI, IsAddrOperand: true*))
7338	return std::nullopt;
7339
7340	MachineOperand &LHS = PtrAdd->getOperand(i: `1`);
7341	MachineOperand &RHS = PtrAdd->getOperand(i: `2`);
7342	MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
7343
7344	// The first case is the same as selectAddrModeXRO, except we need an extend.
7345	// In this case, we try to find a shift and extend, and fold them into the
7346	// addressing mode.
7347	//
7348	// E.g.
7349	//
7350	// off_reg = G_Z/S/ANYEXT ext_reg
7351	// val = G_CONSTANT LegalShiftVal
7352	// shift = G_SHL off_reg val
7353	// ptr = G_PTR_ADD base_reg shift
7354	// x = G_LOAD ptr
7355	//
7356	// In this case we can get a load like this:
7357	//
7358	// ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7359	auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: `0`),
7360	SizeInBytes, /WantsExt=/true);
7361	if (ExtendedShl)
7362	return ExtendedShl;
7363
7364	// There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7365	//
7366	// e.g.
7367	// ldr something, [base_reg, ext_reg, sxtw]
7368	if (!isWorthFoldingIntoExtendedReg(MI&: OffsetInst, MRI, IsAddrOperand: true*))
7369	return std::nullopt;
7370
7371	// Check if this is an extend. We'll get an extend type if it is.
7372	AArch64_AM::ShiftExtendType Ext =
7373	getExtendTypeForInst(MI&: OffsetInst, MRI, /IsLoadStore=/*true);
7374	if (Ext == AArch64_AM::InvalidShiftExtend)
7375	return std::nullopt;
7376
7377	// Need a 32-bit wide register.
7378	MachineIRBuilder MIB(*PtrAdd);
7379	Register ExtReg = moveScalarRegClass(Reg: OffsetInst->getOperand(i: `1`).getReg(),
7380	RC: AArch64::GPR32RegClass, MIB);
7381	unsigned SignExtend = Ext == AArch64_AM::SXTW;
7382
7383	// Base is LHS, offset is ExtReg.
7384	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); },
7385	[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7386	[=](MachineInstrBuilder &MIB) {
7387	MIB.addImm(Val: SignExtend);
7388	MIB.addImm(Val: `0`);
7389	}}};
7390	}
7391
7392	/// Select a "register plus unscaled signed 9-bit immediate" address. This
7393	/// should only match when there is an offset that is not valid for a scaled
7394	/// immediate addressing mode. The "Size" argument is the size in bytes of the
7395	/// memory reference, which is needed here to know what is valid for a scaled
7396	/// immediate.
7397	InstructionSelector::ComplexRendererFns
7398	AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7399	unsigned Size) const {
7400	MachineRegisterInfo &MRI =
7401	Root.getParent()->getParent()->getParent()->getRegInfo();
7402
7403	if (!Root.isReg())
7404	return std::nullopt;
7405
7406	if (!isBaseWithConstantOffset(Root, MRI))
7407	return std::nullopt;
7408
7409	MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7410
7411	MachineOperand &OffImm = RootDef->getOperand(i: `2`);
7412	if (!OffImm.isReg())
7413	return std::nullopt;
7414	MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg());
7415	if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7416	return std::nullopt;
7417	int64_t RHSC;
7418	MachineOperand &RHSOp1 = RHS->getOperand(i: `1`);
7419	if (!RHSOp1.isCImm() \|\| RHSOp1.getCImm()->getBitWidth() > `64`)
7420	return std::nullopt;
7421	RHSC = RHSOp1.getCImm()->getSExtValue();
7422
7423	if (RHSC >= -`256` && RHSC < `256`) {
7424	MachineOperand &Base = RootDef->getOperand(i: `1`);
7425	return {{
7426	[=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); },
7427	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); },
7428	}};
7429	}
7430	return std::nullopt;
7431	}
7432
7433	InstructionSelector::ComplexRendererFns
7434	AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7435	unsigned Size,
7436	MachineRegisterInfo &MRI) const {
7437	if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7438	return std::nullopt;
7439	MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: `1`).getReg());
7440	if (Adrp.getOpcode() != AArch64::ADRP)
7441	return std::nullopt;
7442
7443	// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7444	auto Offset = Adrp.getOperand(i: `1`).getOffset();
7445	if (Offset % Size != `0`)
7446	return std::nullopt;
7447
7448	auto GV = Adrp.getOperand(i: `1`).getGlobal();
7449	if (GV->isThreadLocal())
7450	return std::nullopt;
7451
7452	auto &MF = *RootDef.getParent()->getParent();
7453	if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size)
7454	return std::nullopt;
7455
7456	unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget());
7457	MachineIRBuilder MIRBuilder(RootDef);
7458	Register AdrpReg = Adrp.getOperand(i: `0`).getReg();
7459	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); },
7460	[=](MachineInstrBuilder &MIB) {
7461	MIB.addGlobalAddress(GV, Offset,
7462	TargetFlags: OpFlags \| AArch64II::MO_PAGEOFF \|
7463	AArch64II::MO_NC);
7464	}}};
7465	}
7466
7467	/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7468	/// "Size" argument is the size in bytes of the memory reference, which
7469	/// determines the scale.
7470	InstructionSelector::ComplexRendererFns
7471	AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7472	unsigned Size) const {
7473	MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7474	MachineRegisterInfo &MRI = MF.getRegInfo();
7475
7476	if (!Root.isReg())
7477	return std::nullopt;
7478
7479	MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7480	if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7481	return {{
7482	[=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: `1`)); },
7483	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: `0`); },
7484	}};
7485	}
7486
7487	CodeModel::Model CM = MF.getTarget().getCodeModel();
7488	// Check if we can fold in the ADD of small code model ADRP + ADD address.
7489	if (CM == CodeModel::Small) {
7490	auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI);
7491	if (OpFns)
7492	return OpFns;
7493	}
7494
7495	if (isBaseWithConstantOffset(Root, MRI)) {
7496	MachineOperand &LHS = RootDef->getOperand(i: `1`);
7497	MachineOperand &RHS = RootDef->getOperand(i: `2`);
7498	MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg());
7499	MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg());
7500
7501	int64_t RHSC = (int64_t)RHSDef->getOperand(i: `1`).getCImm()->getZExtValue();
7502	unsigned Scale = Log2_32(Value: Size);
7503	if ((RHSC & (Size - `1`)) == `0` && RHSC >= `0` && RHSC < (`0x1000` << Scale)) {
7504	if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7505	return {{
7506	[=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: `1`)); },
7507	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7508	}};
7509
7510	return {{
7511	[=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); },
7512	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7513	}};
7514	}
7515	}
7516
7517	// Before falling back to our general case, check if the unscaled
7518	// instructions can handle this. If so, that's preferable.
7519	if (selectAddrModeUnscaled(Root, Size))
7520	return std::nullopt;
7521
7522	return {{
7523	[=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); },
7524	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: `0`); },
7525	}};
7526	}
7527
7528	/// Given a shift instruction, return the correct shift type for that
7529	/// instruction.
7530	static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
7531	switch (MI.getOpcode()) {
7532	default:
7533	return AArch64_AM::InvalidShiftExtend;
7534	case TargetOpcode::G_SHL:
7535	return AArch64_AM::LSL;
7536	case TargetOpcode::G_LSHR:
7537	return AArch64_AM::LSR;
7538	case TargetOpcode::G_ASHR:
7539	return AArch64_AM::ASR;
7540	case TargetOpcode::G_ROTR:
7541	return AArch64_AM::ROR;
7542	}
7543	}
7544
7545	/// Select a "shifted register" operand. If the value is not shifted, set the
7546	/// shift operand to a default value of "lsl 0".
7547	InstructionSelector::ComplexRendererFns
7548	AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7549	bool AllowROR) const {
7550	if (!Root.isReg())
7551	return std::nullopt;
7552	MachineRegisterInfo &MRI =
7553	Root.getParent()->getParent()->getParent()->getRegInfo();
7554
7555	// Check if the operand is defined by an instruction which corresponds to
7556	// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7557	MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg());
7558	AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst);
7559	if (ShType == AArch64_AM::InvalidShiftExtend)
7560	return std::nullopt;
7561	if (ShType == AArch64_AM::ROR && !AllowROR)
7562	return std::nullopt;
7563	if (!isWorthFoldingIntoExtendedReg(MI&: ShiftInst, MRI, IsAddrOperand: false*))
7564	return std::nullopt;
7565
7566	// Need an immediate on the RHS.
7567	MachineOperand &ShiftRHS = ShiftInst->getOperand(i: `2`);
7568	auto Immed = getImmedFromMO(Root: ShiftRHS);
7569	if (!Immed)
7570	return std::nullopt;
7571
7572	// We have something that we can fold. Fold in the shift's LHS and RHS into
7573	// the instruction.
7574	MachineOperand &ShiftLHS = ShiftInst->getOperand(i: `1`);
7575	Register ShiftReg = ShiftLHS.getReg();
7576
7577	unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits();
7578	unsigned Val = *Immed & (NumBits - `1`);
7579	unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
7580
7581	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); },
7582	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}};
7583	}
7584
7585	AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7586	MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7587	unsigned Opc = MI.getOpcode();
7588
7589	// Handle explicit extend instructions first.
7590	if (Opc == TargetOpcode::G_SEXT \|\| Opc == TargetOpcode::G_SEXT_INREG) {
7591	unsigned Size;
7592	if (Opc == TargetOpcode::G_SEXT)
7593	Size = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getSizeInBits();
7594	else
7595	Size = MI.getOperand(i: `2`).getImm();
7596	assert(Size != `64` && "Extend from 64 bits?");
7597	switch (Size) {
7598	case `8`:
7599	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7600	case `16`:
7601	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7602	case `32`:
7603	return AArch64_AM::SXTW;
7604	default:
7605	return AArch64_AM::InvalidShiftExtend;
7606	}
7607	}
7608
7609	if (Opc == TargetOpcode::G_ZEXT \|\| Opc == TargetOpcode::G_ANYEXT) {
7610	unsigned Size = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getSizeInBits();
7611	assert(Size != `64` && "Extend from 64 bits?");
7612	switch (Size) {
7613	case `8`:
7614	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7615	case `16`:
7616	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7617	case `32`:
7618	return AArch64_AM::UXTW;
7619	default:
7620	return AArch64_AM::InvalidShiftExtend;
7621	}
7622	}
7623
7624	// Don't have an explicit extend. Try to handle a G_AND with a constant mask
7625	// on the RHS.
7626	if (Opc != TargetOpcode::G_AND)
7627	return AArch64_AM::InvalidShiftExtend;
7628
7629	std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: `2`));
7630	if (!MaybeAndMask)
7631	return AArch64_AM::InvalidShiftExtend;
7632	uint64_t AndMask = *MaybeAndMask;
7633	switch (AndMask) {
7634	default:
7635	return AArch64_AM::InvalidShiftExtend;
7636	case `0xFF`:
7637	return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7638	case `0xFFFF`:
7639	return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7640	case `0xFFFFFFFF`:
7641	return AArch64_AM::UXTW;
7642	}
7643	}
7644
7645	Register AArch64InstructionSelector::moveScalarRegClass(
7646	Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7647	MachineRegisterInfo &MRI = *MIB.getMRI();
7648	auto Ty = MRI.getType(Reg);
7649	assert(!Ty.isVector() && "Expected scalars only!");
7650	if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7651	return Reg;
7652
7653	// Create a copy and immediately select it.
7654	// FIXME: We should have an emitCopy function?
7655	auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg});
7656	selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
7657	return Copy.getReg(Idx: `0`);
7658	}
7659
7660	/// Select an "extended register" operand. This operand folds in an extend
7661	/// followed by an optional left shift.
7662	InstructionSelector::ComplexRendererFns
7663	AArch64InstructionSelector::selectArithExtendedRegister(
7664	MachineOperand &Root) const {
7665	if (!Root.isReg())
7666	return std::nullopt;
7667	MachineRegisterInfo &MRI =
7668	Root.getParent()->getParent()->getParent()->getRegInfo();
7669
7670	uint64_t ShiftVal = `0`;
7671	Register ExtReg;
7672	AArch64_AM::ShiftExtendType Ext;
7673	MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI);
7674	if (!RootDef)
7675	return std::nullopt;
7676
7677	if (!isWorthFoldingIntoExtendedReg(MI&: RootDef, MRI, IsAddrOperand: false*))
7678	return std::nullopt;
7679
7680	// Check if we can fold a shift and an extend.
7681	if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7682	// Look for a constant on the RHS of the shift.
7683	MachineOperand &RHS = RootDef->getOperand(i: `2`);
7684	std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS);
7685	if (!MaybeShiftVal)
7686	return std::nullopt;
7687	ShiftVal = *MaybeShiftVal;
7688	if (ShiftVal > `4`)
7689	return std::nullopt;
7690	// Look for a valid extend instruction on the LHS of the shift.
7691	MachineOperand &LHS = RootDef->getOperand(i: `1`);
7692	MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
7693	if (!ExtDef)
7694	return std::nullopt;
7695	Ext = getExtendTypeForInst(MI&: *ExtDef, MRI);
7696	if (Ext == AArch64_AM::InvalidShiftExtend)
7697	return std::nullopt;
7698	ExtReg = ExtDef->getOperand(i: `1`).getReg();
7699	} else {
7700	// Didn't get a shift. Try just folding an extend.
7701	Ext = getExtendTypeForInst(MI&: *RootDef, MRI);
7702	if (Ext == AArch64_AM::InvalidShiftExtend)
7703	return std::nullopt;
7704	ExtReg = RootDef->getOperand(i: `1`).getReg();
7705
7706	// If we have a 32 bit instruction which zeroes out the high half of a
7707	// register, we get an implicit zero extend for free. Check if we have one.
7708	// FIXME: We actually emit the extend right now even though we don't have
7709	// to.
7710	if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == `32`) {
7711	MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg);
7712	if (isDef32(MI: *ExtInst))
7713	return std::nullopt;
7714	}
7715	}
7716
7717	// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7718	// copy.
7719	MachineIRBuilder MIB(*RootDef);
7720	ExtReg = moveScalarRegClass(Reg: ExtReg, RC: AArch64::GPR32RegClass, MIB);
7721
7722	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7723	[=](MachineInstrBuilder &MIB) {
7724	MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal));
7725	}}};
7726	}
7727
7728	InstructionSelector::ComplexRendererFns
7729	AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7730	if (!Root.isReg())
7731	return std::nullopt;
7732	MachineRegisterInfo &MRI =
7733	Root.getParent()->getParent()->getParent()->getRegInfo();
7734
7735	auto Extract = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI);
7736	while (Extract && Extract ->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7737	STI.isLittleEndian())
7738	Extract =
7739	getDefSrcRegIgnoringCopies(Reg: Extract ->MI->getOperand(i: `1`).getReg(), MRI);
7740	if (!Extract)
7741	return std::nullopt;
7742
7743	if (Extract ->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7744	if (Extract ->Reg == Extract ->MI->getOperand(i: `1`).getReg()) {
7745	Register ExtReg = Extract ->MI->getOperand(i: `2`).getReg();
7746	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7747	}
7748	}
7749	if (Extract ->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7750	LLT SrcTy = MRI.getType(Reg: Extract ->MI->getOperand(i: `1`).getReg());
7751	auto LaneIdx = getIConstantVRegValWithLookThrough(
7752	VReg: Extract ->MI->getOperand(i: `2`).getReg(), MRI);
7753	if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) &&
7754	LaneIdx ->Value.getSExtValue() == `1`) {
7755	Register ExtReg = Extract ->MI->getOperand(i: `1`).getReg();
7756	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7757	}
7758	}
7759
7760	return std::nullopt;
7761	}
7762
7763	void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7764	const MachineInstr &MI,
7765	int OpIdx) const {
7766	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7767	assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7768	"Expected G_CONSTANT");
7769	std::optional<int64_t> CstVal =
7770	getIConstantVRegSExtVal(VReg: MI.getOperand(i: `0`).getReg(), MRI);
7771	assert(CstVal && "Expected constant value");
7772	MIB.addImm(Val: *CstVal);
7773	}
7774
7775	void AArch64InstructionSelector::renderLogicalImm32(
7776	MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7777	assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7778	"Expected G_CONSTANT");
7779	uint64_t CstVal = I.getOperand(i: `1`).getCImm()->getZExtValue();
7780	uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: `32`);
7781	MIB.addImm(Val: Enc);
7782	}
7783
7784	void AArch64InstructionSelector::renderLogicalImm64(
7785	MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7786	assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7787	"Expected G_CONSTANT");
7788	uint64_t CstVal = I.getOperand(i: `1`).getCImm()->getZExtValue();
7789	uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: `64`);
7790	MIB.addImm(Val: Enc);
7791	}
7792
7793	void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7794	const MachineInstr &MI,
7795	int OpIdx) const {
7796	assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == `0` &&
7797	"Expected G_UBSANTRAP");
7798	MIB.addImm(Val: MI.getOperand(i: `0`).getImm() \| (`'U'` << `8`));
7799	}
7800
7801	void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7802	const MachineInstr &MI,
7803	int OpIdx) const {
7804	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7805	"Expected G_FCONSTANT");
7806	MIB.addImm(
7807	Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7808	}
7809
7810	void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7811	const MachineInstr &MI,
7812	int OpIdx) const {
7813	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7814	"Expected G_FCONSTANT");
7815	MIB.addImm(
7816	Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7817	}
7818
7819	void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7820	const MachineInstr &MI,
7821	int OpIdx) const {
7822	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7823	"Expected G_FCONSTANT");
7824	MIB.addImm(
7825	Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7826	}
7827
7828	void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7829	MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7830	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7831	"Expected G_FCONSTANT");
7832	MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: `1`)
7833	.getFPImm()
7834	->getValueAPF()
7835	.bitcastToAPInt()
7836	.getZExtValue()));
7837	}
7838
7839	bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7840	const MachineInstr &MI, unsigned NumBytes) const {
7841	if (!MI.mayLoadOrStore())
7842	return false;
7843	assert(MI.hasOneMemOperand() &&
7844	"Expected load/store to have only one mem op!");
7845	return (*MI.memoperands_begin())->getSize() == NumBytes;
7846	}
7847
7848	bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7849	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7850	if (MRI.getType(Reg: MI.getOperand(i: `0`).getReg()).getSizeInBits() != `32`)
7851	return false;
7852
7853	// Only return true if we know the operation will zero-out the high half of
7854	// the 64-bit register. Truncates can be subregister copies, which don't
7855	// zero out the high bits. Copies and other copy-like instructions can be
7856	// fed by truncates, or could be lowered as subregister copies.
7857	switch (MI.getOpcode()) {
7858	default:
7859	return true;
7860	case TargetOpcode::COPY:
7861	case TargetOpcode::G_BITCAST:
7862	case TargetOpcode::G_TRUNC:
7863	case TargetOpcode::G_PHI:
7864	return false;
7865	}
7866	}
7867
7868
7869	// Perform fixups on the given PHI instruction's operands to force them all
7870	// to be the same as the destination regbank.
7871	static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
7872	const AArch64RegisterBankInfo &RBI) {
7873	assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
7874	Register DstReg = MI.getOperand(i: `0`).getReg();
7875	const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg);
7876	assert(DstRB && "Expected PHI dst to have regbank assigned");
7877	MachineIRBuilder MIB(MI);
7878
7879	// Go through each operand and ensure it has the same regbank.
7880	for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) {
7881	if (!MO.isReg())
7882	continue;
7883	Register OpReg = MO.getReg();
7884	const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg);
7885	if (RB != DstRB) {
7886	// Insert a cross-bank copy.
7887	auto *OpDef = MRI.getVRegDef(Reg: OpReg);
7888	const LLT &Ty = MRI.getType(Reg: OpReg);
7889	MachineBasicBlock &OpDefBB = *OpDef->getParent();
7890
7891	// Any instruction we insert must appear after all PHIs in the block
7892	// for the block to be valid MIR.
7893	MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator());
7894	if (InsertPt != OpDefBB.end() && InsertPt ->isPHI())
7895	InsertPt = OpDefBB.getFirstNonPHI();
7896	MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt);
7897	auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg);
7898	MRI.setRegBank(Reg: Copy.getReg(Idx: `0`), RegBank: *DstRB);
7899	MO.setReg(Copy.getReg(Idx: `0`));
7900	}
7901	}
7902	}
7903
7904	void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
7905	// We're looking for PHIs, build a list so we don't invalidate iterators.
7906	MachineRegisterInfo &MRI = MF.getRegInfo();
7907	SmallVector<MachineInstr *, `32`> Phis;
7908	for (auto &BB : MF) {
7909	for (auto &MI : BB) {
7910	if (MI.getOpcode() == TargetOpcode::G_PHI)
7911	Phis.emplace_back(Args: &MI);
7912	}
7913	}
7914
7915	for (auto *MI : Phis) {
7916	// We need to do some work here if the operand types are < 16 bit and they
7917	// are split across fpr/gpr banks. Since all types <32b on gpr
7918	// end up being assigned gpr32 regclasses, we can end up with PHIs here
7919	// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
7920	// be selecting heterogenous regbanks for operands if possible, but we
7921	// still need to be able to deal with it here.
7922	//
7923	// To fix this, if we have a gpr-bank operand < 32b in size and at least
7924	// one other operand is on the fpr bank, then we add cross-bank copies
7925	// to homogenize the operand banks. For simplicity the bank that we choose
7926	// to settle on is whatever bank the def operand has. For example:
7927	//
7928	// %endbb:
7929	// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
7930	// =>
7931	// %bb2:
7932	// ...
7933	// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
7934	// ...
7935	// %endbb:
7936	// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
7937	bool HasGPROp = false, HasFPROp = false;
7938	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
7939	if (!MO.isReg())
7940	continue;
7941	const LLT &Ty = MRI.getType(Reg: MO.getReg());
7942	if (!Ty.isValid() \|\| !Ty.isScalar())
7943	break;
7944	if (Ty.getSizeInBits() >= `32`)
7945	break;
7946	const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg());
7947	// If for some reason we don't have a regbank yet. Don't try anything.
7948	if (!RB)
7949	break;
7950
7951	if (RB->getID() == AArch64::GPRRegBankID)
7952	HasGPROp = true;
7953	else
7954	HasFPROp = true;
7955	}
7956	// We have heterogenous regbanks, need to fixup.
7957	if (HasGPROp && HasFPROp)
7958	fixupPHIOpBanks(MI&: *MI, MRI, RBI);
7959	}
7960	}
7961
7962	namespace llvm {
7963	InstructionSelector *
7964	createAArch64InstructionSelector(const AArch64TargetMachine &TM,
7965	const AArch64Subtarget &Subtarget,
7966	const AArch64RegisterBankInfo &RBI) {
7967	return new AArch64InstructionSelector (TM, Subtarget, RBI);
7968	}
7969	}
7970

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp