AArch64InstructionSelector.cpp source code [llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp]

1	//===- AArch64InstructionSelector.cpp ----------------------------- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the targeting of the InstructionSelector class for
10	/// AArch64.
11	/// \todo This should be generated by TableGen.
12	//===----------------------------------------------------------------------===//
13
14	#include "AArch64GlobalISelUtils.h"
15	#include "AArch64InstrInfo.h"
16	#include "AArch64MachineFunctionInfo.h"
17	#include "AArch64RegisterBankInfo.h"
18	#include "AArch64RegisterInfo.h"
19	#include "AArch64Subtarget.h"
20	#include "AArch64TargetMachine.h"
21	#include "MCTargetDesc/AArch64AddressingModes.h"
22	#include "MCTargetDesc/AArch64MCTargetDesc.h"
23	#include "llvm/BinaryFormat/Dwarf.h"
24	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
25	#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
26	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
27	#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
28	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30	#include "llvm/CodeGen/GlobalISel/Utils.h"
31	#include "llvm/CodeGen/MachineBasicBlock.h"
32	#include "llvm/CodeGen/MachineConstantPool.h"
33	#include "llvm/CodeGen/MachineFrameInfo.h"
34	#include "llvm/CodeGen/MachineFunction.h"
35	#include "llvm/CodeGen/MachineInstr.h"
36	#include "llvm/CodeGen/MachineInstrBuilder.h"
37	#include "llvm/CodeGen/MachineMemOperand.h"
38	#include "llvm/CodeGen/MachineOperand.h"
39	#include "llvm/CodeGen/MachineRegisterInfo.h"
40	#include "llvm/CodeGen/TargetOpcodes.h"
41	#include "llvm/CodeGen/TargetRegisterInfo.h"
42	#include "llvm/IR/Constants.h"
43	#include "llvm/IR/DerivedTypes.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/IntrinsicsAArch64.h"
46	#include "llvm/IR/Type.h"
47	#include "llvm/Pass.h"
48	#include "llvm/Support/Debug.h"
49	#include "llvm/Support/raw_ostream.h"
50	#include <optional>
51
52	#define DEBUG_TYPE "aarch64-isel"
53
54	using namespace llvm;
55	using namespace MIPatternMatch;
56	using namespace AArch64GISelUtils;
57
58	namespace llvm {
59	class BlockFrequencyInfo;
60	class ProfileSummaryInfo;
61	}
62
63	namespace {
64
65	#define GET_GLOBALISEL_PREDICATE_BITSET
66	#include "AArch64GenGlobalISel.inc"
67	#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70	class AArch64InstructionSelector : public InstructionSelector {
71	public:
72	AArch64InstructionSelector(const AArch64TargetMachine &TM,
73	const AArch64Subtarget &STI,
74	const AArch64RegisterBankInfo &RBI);
75
76	bool select(MachineInstr &I) override;
77	static const char getName() { return* DEBUG_TYPE; }
78
79	void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80	CodeGenCoverage CoverageInfo, ProfileSummaryInfo PSI,
81	BlockFrequencyInfo *BFI) override {
82	InstructionSelector::setupMF(mf&: MF, vt: VT, covinfo: CoverageInfo, psi: PSI, bfi: BFI);
83	MIB.setMF(MF);
84
85	// hasFnAttribute() is expensive to call on every BRCOND selection, so
86	// cache it here for each run of the selector.
87	ProduceNonFlagSettingCondBr =
88	!MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening);
89	MFReturnAddr = Register ();
90
91	processPHIs(MF);
92	}
93
94	private:
95	/// tblgen-erated 'select' implementation, used as the initial selector for
96	/// the patterns that don't require complex C++.
97	bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99	// A lowering phase that runs before any selection attempts.
100	// Returns true if the instruction was modified.
101	bool preISelLower(MachineInstr &I);
102
103	// An early selection function that runs before the selectImpl() call.
104	bool earlySelect(MachineInstr &I);
105
106	/// Save state that is shared between select calls, call select on \p I and
107	/// then restore the saved state. This can be used to recursively call select
108	/// within a select call.
109	bool selectAndRestoreState(MachineInstr &I);
110
111	// Do some preprocessing of G_PHIs before we begin selection.
112	void processPHIs(MachineFunction &MF);
113
114	bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116	/// Eliminate same-sized cross-bank copies into stores before selectImpl().
117	bool contractCrossBankCopyIntoStore(MachineInstr &I,
118	MachineRegisterInfo &MRI);
119
120	bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122	bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123	MachineRegisterInfo &MRI) const;
124	bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125	MachineRegisterInfo &MRI) const;
126
127	///@{
128	/// Helper functions for selectCompareBranch.
129	bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130	MachineIRBuilder &MIB) const;
131	bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132	MachineIRBuilder &MIB) const;
133	bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134	MachineIRBuilder &MIB) const;
135	bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136	MachineBasicBlock *DstMBB,
137	MachineIRBuilder &MIB) const;
138	///@}
139
140	bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
141	MachineRegisterInfo &MRI);
142
143	bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144	bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146	// Helper to generate an equivalent of scalar_to_vector into a new register,
147	// returned via 'Dst'.
148	MachineInstr emitScalarToVector(unsigned* EltSize,
149	const TargetRegisterClass *DstRC,
150	Register Scalar,
151	MachineIRBuilder &MIRBuilder) const;
152	/// Helper to narrow vector that was widened by emitScalarToVector.
153	/// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154	/// vector, correspondingly.
155	MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156	MachineIRBuilder &MIRBuilder,
157	MachineRegisterInfo &MRI) const;
158
159	/// Emit a lane insert into \p DstReg, or a new vector register if
160	/// std::nullopt is provided.
161	///
162	/// The lane inserted into is defined by \p LaneIdx. The vector source
163	/// register is given by \p SrcReg. The register containing the element is
164	/// given by \p EltReg.
165	MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166	Register EltReg, unsigned LaneIdx,
167	const RegisterBank &RB,
168	MachineIRBuilder &MIRBuilder) const;
169
170	/// Emit a sequence of instructions representing a constant \p CV for a
171	/// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172	///
173	/// \returns the last instruction in the sequence on success, and nullptr
174	/// otherwise.
175	MachineInstr emitConstantVector(Register Dst, Constant CV,
176	MachineIRBuilder &MIRBuilder,
177	MachineRegisterInfo &MRI);
178
179	MachineInstr tryAdvSIMDModImm8(Register Dst, unsigned* DstSize, APInt Bits,
180	MachineIRBuilder &MIRBuilder);
181
182	MachineInstr tryAdvSIMDModImm16(Register Dst, unsigned* DstSize, APInt Bits,
183	MachineIRBuilder &MIRBuilder, bool Inv);
184
185	MachineInstr tryAdvSIMDModImm32(Register Dst, unsigned* DstSize, APInt Bits,
186	MachineIRBuilder &MIRBuilder, bool Inv);
187	MachineInstr tryAdvSIMDModImm64(Register Dst, unsigned* DstSize, APInt Bits,
188	MachineIRBuilder &MIRBuilder);
189	MachineInstr tryAdvSIMDModImm321s(Register Dst, unsigned* DstSize, APInt Bits,
190	MachineIRBuilder &MIRBuilder, bool Inv);
191	MachineInstr tryAdvSIMDModImmFP(Register Dst, unsigned* DstSize, APInt Bits,
192	MachineIRBuilder &MIRBuilder);
193
194	bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
195	MachineRegisterInfo &MRI);
196	/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197	/// SUBREG_TO_REG.
198	bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199	bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
200	bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
201	bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
202
203	bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204	bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205	bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206	bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208	/// Helper function to select vector load intrinsics like
209	/// @llvm.aarch64.neon.ld2., @llvm.aarch64.neon.ld4., etc.
210	/// \p Opc is the opcode that the selected instruction should use.
211	/// \p NumVecs is the number of vector destinations for the instruction.
212	/// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213	bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214	MachineInstr &I);
215	bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216	MachineInstr &I);
217	void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218	unsigned Opc);
219	bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220	unsigned Opc);
221	bool selectIntrinsicWithSideEffects(MachineInstr &I,
222	MachineRegisterInfo &MRI);
223	bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224	bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225	bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226	bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227	bool selectPtrAuthGlobalValue(MachineInstr &I,
228	MachineRegisterInfo &MRI) const;
229	bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230	bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231	bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232	void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233	unsigned Opc1, unsigned Opc2, bool isExt);
234
235	bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236	bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237	bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239	unsigned emitConstantPoolEntry(const Constant *CPVal,
240	MachineFunction &MF) const;
241	MachineInstr emitLoadFromConstantPool(const* Constant *CPVal,
242	MachineIRBuilder &MIRBuilder) const;
243
244	// Emit a vector concat operation.
245	MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246	Register Op2,
247	MachineIRBuilder &MIRBuilder) const;
248
249	// Emit an integer compare between LHS and RHS, which checks for Predicate.
250	MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251	MachineOperand &Predicate,
252	MachineIRBuilder &MIRBuilder) const;
253
254	/// Emit a floating point comparison between \p LHS and \p RHS.
255	/// \p Pred if given is the intended predicate to use.
256	MachineInstr *
257	emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258	std::optional<CmpInst::Predicate> = std::nullopt) const;
259
260	MachineInstr *
261	emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262	std::initializer_list<llvm::SrcOp> SrcOps,
263	MachineIRBuilder &MIRBuilder,
264	const ComplexRendererFns &RenderFns = std::nullopt) const;
265	/// Helper function to emit an add or sub instruction.
266	///
267	/// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268	/// in a specific order.
269	///
270	/// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271	///
272	/// \code
273	/// const std::array<std::array<unsigned, 2>, 4> Table {
274	/// {{AArch64::ADDXri, AArch64::ADDWri},
275	/// {AArch64::ADDXrs, AArch64::ADDWrs},
276	/// {AArch64::ADDXrr, AArch64::ADDWrr},
277	/// {AArch64::SUBXri, AArch64::SUBWri},
278	/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279	/// \endcode
280	///
281	/// Each row in the table corresponds to a different addressing mode. Each
282	/// column corresponds to a different register size.
283	///
284	/// \attention Rows must be structured as follows:
285	/// - Row 0: The ri opcode variants
286	/// - Row 1: The rs opcode variants
287	/// - Row 2: The rr opcode variants
288	/// - Row 3: The ri opcode variants for negative immediates
289	/// - Row 4: The rx opcode variants
290	///
291	/// \attention Columns must be structured as follows:
292	/// - Column 0: The 64-bit opcode variants
293	/// - Column 1: The 32-bit opcode variants
294	///
295	/// \p Dst is the destination register of the binop to emit.
296	/// \p LHS is the left-hand operand of the binop to emit.
297	/// \p RHS is the right-hand operand of the binop to emit.
298	MachineInstr *emitAddSub(
299	const std::array<std::array<unsigned, `2`>, `5`> &AddrModeAndSizeToOpcode,
300	Register Dst, MachineOperand &LHS, MachineOperand &RHS,
301	MachineIRBuilder &MIRBuilder) const;
302	MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
303	MachineOperand &RHS,
304	MachineIRBuilder &MIRBuilder) const;
305	MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
306	MachineIRBuilder &MIRBuilder) const;
307	MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
308	MachineIRBuilder &MIRBuilder) const;
309	MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
310	MachineIRBuilder &MIRBuilder) const;
311	MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
312	MachineIRBuilder &MIRBuilder) const;
313	MachineInstr *emitCMP(MachineOperand &LHS, MachineOperand &RHS,
314	MachineIRBuilder &MIRBuilder) const;
315	MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
316	MachineIRBuilder &MIRBuilder) const;
317	MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
318	MachineIRBuilder &MIRBuilder) const;
319	MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
320	AArch64CC::CondCode CC,
321	MachineIRBuilder &MIRBuilder) const;
322	MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
323	const RegisterBank &DstRB, LLT ScalarTy,
324	Register VecReg, unsigned LaneIdx,
325	MachineIRBuilder &MIRBuilder) const;
326	MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
327	AArch64CC::CondCode Pred,
328	MachineIRBuilder &MIRBuilder) const;
329	/// Emit a CSet for a FP compare.
330	///
331	/// \p Dst is expected to be a 32-bit scalar register.
332	MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
333	MachineIRBuilder &MIRBuilder) const;
334
335	/// Emit an instruction that sets NZCV to the carry-in expected by \p I.
336	/// Might elide the instruction if the previous instruction already sets NZCV
337	/// correctly.
338	MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
339
340	/// Emit the overflow op for \p Opcode.
341	///
342	/// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
343	/// G_USUBO, etc.
344	std::pair<MachineInstr *, AArch64CC::CondCode>
345	emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
346	MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
347
348	bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
349
350	/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
351	/// In some cases this is even possible with OR operations in the expression.
352	MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
353	MachineIRBuilder &MIB) const;
354	MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
355	CmpInst::Predicate CC,
356	AArch64CC::CondCode Predicate,
357	AArch64CC::CondCode OutCC,
358	MachineIRBuilder &MIB) const;
359	MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
360	bool Negate, Register CCOp,
361	AArch64CC::CondCode Predicate,
362	MachineIRBuilder &MIB) const;
363
364	/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
365	/// \p IsNegative is true if the test should be "not zero".
366	/// This will also optimize the test bit instruction when possible.
367	MachineInstr emitTestBit(Register TestReg, uint64_t Bit, bool* IsNegative,
368	MachineBasicBlock *DstMBB,
369	MachineIRBuilder &MIB) const;
370
371	/// Emit a CB(N)Z instruction which branches to \p DestMBB.
372	MachineInstr emitCBZ(Register CompareReg, bool* IsNegative,
373	MachineBasicBlock *DestMBB,
374	MachineIRBuilder &MIB) const;
375
376	// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
377	// We use these manually instead of using the importer since it doesn't
378	// support SDNodeXForm.
379	ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
380	ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
381	ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
382	ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
383
384	ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
385	ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
386	ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
387
388	ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
389	unsigned Size) const;
390
391	ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
392	return selectAddrModeUnscaled(Root, Size: `1`);
393	}
394	ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
395	return selectAddrModeUnscaled(Root, Size: `2`);
396	}
397	ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
398	return selectAddrModeUnscaled(Root, Size: `4`);
399	}
400	ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
401	return selectAddrModeUnscaled(Root, Size: `8`);
402	}
403	ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
404	return selectAddrModeUnscaled(Root, Size: `16`);
405	}
406
407	/// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
408	/// from complex pattern matchers like selectAddrModeIndexed().
409	ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
410	MachineRegisterInfo &MRI) const;
411
412	ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
413	unsigned Size) const;
414	template <int Width>
415	ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
416	return selectAddrModeIndexed(Root, Size: Width / `8`);
417	}
418
419	std::optional<bool>
420	isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421	const MachineRegisterInfo &MRI) const;
422
423	bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
424	const MachineRegisterInfo &MRI,
425	bool IsAddrOperand) const;
426	ComplexRendererFns
427	selectAddrModeShiftedExtendXReg(MachineOperand &Root,
428	unsigned SizeInBytes) const;
429
430	/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
431	/// or not a shift + extend should be folded into an addressing mode. Returns
432	/// None when this is not profitable or possible.
433	ComplexRendererFns
434	selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
435	MachineOperand &Offset, unsigned SizeInBytes,
436	bool WantsExt) const;
437	ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
438	ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
439	unsigned SizeInBytes) const;
440	template <int Width>
441	ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
442	return selectAddrModeXRO(Root, SizeInBytes: Width / `8`);
443	}
444
445	ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
446	unsigned SizeInBytes) const;
447	template <int Width>
448	ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
449	return selectAddrModeWRO(Root, SizeInBytes: Width / `8`);
450	}
451
452	ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
453	bool AllowROR = false) const;
454
455	ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
456	return selectShiftedRegister(Root);
457	}
458
459	ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
460	return selectShiftedRegister(Root, AllowROR: true);
461	}
462
463	/// Given an extend instruction, determine the correct shift-extend type for
464	/// that instruction.
465	///
466	/// If the instruction is going to be used in a load or store, pass
467	/// \p IsLoadStore = true.
468	AArch64_AM::ShiftExtendType
469	getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
470	bool IsLoadStore = false) const;
471
472	/// Move \p Reg to \p RC if \p Reg is not already on \p RC.
473	///
474	/// \returns Either \p Reg if no change was necessary, or the new register
475	/// created by moving \p Reg.
476	///
477	/// Note: This uses emitCopy right now.
478	Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
479	MachineIRBuilder &MIB) const;
480
481	ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
482
483	ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
484
485	ComplexRendererFns selectCVTFixedPointVec(MachineOperand &Root) const;
486	ComplexRendererFns
487	selectCVTFixedPosRecipOperandVec(MachineOperand &Root) const;
488	ComplexRendererFns
489	selectCVTFixedPointVecBase(const MachineOperand &Root,
490	bool isReciprocal = false) const;
491	void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
492	int OpIdx = -`1`) const;
493	void renderFixedPointRecipXForm(MachineInstrBuilder &MIB,
494	const MachineInstr &MI, int OpIdx = -`1`) const;
495
496	void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
497	int OpIdx = -`1`) const;
498	void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
499	int OpIdx = -`1`) const;
500	void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
501	int OpIdx = -`1`) const;
502	void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
503	int OpIdx) const;
504	void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
505	int OpIdx = -`1`) const;
506	void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
507	int OpIdx = -`1`) const;
508	void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
509	int OpIdx = -`1`) const;
510	void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
511	const MachineInstr &MI,
512	int OpIdx = -`1`) const;
513
514	// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
515	void materializeLargeCMVal(MachineInstr &I, const Value V, unsigned* OpFlags);
516
517	// Optimization methods.
518	bool tryOptSelect(GSelect &Sel);
519	bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
520	MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
521	MachineOperand &Predicate,
522	MachineIRBuilder &MIRBuilder) const;
523
524	/// Return true if \p MI is a load or store of \p NumBytes bytes.
525	bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
526
527	/// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
528	/// register zeroed out. In other words, the result of MI has been explicitly
529	/// zero extended.
530	bool isDef32(const MachineInstr &MI) const;
531
532	const AArch64TargetMachine &TM;
533	const AArch64Subtarget &STI;
534	const AArch64InstrInfo &TII;
535	const AArch64RegisterInfo &TRI;
536	const AArch64RegisterBankInfo &RBI;
537
538	bool ProduceNonFlagSettingCondBr = false;
539
540	// Some cached values used during selection.
541	// We use LR as a live-in register, and we keep track of it here as it can be
542	// clobbered by calls.
543	Register MFReturnAddr;
544
545	MachineIRBuilder MIB;
546
547	#define GET_GLOBALISEL_PREDICATES_DECL
548	#include "AArch64GenGlobalISel.inc"
549	#undef GET_GLOBALISEL_PREDICATES_DECL
550
551	// We declare the temporaries used by selectImpl() in the class to minimize the
552	// cost of constructing placeholder values.
553	#define GET_GLOBALISEL_TEMPORARIES_DECL
554	#include "AArch64GenGlobalISel.inc"
555	#undef GET_GLOBALISEL_TEMPORARIES_DECL
556	};
557
558	} // end anonymous namespace
559
560	#define GET_GLOBALISEL_IMPL
561	#include "AArch64GenGlobalISel.inc"
562	#undef GET_GLOBALISEL_IMPL
563
564	AArch64InstructionSelector::AArch64InstructionSelector(
565	const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
566	const AArch64RegisterBankInfo &RBI)
567	: TM(TM), STI(STI), TII(STI.getInstrInfo()), TRI(STI.getRegisterInfo()),
568	RBI(RBI),
569	#define GET_GLOBALISEL_PREDICATES_INIT
570	#include "AArch64GenGlobalISel.inc"
571	#undef GET_GLOBALISEL_PREDICATES_INIT
572	#define GET_GLOBALISEL_TEMPORARIES_INIT
573	#include "AArch64GenGlobalISel.inc"
574	#undef GET_GLOBALISEL_TEMPORARIES_INIT
575	{
576	}
577
578	// FIXME: This should be target-independent, inferred from the types declared
579	// for each class in the bank.
580	//
581	/// Given a register bank, and a type, return the smallest register class that
582	/// can represent that combination.
583	static const TargetRegisterClass *
584	getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
585	bool GetAllRegSet = false) {
586	if (RB.getID() == AArch64::GPRRegBankID) {
587	if (Ty.getSizeInBits() <= `32`)
588	return GetAllRegSet ? &AArch64::GPR32allRegClass
589	: &AArch64::GPR32RegClass;
590	if (Ty.getSizeInBits() == `64`)
591	return GetAllRegSet ? &AArch64::GPR64allRegClass
592	: &AArch64::GPR64RegClass;
593	if (Ty.getSizeInBits() == `128`)
594	return &AArch64::XSeqPairsClassRegClass;
595	return nullptr;
596	}
597
598	if (RB.getID() == AArch64::FPRRegBankID) {
599	switch (Ty.getSizeInBits()) {
600	case `8`:
601	return &AArch64::FPR8RegClass;
602	case `16`:
603	return &AArch64::FPR16RegClass;
604	case `32`:
605	return &AArch64::FPR32RegClass;
606	case `64`:
607	return &AArch64::FPR64RegClass;
608	case `128`:
609	return &AArch64::FPR128RegClass;
610	}
611	return nullptr;
612	}
613
614	return nullptr;
615	}
616
617	/// Given a register bank, and size in bits, return the smallest register class
618	/// that can represent that combination.
619	static const TargetRegisterClass *
620	getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
621	bool GetAllRegSet = false) {
622	if (SizeInBits.isScalable()) {
623	assert(RB.getID() == AArch64::FPRRegBankID &&
624	"Expected FPR regbank for scalable type size");
625	return &AArch64::ZPRRegClass;
626	}
627
628	unsigned RegBankID = RB.getID();
629
630	if (RegBankID == AArch64::GPRRegBankID) {
631	assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
632	if (SizeInBits <= `32`)
633	return GetAllRegSet ? &AArch64::GPR32allRegClass
634	: &AArch64::GPR32RegClass;
635	if (SizeInBits == `64`)
636	return GetAllRegSet ? &AArch64::GPR64allRegClass
637	: &AArch64::GPR64RegClass;
638	if (SizeInBits == `128`)
639	return &AArch64::XSeqPairsClassRegClass;
640	}
641
642	if (RegBankID == AArch64::FPRRegBankID) {
643	if (SizeInBits.isScalable()) {
644	assert(SizeInBits == TypeSize::getScalable(`128`) &&
645	"Unexpected scalable register size");
646	return &AArch64::ZPRRegClass;
647	}
648
649	switch (SizeInBits) {
650	default:
651	return nullptr;
652	case `8`:
653	return &AArch64::FPR8RegClass;
654	case `16`:
655	return &AArch64::FPR16RegClass;
656	case `32`:
657	return &AArch64::FPR32RegClass;
658	case `64`:
659	return &AArch64::FPR64RegClass;
660	case `128`:
661	return &AArch64::FPR128RegClass;
662	}
663	}
664
665	return nullptr;
666	}
667
668	/// Returns the correct subregister to use for a given register class.
669	static bool getSubRegForClass(const TargetRegisterClass *RC,
670	const TargetRegisterInfo &TRI, unsigned &SubReg) {
671	switch (TRI.getRegSizeInBits(RC: *RC)) {
672	case `8`:
673	SubReg = AArch64::bsub;
674	break;
675	case `16`:
676	SubReg = AArch64::hsub;
677	break;
678	case `32`:
679	if (RC != &AArch64::FPR32RegClass)
680	SubReg = AArch64::sub_32;
681	else
682	SubReg = AArch64::ssub;
683	break;
684	case `64`:
685	SubReg = AArch64::dsub;
686	break;
687	default:
688	LLVM_DEBUG(
689	dbgs() << "Couldn't find appropriate subregister for register class.");
690	return false;
691	}
692
693	return true;
694	}
695
696	/// Returns the minimum size the given register bank can hold.
697	static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
698	switch (RB.getID()) {
699	case AArch64::GPRRegBankID:
700	return `32`;
701	case AArch64::FPRRegBankID:
702	return `8`;
703	default:
704	llvm_unreachable("Tried to get minimum size for unknown register bank.");
705	}
706	}
707
708	/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
709	/// Helper function for functions like createDTuple and createQTuple.
710	///
711	/// \p RegClassIDs - The list of register class IDs available for some tuple of
712	/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
713	/// expected to contain between 2 and 4 tuple classes.
714	///
715	/// \p SubRegs - The list of subregister classes associated with each register
716	/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
717	/// subregister class. The index of each subregister class is expected to
718	/// correspond with the index of each register class.
719	///
720	/// \returns Either the destination register of REG_SEQUENCE instruction that
721	/// was created, or the 0th element of \p Regs if \p Regs contains a single
722	/// element.
723	static Register createTuple(ArrayRef<Register> Regs,
724	const unsigned RegClassIDs[],
725	const unsigned SubRegs[], MachineIRBuilder &MIB) {
726	unsigned NumRegs = Regs.size();
727	if (NumRegs == `1`)
728	return Regs [`0`];
729	assert(NumRegs >= `2` && NumRegs <= `4` &&
730	"Only support between two and 4 registers in a tuple!");
731	const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
732	auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - `2`]);
733	auto RegSequence =
734	MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {});
735	for (unsigned I = `0`, E = Regs.size(); I < E; ++I) {
736	RegSequence.addUse(RegNo: Regs [I]);
737	RegSequence.addImm(Val: SubRegs[I]);
738	}
739	return RegSequence.getReg(Idx: `0`);
740	}
741
742	/// Create a tuple of D-registers using the registers in \p Regs.
743	static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
744	static const unsigned RegClassIDs[] = {
745	AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
746	static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
747	AArch64::dsub2, AArch64::dsub3};
748	return createTuple(Regs, RegClassIDs, SubRegs, MIB);
749	}
750
751	/// Create a tuple of Q-registers using the registers in \p Regs.
752	static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
753	static const unsigned RegClassIDs[] = {
754	AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
755	static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
756	AArch64::qsub2, AArch64::qsub3};
757	return createTuple(Regs, RegClassIDs, SubRegs, MIB);
758	}
759
760	static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
761	auto &MI = *Root.getParent();
762	auto &MBB = *MI.getParent();
763	auto &MF = *MBB.getParent();
764	auto &MRI = MF.getRegInfo();
765	uint64_t Immed;
766	if (Root.isImm())
767	Immed = Root.getImm();
768	else if (Root.isCImm())
769	Immed = Root.getCImm()->getZExtValue();
770	else if (Root.isReg()) {
771	auto ValAndVReg =
772	getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true);
773	if (!ValAndVReg)
774	return std::nullopt;
775	Immed = ValAndVReg ->Value.getSExtValue();
776	} else
777	return std::nullopt;
778	return Immed;
779	}
780
781	/// Check whether \p I is a currently unsupported binary operation:
782	/// - it has an unsized type
783	/// - an operand is not a vreg
784	/// - all operands are not in the same bank
785	/// These are checks that should someday live in the verifier, but right now,
786	/// these are mostly limitations of the aarch64 selector.
787	static bool unsupportedBinOp(const MachineInstr &I,
788	const AArch64RegisterBankInfo &RBI,
789	const MachineRegisterInfo &MRI,
790	const AArch64RegisterInfo &TRI) {
791	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
792	if (!Ty.isValid()) {
793	LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
794	return true;
795	}
796
797	const RegisterBank PrevOpBank = nullptr*;
798	for (auto &MO : I.operands()) {
799	// FIXME: Support non-register operands.
800	if (!MO.isReg()) {
801	LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
802	return true;
803	}
804
805	// FIXME: Can generic operations have physical registers operands? If
806	// so, this will need to be taught about that, and we'll need to get the
807	// bank out of the minimal class for the register.
808	// Either way, this needs to be documented (and possibly verified).
809	if (!MO.getReg().isVirtual()) {
810	LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
811	return true;
812	}
813
814	const RegisterBank *OpBank = RBI.getRegBank(Reg: MO.getReg(), MRI, TRI);
815	if (!OpBank) {
816	LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
817	return true;
818	}
819
820	if (PrevOpBank && OpBank != PrevOpBank) {
821	LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
822	return true;
823	}
824	PrevOpBank = OpBank;
825	}
826	return false;
827	}
828
829	/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
830	/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
831	/// and of size \p OpSize.
832	/// \returns \p GenericOpc if the combination is unsupported.
833	static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
834	unsigned OpSize) {
835	switch (RegBankID) {
836	case AArch64::GPRRegBankID:
837	if (OpSize == `32`) {
838	switch (GenericOpc) {
839	case TargetOpcode::G_SHL:
840	return AArch64::LSLVWr;
841	case TargetOpcode::G_LSHR:
842	return AArch64::LSRVWr;
843	case TargetOpcode::G_ASHR:
844	return AArch64::ASRVWr;
845	default:
846	return GenericOpc;
847	}
848	} else if (OpSize == `64`) {
849	switch (GenericOpc) {
850	case TargetOpcode::G_PTR_ADD:
851	return AArch64::ADDXrr;
852	case TargetOpcode::G_SHL:
853	return AArch64::LSLVXr;
854	case TargetOpcode::G_LSHR:
855	return AArch64::LSRVXr;
856	case TargetOpcode::G_ASHR:
857	return AArch64::ASRVXr;
858	default:
859	return GenericOpc;
860	}
861	}
862	break;
863	case AArch64::FPRRegBankID:
864	switch (OpSize) {
865	case `32`:
866	switch (GenericOpc) {
867	case TargetOpcode::G_FADD:
868	return AArch64::FADDSrr;
869	case TargetOpcode::G_FSUB:
870	return AArch64::FSUBSrr;
871	case TargetOpcode::G_FMUL:
872	return AArch64::FMULSrr;
873	case TargetOpcode::G_FDIV:
874	return AArch64::FDIVSrr;
875	default:
876	return GenericOpc;
877	}
878	case `64`:
879	switch (GenericOpc) {
880	case TargetOpcode::G_FADD:
881	return AArch64::FADDDrr;
882	case TargetOpcode::G_FSUB:
883	return AArch64::FSUBDrr;
884	case TargetOpcode::G_FMUL:
885	return AArch64::FMULDrr;
886	case TargetOpcode::G_FDIV:
887	return AArch64::FDIVDrr;
888	case TargetOpcode::G_OR:
889	return AArch64::ORRv8i8;
890	default:
891	return GenericOpc;
892	}
893	}
894	break;
895	}
896	return GenericOpc;
897	}
898
899	/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
900	/// appropriate for the (value) register bank \p RegBankID and of memory access
901	/// size \p OpSize. This returns the variant with the base+unsigned-immediate
902	/// addressing mode (e.g., LDRXui).
903	/// \returns \p GenericOpc if the combination is unsupported.
904	static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
905	unsigned OpSize) {
906	const bool isStore = GenericOpc == TargetOpcode::G_STORE;
907	switch (RegBankID) {
908	case AArch64::GPRRegBankID:
909	switch (OpSize) {
910	case `8`:
911	return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
912	case `16`:
913	return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
914	case `32`:
915	return isStore ? AArch64::STRWui : AArch64::LDRWui;
916	case `64`:
917	return isStore ? AArch64::STRXui : AArch64::LDRXui;
918	}
919	break;
920	case AArch64::FPRRegBankID:
921	switch (OpSize) {
922	case `8`:
923	return isStore ? AArch64::STRBui : AArch64::LDRBui;
924	case `16`:
925	return isStore ? AArch64::STRHui : AArch64::LDRHui;
926	case `32`:
927	return isStore ? AArch64::STRSui : AArch64::LDRSui;
928	case `64`:
929	return isStore ? AArch64::STRDui : AArch64::LDRDui;
930	case `128`:
931	return isStore ? AArch64::STRQui : AArch64::LDRQui;
932	}
933	break;
934	}
935	return GenericOpc;
936	}
937
938	/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
939	/// to \p To.*
940	///
941	/// E.g "To = COPY SrcReg:SubReg"
942	static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
943	const RegisterBankInfo &RBI, Register SrcReg,
944	const TargetRegisterClass To, unsigned* SubReg) {
945	assert(SrcReg.isValid() && "Expected a valid source register?");
946	assert(To && "Destination register class cannot be null");
947	assert(SubReg && "Expected a valid subregister");
948
949	MachineIRBuilder MIB(I);
950	auto SubRegCopy =
951	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, Flags: {}, SubReg);
952	MachineOperand &RegOp = I.getOperand(i: `1`);
953	RegOp.setReg(SubRegCopy.getReg(Idx: `0`));
954
955	// It's possible that the destination register won't be constrained. Make
956	// sure that happens.
957	if (!I.getOperand(i: `0`).getReg().isPhysical())
958	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(), RC: *To, MRI);
959
960	return true;
961	}
962
963	/// Helper function to get the source and destination register classes for a
964	/// copy. Returns a std::pair containing the source register class for the
965	/// copy, and the destination register class for the copy. If a register class
966	/// cannot be determined, then it will be nullptr.
967	static std::pair<const TargetRegisterClass , const* TargetRegisterClass *>
968	getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
969	MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
970	const RegisterBankInfo &RBI) {
971	Register DstReg = I.getOperand(i: `0`).getReg();
972	Register SrcReg = I.getOperand(i: `1`).getReg();
973	const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
974	const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
975
976	TypeSize DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI);
977	TypeSize SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI);
978
979	// Special casing for cross-bank copies of s1s. We can technically represent
980	// a 1-bit value with any size of register. The minimum size for a GPR is 32
981	// bits. So, we need to put the FPR on 32 bits as well.
982	//
983	// FIXME: I'm not sure if this case holds true outside of copies. If it does,
984	// then we can pull it into the helpers that get the appropriate class for a
985	// register bank. Or make a new helper that carries along some constraint
986	// information.
987	if (SrcRegBank != DstRegBank &&
988	(DstSize == TypeSize::getFixed(ExactSize: `1`) && SrcSize == TypeSize::getFixed(ExactSize: `1`)))
989	SrcSize = DstSize = TypeSize::getFixed(ExactSize: `32`);
990
991	return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true),
992	getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)};
993	}
994
995	// FIXME: We need some sort of API in RBI/TRI to allow generic code to
996	// constrain operands of simple instructions given a TargetRegisterClass
997	// and LLT
998	static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
999	const RegisterBankInfo &RBI) {
1000	for (MachineOperand &MO : I.operands()) {
1001	if (!MO.isReg())
1002	continue;
1003	Register Reg = MO.getReg();
1004	if (!Reg)
1005	continue;
1006	if (Reg.isPhysical())
1007	continue;
1008	LLT Ty = MRI.getType(Reg);
1009	const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
1010	const TargetRegisterClass *RC =
1011	dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank);
1012	if (!RC) {
1013	const RegisterBank &RB = cast<const* RegisterBank *>(Val: RegClassOrBank);
1014	RC = getRegClassForTypeOnBank(Ty, RB);
1015	if (!RC) {
1016	LLVM_DEBUG(
1017	dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1018	break;
1019	}
1020	}
1021	RBI.constrainGenericRegister(Reg, RC: *RC, MRI);
1022	}
1023
1024	return true;
1025	}
1026
1027	static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
1028	MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
1029	const RegisterBankInfo &RBI) {
1030	Register DstReg = I.getOperand(i: `0`).getReg();
1031	Register SrcReg = I.getOperand(i: `1`).getReg();
1032	const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
1033	const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
1034
1035	// Find the correct register classes for the source and destination registers.
1036	const TargetRegisterClass *SrcRC;
1037	const TargetRegisterClass *DstRC;
1038	std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1039
1040	if (!DstRC) {
1041	LLVM_DEBUG(dbgs() << "Unexpected dest size "
1042	<< RBI.getSizeInBits(DstReg, MRI, TRI) << `'\n'`);
1043	return false;
1044	}
1045
1046	// Is this a copy? If so, then we may need to insert a subregister copy.
1047	if (I.isCopy()) {
1048	// Yes. Check if there's anything to fix up.
1049	if (!SrcRC) {
1050	LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1051	return false;
1052	}
1053
1054	const TypeSize SrcSize = TRI.getRegSizeInBits(RC: *SrcRC);
1055	const TypeSize DstSize = TRI.getRegSizeInBits(RC: *DstRC);
1056	unsigned SrcSubReg = I.getOperand(i: `1`).getSubReg();
1057	unsigned SubReg;
1058
1059	if (SrcSubReg)
1060	return RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI);
1061
1062	// If the source bank doesn't support a subregister copy small enough,
1063	// then we first need to copy to the destination bank.
1064	if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) {
1065	const TargetRegisterClass *DstTempRC =
1066	getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, / GetAllRegSet / true);
1067	getSubRegForClass(RC: DstRC, TRI, SubReg);
1068
1069	MachineIRBuilder MIB(I);
1070	auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg});
1071	copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: `0`), To: DstRC, SubReg);
1072	} else if (SrcSize > DstSize) {
1073	// If the source register is bigger than the destination we need to
1074	// perform a subregister copy.
1075	const TargetRegisterClass *SubRegRC =
1076	getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, / GetAllRegSet / true);
1077	getSubRegForClass(RC: SubRegRC, TRI, SubReg);
1078	copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg);
1079	} else if (DstSize > SrcSize) {
1080	// If the destination register is bigger than the source we need to do
1081	// a promotion using SUBREG_TO_REG.
1082	const TargetRegisterClass *PromotionRC =
1083	getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, / GetAllRegSet / true);
1084	getSubRegForClass(RC: SrcRC, TRI, SubReg);
1085
1086	Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC);
1087	BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
1088	MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: PromoteReg)
1089	.addUse(RegNo: SrcReg)
1090	.addImm(Val: SubReg);
1091	MachineOperand &RegOp = I.getOperand(i: `1`);
1092	RegOp.setReg(PromoteReg);
1093	}
1094
1095	// If the destination is a physical register, then there's nothing to
1096	// change, so we're done.
1097	if (DstReg.isPhysical())
1098	return true;
1099	}
1100
1101	// No need to constrain SrcReg. It will get constrained when we hit another
1102	// of its use or its defs. Copies do not have constraints.
1103	if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
1104	LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1105	<< " operand\n");
1106	return false;
1107	}
1108
1109	// If this a GPR ZEXT that we want to just reduce down into a copy.
1110	// The sizes will be mismatched with the source < 32b but that's ok.
1111	if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1112	I.setDesc(TII.get(Opcode: AArch64::COPY));
1113	assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1114	return selectCopy(I, TII, MRI, TRI, RBI);
1115	}
1116
1117	I.setDesc(TII.get(Opcode: AArch64::COPY));
1118	return true;
1119	}
1120
1121	MachineInstr *
1122	AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1123	Register False, AArch64CC::CondCode CC,
1124	MachineIRBuilder &MIB) const {
1125	MachineRegisterInfo &MRI = *MIB.getMRI();
1126	assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1127	RBI.getRegBank(True, MRI, TRI)->getID() &&
1128	"Expected both select operands to have the same regbank?");
1129	LLT Ty = MRI.getType(Reg: True);
1130	if (Ty.isVector())
1131	return nullptr;
1132	const unsigned Size = Ty.getSizeInBits();
1133	assert((Size == `32` \|\| Size == `64`) &&
1134	"Expected 32 bit or 64 bit select only?");
1135	const bool Is32Bit = Size == `32`;
1136	if (RBI.getRegBank(Reg: True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1137	unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1138	auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1139	constrainSelectedInstRegOperands(I&: *FCSel, TII, TRI, RBI);
1140	return &*FCSel;
1141	}
1142
1143	// By default, we'll try and emit a CSEL.
1144	unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1145	bool Optimized = false;
1146	auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1147	&Optimized](Register &Reg, Register &OtherReg,
1148	bool Invert) {
1149	if (Optimized)
1150	return false;
1151
1152	// Attempt to fold:
1153	//
1154	// %sub = G_SUB 0, %x
1155	// %select = G_SELECT cc, %reg, %sub
1156	//
1157	// Into:
1158	// %select = CSNEG %reg, %x, cc
1159	Register MatchReg;
1160	if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) {
1161	Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1162	Reg = MatchReg;
1163	if (Invert) {
1164	CC = AArch64CC::getInvertedCondCode(Code: CC);
1165	std::swap(a&: Reg, b&: OtherReg);
1166	}
1167	return true;
1168	}
1169
1170	// Attempt to fold:
1171	//
1172	// %xor = G_XOR %x, -1
1173	// %select = G_SELECT cc, %reg, %xor
1174	//
1175	// Into:
1176	// %select = CSINV %reg, %x, cc
1177	if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) {
1178	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1179	Reg = MatchReg;
1180	if (Invert) {
1181	CC = AArch64CC::getInvertedCondCode(Code: CC);
1182	std::swap(a&: Reg, b&: OtherReg);
1183	}
1184	return true;
1185	}
1186
1187	// Attempt to fold:
1188	//
1189	// %add = G_ADD %x, 1
1190	// %select = G_SELECT cc, %reg, %add
1191	//
1192	// Into:
1193	// %select = CSINC %reg, %x, cc
1194	if (mi_match(R: Reg, MRI,
1195	P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: `1`)),
1196	preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: `1`))))) {
1197	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1198	Reg = MatchReg;
1199	if (Invert) {
1200	CC = AArch64CC::getInvertedCondCode(Code: CC);
1201	std::swap(a&: Reg, b&: OtherReg);
1202	}
1203	return true;
1204	}
1205
1206	return false;
1207	};
1208
1209	// Helper lambda which tries to use CSINC/CSINV for the instruction when its
1210	// true/false values are constants.
1211	// FIXME: All of these patterns already exist in tablegen. We should be
1212	// able to import these.
1213	auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1214	&Optimized]() {
1215	if (Optimized)
1216	return false;
1217	auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI);
1218	auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI);
1219	if (!TrueCst && !FalseCst)
1220	return false;
1221
1222	Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1223	if (TrueCst && FalseCst) {
1224	int64_t T = TrueCst ->Value.getSExtValue();
1225	int64_t F = FalseCst ->Value.getSExtValue();
1226
1227	if (T == `0` && F == `1`) {
1228	// G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1229	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1230	True = ZReg;
1231	False = ZReg;
1232	return true;
1233	}
1234
1235	if (T == `0` && F == -`1`) {
1236	// G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1237	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1238	True = ZReg;
1239	False = ZReg;
1240	return true;
1241	}
1242	}
1243
1244	if (TrueCst) {
1245	int64_t T = TrueCst ->Value.getSExtValue();
1246	if (T == `1`) {
1247	// G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1248	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1249	True = False;
1250	False = ZReg;
1251	CC = AArch64CC::getInvertedCondCode(Code: CC);
1252	return true;
1253	}
1254
1255	if (T == -`1`) {
1256	// G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1257	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1258	True = False;
1259	False = ZReg;
1260	CC = AArch64CC::getInvertedCondCode(Code: CC);
1261	return true;
1262	}
1263	}
1264
1265	if (FalseCst) {
1266	int64_t F = FalseCst ->Value.getSExtValue();
1267	if (F == `1`) {
1268	// G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1269	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1270	False = ZReg;
1271	return true;
1272	}
1273
1274	if (F == -`1`) {
1275	// G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1276	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1277	False = ZReg;
1278	return true;
1279	}
1280	}
1281	return false;
1282	};
1283
1284	Optimized \|= TryFoldBinOpIntoSelect (False, True, /Invert = / false);
1285	Optimized \|= TryFoldBinOpIntoSelect (True, False, /Invert = / true);
1286	Optimized \|= TryOptSelectCst ();
1287	auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1288	constrainSelectedInstRegOperands(I&: *SelectInst, TII, TRI, RBI);
1289	return &*SelectInst;
1290	}
1291
1292	static AArch64CC::CondCode
1293	changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS = {},
1294	MachineRegisterInfo MRI = nullptr*) {
1295	switch (P) {
1296	default:
1297	llvm_unreachable("Unknown condition code!");
1298	case CmpInst::ICMP_NE:
1299	return AArch64CC::NE;
1300	case CmpInst::ICMP_EQ:
1301	return AArch64CC::EQ;
1302	case CmpInst::ICMP_SGT:
1303	return AArch64CC::GT;
1304	case CmpInst::ICMP_SGE:
1305	if (RHS && MRI) {
1306	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS, MRI: *MRI);
1307	if (ValAndVReg && ValAndVReg ->Value == `0`)
1308	return AArch64CC::PL;
1309	}
1310	return AArch64CC::GE;
1311	case CmpInst::ICMP_SLT:
1312	if (RHS && MRI) {
1313	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS, MRI: *MRI);
1314	if (ValAndVReg && ValAndVReg ->Value == `0`)
1315	return AArch64CC::MI;
1316	}
1317	return AArch64CC::LT;
1318	case CmpInst::ICMP_SLE:
1319	return AArch64CC::LE;
1320	case CmpInst::ICMP_UGT:
1321	return AArch64CC::HI;
1322	case CmpInst::ICMP_UGE:
1323	return AArch64CC::HS;
1324	case CmpInst::ICMP_ULT:
1325	return AArch64CC::LO;
1326	case CmpInst::ICMP_ULE:
1327	return AArch64CC::LS;
1328	}
1329	}
1330
1331	/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1332	static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1333	AArch64CC::CondCode &CondCode,
1334	AArch64CC::CondCode &CondCode2) {
1335	CondCode2 = AArch64CC::AL;
1336	switch (CC) {
1337	default:
1338	llvm_unreachable("Unknown FP condition!");
1339	case CmpInst::FCMP_OEQ:
1340	CondCode = AArch64CC::EQ;
1341	break;
1342	case CmpInst::FCMP_OGT:
1343	CondCode = AArch64CC::GT;
1344	break;
1345	case CmpInst::FCMP_OGE:
1346	CondCode = AArch64CC::GE;
1347	break;
1348	case CmpInst::FCMP_OLT:
1349	CondCode = AArch64CC::MI;
1350	break;
1351	case CmpInst::FCMP_OLE:
1352	CondCode = AArch64CC::LS;
1353	break;
1354	case CmpInst::FCMP_ONE:
1355	CondCode = AArch64CC::MI;
1356	CondCode2 = AArch64CC::GT;
1357	break;
1358	case CmpInst::FCMP_ORD:
1359	CondCode = AArch64CC::VC;
1360	break;
1361	case CmpInst::FCMP_UNO:
1362	CondCode = AArch64CC::VS;
1363	break;
1364	case CmpInst::FCMP_UEQ:
1365	CondCode = AArch64CC::EQ;
1366	CondCode2 = AArch64CC::VS;
1367	break;
1368	case CmpInst::FCMP_UGT:
1369	CondCode = AArch64CC::HI;
1370	break;
1371	case CmpInst::FCMP_UGE:
1372	CondCode = AArch64CC::PL;
1373	break;
1374	case CmpInst::FCMP_ULT:
1375	CondCode = AArch64CC::LT;
1376	break;
1377	case CmpInst::FCMP_ULE:
1378	CondCode = AArch64CC::LE;
1379	break;
1380	case CmpInst::FCMP_UNE:
1381	CondCode = AArch64CC::NE;
1382	break;
1383	}
1384	}
1385
1386	/// Convert an IR fp condition code to an AArch64 CC.
1387	/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1388	/// should be AND'ed instead of OR'ed.
1389	static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1390	AArch64CC::CondCode &CondCode,
1391	AArch64CC::CondCode &CondCode2) {
1392	CondCode2 = AArch64CC::AL;
1393	switch (CC) {
1394	default:
1395	changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1396	assert(CondCode2 == AArch64CC::AL);
1397	break;
1398	case CmpInst::FCMP_ONE:
1399	// (a one b)
1400	// == ((a olt b) \|\| (a ogt b))
1401	// == ((a ord b) && (a une b))
1402	CondCode = AArch64CC::VC;
1403	CondCode2 = AArch64CC::NE;
1404	break;
1405	case CmpInst::FCMP_UEQ:
1406	// (a ueq b)
1407	// == ((a uno b) \|\| (a oeq b))
1408	// == ((a ule b) && (a uge b))
1409	CondCode = AArch64CC::PL;
1410	CondCode2 = AArch64CC::LE;
1411	break;
1412	}
1413	}
1414
1415	/// Return a register which can be used as a bit to test in a TB(N)Z.
1416	static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1417	MachineRegisterInfo &MRI) {
1418	assert(Reg.isValid() && "Expected valid register!");
1419	bool HasZext = false;
1420	while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1421	unsigned Opc = MI->getOpcode();
1422
1423	if (!MI->getOperand(i: `0`).isReg() \|\|
1424	!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()))
1425	break;
1426
1427	// (tbz (any_ext x), b) -> (tbz x, b) and
1428	// (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits.
1429	//
1430	// (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1431	// on the truncated x is the same as the bit number on x.
1432	if (Opc == TargetOpcode::G_ANYEXT \|\| Opc == TargetOpcode::G_ZEXT \|\|
1433	Opc == TargetOpcode::G_TRUNC) {
1434	if (Opc == TargetOpcode::G_ZEXT)
1435	HasZext = true;
1436
1437	Register NextReg = MI->getOperand(i: `1`).getReg();
1438	// Did we find something worth folding?
1439	if (!NextReg.isValid() \|\| !MRI.hasOneNonDBGUse(RegNo: NextReg))
1440	break;
1441	TypeSize InSize = MRI.getType(Reg: NextReg).getSizeInBits();
1442	if (Bit >= InSize)
1443	break;
1444
1445	// NextReg is worth folding. Keep looking.
1446	Reg = NextReg;
1447	continue;
1448	}
1449
1450	// Attempt to find a suitable operation with a constant on one side.
1451	std::optional<uint64_t> C;
1452	Register TestReg;
1453	switch (Opc) {
1454	default:
1455	break;
1456	case TargetOpcode::G_AND:
1457	case TargetOpcode::G_XOR: {
1458	TestReg = MI->getOperand(i: `1`).getReg();
1459	Register ConstantReg = MI->getOperand(i: `2`).getReg();
1460	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1461	if (!VRegAndVal) {
1462	// AND commutes, check the other side for a constant.
1463	// FIXME: Can we canonicalize the constant so that it's always on the
1464	// same side at some point earlier?
1465	std::swap(a&: ConstantReg, b&: TestReg);
1466	VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1467	}
1468	if (VRegAndVal) {
1469	if (HasZext)
1470	C = VRegAndVal ->Value.getZExtValue();
1471	else
1472	C = VRegAndVal ->Value.getSExtValue();
1473	}
1474	break;
1475	}
1476	case TargetOpcode::G_ASHR:
1477	case TargetOpcode::G_LSHR:
1478	case TargetOpcode::G_SHL: {
1479	TestReg = MI->getOperand(i: `1`).getReg();
1480	auto VRegAndVal =
1481	getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: `2`).getReg(), MRI);
1482	if (VRegAndVal)
1483	C = VRegAndVal ->Value.getSExtValue();
1484	break;
1485	}
1486	}
1487
1488	// Didn't find a constant or viable register. Bail out of the loop.
1489	if (!C \|\| !TestReg.isValid())
1490	break;
1491
1492	// We found a suitable instruction with a constant. Check to see if we can
1493	// walk through the instruction.
1494	Register NextReg;
1495	unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits();
1496	switch (Opc) {
1497	default:
1498	break;
1499	case TargetOpcode::G_AND:
1500	// (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1501	if ((*C >> Bit) & `1`)
1502	NextReg = TestReg;
1503	break;
1504	case TargetOpcode::G_SHL:
1505	// (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1506	// the type of the register.
1507	if (C <= Bit && (Bit - C) < TestRegSize) {
1508	NextReg = TestReg;
1509	Bit = Bit - *C;
1510	}
1511	break;
1512	case TargetOpcode::G_ASHR:
1513	// (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1514	// in x
1515	NextReg = TestReg;
1516	Bit = Bit + *C;
1517	if (Bit >= TestRegSize)
1518	Bit = TestRegSize - `1`;
1519	break;
1520	case TargetOpcode::G_LSHR:
1521	// (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1522	if ((Bit + *C) < TestRegSize) {
1523	NextReg = TestReg;
1524	Bit = Bit + *C;
1525	}
1526	break;
1527	case TargetOpcode::G_XOR:
1528	// We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1529	// appropriate.
1530	//
1531	// e.g. If x' = xor x, c, and the b-th bit is set in c then
1532	//
1533	// tbz x', b -> tbnz x, b
1534	//
1535	// Because x' only has the b-th bit set if x does not.
1536	if ((*C >> Bit) & `1`)
1537	Invert = !Invert;
1538	NextReg = TestReg;
1539	break;
1540	}
1541
1542	// Check if we found anything worth folding.
1543	if (!NextReg.isValid())
1544	return Reg;
1545	Reg = NextReg;
1546	}
1547
1548	return Reg;
1549	}
1550
1551	MachineInstr *AArch64InstructionSelector::emitTestBit(
1552	Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1553	MachineIRBuilder &MIB) const {
1554	assert(TestReg.isValid());
1555	assert(ProduceNonFlagSettingCondBr &&
1556	"Cannot emit TB(N)Z with speculation tracking!");
1557	MachineRegisterInfo &MRI = *MIB.getMRI();
1558
1559	// Attempt to optimize the test bit by walking over instructions.
1560	TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI);
1561	LLT Ty = MRI.getType(Reg: TestReg);
1562	unsigned Size = Ty.getSizeInBits();
1563	assert(!Ty.isVector() && "Expected a scalar!");
1564	assert(Bit < `64` && "Bit is too large!");
1565
1566	// When the test register is a 64-bit register, we have to narrow to make
1567	// TBNZW work.
1568	bool UseWReg = Bit < `32`;
1569	unsigned NecessarySize = UseWReg ? `32` : `64`;
1570	if (Size != NecessarySize)
1571	TestReg = moveScalarRegClass(
1572	Reg: TestReg, RC: UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1573	MIB);
1574
1575	static const unsigned OpcTable[`2`][`2`] = {{AArch64::TBZX, AArch64::TBNZX},
1576	{AArch64::TBZW, AArch64::TBNZW}};
1577	unsigned Opc = OpcTable[UseWReg][IsNegative];
1578	auto TestBitMI =
1579	MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB);
1580	constrainSelectedInstRegOperands(I&: *TestBitMI, TII, TRI, RBI);
1581	return &*TestBitMI;
1582	}
1583
1584	bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1585	MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1586	MachineIRBuilder &MIB) const {
1587	assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1588	// Given something like this:
1589	//
1590	// %x = ...Something...
1591	// %one = G_CONSTANT i64 1
1592	// %zero = G_CONSTANT i64 0
1593	// %and = G_AND %x, %one
1594	// %cmp = G_ICMP intpred(ne), %and, %zero
1595	// %cmp_trunc = G_TRUNC %cmp
1596	// G_BRCOND %cmp_trunc, %bb.3
1597	//
1598	// We want to try and fold the AND into the G_BRCOND and produce either a
1599	// TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1600	//
1601	// In this case, we'd get
1602	//
1603	// TBNZ %x %bb.3
1604	//
1605
1606	// Check if the AND has a constant on its RHS which we can use as a mask.
1607	// If it's a power of 2, then it's the same as checking a specific bit.
1608	// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1609	auto MaybeBit = getIConstantVRegValWithLookThrough(
1610	VReg: AndInst.getOperand(i: `2`).getReg(), MRI: *MIB.getMRI());
1611	if (!MaybeBit)
1612	return false;
1613
1614	int32_t Bit = MaybeBit ->Value.exactLogBase2();
1615	if (Bit < `0`)
1616	return false;
1617
1618	Register TestReg = AndInst.getOperand(i: `1`).getReg();
1619
1620	// Emit a TB(N)Z.
1621	emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB);
1622	return true;
1623	}
1624
1625	MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1626	bool IsNegative,
1627	MachineBasicBlock *DestMBB,
1628	MachineIRBuilder &MIB) const {
1629	assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1630	MachineRegisterInfo &MRI = *MIB.getMRI();
1631	assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1632	AArch64::GPRRegBankID &&
1633	"Expected GPRs only?");
1634	auto Ty = MRI.getType(Reg: CompareReg);
1635	unsigned Width = Ty.getSizeInBits();
1636	assert(!Ty.isVector() && "Expected scalar only?");
1637	assert(Width <= `64` && "Expected width to be at most 64?");
1638	static const unsigned OpcTable[`2`][`2`] = {{AArch64::CBZW, AArch64::CBZX},
1639	{AArch64::CBNZW, AArch64::CBNZX}};
1640	unsigned Opc = OpcTable[IsNegative][Width == `64`];
1641	auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB);
1642	constrainSelectedInstRegOperands(I&: *BranchMI, TII, TRI, RBI);
1643	return &*BranchMI;
1644	}
1645
1646	bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1647	MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1648	assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1649	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1650	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1651	// totally clean. Some of them require two branches to implement.
1652	auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: `1`).getPredicate();
1653	emitFPCompare(LHS: FCmp.getOperand(i: `2`).getReg(), RHS: FCmp.getOperand(i: `3`).getReg(), MIRBuilder&: MIB,
1654	Pred);
1655	AArch64CC::CondCode CC1, CC2;
1656	changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2);
1657	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1658	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC1).addMBB(MBB: DestMBB);
1659	if (CC2 != AArch64CC::AL)
1660	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC2).addMBB(MBB: DestMBB);
1661	I.eraseFromParent();
1662	return true;
1663	}
1664
1665	bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1666	MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1667	assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1668	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1669	// Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1670	//
1671	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1672	// instructions will not be produced, as they are conditional branch
1673	// instructions that do not set flags.
1674	if (!ProduceNonFlagSettingCondBr)
1675	return false;
1676
1677	MachineRegisterInfo &MRI = *MIB.getMRI();
1678	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1679	auto Pred =
1680	static_cast<CmpInst::Predicate>(ICmp.getOperand(i: `1`).getPredicate());
1681	Register LHS = ICmp.getOperand(i: `2`).getReg();
1682	Register RHS = ICmp.getOperand(i: `3`).getReg();
1683
1684	// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1685	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1686	MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1687
1688	// When we can emit a TB(N)Z, prefer that.
1689	//
1690	// Handle non-commutative condition codes first.
1691	// Note that we don't want to do this when we have a G_AND because it can
1692	// become a tst. The tst will make the test bit in the TB(N)Z redundant.
1693	if (VRegAndVal && !AndInst) {
1694	int64_t C = VRegAndVal ->Value.getSExtValue();
1695
1696	// When we have a greater-than comparison, we can just test if the msb is
1697	// zero.
1698	if (C == -`1` && Pred == CmpInst::ICMP_SGT) {
1699	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1700	emitTestBit(TestReg: LHS, Bit, /IsNegative = / false, DstMBB: DestMBB, MIB);
1701	I.eraseFromParent();
1702	return true;
1703	}
1704
1705	// When we have a less than comparison, we can just test if the msb is not
1706	// zero.
1707	if (C == `0` && Pred == CmpInst::ICMP_SLT) {
1708	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1709	emitTestBit(TestReg: LHS, Bit, /IsNegative = / true, DstMBB: DestMBB, MIB);
1710	I.eraseFromParent();
1711	return true;
1712	}
1713
1714	// Inversely, if we have a signed greater-than-or-equal comparison to zero,
1715	// we can test if the msb is zero.
1716	if (C == `0` && Pred == CmpInst::ICMP_SGE) {
1717	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1718	emitTestBit(TestReg: LHS, Bit, /IsNegative = / false, DstMBB: DestMBB, MIB);
1719	I.eraseFromParent();
1720	return true;
1721	}
1722	}
1723
1724	// Attempt to handle commutative condition codes. Right now, that's only
1725	// eq/ne.
1726	if (ICmpInst::isEquality(P: Pred)) {
1727	if (!VRegAndVal) {
1728	std::swap(a&: RHS, b&: LHS);
1729	VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1730	AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1731	}
1732
1733	if (VRegAndVal && VRegAndVal ->Value == `0`) {
1734	// If there's a G_AND feeding into this branch, try to fold it away by
1735	// emitting a TB(N)Z instead.
1736	//
1737	// Note: If we have LT, then it is* possible to fold, but it wouldn't be*
1738	// beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1739	// would be redundant.
1740	if (AndInst &&
1741	tryOptAndIntoCompareBranch(
1742	AndInst&: AndInst, /Invert = /* Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) {
1743	I.eraseFromParent();
1744	return true;
1745	}
1746
1747	// Otherwise, try to emit a CB(N)Z instead.
1748	auto LHSTy = MRI.getType(Reg: LHS);
1749	if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= `64`) {
1750	emitCBZ(CompareReg: LHS, /IsNegative = / Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1751	I.eraseFromParent();
1752	return true;
1753	}
1754	}
1755	}
1756
1757	return false;
1758	}
1759
1760	bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1761	MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1762	assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1763	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1764	if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1765	return true;
1766
1767	// Couldn't optimize. Emit a compare + a Bcc.
1768	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1769	auto &PredOp = ICmp.getOperand(i: `1`);
1770	emitIntegerCompare(LHS&: ICmp.getOperand(i: `2`), RHS&: ICmp.getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
1771	const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1772	P: static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1773	RHS: ICmp.getOperand(i: `3`).getReg(), MRI: MIB.getMRI());
1774	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC).addMBB(MBB: DestMBB);
1775	I.eraseFromParent();
1776	return true;
1777	}
1778
1779	bool AArch64InstructionSelector::selectCompareBranch(
1780	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1781	Register CondReg = I.getOperand(i: `0`).getReg();
1782	MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg);
1783	// Try to select the G_BRCOND using whatever is feeding the condition if
1784	// possible.
1785	unsigned CCMIOpc = CCMI->getOpcode();
1786	if (CCMIOpc == TargetOpcode::G_FCMP)
1787	return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB);
1788	if (CCMIOpc == TargetOpcode::G_ICMP)
1789	return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB);
1790
1791	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1792	// instructions will not be produced, as they are conditional branch
1793	// instructions that do not set flags.
1794	if (ProduceNonFlagSettingCondBr) {
1795	emitTestBit(TestReg: CondReg, /Bit = / `0`, /IsNegative = / true,
1796	DstMBB: I.getOperand(i: `1`).getMBB(), MIB);
1797	I.eraseFromParent();
1798	return true;
1799	}
1800
1801	// Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1802	auto TstMI =
1803	MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {LLT::scalar(SizeInBits: `32`)}, SrcOps: {CondReg}).addImm(Val: `1`);
1804	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
1805	auto Bcc = MIB.buildInstr(Opcode: AArch64::Bcc)
1806	.addImm(Val: AArch64CC::NE)
1807	.addMBB(MBB: I.getOperand(i: `1`).getMBB());
1808	I.eraseFromParent();
1809	constrainSelectedInstRegOperands(I&: *Bcc, TII, TRI, RBI);
1810	return true;
1811	}
1812
1813	/// Returns the element immediate value of a vector shift operand if found.
1814	/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1815	static std::optional<int64_t> getVectorShiftImm(Register Reg,
1816	MachineRegisterInfo &MRI) {
1817	assert(MRI.getType(Reg).isVector() && "Expected a vector shift operand");
1818	MachineInstr *OpMI = MRI.getVRegDef(Reg);
1819	return getAArch64VectorSplatScalar(MI: *OpMI, MRI);
1820	}
1821
1822	/// Matches and returns the shift immediate value for a SHL instruction given
1823	/// a shift operand.
1824	static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1825	MachineRegisterInfo &MRI) {
1826	std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1827	if (!ShiftImm)
1828	return std::nullopt;
1829	// Check the immediate is in range for a SHL.
1830	int64_t Imm = *ShiftImm;
1831	if (Imm < `0`)
1832	return std::nullopt;
1833	switch (SrcTy.getElementType().getSizeInBits()) {
1834	default:
1835	LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1836	return std::nullopt;
1837	case `8`:
1838	if (Imm > `7`)
1839	return std::nullopt;
1840	break;
1841	case `16`:
1842	if (Imm > `15`)
1843	return std::nullopt;
1844	break;
1845	case `32`:
1846	if (Imm > `31`)
1847	return std::nullopt;
1848	break;
1849	case `64`:
1850	if (Imm > `63`)
1851	return std::nullopt;
1852	break;
1853	}
1854	return Imm;
1855	}
1856
1857	bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1858	MachineRegisterInfo &MRI) {
1859	assert(I.getOpcode() == TargetOpcode::G_SHL);
1860	Register DstReg = I.getOperand(i: `0`).getReg();
1861	const LLT Ty = MRI.getType(Reg: DstReg);
1862	Register Src1Reg = I.getOperand(i: `1`).getReg();
1863	Register Src2Reg = I.getOperand(i: `2`).getReg();
1864
1865	if (!Ty.isVector())
1866	return false;
1867
1868	// Check if we have a vector of constants on RHS that we can select as the
1869	// immediate form.
1870	std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI);
1871
1872	unsigned Opc = `0`;
1873	if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1874	Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1875	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
1876	Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1877	} else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`)) {
1878	Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1879	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`)) {
1880	Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1881	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`)) {
1882	Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1883	} else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`)) {
1884	Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1885	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`)) {
1886	Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1887	} else {
1888	LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1889	return false;
1890	}
1891
1892	auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg});
1893	if (ImmVal)
1894	Shl.addImm(Val: *ImmVal);
1895	else
1896	Shl.addUse(RegNo: Src2Reg);
1897	constrainSelectedInstRegOperands(I&: *Shl, TII, TRI, RBI);
1898	I.eraseFromParent();
1899	return true;
1900	}
1901
1902	bool AArch64InstructionSelector::selectVectorAshrLshr(
1903	MachineInstr &I, MachineRegisterInfo &MRI) {
1904	assert(I.getOpcode() == TargetOpcode::G_ASHR \|\|
1905	I.getOpcode() == TargetOpcode::G_LSHR);
1906	Register DstReg = I.getOperand(i: `0`).getReg();
1907	const LLT Ty = MRI.getType(Reg: DstReg);
1908	Register Src1Reg = I.getOperand(i: `1`).getReg();
1909	Register Src2Reg = I.getOperand(i: `2`).getReg();
1910
1911	if (!Ty.isVector())
1912	return false;
1913
1914	bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1915
1916	// We expect the immediate case to be lowered in the PostLegalCombiner to
1917	// AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1918
1919	// There is not a shift right register instruction, but the shift left
1920	// register instruction takes a signed value, where negative numbers specify a
1921	// right shift.
1922
1923	unsigned Opc = `0`;
1924	unsigned NegOpc = `0`;
1925	const TargetRegisterClass *RC =
1926	getRegClassForTypeOnBank(Ty, RB: RBI.getRegBank(ID: AArch64::FPRRegBankID));
1927	if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1928	Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1929	NegOpc = AArch64::NEGv2i64;
1930	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
1931	Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1932	NegOpc = AArch64::NEGv4i32;
1933	} else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`)) {
1934	Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1935	NegOpc = AArch64::NEGv2i32;
1936	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`)) {
1937	Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1938	NegOpc = AArch64::NEGv4i16;
1939	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`)) {
1940	Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1941	NegOpc = AArch64::NEGv8i16;
1942	} else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`)) {
1943	Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1944	NegOpc = AArch64::NEGv16i8;
1945	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`)) {
1946	Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1947	NegOpc = AArch64::NEGv8i8;
1948	} else {
1949	LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1950	return false;
1951	}
1952
1953	auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg});
1954	constrainSelectedInstRegOperands(I&: *Neg, TII, TRI, RBI);
1955	auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg});
1956	constrainSelectedInstRegOperands(I&: *SShl, TII, TRI, RBI);
1957	I.eraseFromParent();
1958	return true;
1959	}
1960
1961	bool AArch64InstructionSelector::selectVaStartAAPCS(
1962	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1963
1964	if (STI.isCallingConvWin64(CC: MF.getFunction().getCallingConv(),
1965	IsVarArg: MF.getFunction().isVarArg()))
1966	return false;
1967
1968	// The layout of the va_list struct is specified in the AArch64 Procedure Call
1969	// Standard, section 10.1.5.
1970
1971	const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1972	const unsigned PtrSize = STI.isTargetILP32() ? `4` : `8`;
1973	const auto *PtrRegClass =
1974	STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1975
1976	const MCInstrDesc &MCIDAddAddr =
1977	TII.get(Opcode: STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1978	const MCInstrDesc &MCIDStoreAddr =
1979	TII.get(Opcode: STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1980
1981	/*
1982	* typedef struct va_list {
1983	* void * stack; // next stack param
1984	* void * gr_top; // end of GP arg reg save area
1985	* void * vr_top; // end of FP/SIMD arg reg save area
1986	* int gr_offs; // offset from gr_top to next GP register arg
1987	* int vr_offs; // offset from vr_top to next FP/SIMD register arg
1988	* } va_list;
1989	*/
1990	const auto VAList = I.getOperand(i: `0`).getReg();
1991
1992	// Our current offset in bytes from the va_list struct (VAList).
1993	unsigned OffsetBytes = `0`;
1994
1995	// Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1996	// and increment OffsetBytes by PtrSize.
1997	const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1998	const Register Top = MRI.createVirtualRegister(RegClass: PtrRegClass);
1999	auto MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDAddAddr)
2000	.addDef(RegNo: Top)
2001	.addFrameIndex(Idx: FrameIndex)
2002	.addImm(Val: Imm)
2003	.addImm(Val: `0`);
2004	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2005
2006	const auto MMO = I.memoperands_begin();
2007	MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDStoreAddr)
2008	.addUse(RegNo: Top)
2009	.addUse(RegNo: VAList)
2010	.addImm(Val: OffsetBytes / PtrSize)
2011	.addMemOperand(MMO: MF.getMachineMemOperand(
2012	PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes),
2013	F: MachineMemOperand::MOStore, Size: PtrSize, BaseAlignment: MMO->getBaseAlign()));
2014	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2015
2016	OffsetBytes += PtrSize;
2017	};
2018
2019	// void stack at offset 0*
2020	PushAddress (FuncInfo->getVarArgsStackIndex(), `0`);
2021
2022	// void gr_top at offset 8 (4 on ILP32)*
2023	const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2024	PushAddress (FuncInfo->getVarArgsGPRIndex(), GPRSize);
2025
2026	// void vr_top at offset 16 (8 on ILP32)*
2027	const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2028	PushAddress (FuncInfo->getVarArgsFPRIndex(), FPRSize);
2029
2030	// Helper function to store a 4-byte integer constant to VAList at offset
2031	// OffsetBytes, and increment OffsetBytes by 4.
2032	const auto PushIntConstant = [&](const int32_t Value) {
2033	constexpr int IntSize = `4`;
2034	const Register Temp = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
2035	auto MIB =
2036	BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVi32imm))
2037	.addDef(RegNo: Temp)
2038	.addImm(Val: Value);
2039	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2040
2041	const auto MMO = I.memoperands_begin();
2042	MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRWui))
2043	.addUse(RegNo: Temp)
2044	.addUse(RegNo: VAList)
2045	.addImm(Val: OffsetBytes / IntSize)
2046	.addMemOperand(MMO: MF.getMachineMemOperand(
2047	PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes),
2048	F: MachineMemOperand::MOStore, Size: IntSize, BaseAlignment: MMO->getBaseAlign()));
2049	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2050	OffsetBytes += IntSize;
2051	};
2052
2053	// int gr_offs at offset 24 (12 on ILP32)
2054	PushIntConstant (-static_cast<int32_t>(GPRSize));
2055
2056	// int vr_offs at offset 28 (16 on ILP32)
2057	PushIntConstant (-static_cast<int32_t>(FPRSize));
2058
2059	assert(OffsetBytes == (STI.isTargetILP32() ? `20` : `32`) && "Unexpected offset");
2060
2061	I.eraseFromParent();
2062	return true;
2063	}
2064
2065	bool AArch64InstructionSelector::selectVaStartDarwin(
2066	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2067	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2068	Register ListReg = I.getOperand(i: `0`).getReg();
2069
2070	Register ArgsAddrReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2071
2072	int FrameIdx = FuncInfo->getVarArgsStackIndex();
2073	if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2074	CC: MF.getFunction().getCallingConv(), IsVarArg: MF.getFunction().isVarArg())) {
2075	FrameIdx = FuncInfo->getVarArgsGPRSize() > `0`
2076	? FuncInfo->getVarArgsGPRIndex()
2077	: FuncInfo->getVarArgsStackIndex();
2078	}
2079
2080	auto MIB =
2081	BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri))
2082	.addDef(RegNo: ArgsAddrReg)
2083	.addFrameIndex(Idx: FrameIdx)
2084	.addImm(Val: `0`)
2085	.addImm(Val: `0`);
2086
2087	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2088
2089	MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRXui))
2090	.addUse(RegNo: ArgsAddrReg)
2091	.addUse(RegNo: ListReg)
2092	.addImm(Val: `0`)
2093	.addMemOperand(MMO: *I.memoperands_begin());
2094
2095	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2096	I.eraseFromParent();
2097	return true;
2098	}
2099
2100	void AArch64InstructionSelector::materializeLargeCMVal(
2101	MachineInstr &I, const Value V, unsigned* OpFlags) {
2102	MachineBasicBlock &MBB = *I.getParent();
2103	MachineFunction &MF = *MBB.getParent();
2104	MachineRegisterInfo &MRI = MF.getRegInfo();
2105
2106	auto MovZ = MIB.buildInstr(Opc: AArch64::MOVZXi, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {});
2107	MovZ ->addOperand(MF, Op: I.getOperand(i: `1`));
2108	MovZ ->getOperand(i: `1`).setTargetFlags(OpFlags \| AArch64II::MO_G0 \|
2109	AArch64II::MO_NC);
2110	MovZ ->addOperand(MF, Op: MachineOperand::CreateImm(Val: `0`));
2111	constrainSelectedInstRegOperands(I&: *MovZ, TII, TRI, RBI);
2112
2113	auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2114	Register ForceDstReg) {
2115	Register DstReg = ForceDstReg
2116	? ForceDstReg
2117	: MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2118	auto MovI = MIB.buildInstr(Opcode: AArch64::MOVKXi).addDef(RegNo: DstReg).addUse(RegNo: SrcReg);
2119	if (auto *GV = dyn_cast<GlobalValue>(Val: V)) {
2120	MovI ->addOperand(MF, Op: MachineOperand::CreateGA(
2121	GV, Offset: MovZ ->getOperand(i: `1`).getOffset(), TargetFlags: Flags));
2122	} else {
2123	MovI ->addOperand(
2124	MF, Op: MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V),
2125	Offset: MovZ ->getOperand(i: `1`).getOffset(), TargetFlags: Flags));
2126	}
2127	MovI ->addOperand(MF, Op: MachineOperand::CreateImm(Val: Offset));
2128	constrainSelectedInstRegOperands(I&: *MovI, TII, TRI, RBI);
2129	return DstReg;
2130	};
2131	Register DstReg = BuildMovK (MovZ.getReg(Idx: `0`),
2132	AArch64II::MO_G1 \| AArch64II::MO_NC, `16`, `0`);
2133	DstReg = BuildMovK (DstReg, AArch64II::MO_G2 \| AArch64II::MO_NC, `32`, `0`);
2134	BuildMovK (DstReg, AArch64II::MO_G3, `48`, I.getOperand(i: `0`).getReg());
2135	}
2136
2137	bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2138	MachineBasicBlock &MBB = *I.getParent();
2139	MachineFunction &MF = *MBB.getParent();
2140	MachineRegisterInfo &MRI = MF.getRegInfo();
2141
2142	switch (I.getOpcode()) {
2143	case TargetOpcode::G_CONSTANT: {
2144	Register DefReg = I.getOperand(i: `0`).getReg();
2145	const LLT DefTy = MRI.getType(Reg: DefReg);
2146	if (!DefTy.isPointer())
2147	return false;
2148	const unsigned PtrSize = DefTy.getSizeInBits();
2149	if (PtrSize != `32` && PtrSize != `64`)
2150	return false;
2151	// Convert pointer typed constants to integers so TableGen can select.
2152	MRI.setType(VReg: DefReg, Ty: LLT::integer(SizeInBits: PtrSize));
2153	return true;
2154	}
2155	case TargetOpcode::G_STORE: {
2156	bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2157	MachineOperand &SrcOp = I.getOperand(i: `0`);
2158	if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) {
2159	// Allow matching with imported patterns for stores of pointers. Unlike
2160	// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2161	// and constrain.
2162	auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: SrcOp);
2163	Register NewSrc = Copy.getReg(Idx: `0`);
2164	SrcOp.setReg(NewSrc);
2165	RBI.constrainGenericRegister(Reg: NewSrc, RC: AArch64::GPR64RegClass, MRI);
2166	Changed = true;
2167	}
2168	return Changed;
2169	}
2170	case TargetOpcode::G_PTR_ADD: {
2171	// If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2172	// arithmetic semantics instead of falling back to regular arithmetic.
2173	const auto &TL = STI.getTargetLowering();
2174	if (TL->shouldPreservePtrArith(F: MF.getFunction(), PtrVT: EVT ()))
2175	return false;
2176	return convertPtrAddToAdd(I, MRI);
2177	}
2178	case TargetOpcode::G_LOAD: {
2179	// For scalar loads of pointers, we try to convert the dest type from p0
2180	// to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2181	// conversion, this should be ok because all users should have been
2182	// selected already, so the type doesn't matter for them.
2183	Register DstReg = I.getOperand(i: `0`).getReg();
2184	const LLT DstTy = MRI.getType(Reg: DstReg);
2185	if (!DstTy.isPointer())
2186	return false;
2187	MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: `64`));
2188	return true;
2189	}
2190	case AArch64::G_DUP: {
2191	// Convert the type from p0 to s64 to help selection.
2192	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2193	if (!DstTy.isPointerVector())
2194	return false;
2195	auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: I.getOperand(i: `1`).getReg());
2196	MRI.setType(VReg: I.getOperand(i: `0`).getReg(),
2197	Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `64`)));
2198	MRI.setRegClass(Reg: NewSrc.getReg(Idx: `0`), RC: &AArch64::GPR64RegClass);
2199	I.getOperand(i: `1`).setReg(NewSrc.getReg(Idx: `0`));
2200	return true;
2201	}
2202	case AArch64::G_INSERT_VECTOR_ELT: {
2203	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2204	LLT SrcVecTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
2205	if (SrcVecTy.isPointerVector()) {
2206	// Convert the type from p0 to s64 to help selection.
2207	auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: I.getOperand(i: `2`).getReg());
2208	MRI.setType(VReg: I.getOperand(i: `1`).getReg(),
2209	Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `64`)));
2210	MRI.setType(VReg: I.getOperand(i: `0`).getReg(),
2211	Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `64`)));
2212	MRI.setRegClass(Reg: NewSrc.getReg(Idx: `0`), RC: &AArch64::GPR64RegClass);
2213	I.getOperand(i: `2`).setReg(NewSrc.getReg(Idx: `0`));
2214	return true;
2215	}
2216
2217	Register EltReg = I.getOperand(i: `2`).getReg();
2218	LLT EltTy = MRI.getType(Reg: EltReg);
2219	if (EltTy.isScalar() &&
2220	(EltTy.getSizeInBits() == `8` \|\| EltTy.getSizeInBits() == `16`) &&
2221	RBI.getRegBank(Reg: EltReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2222	// Convert the type from s8/s16 to s32 to help selection.
2223	auto NewElt = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `32`), Op: EltReg);
2224	MRI.setRegClass(Reg: NewElt.getReg(Idx: `0`), RC: &AArch64::GPR32RegClass);
2225	I.getOperand(i: `2`).setReg(NewElt.getReg(Idx: `0`));
2226	return true;
2227	}
2228	return false;
2229	}
2230	case TargetOpcode::G_UITOFP:
2231	case TargetOpcode::G_SITOFP: {
2232	// If both source and destination regbanks are FPR, then convert the opcode
2233	// to G_SITOF so that the importer can select it to an fpr variant.
2234	// Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2235	// copy.
2236	Register SrcReg = I.getOperand(i: `1`).getReg();
2237	LLT SrcTy = MRI.getType(Reg: SrcReg);
2238	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2239	if (SrcTy.isVector() \|\| SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2240	return false;
2241
2242	if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2243	// Need to add a copy to change the type so that the existing patterns can
2244	// match when there is an integer on an FPR bank.
2245	if (SrcTy.getScalarType().isInteger()) {
2246	auto Copy = MIB.buildCopy(Res: DstTy, Op: SrcReg);
2247	I.getOperand(i: `1`).setReg(Copy.getReg(Idx: `0`));
2248	MRI.setRegClass(Reg: Copy.getReg(Idx: `0`),
2249	RC: getRegClassForTypeOnBank(
2250	Ty: SrcTy, RB: RBI.getRegBank(ID: AArch64::FPRRegBankID)));
2251	}
2252	if (I.getOpcode() == TargetOpcode::G_SITOFP)
2253	I.setDesc(TII.get(Opcode: AArch64::G_SITOF));
2254	else
2255	I.setDesc(TII.get(Opcode: AArch64::G_UITOF));
2256	return true;
2257	}
2258	return false;
2259	}
2260	default:
2261	return false;
2262	}
2263	}
2264
2265	/// This lowering tries to look for G_PTR_ADD instructions and then converts
2266	/// them to a standard G_ADD with a COPY on the source.
2267	///
2268	/// The motivation behind this is to expose the add semantics to the imported
2269	/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2270	/// because the selector works bottom up, uses before defs. By the time we
2271	/// end up trying to select a G_PTR_ADD, we should have already attempted to
2272	/// fold this into addressing modes and were therefore unsuccessful.
2273	bool AArch64InstructionSelector::convertPtrAddToAdd(
2274	MachineInstr &I, MachineRegisterInfo &MRI) {
2275	assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2276	Register DstReg = I.getOperand(i: `0`).getReg();
2277	Register AddOp1Reg = I.getOperand(i: `1`).getReg();
2278	const LLT PtrTy = MRI.getType(Reg: DstReg);
2279	if (PtrTy.getAddressSpace() != `0`)
2280	return false;
2281
2282	const LLT CastPtrTy = PtrTy.isVector()
2283	? LLT::fixed_vector(NumElements: `2`, ScalarTy: LLT::integer(SizeInBits: `64`))
2284	: LLT::integer(SizeInBits: `64`);
2285	auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg);
2286	// Set regbanks on the registers.
2287	if (PtrTy.isVector())
2288	MRI.setRegBank(Reg: PtrToInt.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::FPRRegBankID));
2289	else
2290	MRI.setRegBank(Reg: PtrToInt.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
2291
2292	// Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2293	// %dst(intty) = G_ADD %intbase, off
2294	I.setDesc(TII.get(Opcode: TargetOpcode::G_ADD));
2295	MRI.setType(VReg: DstReg, Ty: CastPtrTy);
2296	I.getOperand(i: `1`).setReg(PtrToInt.getReg(Idx: `0`));
2297	if (!select(I&: *PtrToInt)) {
2298	LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2299	return false;
2300	}
2301
2302	// Also take the opportunity here to try to do some optimization.
2303	// Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2304	Register NegatedReg;
2305	if (!mi_match(R: I.getOperand(i: `2`).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg))))
2306	return true;
2307	I.getOperand(i: `2`).setReg(NegatedReg);
2308	I.setDesc(TII.get(Opcode: TargetOpcode::G_SUB));
2309	return true;
2310	}
2311
2312	bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2313	MachineRegisterInfo &MRI) {
2314	// We try to match the immediate variant of LSL, which is actually an alias
2315	// for a special case of UBFM. Otherwise, we fall back to the imported
2316	// selector which will match the register variant.
2317	assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2318	const auto &MO = I.getOperand(i: `2`);
2319	auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI);
2320	if (!VRegAndVal)
2321	return false;
2322
2323	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2324	if (DstTy.isVector())
2325	return false;
2326	bool Is64Bit = DstTy.getSizeInBits() == `64`;
2327	auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO);
2328	auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO);
2329
2330	if (!Imm1Fn \|\| !Imm2Fn)
2331	return false;
2332
2333	auto NewI =
2334	MIB.buildInstr(Opc: Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2335	DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {I.getOperand(i: `1`).getReg()});
2336
2337	for (auto &RenderFn : *Imm1Fn)
2338	RenderFn (NewI);
2339	for (auto &RenderFn : *Imm2Fn)
2340	RenderFn (NewI);
2341
2342	I.eraseFromParent();
2343	constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
2344	return true;
2345	}
2346
2347	bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2348	MachineInstr &I, MachineRegisterInfo &MRI) {
2349	assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2350	// If we're storing a scalar, it doesn't matter what register bank that
2351	// scalar is on. All that matters is the size.
2352	//
2353	// So, if we see something like this (with a 32-bit scalar as an example):
2354	//
2355	// %x:gpr(s32) = ... something ...
2356	// %y:fpr(s32) = COPY %x:gpr(s32)
2357	// G_STORE %y:fpr(s32)
2358	//
2359	// We can fix this up into something like this:
2360	//
2361	// G_STORE %x:gpr(s32)
2362	//
2363	// And then continue the selection process normally.
2364	Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: `0`).getReg(), MRI);
2365	if (!DefDstReg.isValid())
2366	return false;
2367	LLT DefDstTy = MRI.getType(Reg: DefDstReg);
2368	Register StoreSrcReg = I.getOperand(i: `0`).getReg();
2369	LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg);
2370
2371	// If we get something strange like a physical register, then we shouldn't
2372	// go any further.
2373	if (!DefDstTy.isValid())
2374	return false;
2375
2376	// Are the source and dst types the same size?
2377	if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2378	return false;
2379
2380	if (RBI.getRegBank(Reg: StoreSrcReg, MRI, TRI) ==
2381	RBI.getRegBank(Reg: DefDstReg, MRI, TRI))
2382	return false;
2383
2384	// We have a cross-bank copy, which is entering a store. Let's fold it.
2385	I.getOperand(i: `0`).setReg(DefDstReg);
2386	return true;
2387	}
2388
2389	bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2390	assert(I.getParent() && "Instruction should be in a basic block!");
2391	assert(I.getParent()->getParent() && "Instruction should be in a function!");
2392
2393	MachineBasicBlock &MBB = *I.getParent();
2394	MachineFunction &MF = *MBB.getParent();
2395	MachineRegisterInfo &MRI = MF.getRegInfo();
2396
2397	switch (I.getOpcode()) {
2398	case AArch64::G_DUP: {
2399	// Before selecting a DUP instruction, check if it is better selected as a
2400	// MOV or load from a constant pool.
2401	Register Src = I.getOperand(i: `1`).getReg();
2402	auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
2403	VReg: Src, MRI, /LookThroughInstrs=/true, /LookThroughAnyExt=/true);
2404	if (!ValAndVReg)
2405	return false;
2406	LLVMContext &Ctx = MF.getFunction().getContext();
2407	Register Dst = I.getOperand(i: `0`).getReg();
2408	auto *CV = ConstantDataVector::getSplat(
2409	NumElts: MRI.getType(Reg: Dst).getNumElements(),
2410	Elt: ConstantInt::get(
2411	Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Dst).getScalarSizeInBits()),
2412	V: ValAndVReg ->Value.trunc(width: MRI.getType(Reg: Dst).getScalarSizeInBits())));
2413	if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI))
2414	return false;
2415	I.eraseFromParent();
2416	return true;
2417	}
2418	case TargetOpcode::G_SEXT:
2419	// Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2420	// over a normal extend.
2421	if (selectUSMovFromExtend(I, MRI))
2422	return true;
2423	return false;
2424	case TargetOpcode::G_BR:
2425	return false;
2426	case TargetOpcode::G_SHL:
2427	return earlySelectSHL(I, MRI);
2428	case TargetOpcode::G_CONSTANT: {
2429	bool IsZero = false;
2430	if (I.getOperand(i: `1`).isCImm())
2431	IsZero = I.getOperand(i: `1`).getCImm()->isZero();
2432	else if (I.getOperand(i: `1`).isImm())
2433	IsZero = I.getOperand(i: `1`).getImm() == `0`;
2434
2435	if (!IsZero)
2436	return false;
2437
2438	Register DefReg = I.getOperand(i: `0`).getReg();
2439	LLT Ty = MRI.getType(Reg: DefReg);
2440	if (Ty.getSizeInBits() == `64`) {
2441	I.getOperand(i: `1`).ChangeToRegister(Reg: AArch64::XZR, isDef: false);
2442	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
2443	} else if (Ty.getSizeInBits() <= `32`) {
2444	I.getOperand(i: `1`).ChangeToRegister(Reg: AArch64::WZR, isDef: false);
2445	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR32RegClass, MRI);
2446	} else
2447	return false;
2448
2449	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
2450	return true;
2451	}
2452
2453	case TargetOpcode::G_ADD: {
2454	// Check if this is being fed by a G_ICMP on either side.
2455	//
2456	// (cmp pred, x, y) + z
2457	//
2458	// In the above case, when the cmp is true, we increment z by 1. So, we can
2459	// fold the add into the cset for the cmp by using cinc.
2460	//
2461	// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2462	Register AddDst = I.getOperand(i: `0`).getReg();
2463	Register AddLHS = I.getOperand(i: `1`).getReg();
2464	Register AddRHS = I.getOperand(i: `2`).getReg();
2465	// Only handle scalars.
2466	LLT Ty = MRI.getType(Reg: AddLHS);
2467	if (Ty.isVector())
2468	return false;
2469	// Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2470	// bits.
2471	unsigned Size = Ty.getSizeInBits();
2472	if (Size != `32` && Size != `64`)
2473	return false;
2474	auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2475	if (!MRI.hasOneNonDBGUse(RegNo: Reg))
2476	return nullptr;
2477	// If the LHS of the add is 32 bits, then we want to fold a 32-bit
2478	// compare.
2479	if (Size == `32`)
2480	return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI);
2481	// We model scalar compares using 32-bit destinations right now.
2482	// If it's a 64-bit compare, it'll have 64-bit sources.
2483	Register ZExt;
2484	if (!mi_match(R: Reg, MRI,
2485	P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt))))))
2486	return nullptr;
2487	auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI);
2488	if (!Cmp \|\|
2489	MRI.getType(Reg: Cmp->getOperand(i: `2`).getReg()).getSizeInBits() != `64`)
2490	return nullptr;
2491	return Cmp;
2492	};
2493	// Try to match
2494	// z + (cmp pred, x, y)
2495	MachineInstr *Cmp = MatchCmp (AddRHS);
2496	if (!Cmp) {
2497	// (cmp pred, x, y) + z
2498	std::swap(a&: AddLHS, b&: AddRHS);
2499	Cmp = MatchCmp (AddRHS);
2500	if (!Cmp)
2501	return false;
2502	}
2503	auto &PredOp = Cmp->getOperand(i: `1`);
2504	MIB.setInstrAndDebugLoc(I);
2505	emitIntegerCompare(/LHS=/Cmp->getOperand(i: `2`),
2506	/RHS=/Cmp->getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
2507	auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2508	const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
2509	P: CmpInst::getInversePredicate(pred: Pred), RHS: Cmp->getOperand(i: `3`).getReg(), MRI: &MRI);
2510	emitCSINC(/Dst=/AddDst, /Src =/Src1: AddLHS, /Src2=/AddLHS, Pred: InvCC, MIRBuilder&: MIB);
2511	I.eraseFromParent();
2512	return true;
2513	}
2514	case TargetOpcode::G_OR: {
2515	// Look for operations that take the lower `Width=Size-ShiftImm` bits of
2516	// `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2517	// shifting and masking that we can replace with a BFI (encoded as a BFM).
2518	Register Dst = I.getOperand(i: `0`).getReg();
2519	LLT Ty = MRI.getType(Reg: Dst);
2520
2521	if (!Ty.isScalar())
2522	return false;
2523
2524	unsigned Size = Ty.getSizeInBits();
2525	if (Size != `32` && Size != `64`)
2526	return false;
2527
2528	Register ShiftSrc;
2529	int64_t ShiftImm;
2530	Register MaskSrc;
2531	int64_t MaskImm;
2532	if (!mi_match(
2533	R: Dst, MRI,
2534	P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))),
2535	R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm))))))
2536	return false;
2537
2538	if (ShiftImm > Size \|\| ((`1ULL` << ShiftImm) - `1ULL`) != uint64_t(MaskImm))
2539	return false;
2540
2541	int64_t Immr = Size - ShiftImm;
2542	int64_t Imms = Size - ShiftImm - `1`;
2543	unsigned Opc = Size == `32` ? AArch64::BFMWri : AArch64::BFMXri;
2544	emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB);
2545	I.eraseFromParent();
2546	return true;
2547	}
2548	case TargetOpcode::G_FENCE: {
2549	if (I.getOperand(i: `1`).getImm() == `0`)
2550	BuildMI(BB&: MBB, I, MIMD: MIMetadata (I), MCID: TII.get(Opcode: TargetOpcode::MEMBARRIER));
2551	else
2552	BuildMI(BB&: MBB, I, MIMD: MIMetadata (I), MCID: TII.get(Opcode: AArch64::DMB))
2553	.addImm(Val: I.getOperand(i: `0`).getImm() == `4` ? `0x9` : `0xb`);
2554	I.eraseFromParent();
2555	return true;
2556	}
2557	default:
2558	return false;
2559	}
2560	}
2561
2562	bool AArch64InstructionSelector::select(MachineInstr &I) {
2563	assert(I.getParent() && "Instruction should be in a basic block!");
2564	assert(I.getParent()->getParent() && "Instruction should be in a function!");
2565
2566	MachineBasicBlock &MBB = *I.getParent();
2567	MachineFunction &MF = *MBB.getParent();
2568	MachineRegisterInfo &MRI = MF.getRegInfo();
2569
2570	const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2571	if (Subtarget->requiresStrictAlign()) {
2572	// We don't support this feature yet.
2573	LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2574	return false;
2575	}
2576
2577	MIB.setInstrAndDebugLoc(I);
2578
2579	unsigned Opcode = I.getOpcode();
2580	// G_PHI requires same handling as PHI
2581	if (!I.isPreISelOpcode() \|\| Opcode == TargetOpcode::G_PHI) {
2582	// Certain non-generic instructions also need some special handling.
2583
2584	if (Opcode == TargetOpcode::LOAD_STACK_GUARD) {
2585	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2586	return true;
2587	}
2588
2589	if (Opcode == TargetOpcode::PHI \|\| Opcode == TargetOpcode::G_PHI) {
2590	const Register DefReg = I.getOperand(i: `0`).getReg();
2591	const LLT DefTy = MRI.getType(Reg: DefReg);
2592
2593	const RegClassOrRegBank &RegClassOrBank =
2594	MRI.getRegClassOrRegBank(Reg: DefReg);
2595
2596	const TargetRegisterClass *DefRC =
2597	dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank);
2598	if (!DefRC) {
2599	if (!DefTy.isValid()) {
2600	LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2601	return false;
2602	}
2603	const RegisterBank &RB = cast<const* RegisterBank *>(Val: RegClassOrBank);
2604	DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB);
2605	if (!DefRC) {
2606	LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2607	return false;
2608	}
2609	}
2610
2611	I.setDesc(TII.get(Opcode: TargetOpcode::PHI));
2612
2613	return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI);
2614	}
2615
2616	if (I.isCopy())
2617	return selectCopy(I, TII, MRI, TRI, RBI);
2618
2619	if (I.isDebugInstr())
2620	return selectDebugInstr(I, MRI, RBI);
2621
2622	return true;
2623	}
2624
2625
2626	if (I.getNumOperands() != I.getNumExplicitOperands()) {
2627	LLVM_DEBUG(
2628	dbgs() << "Generic instruction has unexpected implicit operands\n");
2629	return false;
2630	}
2631
2632	// Try to do some lowering before we start instruction selecting. These
2633	// lowerings are purely transformations on the input G_MIR and so selection
2634	// must continue after any modification of the instruction.
2635	if (preISelLower(I)) {
2636	Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2637	}
2638
2639	// There may be patterns where the importer can't deal with them optimally,
2640	// but does select it to a suboptimal sequence so our custom C++ selection
2641	// code later never has a chance to work on it. Therefore, we have an early
2642	// selection attempt here to give priority to certain selection routines
2643	// over the imported ones.
2644	if (earlySelect(I))
2645	return true;
2646
2647	if (selectImpl(I, CoverageInfo&: *CoverageInfo))
2648	return true;
2649
2650	LLT Ty =
2651	I.getOperand(i: `0`).isReg() ? MRI.getType(Reg: I.getOperand(i: `0`).getReg()) : LLT {};
2652
2653	switch (Opcode) {
2654	case TargetOpcode::G_SBFX:
2655	case TargetOpcode::G_UBFX: {
2656	static const unsigned OpcTable[`2`][`2`] = {
2657	{AArch64::UBFMWri, AArch64::UBFMXri},
2658	{AArch64::SBFMWri, AArch64::SBFMXri}};
2659	bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2660	unsigned Size = Ty.getSizeInBits();
2661	unsigned Opc = OpcTable[IsSigned][Size == `64`];
2662	auto Cst1 =
2663	getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: `2`).getReg(), MRI);
2664	assert(Cst1 && "Should have gotten a constant for src 1?");
2665	auto Cst2 =
2666	getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: `3`).getReg(), MRI);
2667	assert(Cst2 && "Should have gotten a constant for src 2?");
2668	auto LSB = Cst1 ->Value.getZExtValue();
2669	auto Width = Cst2 ->Value.getZExtValue();
2670	auto BitfieldInst =
2671	MIB.buildInstr(Opc, DstOps: {I.getOperand(i: `0`)}, SrcOps: {I.getOperand(i: `1`)})
2672	.addImm(Val: LSB)
2673	.addImm(Val: LSB + Width - `1`);
2674	I.eraseFromParent();
2675	constrainSelectedInstRegOperands(I&: *BitfieldInst, TII, TRI, RBI);
2676	return true;
2677	}
2678	case TargetOpcode::G_BRCOND:
2679	return selectCompareBranch(I, MF, MRI);
2680
2681	case TargetOpcode::G_BRINDIRECT: {
2682	const Function &Fn = MF.getFunction();
2683	if (std::optional<uint16_t> BADisc =
2684	STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: Fn)) {
2685	auto MI = MIB.buildInstr(Opc: AArch64::BRA, DstOps: {}, SrcOps: {I.getOperand(i: `0`).getReg()});
2686	MI.addImm(Val: AArch64PACKey::IA);
2687	MI.addImm(Val: *BADisc);
2688	MI.addReg(/AddrDisc=/RegNo: AArch64::XZR);
2689	I.eraseFromParent();
2690	constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
2691	return true;
2692	}
2693	I.setDesc(TII.get(Opcode: AArch64::BR));
2694	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2695	return true;
2696	}
2697
2698	case TargetOpcode::G_BRJT:
2699	return selectBrJT(I, MRI);
2700
2701	case AArch64::G_ADD_LOW: {
2702	// This op may have been separated from it's ADRP companion by the localizer
2703	// or some other code motion pass. Given that many CPUs will try to
2704	// macro fuse these operations anyway, select this into a MOVaddr pseudo
2705	// which will later be expanded into an ADRP+ADD pair after scheduling.
2706	MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: `1`).getReg());
2707	if (BaseMI->getOpcode() != AArch64::ADRP) {
2708	I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2709	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2710	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2711	return true;
2712	}
2713	assert(TM.getCodeModel() == CodeModel::Small &&
2714	"Expected small code model");
2715	auto Op1 = BaseMI->getOperand(i: `1`);
2716	auto Op2 = I.getOperand(i: `2`);
2717	auto MovAddr = MIB.buildInstr(Opc: AArch64::MOVaddr, DstOps: {I.getOperand(i: `0`)}, SrcOps: {})
2718	.addGlobalAddress(GV: Op1.getGlobal(), Offset: Op1.getOffset(),
2719	TargetFlags: Op1.getTargetFlags())
2720	.addGlobalAddress(GV: Op2.getGlobal(), Offset: Op2.getOffset(),
2721	TargetFlags: Op2.getTargetFlags());
2722	I.eraseFromParent();
2723	constrainSelectedInstRegOperands(I&: *MovAddr, TII, TRI, RBI);
2724	return true;
2725	}
2726
2727	case TargetOpcode::G_FCONSTANT: {
2728	const Register DefReg = I.getOperand(i: `0`).getReg();
2729	const LLT DefTy = MRI.getType(Reg: DefReg);
2730	const unsigned DefSize = DefTy.getSizeInBits();
2731	const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
2732
2733	const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB);
2734	// For 16, 64, and 128b values, emit a constant pool load.
2735	switch (DefSize) {
2736	default:
2737	llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2738	case `32`:
2739	case `64`: {
2740	bool OptForSize = shouldOptForSize(MF: &MF);
2741	const auto &TLI = MF.getSubtarget().getTargetLowering();
2742	// If TLI says that this fpimm is illegal, then we'll expand to a
2743	// constant pool load.
2744	if (TLI->isFPImmLegal(I.getOperand(i: `1`).getFPImm()->getValueAPF(),
2745	EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize))
2746	break;
2747	[[fallthrough]];
2748	}
2749	case `16`:
2750	case `128`: {
2751	auto *FPImm = I.getOperand(i: `1`).getFPImm();
2752	auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB);
2753	if (!LoadMI) {
2754	LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2755	return false;
2756	}
2757	MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: `0`).getReg()});
2758	I.eraseFromParent();
2759	return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI);
2760	}
2761	}
2762
2763	assert((DefSize == `32` \|\| DefSize == `64`) && "Unexpected const def size");
2764	// Either emit a FMOV, or emit a copy to emit a normal mov.
2765	const Register DefGPRReg = MRI.createVirtualRegister(
2766	RegClass: DefSize == `32` ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2767	MachineOperand &RegOp = I.getOperand(i: `0`);
2768	RegOp.setReg(DefGPRReg);
2769	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2770	MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg});
2771
2772	if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) {
2773	LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2774	return false;
2775	}
2776
2777	MachineOperand &ImmOp = I.getOperand(i: `1`);
2778	ImmOp.ChangeToImmediate(
2779	ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2780
2781	const unsigned MovOpc =
2782	DefSize == `64` ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2783	I.setDesc(TII.get(Opcode: MovOpc));
2784	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2785	return true;
2786	}
2787	case TargetOpcode::G_EXTRACT: {
2788	Register DstReg = I.getOperand(i: `0`).getReg();
2789	Register SrcReg = I.getOperand(i: `1`).getReg();
2790	LLT SrcTy = MRI.getType(Reg: SrcReg);
2791	LLT DstTy = MRI.getType(Reg: DstReg);
2792	(void)DstTy;
2793	unsigned SrcSize = SrcTy.getSizeInBits();
2794
2795	if (SrcTy.getSizeInBits() > `64`) {
2796	// This should be an extract of an s128, which is like a vector extract.
2797	if (SrcTy.getSizeInBits() != `128`)
2798	return false;
2799	// Only support extracting 64 bits from an s128 at the moment.
2800	if (DstTy.getSizeInBits() != `64`)
2801	return false;
2802
2803	unsigned Offset = I.getOperand(i: `2`).getImm();
2804	if (Offset % `64` != `0`)
2805	return false;
2806
2807	// Check we have the right regbank always.
2808	const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
2809	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
2810	assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2811
2812	if (SrcRB.getID() == AArch64::GPRRegBankID) {
2813	auto NewI =
2814	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
2815	.addUse(RegNo: SrcReg, Flags: {},
2816	SubReg: Offset == `0` ? AArch64::sube64 : AArch64::subo64);
2817	constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt&: *NewI,
2818	RegClass: AArch64::GPR64RegClass, RegMO&: NewI ->getOperand(i: `0`));
2819	I.eraseFromParent();
2820	return true;
2821	}
2822
2823	// Emit the same code as a vector extract.
2824	// Offset must be a multiple of 64.
2825	unsigned LaneIdx = Offset / `64`;
2826	MachineInstr *Extract = emitExtractVectorElt(
2827	DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: `64`), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB);
2828	if (!Extract)
2829	return false;
2830	I.eraseFromParent();
2831	return true;
2832	}
2833
2834	I.setDesc(TII.get(Opcode: SrcSize == `64` ? AArch64::UBFMXri : AArch64::UBFMWri));
2835	MachineInstrBuilder (MF, I).addImm(Val: I.getOperand(i: `2`).getImm() +
2836	Ty.getSizeInBits() - `1`);
2837
2838	if (SrcSize < `64`) {
2839	assert(SrcSize == `32` && DstTy.getSizeInBits() == `16` &&
2840	"unexpected G_EXTRACT types");
2841	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2842	return true;
2843	}
2844
2845	DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
2846	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2847	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {})
2848	.addReg(RegNo: DstReg, Flags: {}, SubReg: AArch64::sub_32);
2849	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(),
2850	RC: AArch64::GPR32RegClass, MRI);
2851	I.getOperand(i: `0`).setReg(DstReg);
2852
2853	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2854	return true;
2855	}
2856
2857	case TargetOpcode::G_INSERT: {
2858	LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `2`).getReg());
2859	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2860	unsigned DstSize = DstTy.getSizeInBits();
2861	// Larger inserts are vectors, same-size ones should be something else by
2862	// now (split up or turned into COPYs).
2863	if (Ty.getSizeInBits() > `64` \|\| SrcTy.getSizeInBits() > `32`)
2864	return false;
2865
2866	I.setDesc(TII.get(Opcode: DstSize == `64` ? AArch64::BFMXri : AArch64::BFMWri));
2867	unsigned LSB = I.getOperand(i: `3`).getImm();
2868	unsigned Width = MRI.getType(Reg: I.getOperand(i: `2`).getReg()).getSizeInBits();
2869	I.getOperand(i: `3`).setImm((DstSize - LSB) % DstSize);
2870	MachineInstrBuilder (MF, I).addImm(Val: Width - `1`);
2871
2872	if (DstSize < `64`) {
2873	assert(DstSize == `32` && SrcTy.getSizeInBits() == `16` &&
2874	"unexpected G_INSERT types");
2875	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2876	return true;
2877	}
2878
2879	Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
2880	BuildMI(BB&: MBB, I: I.getIterator(), MIMD: I.getDebugLoc(),
2881	MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
2882	.addDef(RegNo: SrcReg)
2883	.addUse(RegNo: I.getOperand(i: `2`).getReg())
2884	.addImm(Val: AArch64::sub_32);
2885	RBI.constrainGenericRegister(Reg: I.getOperand(i: `2`).getReg(),
2886	RC: AArch64::GPR32RegClass, MRI);
2887	I.getOperand(i: `2`).setReg(SrcReg);
2888
2889	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2890	return true;
2891	}
2892	case TargetOpcode::G_FRAME_INDEX: {
2893	// allocas and G_FRAME_INDEX are only supported in addrspace(0).
2894	if (Ty != LLT::pointer(AddressSpace: `0`, SizeInBits: `64`)) {
2895	LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2896	<< ", expected: " << LLT::pointer(`0`, `64`) << `'\n'`);
2897	return false;
2898	}
2899	I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2900
2901	// MOs for a #0 shifted immediate.
2902	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2903	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2904
2905	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2906	return true;
2907	}
2908
2909	case TargetOpcode::G_GLOBAL_VALUE: {
2910	const GlobalValue GV = nullptr*;
2911	unsigned OpFlags;
2912	if (I.getOperand(i: `1`).isSymbol()) {
2913	OpFlags = I.getOperand(i: `1`).getTargetFlags();
2914	// Currently only used by "RtLibUseGOT".
2915	assert(OpFlags == AArch64II::MO_GOT);
2916	} else {
2917	GV = I.getOperand(i: `1`).getGlobal();
2918	if (GV->isThreadLocal()) {
2919	// We don't support instructions with emulated TLS variables yet
2920	if (TM.useEmulatedTLS())
2921	return false;
2922	return selectTLSGlobalValue(I, MRI);
2923	}
2924	OpFlags = STI.ClassifyGlobalReference(GV, TM);
2925	}
2926
2927	if (OpFlags & AArch64II::MO_GOT) {
2928	bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2929	I.setDesc(TII.get(Opcode: IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2930	I.getOperand(i: `1`).setTargetFlags(OpFlags);
2931	I.addImplicitDefUseOperands(MF);
2932	} else if (TM.getCodeModel() == CodeModel::Large &&
2933	!TM.isPositionIndependent()) {
2934	// Materialize the global using movz/movk instructions.
2935	materializeLargeCMVal(I, V: GV, OpFlags);
2936	I.eraseFromParent();
2937	return true;
2938	} else if (TM.getCodeModel() == CodeModel::Tiny) {
2939	I.setDesc(TII.get(Opcode: AArch64::ADR));
2940	I.getOperand(i: `1`).setTargetFlags(OpFlags);
2941	} else {
2942	I.setDesc(TII.get(Opcode: AArch64::MOVaddr));
2943	I.getOperand(i: `1`).setTargetFlags(OpFlags \| AArch64II::MO_PAGE);
2944	MachineInstrBuilder MIB(MF, I);
2945	MIB.addGlobalAddress(GV, Offset: I.getOperand(i: `1`).getOffset(),
2946	TargetFlags: OpFlags \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
2947	}
2948	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2949	return true;
2950	}
2951
2952	case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2953	return selectPtrAuthGlobalValue(I, MRI);
2954
2955	case TargetOpcode::G_ZEXTLOAD:
2956	case TargetOpcode::G_LOAD:
2957	case TargetOpcode::G_STORE: {
2958	GLoadStore &LdSt = cast<GLoadStore>(Val&: I);
2959	bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2960	LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg());
2961
2962	// Can only handle AddressSpace 0, 64-bit pointers.
2963	if (PtrTy != LLT::pointer(AddressSpace: `0`, SizeInBits: `64`)) {
2964	return false;
2965	}
2966
2967	uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2968	unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2969	AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2970
2971	// Need special instructions for atomics that affect ordering.
2972	if (isStrongerThanMonotonic(AO: Order)) {
2973	assert(!isa<GZExtLoad>(LdSt));
2974	assert(MemSizeInBytes <= `8` &&
2975	"128-bit atomics should already be custom-legalized");
2976
2977	if (isa<GLoad>(Val: LdSt)) {
2978	static constexpr unsigned LDAPROpcodes[] = {
2979	AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2980	static constexpr unsigned LDAROpcodes[] = {
2981	AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2982	ArrayRef<unsigned> Opcodes =
2983	STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2984	? LDAPROpcodes
2985	: LDAROpcodes;
2986	I.setDesc(TII.get(Opcode: Opcodes [Log2_32(Value: MemSizeInBytes)]));
2987	} else {
2988	static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2989	AArch64::STLRW, AArch64::STLRX};
2990	Register ValReg = LdSt.getReg(Idx: `0`);
2991	if (MRI.getType(Reg: ValReg).getSizeInBits() == `64` && MemSizeInBits != `64`) {
2992	// Emit a subreg copy of 32 bits.
2993	Register NewVal = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
2994	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {NewVal}, SrcOps: {})
2995	.addReg(RegNo: I.getOperand(i: `0`).getReg(), Flags: {}, SubReg: AArch64::sub_32);
2996	I.getOperand(i: `0`).setReg(NewVal);
2997	}
2998	I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)]));
2999	}
3000	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3001	return true;
3002	}
3003
3004	#ifndef NDEBUG
3005	const Register PtrReg = LdSt.getPointerReg();
3006	const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
3007	// Check that the pointer register is valid.
3008	assert(PtrRB.getID() == AArch64::GPRRegBankID &&
3009	"Load/Store pointer operand isn't a GPR");
3010	assert(MRI.getType(PtrReg).isPointer() &&
3011	"Load/Store pointer operand isn't a pointer");
3012	#endif
3013
3014	const Register ValReg = LdSt.getReg(Idx: `0`);
3015	const RegisterBank &RB = *RBI.getRegBank(Reg: ValReg, MRI, TRI);
3016	LLT ValTy = MRI.getType(Reg: ValReg);
3017
3018	// The code below doesn't support truncating stores, so we need to split it
3019	// again.
3020	if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3021	unsigned SubReg;
3022	LLT MemTy = LdSt.getMMO().getMemoryType();
3023	auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
3024	if (!getSubRegForClass(RC, TRI, SubReg))
3025	return false;
3026
3027	// Generate a subreg copy.
3028	auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {})
3029	.addReg(RegNo: ValReg, Flags: {}, SubReg)
3030	.getReg(Idx: `0`);
3031	RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI);
3032	LdSt.getOperand(i: `0`).setReg(Copy);
3033	} else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3034	// If this is an any-extending load from the FPR bank, split it into a regular
3035	// load + extend.
3036	if (RB.getID() == AArch64::FPRRegBankID) {
3037	unsigned SubReg;
3038	LLT MemTy = LdSt.getMMO().getMemoryType();
3039	auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
3040	if (!getSubRegForClass(RC, TRI, SubReg))
3041	return false;
3042	Register OldDst = LdSt.getReg(Idx: `0`);
3043	Register NewDst =
3044	MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType());
3045	LdSt.getOperand(i: `0`).setReg(NewDst);
3046	MRI.setRegBank(Reg: NewDst, RegBank: RB);
3047	// Generate a SUBREG_TO_REG to extend it.
3048	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator()));
3049	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {OldDst}, SrcOps: {})
3050	.addUse(RegNo: NewDst)
3051	.addImm(Val: SubReg);
3052	auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB);
3053	RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI);
3054	MIB.setInstr(LdSt);
3055	ValTy = MemTy; // This is no longer an extending load.
3056	}
3057	}
3058
3059	// Helper lambda for partially selecting I. Either returns the original
3060	// instruction with an updated opcode, or a new instruction.
3061	auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3062	bool IsStore = isa<GStore>(Val: I);
3063	const unsigned NewOpc =
3064	selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits);
3065	if (NewOpc == I.getOpcode())
3066	return nullptr;
3067	// Check if we can fold anything into the addressing mode.
3068	auto AddrModeFns =
3069	selectAddrModeIndexed(Root&: I.getOperand(i: `1`), Size: MemSizeInBytes);
3070	if (!AddrModeFns) {
3071	// Can't fold anything. Use the original instruction.
3072	I.setDesc(TII.get(Opcode: NewOpc));
3073	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
3074	return &I;
3075	}
3076
3077	// Folded something. Create a new instruction and return it.
3078	auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags());
3079	Register CurValReg = I.getOperand(i: `0`).getReg();
3080	IsStore ? NewInst.addUse(RegNo: CurValReg) : NewInst.addDef(RegNo: CurValReg);
3081	NewInst.cloneMemRefs(OtherMI: I);
3082	for (auto &Fn : *AddrModeFns)
3083	Fn (NewInst);
3084	I.eraseFromParent();
3085	return &*NewInst;
3086	};
3087
3088	MachineInstr *LoadStore = SelectLoadStoreAddressingMode ();
3089	if (!LoadStore)
3090	return false;
3091
3092	// If we're storing a 0, use WZR/XZR.
3093	if (Opcode == TargetOpcode::G_STORE) {
3094	auto CVal = getIConstantVRegValWithLookThrough(
3095	VReg: LoadStore->getOperand(i: `0`).getReg(), MRI);
3096	if (CVal && CVal ->Value == `0`) {
3097	switch (LoadStore->getOpcode()) {
3098	case AArch64::STRWui:
3099	case AArch64::STRHHui:
3100	case AArch64::STRBBui:
3101	LoadStore->getOperand(i: `0`).setReg(AArch64::WZR);
3102	break;
3103	case AArch64::STRXui:
3104	LoadStore->getOperand(i: `0`).setReg(AArch64::XZR);
3105	break;
3106	}
3107	}
3108	}
3109
3110	if (IsZExtLoad \|\| (Opcode == TargetOpcode::G_LOAD &&
3111	ValTy == LLT::scalar(SizeInBits: `64`) && MemSizeInBits == `32`)) {
3112	// The any/zextload from a smaller type to i32 should be handled by the
3113	// importer.
3114	if (MRI.getType(Reg: LoadStore->getOperand(i: `0`).getReg()).getSizeInBits() != `64`)
3115	return false;
3116	// If we have an extending load then change the load's type to be a
3117	// narrower reg and zero_extend with SUBREG_TO_REG.
3118	Register LdReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3119	Register DstReg = LoadStore->getOperand(i: `0`).getReg();
3120	LoadStore->getOperand(i: `0`).setReg(LdReg);
3121
3122	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator()));
3123	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DstReg}, SrcOps: {})
3124	.addUse(RegNo: LdReg)
3125	.addImm(Val: AArch64::sub_32);
3126	constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3127	return RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64allRegClass,
3128	MRI);
3129	}
3130	constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3131	return true;
3132	}
3133
3134	case TargetOpcode::G_INDEXED_ZEXTLOAD:
3135	case TargetOpcode::G_INDEXED_SEXTLOAD:
3136	return selectIndexedExtLoad(I, MRI);
3137	case TargetOpcode::G_INDEXED_LOAD:
3138	return selectIndexedLoad(I, MRI);
3139	case TargetOpcode::G_INDEXED_STORE:
3140	return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI);
3141
3142	case TargetOpcode::G_LSHR:
3143	case TargetOpcode::G_ASHR:
3144	if (MRI.getType(Reg: I.getOperand(i: `0`).getReg()).isVector())
3145	return selectVectorAshrLshr(I, MRI);
3146	[[fallthrough]];
3147	case TargetOpcode::G_SHL:
3148	if (Opcode == TargetOpcode::G_SHL &&
3149	MRI.getType(Reg: I.getOperand(i: `0`).getReg()).isVector())
3150	return selectVectorSHL(I, MRI);
3151
3152	// These shifts were legalized to have 64 bit shift amounts because we
3153	// want to take advantage of the selection patterns that assume the
3154	// immediates are s64s, however, selectBinaryOp will assume both operands
3155	// will have the same bit size.
3156	{
3157	Register SrcReg = I.getOperand(i: `1`).getReg();
3158	Register ShiftReg = I.getOperand(i: `2`).getReg();
3159	const LLT ShiftTy = MRI.getType(Reg: ShiftReg);
3160	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3161	if (!SrcTy.isVector() && SrcTy.getSizeInBits() == `32` &&
3162	ShiftTy.getSizeInBits() == `64`) {
3163	assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3164	// Insert a subregister copy to implement a 64->32 trunc
3165	auto Trunc = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {SrcTy}, SrcOps: {})
3166	.addReg(RegNo: ShiftReg, Flags: {}, SubReg: AArch64::sub_32);
3167	MRI.setRegBank(Reg: Trunc.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
3168	I.getOperand(i: `2`).setReg(Trunc.getReg(Idx: `0`));
3169	}
3170	}
3171	[[fallthrough]];
3172	case TargetOpcode::G_OR: {
3173	// Reject the various things we don't support yet.
3174	if (unsupportedBinOp(I, RBI, MRI, TRI))
3175	return false;
3176
3177	const unsigned OpSize = Ty.getSizeInBits();
3178
3179	const Register DefReg = I.getOperand(i: `0`).getReg();
3180	const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
3181
3182	const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize);
3183	if (NewOpc == I.getOpcode())
3184	return false;
3185
3186	I.setDesc(TII.get(Opcode: NewOpc));
3187	// FIXME: Should the type be always reset in setDesc?
3188
3189	// Now that we selected an opcode, we need to constrain the register
3190	// operands to use appropriate classes.
3191	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3192	return true;
3193	}
3194
3195	case TargetOpcode::G_PTR_ADD: {
3196	emitADD(DefReg: I.getOperand(i: `0`).getReg(), LHS&: I.getOperand(i: `1`), RHS&: I.getOperand(i: `2`), MIRBuilder&: MIB);
3197	I.eraseFromParent();
3198	return true;
3199	}
3200
3201	case TargetOpcode::G_SADDE:
3202	case TargetOpcode::G_UADDE:
3203	case TargetOpcode::G_SSUBE:
3204	case TargetOpcode::G_USUBE:
3205	case TargetOpcode::G_SADDO:
3206	case TargetOpcode::G_UADDO:
3207	case TargetOpcode::G_SSUBO:
3208	case TargetOpcode::G_USUBO:
3209	return selectOverflowOp(I, MRI);
3210
3211	case TargetOpcode::G_PTRMASK: {
3212	Register MaskReg = I.getOperand(i: `2`).getReg();
3213	std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI);
3214	// TODO: Implement arbitrary cases
3215	if (!MaskVal \|\| !isShiftedMask_64(Value: *MaskVal))
3216	return false;
3217
3218	uint64_t Mask = *MaskVal;
3219	I.setDesc(TII.get(Opcode: AArch64::ANDXri));
3220	I.getOperand(i: `2`).ChangeToImmediate(
3221	ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: `64`));
3222
3223	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3224	return true;
3225	}
3226	case TargetOpcode::G_PTRTOINT:
3227	case TargetOpcode::G_TRUNC: {
3228	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3229	const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3230
3231	const Register DstReg = I.getOperand(i: `0`).getReg();
3232	const Register SrcReg = I.getOperand(i: `1`).getReg();
3233
3234	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3235	const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3236
3237	if (DstRB.getID() != SrcRB.getID()) {
3238	LLVM_DEBUG(
3239	dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3240	return false;
3241	}
3242
3243	if (DstRB.getID() == AArch64::GPRRegBankID) {
3244	const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3245	if (!DstRC)
3246	return false;
3247
3248	const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB);
3249	if (!SrcRC)
3250	return false;
3251
3252	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) \|\|
3253	!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
3254	LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3255	return false;
3256	}
3257
3258	if (DstRC == SrcRC) {
3259	// Nothing to be done
3260	} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: `32`) &&
3261	SrcTy == LLT::scalar(SizeInBits: `64`)) {
3262	llvm_unreachable("TableGen can import this case");
3263	return false;
3264	} else if (DstRC == &AArch64::GPR32RegClass &&
3265	SrcRC == &AArch64::GPR64RegClass) {
3266	I.getOperand(i: `1`).setSubReg(AArch64::sub_32);
3267	} else {
3268	LLVM_DEBUG(
3269	dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3270	return false;
3271	}
3272
3273	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3274	return true;
3275	} else if (DstRB.getID() == AArch64::FPRRegBankID) {
3276	if (DstTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`) &&
3277	SrcTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
3278	I.setDesc(TII.get(Opcode: AArch64::XTNv4i16));
3279	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3280	return true;
3281	}
3282
3283	if (!SrcTy.isVector() && SrcTy.getSizeInBits() == `128`) {
3284	MachineInstr *Extract = emitExtractVectorElt(
3285	DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: `0`, MIRBuilder&: MIB);
3286	if (!Extract)
3287	return false;
3288	I.eraseFromParent();
3289	return true;
3290	}
3291
3292	// We might have a vector G_PTRTOINT, in which case just emit a COPY.
3293	if (Opcode == TargetOpcode::G_PTRTOINT) {
3294	assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3295	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3296	return selectCopy(I, TII, MRI, TRI, RBI);
3297	}
3298	}
3299
3300	return false;
3301	}
3302
3303	case TargetOpcode::G_ANYEXT: {
3304	if (selectUSMovFromExtend(I, MRI))
3305	return true;
3306
3307	const Register DstReg = I.getOperand(i: `0`).getReg();
3308	const Register SrcReg = I.getOperand(i: `1`).getReg();
3309
3310	const RegisterBank &RBDst = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3311	if (RBDst.getID() != AArch64::GPRRegBankID) {
3312	LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3313	<< ", expected: GPR\n");
3314	return false;
3315	}
3316
3317	const RegisterBank &RBSrc = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3318	if (RBSrc.getID() != AArch64::GPRRegBankID) {
3319	LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3320	<< ", expected: GPR\n");
3321	return false;
3322	}
3323
3324	const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits();
3325
3326	if (DstSize == `0`) {
3327	LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3328	return false;
3329	}
3330
3331	if (DstSize != `64` && DstSize > `32`) {
3332	LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3333	<< ", expected: 32 or 64\n");
3334	return false;
3335	}
3336	// At this point G_ANYEXT is just like a plain COPY, but we need
3337	// to explicitly form the 64-bit value if any.
3338	if (DstSize > `32`) {
3339	Register ExtSrc = MRI.createVirtualRegister(RegClass: &AArch64::GPR64allRegClass);
3340	BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
3341	.addDef(RegNo: ExtSrc)
3342	.addUse(RegNo: SrcReg)
3343	.addImm(Val: AArch64::sub_32);
3344	I.getOperand(i: `1`).setReg(ExtSrc);
3345	}
3346	return selectCopy(I, TII, MRI, TRI, RBI);
3347	}
3348
3349	case TargetOpcode::G_ZEXT:
3350	case TargetOpcode::G_SEXT_INREG:
3351	case TargetOpcode::G_SEXT: {
3352	if (selectUSMovFromExtend(I, MRI))
3353	return true;
3354
3355	unsigned Opcode = I.getOpcode();
3356	const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3357	const Register DefReg = I.getOperand(i: `0`).getReg();
3358	Register SrcReg = I.getOperand(i: `1`).getReg();
3359	const LLT DstTy = MRI.getType(Reg: DefReg);
3360	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3361	unsigned DstSize = DstTy.getSizeInBits();
3362	unsigned SrcSize = SrcTy.getSizeInBits();
3363
3364	// SEXT_INREG has the same src reg size as dst, the size of the value to be
3365	// extended is encoded in the imm.
3366	if (Opcode == TargetOpcode::G_SEXT_INREG)
3367	SrcSize = I.getOperand(i: `2`).getImm();
3368
3369	if (DstTy.isVector())
3370	return false; // Should be handled by imported patterns.
3371
3372	assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3373	AArch64::GPRRegBankID &&
3374	"Unexpected ext regbank");
3375
3376	MachineInstr *ExtI;
3377
3378	// First check if we're extending the result of a load which has a dest type
3379	// smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3380	// GPR register on AArch64 and all loads which are smaller automatically
3381	// zero-extend the upper bits. E.g.
3382	// %v(s8) = G_LOAD %p, :: (load 1)
3383	// %v2(s32) = G_ZEXT %v(s8)
3384	if (!IsSigned) {
3385	auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI);
3386	bool IsGPR =
3387	RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3388	if (LoadMI && IsGPR) {
3389	const MachineMemOperand MemOp = LoadMI->memoperands_begin();
3390	unsigned BytesLoaded = MemOp->getSize().getValue();
3391	if (BytesLoaded < `4` && SrcTy.getSizeInBytes() == BytesLoaded)
3392	return selectCopy(I, TII, MRI, TRI, RBI);
3393	}
3394
3395	// For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3396	// + SUBREG_TO_REG.
3397	if (IsGPR && SrcSize == `32` && DstSize == `64`) {
3398	Register SubregToRegSrc =
3399	MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3400	const Register ZReg = AArch64::WZR;
3401	MIB.buildInstr(Opc: AArch64::ORRWrs, DstOps: {SubregToRegSrc}, SrcOps: {ZReg, SrcReg})
3402	.addImm(Val: `0`);
3403
3404	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
3405	.addUse(RegNo: SubregToRegSrc)
3406	.addImm(Val: AArch64::sub_32);
3407
3408	if (!RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass,
3409	MRI)) {
3410	LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3411	return false;
3412	}
3413
3414	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3415	MRI)) {
3416	LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3417	return false;
3418	}
3419
3420	I.eraseFromParent();
3421	return true;
3422	}
3423	}
3424
3425	if (DstSize == `64`) {
3426	if (Opcode != TargetOpcode::G_SEXT_INREG) {
3427	// FIXME: Can we avoid manually doing this?
3428	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3429	MRI)) {
3430	LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3431	<< " operand\n");
3432	return false;
3433	}
3434	SrcReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG,
3435	DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
3436	.addUse(RegNo: SrcReg)
3437	.addImm(Val: AArch64::sub_32)
3438	.getReg(Idx: `0`);
3439	}
3440
3441	ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3442	DstOps: {DefReg}, SrcOps: {SrcReg})
3443	.addImm(Val: `0`)
3444	.addImm(Val: SrcSize - `1`);
3445	} else if (DstSize <= `32`) {
3446	ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3447	DstOps: {DefReg}, SrcOps: {SrcReg})
3448	.addImm(Val: `0`)
3449	.addImm(Val: SrcSize - `1`);
3450	} else {
3451	return false;
3452	}
3453
3454	constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
3455	I.eraseFromParent();
3456	return true;
3457	}
3458
3459	case TargetOpcode::G_FREEZE:
3460	return selectCopy(I, TII, MRI, TRI, RBI);
3461
3462	case TargetOpcode::G_INTTOPTR:
3463	// The importer is currently unable to import pointer types since they
3464	// didn't exist in SelectionDAG.
3465	return selectCopy(I, TII, MRI, TRI, RBI);
3466
3467	case TargetOpcode::G_BITCAST:
3468	// Imported SelectionDAG rules can handle every bitcast except those that
3469	// bitcast from a type to the same type. Ideally, these shouldn't occur
3470	// but we might not run an optimizer that deletes them. The other exception
3471	// is bitcasts involving pointer types, as SelectionDAG has no knowledge
3472	// of them.
3473	return selectCopy(I, TII, MRI, TRI, RBI);
3474
3475	case TargetOpcode::G_SELECT: {
3476	auto &Sel = cast<GSelect>(Val&: I);
3477	const Register CondReg = Sel.getCondReg();
3478	const Register TReg = Sel.getTrueReg();
3479	const Register FReg = Sel.getFalseReg();
3480
3481	if (tryOptSelect(Sel))
3482	return true;
3483
3484	// Make sure to use an unused vreg instead of wzr, so that the peephole
3485	// optimizations will be able to optimize these.
3486	Register DeadVReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3487	auto TstMI = MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {DeadVReg}, SrcOps: {CondReg})
3488	.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: `1`, regSize: `32`));
3489	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
3490	if (!emitSelect(Dst: Sel.getReg(Idx: `0`), True: TReg, False: FReg, CC: AArch64CC::NE, MIB))
3491	return false;
3492	Sel.eraseFromParent();
3493	return true;
3494	}
3495	case TargetOpcode::G_ICMP: {
3496	if (Ty.isVector())
3497	return false;
3498
3499	if (Ty != LLT::scalar(SizeInBits: `32`)) {
3500	LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3501	<< ", expected: " << LLT::scalar(`32`) << `'\n'`);
3502	return false;
3503	}
3504
3505	auto &PredOp = I.getOperand(i: `1`);
3506	emitIntegerCompare(LHS&: I.getOperand(i: `2`), RHS&: I.getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
3507	auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3508	const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3509	P: CmpInst::getInversePredicate(pred: Pred), RHS: I.getOperand(i: `3`).getReg(), MRI: &MRI);
3510	emitCSINC(/Dst=/I.getOperand(i: `0`).getReg(), /Src1=/AArch64::WZR,
3511	/Src2=/AArch64::WZR, Pred: InvCC, MIRBuilder&: MIB);
3512	I.eraseFromParent();
3513	return true;
3514	}
3515
3516	case TargetOpcode::G_FCMP: {
3517	CmpInst::Predicate Pred =
3518	static_cast<CmpInst::Predicate>(I.getOperand(i: `1`).getPredicate());
3519	if (!emitFPCompare(LHS: I.getOperand(i: `2`).getReg(), RHS: I.getOperand(i: `3`).getReg(), MIRBuilder&: MIB,
3520	Pred) \|\|
3521	!emitCSetForFCmp(Dst: I.getOperand(i: `0`).getReg(), Pred, MIRBuilder&: MIB))
3522	return false;
3523	I.eraseFromParent();
3524	return true;
3525	}
3526	case TargetOpcode::G_VASTART:
3527	return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3528	: selectVaStartAAPCS(I, MF, MRI);
3529	case TargetOpcode::G_INTRINSIC:
3530	return selectIntrinsic(I, MRI);
3531	case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3532	return selectIntrinsicWithSideEffects(I, MRI);
3533	case TargetOpcode::G_IMPLICIT_DEF: {
3534	I.setDesc(TII.get(Opcode: TargetOpcode::IMPLICIT_DEF));
3535	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3536	const Register DstReg = I.getOperand(i: `0`).getReg();
3537	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3538	const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3539	RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI);
3540	return true;
3541	}
3542	case TargetOpcode::G_BLOCK_ADDR: {
3543	Function *BAFn = I.getOperand(i: `1`).getBlockAddress()->getFunction();
3544	if (std::optional<uint16_t> BADisc =
3545	STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: *BAFn)) {
3546	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
3547	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
3548	MIB.buildInstr(Opcode: AArch64::MOVaddrPAC)
3549	.addBlockAddress(BA: I.getOperand(i: `1`).getBlockAddress())
3550	.addImm(Val: AArch64PACKey::IA)
3551	.addReg(/AddrDisc=/RegNo: AArch64::XZR)
3552	.addImm(Val: *BADisc)
3553	.constrainAllUses(TII, TRI, RBI);
3554	MIB.buildCopy(Res: I.getOperand(i: `0`).getReg(), Op: Register (AArch64::X16));
3555	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(),
3556	RC: AArch64::GPR64RegClass, MRI);
3557	I.eraseFromParent();
3558	return true;
3559	}
3560	if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3561	materializeLargeCMVal(I, V: I.getOperand(i: `1`).getBlockAddress(), OpFlags: `0`);
3562	I.eraseFromParent();
3563	return true;
3564	} else {
3565	I.setDesc(TII.get(Opcode: AArch64::MOVaddrBA));
3566	auto MovMI = BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVaddrBA),
3567	DestReg: I.getOperand(i: `0`).getReg())
3568	.addBlockAddress(BA: I.getOperand(i: `1`).getBlockAddress(),
3569	/ Offset / `0`, TargetFlags: AArch64II::MO_PAGE)
3570	.addBlockAddress(
3571	BA: I.getOperand(i: `1`).getBlockAddress(), / Offset / `0`,
3572	TargetFlags: AArch64II::MO_NC \| AArch64II::MO_PAGEOFF);
3573	I.eraseFromParent();
3574	constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3575	return true;
3576	}
3577	}
3578	case AArch64::G_DUP: {
3579	// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3580	// imported patterns. Do it manually here. Avoiding generating s16 gpr is
3581	// difficult because at RBS we may end up pessimizing the fpr case if we
3582	// decided to add an anyextend to fix this. Manual selection is the most
3583	// robust solution for now.
3584	if (RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI)->getID() !=
3585	AArch64::GPRRegBankID)
3586	return false; // We expect the fpr regbank case to be imported.
3587	LLT VecTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3588	if (VecTy == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`))
3589	I.setDesc(TII.get(Opcode: AArch64::DUPv8i8gpr));
3590	else if (VecTy == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`))
3591	I.setDesc(TII.get(Opcode: AArch64::DUPv16i8gpr));
3592	else if (VecTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`))
3593	I.setDesc(TII.get(Opcode: AArch64::DUPv4i16gpr));
3594	else if (VecTy == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`))
3595	I.setDesc(TII.get(Opcode: AArch64::DUPv8i16gpr));
3596	else
3597	return false;
3598	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3599	return true;
3600	}
3601	case TargetOpcode::G_BUILD_VECTOR:
3602	return selectBuildVector(I, MRI);
3603	case TargetOpcode::G_MERGE_VALUES:
3604	return selectMergeValues(I, MRI);
3605	case TargetOpcode::G_UNMERGE_VALUES:
3606	return selectUnmergeValues(I, MRI);
3607	case TargetOpcode::G_SHUFFLE_VECTOR:
3608	return selectShuffleVector(I, MRI);
3609	case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3610	return selectExtractElt(I, MRI);
3611	case TargetOpcode::G_CONCAT_VECTORS:
3612	return selectConcatVectors(I, MRI);
3613	case TargetOpcode::G_JUMP_TABLE:
3614	return selectJumpTable(I, MRI);
3615	case TargetOpcode::G_MEMCPY:
3616	case TargetOpcode::G_MEMCPY_INLINE:
3617	case TargetOpcode::G_MEMMOVE:
3618	case TargetOpcode::G_MEMSET:
3619	case TargetOpcode::G_MEMSET_INLINE:
3620	assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3621	return selectMOPS(I, MRI);
3622	}
3623
3624	return false;
3625	}
3626
3627	bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3628	MachineIRBuilderState OldMIBState = MIB.getState();
3629	bool Success = select(I);
3630	MIB.setState(OldMIBState);
3631	return Success;
3632	}
3633
3634	bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3635	MachineRegisterInfo &MRI) {
3636	unsigned Mopcode;
3637	switch (GI.getOpcode()) {
3638	case TargetOpcode::G_MEMCPY:
3639	case TargetOpcode::G_MEMCPY_INLINE:
3640	Mopcode = AArch64::MOPSMemoryCopyPseudo;
3641	break;
3642	case TargetOpcode::G_MEMMOVE:
3643	Mopcode = AArch64::MOPSMemoryMovePseudo;
3644	break;
3645	case TargetOpcode::G_MEMSET:
3646	case TargetOpcode::G_MEMSET_INLINE:
3647	// For tagged memset see llvm.aarch64.mops.memset.tag
3648	Mopcode = AArch64::MOPSMemorySetPseudo;
3649	break;
3650	}
3651
3652	auto &DstPtr = GI.getOperand(i: `0`);
3653	auto &SrcOrVal = GI.getOperand(i: `1`);
3654	auto &Size = GI.getOperand(i: `2`);
3655
3656	// Create copies of the registers that can be clobbered.
3657	const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg());
3658	const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg());
3659	const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg());
3660
3661	const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3662	const auto &SrcValRegClass =
3663	IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3664
3665	// Constrain to specific registers
3666	RBI.constrainGenericRegister(Reg: DstPtrCopy, RC: AArch64::GPR64commonRegClass, MRI);
3667	RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI);
3668	RBI.constrainGenericRegister(Reg: SizeCopy, RC: AArch64::GPR64RegClass, MRI);
3669
3670	MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr);
3671	MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal);
3672	MIB.buildCopy(Res: SizeCopy, Op: Size);
3673
3674	// New instruction uses the copied registers because it must update them.
3675	// The defs are not used since they don't exist in G_MEM. They are still*
3676	// tied.
3677	// Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3678	Register DefDstPtr = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
3679	Register DefSize = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3680	if (IsSet) {
3681	MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize},
3682	SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy});
3683	} else {
3684	Register DefSrcPtr = MRI.createVirtualRegister(RegClass: &SrcValRegClass);
3685	MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize},
3686	SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy});
3687	}
3688
3689	GI.eraseFromParent();
3690	return true;
3691	}
3692
3693	bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3694	MachineRegisterInfo &MRI) {
3695	assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3696	Register JTAddr = I.getOperand(i: `0`).getReg();
3697	unsigned JTI = I.getOperand(i: `1`).getIndex();
3698	Register Index = I.getOperand(i: `2`).getReg();
3699
3700	MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: `4`, PCRelSym: nullptr);
3701
3702	// With aarch64-jump-table-hardening, we only expand the jump table dispatch
3703	// sequence later, to guarantee the integrity of the intermediate values.
3704	if (MF->getFunction().hasFnAttribute(Kind: "aarch64-jump-table-hardening")) {
3705	CodeModel::Model CM = TM.getCodeModel();
3706	if (STI.isTargetMachO()) {
3707	if (CM != CodeModel::Small && CM != CodeModel::Large)
3708	report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3709	} else {
3710	// Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3711	assert(STI.isTargetELF() &&
3712	"jump table hardening only supported on MachO/ELF");
3713	if (CM != CodeModel::Small)
3714	report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3715	}
3716
3717	MIB.buildCopy(Res: {AArch64::X16}, Op: I.getOperand(i: `2`).getReg());
3718	MIB.buildInstr(Opcode: AArch64::BR_JumpTable)
3719	.addJumpTableIndex(Idx: I.getOperand(i: `1`).getIndex());
3720	I.eraseFromParent();
3721	return true;
3722	}
3723
3724	Register TargetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3725	Register ScratchReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
3726
3727	auto JumpTableInst = MIB.buildInstr(Opc: AArch64::JumpTableDest32,
3728	DstOps: {TargetReg, ScratchReg}, SrcOps: {JTAddr, Index})
3729	.addJumpTableIndex(Idx: JTI);
3730	// Save the jump table info.
3731	MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {},
3732	SrcOps: {static_cast<int64_t>(JTI)});
3733	// Build the indirect branch.
3734	MIB.buildInstr(Opc: AArch64::BR, DstOps: {}, SrcOps: {TargetReg});
3735	I.eraseFromParent();
3736	constrainSelectedInstRegOperands(I&: *JumpTableInst, TII, TRI, RBI);
3737	return true;
3738	}
3739
3740	bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3741	MachineRegisterInfo &MRI) {
3742	assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3743	assert(I.getOperand(`1`).isJTI() && "Jump table op should have a JTI!");
3744
3745	Register DstReg = I.getOperand(i: `0`).getReg();
3746	unsigned JTI = I.getOperand(i: `1`).getIndex();
3747	// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3748	auto MovMI =
3749	MIB.buildInstr(Opc: AArch64::MOVaddrJT, DstOps: {DstReg}, SrcOps: {})
3750	.addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_PAGE)
3751	.addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_NC \| AArch64II::MO_PAGEOFF);
3752	I.eraseFromParent();
3753	constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3754	return true;
3755	}
3756
3757	bool AArch64InstructionSelector::selectTLSGlobalValue(
3758	MachineInstr &I, MachineRegisterInfo &MRI) {
3759	if (!STI.isTargetMachO())
3760	return false;
3761	MachineFunction &MF = *I.getParent()->getParent();
3762	MF.getFrameInfo().setAdjustsStack(true);
3763
3764	const auto &GlobalOp = I.getOperand(i: `1`);
3765	assert(GlobalOp.getOffset() == `0` &&
3766	"Shouldn't have an offset on TLS globals!");
3767	const GlobalValue &GV = *GlobalOp.getGlobal();
3768
3769	auto LoadGOT =
3770	MIB.buildInstr(Opc: AArch64::LOADgot, DstOps: {&AArch64::GPR64commonRegClass}, SrcOps: {})
3771	.addGlobalAddress(GV: &GV, Offset: `0`, TargetFlags: AArch64II::MO_TLS);
3772
3773	auto Load = MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {&AArch64::GPR64commonRegClass},
3774	SrcOps: {LoadGOT.getReg(Idx: `0`)})
3775	.addImm(Val: `0`);
3776
3777	MIB.buildCopy(Res: Register (AArch64::X0), Op: LoadGOT.getReg(Idx: `0`));
3778	// TLS calls preserve all registers except those that absolutely must be
3779	// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3780	// silly).
3781	unsigned Opcode = getBLRCallOpcode(MF);
3782
3783	// With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3784	if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-calls")) {
3785	assert(Opcode == AArch64::BLR);
3786	Opcode = AArch64::BLRAAZ;
3787	}
3788
3789	MIB.buildInstr(Opc: Opcode, DstOps: {}, SrcOps: {Load})
3790	.addUse(RegNo: AArch64::X0, Flags: RegState::Implicit)
3791	.addDef(RegNo: AArch64::X0, Flags: RegState::Implicit)
3792	.addRegMask(Mask: TRI.getTLSCallPreservedMask());
3793
3794	MIB.buildCopy(Res: I.getOperand(i: `0`).getReg(), Op: Register (AArch64::X0));
3795	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(), RC: AArch64::GPR64RegClass,
3796	MRI);
3797	I.eraseFromParent();
3798	return true;
3799	}
3800
3801	MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3802	unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3803	MachineIRBuilder &MIRBuilder) const {
3804	auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {});
3805
3806	auto BuildFn = [&](unsigned SubregIndex) {
3807	auto Ins =
3808	MIRBuilder
3809	.buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar})
3810	.addImm(Val: SubregIndex);
3811	constrainSelectedInstRegOperands(I&: *Undef, TII, TRI, RBI);
3812	constrainSelectedInstRegOperands(I&: *Ins, TII, TRI, RBI);
3813	return &*Ins;
3814	};
3815
3816	switch (EltSize) {
3817	case `8`:
3818	return BuildFn (AArch64::bsub);
3819	case `16`:
3820	return BuildFn (AArch64::hsub);
3821	case `32`:
3822	return BuildFn (AArch64::ssub);
3823	case `64`:
3824	return BuildFn (AArch64::dsub);
3825	default:
3826	return nullptr;
3827	}
3828	}
3829
3830	MachineInstr *
3831	AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3832	MachineIRBuilder &MIB,
3833	MachineRegisterInfo &MRI) const {
3834	LLT DstTy = MRI.getType(Reg: DstReg);
3835	const TargetRegisterClass *RC =
3836	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
3837	if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3838	LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3839	return nullptr;
3840	}
3841	unsigned SubReg = `0`;
3842	if (!getSubRegForClass(RC, TRI, SubReg))
3843	return nullptr;
3844	if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3845	LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3846	<< DstTy.getSizeInBits() << "\n");
3847	return nullptr;
3848	}
3849	auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
3850	.addReg(RegNo: SrcReg, Flags: {}, SubReg);
3851	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
3852	return Copy;
3853	}
3854
3855	bool AArch64InstructionSelector::selectMergeValues(
3856	MachineInstr &I, MachineRegisterInfo &MRI) {
3857	assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3858	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3859	const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3860	assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3861	const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI);
3862
3863	if (I.getNumOperands() != `3`)
3864	return false;
3865
3866	// Merging 2 s64s into an s128.
3867	if (DstTy == LLT::scalar(SizeInBits: `128`)) {
3868	if (SrcTy.getSizeInBits() != `64`)
3869	return false;
3870	Register DstReg = I.getOperand(i: `0`).getReg();
3871	Register Src1Reg = I.getOperand(i: `1`).getReg();
3872	Register Src2Reg = I.getOperand(i: `2`).getReg();
3873	auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {});
3874	MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: `0`), EltReg: Src1Reg,
3875	/ LaneIdx / `0`, RB, MIRBuilder&: MIB);
3876	if (!InsMI)
3877	return false;
3878	MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: `0`).getReg(),
3879	EltReg: Src2Reg, / LaneIdx / `1`, RB, MIRBuilder&: MIB);
3880	if (!Ins2MI)
3881	return false;
3882	constrainSelectedInstRegOperands(I&: *InsMI, TII, TRI, RBI);
3883	constrainSelectedInstRegOperands(I&: *Ins2MI, TII, TRI, RBI);
3884	I.eraseFromParent();
3885	return true;
3886	}
3887
3888	if (RB.getID() != AArch64::GPRRegBankID)
3889	return false;
3890
3891	if (DstTy.getSizeInBits() != `64` \|\| SrcTy.getSizeInBits() != `32`)
3892	return false;
3893
3894	auto *DstRC = &AArch64::GPR64RegClass;
3895	Register SubToRegDef = MRI.createVirtualRegister(RegClass: DstRC);
3896	MachineInstr &SubRegMI = BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(),
3897	MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3898	.addDef(RegNo: SubToRegDef)
3899	.addUse(RegNo: I.getOperand(i: `1`).getReg())
3900	.addImm(Val: AArch64::sub_32);
3901	Register SubToRegDef2 = MRI.createVirtualRegister(RegClass: DstRC);
3902	// Need to anyext the second scalar before we can use bfm
3903	MachineInstr &SubRegMI2 = BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(),
3904	MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3905	.addDef(RegNo: SubToRegDef2)
3906	.addUse(RegNo: I.getOperand(i: `2`).getReg())
3907	.addImm(Val: AArch64::sub_32);
3908	MachineInstr &BFM =
3909	BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::BFMXri))
3910	.addDef(RegNo: I.getOperand(i: `0`).getReg())
3911	.addUse(RegNo: SubToRegDef)
3912	.addUse(RegNo: SubToRegDef2)
3913	.addImm(Val: `32`)
3914	.addImm(Val: `31`);
3915	constrainSelectedInstRegOperands(I&: SubRegMI, TII, TRI, RBI);
3916	constrainSelectedInstRegOperands(I&: SubRegMI2, TII, TRI, RBI);
3917	constrainSelectedInstRegOperands(I&: BFM, TII, TRI, RBI);
3918	I.eraseFromParent();
3919	return true;
3920	}
3921
3922	static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3923	const unsigned EltSize) {
3924	// Choose a lane copy opcode and subregister based off of the size of the
3925	// vector's elements.
3926	switch (EltSize) {
3927	case `8`:
3928	CopyOpc = AArch64::DUPi8;
3929	ExtractSubReg = AArch64::bsub;
3930	break;
3931	case `16`:
3932	CopyOpc = AArch64::DUPi16;
3933	ExtractSubReg = AArch64::hsub;
3934	break;
3935	case `32`:
3936	CopyOpc = AArch64::DUPi32;
3937	ExtractSubReg = AArch64::ssub;
3938	break;
3939	case `64`:
3940	CopyOpc = AArch64::DUPi64;
3941	ExtractSubReg = AArch64::dsub;
3942	break;
3943	default:
3944	// Unknown size, bail out.
3945	LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3946	return false;
3947	}
3948	return true;
3949	}
3950
3951	MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3952	std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3953	Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3954	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3955	unsigned CopyOpc = `0`;
3956	unsigned ExtractSubReg = `0`;
3957	if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) {
3958	LLVM_DEBUG(
3959	dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3960	return nullptr;
3961	}
3962
3963	const TargetRegisterClass *DstRC =
3964	getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true);
3965	if (!DstRC) {
3966	LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3967	return nullptr;
3968	}
3969
3970	const RegisterBank &VecRB = *RBI.getRegBank(Reg: VecReg, MRI, TRI);
3971	const LLT &VecTy = MRI.getType(Reg: VecReg);
3972	const TargetRegisterClass *VecRC =
3973	getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true);
3974	if (!VecRC) {
3975	LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3976	return nullptr;
3977	}
3978
3979	// The register that we're going to copy into.
3980	Register InsertReg = VecReg;
3981	if (!DstReg)
3982	DstReg = MRI.createVirtualRegister(RegClass: DstRC);
3983	// If the lane index is 0, we just use a subregister COPY.
3984	if (LaneIdx == `0`) {
3985	auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {})
3986	.addReg(RegNo: VecReg, Flags: {}, SubReg: ExtractSubReg);
3987	RBI.constrainGenericRegister(Reg: DstReg, RC: DstRC, MRI);
3988	return &*Copy;
3989	}
3990
3991	// Lane copies require 128-bit wide registers. If we're dealing with an
3992	// unpacked vector, then we need to move up to that width. Insert an implicit
3993	// def and a subregister insert to get us there.
3994	if (VecTy.getSizeInBits() != `128`) {
3995	MachineInstr *ScalarToVector = emitScalarToVector(
3996	EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: VecReg, MIRBuilder);
3997	if (!ScalarToVector)
3998	return nullptr;
3999	InsertReg = ScalarToVector->getOperand(i: `0`).getReg();
4000	}
4001
4002	MachineInstr *LaneCopyMI =
4003	MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx);
4004	constrainSelectedInstRegOperands(I&: *LaneCopyMI, TII, TRI, RBI);
4005
4006	// Make sure that we actually constrain the initial copy.
4007	RBI.constrainGenericRegister(Reg: DstReg, RC: DstRC, MRI);
4008	return LaneCopyMI;
4009	}
4010
4011	bool AArch64InstructionSelector::selectExtractElt(
4012	MachineInstr &I, MachineRegisterInfo &MRI) {
4013	assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4014	"unexpected opcode!");
4015	Register DstReg = I.getOperand(i: `0`).getReg();
4016	const LLT NarrowTy = MRI.getType(Reg: DstReg);
4017	const Register SrcReg = I.getOperand(i: `1`).getReg();
4018	const LLT WideTy = MRI.getType(Reg: SrcReg);
4019	(void)WideTy;
4020	assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4021	"source register size too small!");
4022	assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4023
4024	// Need the lane index to determine the correct copy opcode.
4025	MachineOperand &LaneIdxOp = I.getOperand(i: `2`);
4026	assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4027
4028	if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4029	LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4030	return false;
4031	}
4032
4033	// Find the index to extract from.
4034	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI);
4035	if (!VRegAndVal)
4036	return false;
4037	unsigned LaneIdx = VRegAndVal ->Value.getSExtValue();
4038
4039
4040	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
4041	MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg,
4042	LaneIdx, MIRBuilder&: MIB);
4043	if (!Extract)
4044	return false;
4045
4046	I.eraseFromParent();
4047	return true;
4048	}
4049
4050	bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4051	MachineInstr &I, MachineRegisterInfo &MRI) {
4052	unsigned NumElts = I.getNumOperands() - `1`;
4053	Register SrcReg = I.getOperand(i: NumElts).getReg();
4054	const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
4055	const LLT SrcTy = MRI.getType(Reg: SrcReg);
4056
4057	assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4058	if (SrcTy.getSizeInBits() > `128`) {
4059	LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4060	return false;
4061	}
4062
4063	// We implement a split vector operation by treating the sub-vectors as
4064	// scalars and extracting them.
4065	const RegisterBank &DstRB =
4066	*RBI.getRegBank(Reg: I.getOperand(i: `0`).getReg(), MRI, TRI);
4067	for (unsigned OpIdx = `0`; OpIdx < NumElts; ++OpIdx) {
4068	Register Dst = I.getOperand(i: OpIdx).getReg();
4069	MachineInstr *Extract =
4070	emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB);
4071	if (!Extract)
4072	return false;
4073	}
4074	I.eraseFromParent();
4075	return true;
4076	}
4077
4078	bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4079	MachineRegisterInfo &MRI) {
4080	assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4081	"unexpected opcode");
4082
4083	// TODO: Handle unmerging into GPRs and from scalars to scalars.
4084	if (RBI.getRegBank(Reg: I.getOperand(i: `0`).getReg(), MRI, TRI)->getID() !=
4085	AArch64::FPRRegBankID \|\|
4086	RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI)->getID() !=
4087	AArch64::FPRRegBankID) {
4088	LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4089	"currently unsupported.\n");
4090	return false;
4091	}
4092
4093	// The last operand is the vector source register, and every other operand is
4094	// a register to unpack into.
4095	unsigned NumElts = I.getNumOperands() - `1`;
4096	Register SrcReg = I.getOperand(i: NumElts).getReg();
4097	const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
4098	const LLT WideTy = MRI.getType(Reg: SrcReg);
4099
4100	assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4101	"source register size too small!");
4102
4103	if (!NarrowTy.isScalar())
4104	return selectSplitVectorUnmerge(I, MRI);
4105
4106	// Choose a lane copy opcode and subregister based off of the size of the
4107	// vector's elements.
4108	unsigned CopyOpc = `0`;
4109	unsigned ExtractSubReg = `0`;
4110	if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits()))
4111	return false;
4112
4113	// Set up for the lane copies.
4114	MachineBasicBlock &MBB = *I.getParent();
4115
4116	// Stores the registers we'll be copying from.
4117	SmallVector<Register, `4`> InsertRegs;
4118
4119	// We'll use the first register twice, so we only need NumElts-1 registers.
4120	unsigned NumInsertRegs = NumElts - `1`;
4121
4122	// If our elements fit into exactly 128 bits, then we can copy from the source
4123	// directly. Otherwise, we need to do a bit of setup with some subregister
4124	// inserts.
4125	if (NarrowTy.getSizeInBits() * NumElts == `128`) {
4126	InsertRegs.assign(NumElts: NumInsertRegs, Elt: SrcReg);
4127	} else {
4128	// No. We have to perform subregister inserts. For each insert, create an
4129	// implicit def and a subregister insert, and save the register we create.
4130	// For scalar sources, treat as a pseudo-vector of NarrowTy elements.
4131	unsigned EltSize = WideTy.isVector() ? WideTy.getScalarSizeInBits()
4132	: NarrowTy.getSizeInBits();
4133	const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4134	Ty: LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: EltSize), RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
4135	unsigned SubReg = `0`;
4136	bool Found = getSubRegForClass(RC, TRI, SubReg);
4137	(void)Found;
4138	assert(Found && "expected to find last operand's subeg idx");
4139	for (unsigned Idx = `0`; Idx < NumInsertRegs; ++Idx) {
4140	Register ImpDefReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4141	MachineInstr &ImpDefMI =
4142	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF),
4143	DestReg: ImpDefReg);
4144
4145	// Now, create the subregister insert from SrcReg.
4146	Register InsertReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4147	MachineInstr &InsMI =
4148	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(),
4149	MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: InsertReg)
4150	.addUse(RegNo: ImpDefReg)
4151	.addUse(RegNo: SrcReg)
4152	.addImm(Val: SubReg);
4153
4154	constrainSelectedInstRegOperands(I&: ImpDefMI, TII, TRI, RBI);
4155	constrainSelectedInstRegOperands(I&: InsMI, TII, TRI, RBI);
4156
4157	// Save the register so that we can copy from it after.
4158	InsertRegs.push_back(Elt: InsertReg);
4159	}
4160	}
4161
4162	// Now that we've created any necessary subregister inserts, we can
4163	// create the copies.
4164	//
4165	// Perform the first copy separately as a subregister copy.
4166	Register CopyTo = I.getOperand(i: `0`).getReg();
4167	auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {})
4168	.addReg(RegNo: InsertRegs [`0`], Flags: {}, SubReg: ExtractSubReg);
4169	constrainSelectedInstRegOperands(I&: *FirstCopy, TII, TRI, RBI);
4170
4171	// Now, perform the remaining copies as vector lane copies.
4172	unsigned LaneIdx = `1`;
4173	for (Register InsReg : InsertRegs) {
4174	Register CopyTo = I.getOperand(i: LaneIdx).getReg();
4175	MachineInstr &CopyInst =
4176	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: CopyOpc), DestReg: CopyTo)
4177	.addUse(RegNo: InsReg)
4178	.addImm(Val: LaneIdx);
4179	constrainSelectedInstRegOperands(I&: CopyInst, TII, TRI, RBI);
4180	++LaneIdx;
4181	}
4182
4183	// Separately constrain the first copy's destination. Because of the
4184	// limitation in constrainOperandRegClass, we can't guarantee that this will
4185	// actually be constrained. So, do it ourselves using the second operand.
4186	const TargetRegisterClass *RC =
4187	MRI.getRegClassOrNull(Reg: I.getOperand(i: `1`).getReg());
4188	if (!RC) {
4189	LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4190	return false;
4191	}
4192
4193	RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI);
4194	I.eraseFromParent();
4195	return true;
4196	}
4197
4198	bool AArch64InstructionSelector::selectConcatVectors(
4199	MachineInstr &I, MachineRegisterInfo &MRI) {
4200	assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4201	"Unexpected opcode");
4202	Register Dst = I.getOperand(i: `0`).getReg();
4203	Register Op1 = I.getOperand(i: `1`).getReg();
4204	Register Op2 = I.getOperand(i: `2`).getReg();
4205	MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB);
4206	if (!ConcatMI)
4207	return false;
4208	I.eraseFromParent();
4209	return true;
4210	}
4211
4212	unsigned
4213	AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4214	MachineFunction &MF) const {
4215	Type *CPTy = CPVal->getType();
4216	Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy);
4217
4218	MachineConstantPool *MCP = MF.getConstantPool();
4219	return MCP->getConstantPoolIndex(C: CPVal, Alignment);
4220	}
4221
4222	MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4223	const Constant CPVal, MachineIRBuilder &MIRBuilder) const* {
4224	const TargetRegisterClass *RC;
4225	unsigned Opc;
4226	bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4227	unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType());
4228	switch (Size) {
4229	case `16`:
4230	RC = &AArch64::FPR128RegClass;
4231	Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4232	break;
4233	case `8`:
4234	RC = &AArch64::FPR64RegClass;
4235	Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4236	break;
4237	case `4`:
4238	RC = &AArch64::FPR32RegClass;
4239	Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4240	break;
4241	case `2`:
4242	RC = &AArch64::FPR16RegClass;
4243	Opc = AArch64::LDRHui;
4244	break;
4245	default:
4246	LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4247	<< *CPVal->getType());
4248	return nullptr;
4249	}
4250
4251	MachineInstr LoadMI = nullptr*;
4252	auto &MF = MIRBuilder.getMF();
4253	unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4254	if (IsTiny && (Size == `16` \|\| Size == `8` \|\| Size == `4`)) {
4255	// Use load(literal) for tiny code model.
4256	LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx);
4257	} else {
4258	auto Adrp =
4259	MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
4260	.addConstantPoolIndex(Idx: CPIdx, Offset: `0`, TargetFlags: AArch64II::MO_PAGE);
4261
4262	LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {Adrp})
4263	.addConstantPoolIndex(
4264	Idx: CPIdx, Offset: `0`, TargetFlags: AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
4265
4266	constrainSelectedInstRegOperands(I&: *Adrp, TII, TRI, RBI);
4267	}
4268
4269	MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4270	LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo,
4271	F: MachineMemOperand::MOLoad,
4272	Size, BaseAlignment: Align (Size)));
4273	constrainSelectedInstRegOperands(I&: *LoadMI, TII, TRI, RBI);
4274	return LoadMI;
4275	}
4276
4277	/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4278	/// size and RB.
4279	static std::pair<unsigned, unsigned>
4280	getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4281	unsigned Opc, SubregIdx;
4282	if (RB.getID() == AArch64::GPRRegBankID) {
4283	if (EltSize == `8`) {
4284	Opc = AArch64::INSvi8gpr;
4285	SubregIdx = AArch64::bsub;
4286	} else if (EltSize == `16`) {
4287	Opc = AArch64::INSvi16gpr;
4288	SubregIdx = AArch64::ssub;
4289	} else if (EltSize == `32`) {
4290	Opc = AArch64::INSvi32gpr;
4291	SubregIdx = AArch64::ssub;
4292	} else if (EltSize == `64`) {
4293	Opc = AArch64::INSvi64gpr;
4294	SubregIdx = AArch64::dsub;
4295	} else {
4296	llvm_unreachable("invalid elt size!");
4297	}
4298	} else {
4299	if (EltSize == `8`) {
4300	Opc = AArch64::INSvi8lane;
4301	SubregIdx = AArch64::bsub;
4302	} else if (EltSize == `16`) {
4303	Opc = AArch64::INSvi16lane;
4304	SubregIdx = AArch64::hsub;
4305	} else if (EltSize == `32`) {
4306	Opc = AArch64::INSvi32lane;
4307	SubregIdx = AArch64::ssub;
4308	} else if (EltSize == `64`) {
4309	Opc = AArch64::INSvi64lane;
4310	SubregIdx = AArch64::dsub;
4311	} else {
4312	llvm_unreachable("invalid elt size!");
4313	}
4314	}
4315	return std::make_pair(x&: Opc, y&: SubregIdx);
4316	}
4317
4318	MachineInstr *AArch64InstructionSelector::emitInstr(
4319	unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4320	std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4321	const ComplexRendererFns &RenderFns) const {
4322	assert(Opcode && "Expected an opcode?");
4323	assert(!isPreISelGenericOpcode(Opcode) &&
4324	"Function should only be used to produce selected instructions!");
4325	auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps);
4326	if (RenderFns)
4327	for (auto &Fn : *RenderFns)
4328	Fn (MI);
4329	constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
4330	return &*MI;
4331	}
4332
4333	MachineInstr *AArch64InstructionSelector::emitAddSub(
4334	const std::array<std::array<unsigned, `2`>, `5`> &AddrModeAndSizeToOpcode,
4335	Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4336	MachineIRBuilder &MIRBuilder) const {
4337	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4338	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4339	auto Ty = MRI.getType(Reg: LHS.getReg());
4340	assert(!Ty.isVector() && "Expected a scalar or pointer?");
4341	unsigned Size = Ty.getSizeInBits();
4342	assert((Size == `32` \|\| Size == `64`) && "Expected a 32-bit or 64-bit type only");
4343	bool Is32Bit = Size == `32`;
4344
4345	// INSTRri form with positive arithmetic immediate.
4346	if (auto Fns = selectArithImmed(Root&: RHS))
4347	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`0`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4348	MIRBuilder, RenderFns: Fns);
4349
4350	// INSTRri form with negative arithmetic immediate.
4351	if (auto Fns = selectNegArithImmed(Root&: RHS))
4352	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`3`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4353	MIRBuilder, RenderFns: Fns);
4354
4355	// INSTRrx form.
4356	if (auto Fns = selectArithExtendedRegister(Root&: RHS))
4357	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`4`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4358	MIRBuilder, RenderFns: Fns);
4359
4360	// INSTRrs form.
4361	if (auto Fns = selectShiftedRegister(Root&: RHS))
4362	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`1`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4363	MIRBuilder, RenderFns: Fns);
4364	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`2`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS},
4365	MIRBuilder);
4366	}
4367
4368	MachineInstr *
4369	AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4370	MachineOperand &RHS,
4371	MachineIRBuilder &MIRBuilder) const {
4372	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4373	._M_elems: {{AArch64::ADDXri, AArch64::ADDWri},
4374	{AArch64::ADDXrs, AArch64::ADDWrs},
4375	{AArch64::ADDXrr, AArch64::ADDWrr},
4376	{AArch64::SUBXri, AArch64::SUBWri},
4377	{AArch64::ADDXrx, AArch64::ADDWrx}}};
4378	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder);
4379	}
4380
4381	MachineInstr *
4382	AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4383	MachineOperand &RHS,
4384	MachineIRBuilder &MIRBuilder) const {
4385	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4386	._M_elems: {{AArch64::ADDSXri, AArch64::ADDSWri},
4387	{AArch64::ADDSXrs, AArch64::ADDSWrs},
4388	{AArch64::ADDSXrr, AArch64::ADDSWrr},
4389	{AArch64::SUBSXri, AArch64::SUBSWri},
4390	{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4391	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4392	}
4393
4394	MachineInstr *
4395	AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4396	MachineOperand &RHS,
4397	MachineIRBuilder &MIRBuilder) const {
4398	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4399	._M_elems: {{AArch64::SUBSXri, AArch64::SUBSWri},
4400	{AArch64::SUBSXrs, AArch64::SUBSWrs},
4401	{AArch64::SUBSXrr, AArch64::SUBSWrr},
4402	{AArch64::ADDSXri, AArch64::ADDSWri},
4403	{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4404	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4405	}
4406
4407	MachineInstr *
4408	AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4409	MachineOperand &RHS,
4410	MachineIRBuilder &MIRBuilder) const {
4411	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4412	MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4413	bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4414	static const unsigned OpcTable[`2`] = {AArch64::ADCSXr, AArch64::ADCSWr};
4415	return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4416	}
4417
4418	MachineInstr *
4419	AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4420	MachineOperand &RHS,
4421	MachineIRBuilder &MIRBuilder) const {
4422	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4423	MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4424	bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4425	static const unsigned OpcTable[`2`] = {AArch64::SBCSXr, AArch64::SBCSWr};
4426	return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4427	}
4428
4429	MachineInstr *
4430	AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS,
4431	MachineIRBuilder &MIRBuilder) const {
4432	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4433	bool Is32Bit = MRI.getType(Reg: LHS.getReg()).getSizeInBits() == `32`;
4434	auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4435	return emitSUBS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder);
4436	}
4437
4438	MachineInstr *
4439	AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4440	MachineIRBuilder &MIRBuilder) const {
4441	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4442	bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4443	auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4444	return emitADDS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder);
4445	}
4446
4447	MachineInstr *
4448	AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4449	MachineIRBuilder &MIRBuilder) const {
4450	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4451	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4452	LLT Ty = MRI.getType(Reg: LHS.getReg());
4453	unsigned RegSize = Ty.getSizeInBits();
4454	bool Is32Bit = (RegSize == `32`);
4455	const unsigned OpcTable[`3`][`2`] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4456	{AArch64::ANDSXrs, AArch64::ANDSWrs},
4457	{AArch64::ANDSXrr, AArch64::ANDSWrr}};
4458	// ANDS needs a logical immediate for its immediate form. Check if we can
4459	// fold one in.
4460	if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) {
4461	int64_t Imm = ValAndVReg ->Value.getSExtValue();
4462
4463	if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) {
4464	auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[`0`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS});
4465	TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
4466	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
4467	return &*TstMI;
4468	}
4469	}
4470
4471	if (auto Fns = selectLogicalShiftedRegister(Root&: RHS))
4472	return emitInstr(Opcode: OpcTable[`1`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns);
4473	return emitInstr(Opcode: OpcTable[`2`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder);
4474	}
4475
4476	MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4477	MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4478	MachineIRBuilder &MIRBuilder) const {
4479	assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4480	assert(Predicate.isPredicate() && "Expected predicate?");
4481	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4482	LLT CmpTy = MRI.getType(Reg: LHS.getReg());
4483	assert(!CmpTy.isVector() && "Expected scalar or pointer");
4484	unsigned Size = CmpTy.getSizeInBits();
4485	(void)Size;
4486	assert((Size == `32` \|\| Size == `64`) && "Expected a 32-bit or 64-bit LHS/RHS?");
4487	// Fold the compare into a cmn or tst if possible.
4488	if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4489	return FoldCmp;
4490	return emitCMP(LHS, RHS, MIRBuilder);
4491	}
4492
4493	MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4494	Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4495	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4496	#ifndef NDEBUG
4497	LLT Ty = MRI.getType(Dst);
4498	assert(!Ty.isVector() && Ty.getSizeInBits() == `32` &&
4499	"Expected a 32-bit scalar register?");
4500	#endif
4501	const Register ZReg = AArch64::WZR;
4502	AArch64CC::CondCode CC1, CC2;
4503	changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2);
4504	auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1);
4505	if (CC2 == AArch64CC::AL)
4506	return emitCSINC(/Dst=/Dst, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC1,
4507	MIRBuilder);
4508	const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4509	Register Def1Reg = MRI.createVirtualRegister(RegClass: RC);
4510	Register Def2Reg = MRI.createVirtualRegister(RegClass: RC);
4511	auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2);
4512	emitCSINC(/Dst=/Def1Reg, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC1, MIRBuilder);
4513	emitCSINC(/Dst=/Def2Reg, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC2, MIRBuilder);
4514	auto OrMI = MIRBuilder.buildInstr(Opc: AArch64::ORRWrr, DstOps: {Dst}, SrcOps: {Def1Reg, Def2Reg});
4515	constrainSelectedInstRegOperands(I&: *OrMI, TII, TRI, RBI);
4516	return &*OrMI;
4517	}
4518
4519	MachineInstr *AArch64InstructionSelector::emitFPCompare(
4520	Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4521	std::optional<CmpInst::Predicate> Pred) const {
4522	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4523	LLT Ty = MRI.getType(Reg: LHS);
4524	if (Ty.isVector())
4525	return nullptr;
4526	unsigned OpSize = Ty.getSizeInBits();
4527	assert(OpSize == `16` \|\| OpSize == `32` \|\| OpSize == `64`);
4528
4529	// If this is a compare against +0.0, then we don't have
4530	// to explicitly materialize a constant.
4531	const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI);
4532	bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4533
4534	auto IsEqualityPred = [](CmpInst::Predicate P) {
4535	return P == CmpInst::FCMP_OEQ \|\| P == CmpInst::FCMP_ONE \|\|
4536	P == CmpInst::FCMP_UEQ \|\| P == CmpInst::FCMP_UNE;
4537	};
4538	if (!ShouldUseImm && Pred && IsEqualityPred (*Pred)) {
4539	// Try commuting the operands.
4540	const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI);
4541	if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4542	ShouldUseImm = true;
4543	std::swap(a&: LHS, b&: RHS);
4544	}
4545	}
4546	unsigned CmpOpcTbl[`2`][`3`] = {
4547	{AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4548	{AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4549	unsigned CmpOpc =
4550	CmpOpcTbl[ShouldUseImm][OpSize == `16` ? `0` : (OpSize == `32` ? `1` : `2`)];
4551
4552	// Partially build the compare. Decide if we need to add a use for the
4553	// third operand based off whether or not we're comparing against 0.0.
4554	auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS);
4555	CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4556	if (!ShouldUseImm)
4557	CmpMI.addUse(RegNo: RHS);
4558	constrainSelectedInstRegOperands(I&: *CmpMI, TII, TRI, RBI);
4559	return &*CmpMI;
4560	}
4561
4562	MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4563	std::optional<Register> Dst, Register Op1, Register Op2,
4564	MachineIRBuilder &MIRBuilder) const {
4565	// We implement a vector concat by:
4566	// 1. Use scalar_to_vector to insert the lower vector into the larger dest
4567	// 2. Insert the upper vector into the destination's upper element
4568	// TODO: some of this code is common with G_BUILD_VECTOR handling.
4569	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4570
4571	const LLT Op1Ty = MRI.getType(Reg: Op1);
4572	const LLT Op2Ty = MRI.getType(Reg: Op2);
4573
4574	if (Op1Ty != Op2Ty) {
4575	LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4576	return nullptr;
4577	}
4578	assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4579
4580	if (Op1Ty.getSizeInBits() >= `128`) {
4581	LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4582	return nullptr;
4583	}
4584
4585	// At the moment we just support 64 bit vector concats.
4586	if (Op1Ty.getSizeInBits() != `64`) {
4587	LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4588	return nullptr;
4589	}
4590
4591	const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits());
4592	const RegisterBank &FPRBank = *RBI.getRegBank(Reg: Op1, MRI, TRI);
4593	const TargetRegisterClass *DstRC =
4594	getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: `2`), RB: FPRBank);
4595
4596	MachineInstr *WidenedOp1 =
4597	emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder);
4598	MachineInstr *WidenedOp2 =
4599	emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder);
4600	if (!WidenedOp1 \|\| !WidenedOp2) {
4601	LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4602	return nullptr;
4603	}
4604
4605	// Now do the insert of the upper element.
4606	unsigned InsertOpc, InsSubRegIdx;
4607	std::tie(args&: InsertOpc, args&: InsSubRegIdx) =
4608	getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits());
4609
4610	if (!Dst)
4611	Dst = MRI.createVirtualRegister(RegClass: DstRC);
4612	auto InsElt =
4613	MIRBuilder
4614	.buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: `0`).getReg()})
4615	.addImm(Val: `1`) / Lane index /
4616	.addUse(RegNo: WidenedOp2->getOperand(i: `0`).getReg())
4617	.addImm(Val: `0`);
4618	constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
4619	return &*InsElt;
4620	}
4621
4622	MachineInstr *
4623	AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4624	Register Src2, AArch64CC::CondCode Pred,
4625	MachineIRBuilder &MIRBuilder) const {
4626	auto &MRI = *MIRBuilder.getMRI();
4627	const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst);
4628	// If we used a register class, then this won't necessarily have an LLT.
4629	// Compute the size based off whether or not we have a class or bank.
4630	unsigned Size;
4631	if (const auto RC = dyn_cast<const* TargetRegisterClass *>(Val: RegClassOrBank))
4632	Size = TRI.getRegSizeInBits(RC: *RC);
4633	else
4634	Size = MRI.getType(Reg: Dst).getSizeInBits();
4635	// Some opcodes use s1.
4636	assert(Size <= `64` && "Expected 64 bits or less only!");
4637	static const unsigned OpcTable[`2`] = {AArch64::CSINCWr, AArch64::CSINCXr};
4638	unsigned Opc = OpcTable[Size == `64`];
4639	auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred);
4640	constrainSelectedInstRegOperands(I&: *CSINC, TII, TRI, RBI);
4641	return &*CSINC;
4642	}
4643
4644	MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4645	Register CarryReg) {
4646	MachineRegisterInfo *MRI = MIB.getMRI();
4647	unsigned Opcode = I.getOpcode();
4648
4649	// If the instruction is a SUB, we need to negate the carry,
4650	// because borrowing is indicated by carry-flag == 0.
4651	bool NeedsNegatedCarry =
4652	(Opcode == TargetOpcode::G_USUBE \|\| Opcode == TargetOpcode::G_SSUBE);
4653
4654	// If the previous instruction will already produce the correct carry, do not
4655	// emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4656	// generated during legalization of wide add/sub. This optimization depends on
4657	// these sequences not being interrupted by other instructions.
4658	// We have to select the previous instruction before the carry-using
4659	// instruction is deleted by the calling function, otherwise the previous
4660	// instruction might become dead and would get deleted.
4661	MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg);
4662	if (SrcMI == I.getPrevNode()) {
4663	if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) {
4664	bool ProducesNegatedCarry = CarrySrcMI->isSub();
4665	if (NeedsNegatedCarry == ProducesNegatedCarry &&
4666	CarrySrcMI->isUnsigned() &&
4667	CarrySrcMI->getCarryOutReg() == CarryReg &&
4668	selectAndRestoreState(I&: *SrcMI))
4669	return nullptr;
4670	}
4671	}
4672
4673	Register DeadReg = MRI->createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
4674
4675	if (NeedsNegatedCarry) {
4676	// (0 - Carry) sets !C in NZCV when Carry == 1
4677	Register ZReg = AArch64::WZR;
4678	return emitInstr(Opcode: AArch64::SUBSWrr, DstOps: {DeadReg}, SrcOps: {ZReg, CarryReg}, MIRBuilder&: MIB);
4679	}
4680
4681	// (Carry - 1) sets !C in NZCV when Carry == 0
4682	auto Fns = select12BitValueWithLeftShift(Immed: `1`);
4683	return emitInstr(Opcode: AArch64::SUBSWri, DstOps: {DeadReg}, SrcOps: {CarryReg}, MIRBuilder&: MIB, RenderFns: Fns);
4684	}
4685
4686	bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4687	MachineRegisterInfo &MRI) {
4688	auto &CarryMI = cast<GAddSubCarryOut>(Val&: I);
4689
4690	if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) {
4691	// Set NZCV carry according to carry-in VReg
4692	emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg());
4693	}
4694
4695	// Emit the operation and get the correct condition code.
4696	auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(),
4697	LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB);
4698
4699	Register CarryOutReg = CarryMI.getCarryOutReg();
4700
4701	// Don't convert carry-out to VReg if it is never used
4702	if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) {
4703	// Now, put the overflow result in the register given by the first operand
4704	// to the overflow op. CSINC increments the result when the predicate is
4705	// false, so to get the increment when it's true, we need to use the
4706	// inverse. In this case, we want to increment when carry is set.
4707	Register ZReg = AArch64::WZR;
4708	emitCSINC(/Dst=/CarryOutReg, /Src1=/ZReg, /Src2=/ZReg,
4709	Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB);
4710	}
4711
4712	I.eraseFromParent();
4713	return true;
4714	}
4715
4716	std::pair<MachineInstr *, AArch64CC::CondCode>
4717	AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4718	MachineOperand &LHS,
4719	MachineOperand &RHS,
4720	MachineIRBuilder &MIRBuilder) const {
4721	switch (Opcode) {
4722	default:
4723	llvm_unreachable("Unexpected opcode!");
4724	case TargetOpcode::G_SADDO:
4725	return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4726	case TargetOpcode::G_UADDO:
4727	return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4728	case TargetOpcode::G_SSUBO:
4729	return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4730	case TargetOpcode::G_USUBO:
4731	return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4732	case TargetOpcode::G_SADDE:
4733	return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4734	case TargetOpcode::G_UADDE:
4735	return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4736	case TargetOpcode::G_SSUBE:
4737	return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4738	case TargetOpcode::G_USUBE:
4739	return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4740	}
4741	}
4742
4743	/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4744	/// expressed as a conjunction.
4745	/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4746	/// changing the conditions on the CMP tests.
4747	/// (this means we can call emitConjunctionRec() with
4748	/// Negate==true on this sub-tree)
4749	/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4750	/// cannot do the negation naturally. We are required to
4751	/// emit the subtree first in this case.
4752	/// \param WillNegate Is true if are called when the result of this
4753	/// subexpression must be negated. This happens when the
4754	/// outer expression is an OR. We can use this fact to know
4755	/// that we have a double negation (or (or ...) ...) that
4756	/// can be implemented for free.
4757	static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4758	bool WillNegate, MachineRegisterInfo &MRI,
4759	unsigned Depth = `0`) {
4760	if (!MRI.hasOneNonDBGUse(RegNo: Val))
4761	return false;
4762	MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4763	unsigned Opcode = ValDef->getOpcode();
4764	if (isa<GAnyCmp>(Val: ValDef)) {
4765	CanNegate = true;
4766	MustBeFirst = false;
4767	return true;
4768	}
4769	// Protect against exponential runtime and stack overflow.
4770	if (Depth > `6`)
4771	return false;
4772	if (Opcode == TargetOpcode::G_AND \|\| Opcode == TargetOpcode::G_OR) {
4773	bool IsOR = Opcode == TargetOpcode::G_OR;
4774	Register O0 = ValDef->getOperand(i: `1`).getReg();
4775	Register O1 = ValDef->getOperand(i: `2`).getReg();
4776	bool CanNegateL;
4777	bool MustBeFirstL;
4778	if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + `1`))
4779	return false;
4780	bool CanNegateR;
4781	bool MustBeFirstR;
4782	if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + `1`))
4783	return false;
4784
4785	if (MustBeFirstL && MustBeFirstR)
4786	return false;
4787
4788	if (IsOR) {
4789	// For an OR expression we need to be able to naturally negate at least
4790	// one side or we cannot do the transformation at all.
4791	if (!CanNegateL && !CanNegateR)
4792	return false;
4793	// If we the result of the OR will be negated and we can naturally negate
4794	// the leaves, then this sub-tree as a whole negates naturally.
4795	CanNegate = WillNegate && CanNegateL && CanNegateR;
4796	// If we cannot naturally negate the whole sub-tree, then this must be
4797	// emitted first.
4798	MustBeFirst = !CanNegate;
4799	} else {
4800	assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4801	// We cannot naturally negate an AND operation.
4802	CanNegate = false;
4803	MustBeFirst = MustBeFirstL \|\| MustBeFirstR;
4804	}
4805	return true;
4806	}
4807	return false;
4808	}
4809
4810	MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4811	Register LHS, Register RHS, CmpInst::Predicate CC,
4812	AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4813	MachineIRBuilder &MIB) const {
4814	auto &MRI = *MIB.getMRI();
4815	LLT OpTy = MRI.getType(Reg: LHS);
4816	unsigned CCmpOpc;
4817	std::optional<ValueAndVReg> C;
4818	if (CmpInst::isIntPredicate(P: CC)) {
4819	assert(OpTy.getSizeInBits() == `32` \|\| OpTy.getSizeInBits() == `64`);
4820	C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
4821	if (!C \|\| C ->Value.sgt(RHS: `31`) \|\| C ->Value.slt(RHS: -`31`))
4822	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMPWr : AArch64::CCMPXr;
4823	else if (C ->Value.ule(RHS: `31`))
4824	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMPWi : AArch64::CCMPXi;
4825	else
4826	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMNWi : AArch64::CCMNXi;
4827	} else {
4828	assert(OpTy.getSizeInBits() == `16` \|\| OpTy.getSizeInBits() == `32` \|\|
4829	OpTy.getSizeInBits() == `64`);
4830	switch (OpTy.getSizeInBits()) {
4831	case `16`:
4832	assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4833	CCmpOpc = AArch64::FCCMPHrr;
4834	break;
4835	case `32`:
4836	CCmpOpc = AArch64::FCCMPSrr;
4837	break;
4838	case `64`:
4839	CCmpOpc = AArch64::FCCMPDrr;
4840	break;
4841	default:
4842	return nullptr;
4843	}
4844	}
4845	AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4846	unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC);
4847	auto CCmp =
4848	MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS});
4849	if (CCmpOpc == AArch64::CCMPWi \|\| CCmpOpc == AArch64::CCMPXi)
4850	CCmp.addImm(Val: C ->Value.getZExtValue());
4851	else if (CCmpOpc == AArch64::CCMNWi \|\| CCmpOpc == AArch64::CCMNXi)
4852	CCmp.addImm(Val: C ->Value.abs().getZExtValue());
4853	else
4854	CCmp.addReg(RegNo: RHS);
4855	CCmp.addImm(Val: NZCV).addImm(Val: Predicate);
4856	constrainSelectedInstRegOperands(I&: *CCmp, TII, TRI, RBI);
4857	return &*CCmp;
4858	}
4859
4860	MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4861	Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4862	AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4863	// We're at a tree leaf, produce a conditional comparison operation.
4864	auto &MRI = *MIB.getMRI();
4865	MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4866	unsigned Opcode = ValDef->getOpcode();
4867	if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) {
4868	Register LHS = Cmp->getLHSReg();
4869	Register RHS = Cmp->getRHSReg();
4870	CmpInst::Predicate CC = Cmp->getCond();
4871	if (Negate)
4872	CC = CmpInst::getInversePredicate(pred: CC);
4873	if (isa<GICmp>(Val: Cmp)) {
4874	OutCC = changeICMPPredToAArch64CC(P: CC, RHS, MRI: MIB.getMRI());
4875	} else {
4876	// Handle special FP cases.
4877	AArch64CC::CondCode ExtraCC;
4878	changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC);
4879	// Some floating point conditions can't be tested with a single condition
4880	// code. Construct an additional comparison in this case.
4881	if (ExtraCC != AArch64CC::AL) {
4882	MachineInstr *ExtraCmp;
4883	if (!CCOp)
4884	ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC);
4885	else
4886	ExtraCmp =
4887	emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB);
4888	CCOp = ExtraCmp->getOperand(i: `0`).getReg();
4889	Predicate = ExtraCC;
4890	}
4891	}
4892
4893	// Produce a normal comparison if we are first in the chain
4894	if (!CCOp) {
4895	if (isa<GICmp>(Val: Cmp))
4896	return emitCMP(LHS&: Cmp->getOperand(i: `2`), RHS&: Cmp->getOperand(i: `3`), MIRBuilder&: MIB);
4897	return emitFPCompare(LHS: Cmp->getOperand(i: `2`).getReg(),
4898	RHS: Cmp->getOperand(i: `3`).getReg(), MIRBuilder&: MIB);
4899	}
4900	// Otherwise produce a ccmp.
4901	return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4902	}
4903	assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4904
4905	bool IsOR = Opcode == TargetOpcode::G_OR;
4906
4907	Register LHS = ValDef->getOperand(i: `1`).getReg();
4908	bool CanNegateL;
4909	bool MustBeFirstL;
4910	bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI);
4911	assert(ValidL && "Valid conjunction/disjunction tree");
4912	(void)ValidL;
4913
4914	Register RHS = ValDef->getOperand(i: `2`).getReg();
4915	bool CanNegateR;
4916	bool MustBeFirstR;
4917	bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI);
4918	assert(ValidR && "Valid conjunction/disjunction tree");
4919	(void)ValidR;
4920
4921	// Swap sub-tree that must come first to the right side.
4922	if (MustBeFirstL) {
4923	assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4924	std::swap(a&: LHS, b&: RHS);
4925	std::swap(a&: CanNegateL, b&: CanNegateR);
4926	std::swap(a&: MustBeFirstL, b&: MustBeFirstR);
4927	}
4928
4929	bool NegateR;
4930	bool NegateAfterR;
4931	bool NegateL;
4932	bool NegateAfterAll;
4933	if (Opcode == TargetOpcode::G_OR) {
4934	// Swap the sub-tree that we can negate naturally to the left.
4935	if (!CanNegateL) {
4936	assert(CanNegateR && "at least one side must be negatable");
4937	assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4938	assert(!Negate);
4939	std::swap(a&: LHS, b&: RHS);
4940	NegateR = false;
4941	NegateAfterR = true;
4942	} else {
4943	// Negate the left sub-tree if possible, otherwise negate the result.
4944	NegateR = CanNegateR;
4945	NegateAfterR = !CanNegateR;
4946	}
4947	NegateL = true;
4948	NegateAfterAll = !Negate;
4949	} else {
4950	assert(Opcode == TargetOpcode::G_AND &&
4951	"Valid conjunction/disjunction tree");
4952	assert(!Negate && "Valid conjunction/disjunction tree");
4953
4954	NegateL = false;
4955	NegateR = false;
4956	NegateAfterR = false;
4957	NegateAfterAll = false;
4958	}
4959
4960	// Emit sub-trees.
4961	AArch64CC::CondCode RHSCC;
4962	MachineInstr *CmpR =
4963	emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB);
4964	if (NegateAfterR)
4965	RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC);
4966	MachineInstr *CmpL = emitConjunctionRec(
4967	Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: `0`).getReg(), Predicate: RHSCC, MIB);
4968	if (NegateAfterAll)
4969	OutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4970	return CmpL;
4971	}
4972
4973	MachineInstr *AArch64InstructionSelector::emitConjunction(
4974	Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4975	bool DummyCanNegate;
4976	bool DummyMustBeFirst;
4977	if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false,
4978	MRI&: *MIB.getMRI()))
4979	return nullptr;
4980	return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register (), Predicate: AArch64CC::AL, MIB);
4981	}
4982
4983	bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4984	MachineInstr &CondMI) {
4985	AArch64CC::CondCode AArch64CC;
4986	MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB);
4987	if (!ConjMI)
4988	return false;
4989
4990	emitSelect(Dst: SelI.getReg(Idx: `0`), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB);
4991	SelI.eraseFromParent();
4992	return true;
4993	}
4994
4995	bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4996	MachineRegisterInfo &MRI = *MIB.getMRI();
4997	// We want to recognize this pattern:
4998	//
4999	// $z = G_FCMP pred, $x, $y
5000	// ...
5001	// $w = G_SELECT $z, $a, $b
5002	//
5003	// Where the value of $z is only* ever used by the G_SELECT (possibly with*
5004	// some copies/truncs in between.)
5005	//
5006	// If we see this, then we can emit something like this:
5007	//
5008	// fcmp $x, $y
5009	// fcsel $w, $a, $b, pred
5010	//
5011	// Rather than emitting both of the rather long sequences in the standard
5012	// G_FCMP/G_SELECT select methods.
5013
5014	// First, check if the condition is defined by a compare.
5015	MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: `1`).getReg());
5016
5017	// We can only fold if all of the defs have one use.
5018	Register CondDefReg = CondDef->getOperand(i: `0`).getReg();
5019	if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) {
5020	// Unless it's another select.
5021	for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) {
5022	if (CondDef == &UI)
5023	continue;
5024	if (UI.getOpcode() != TargetOpcode::G_SELECT)
5025	return false;
5026	}
5027	}
5028
5029	// Is the condition defined by a compare?
5030	unsigned CondOpc = CondDef->getOpcode();
5031	if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5032	if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef))
5033	return true;
5034	return false;
5035	}
5036
5037	AArch64CC::CondCode CondCode;
5038	if (CondOpc == TargetOpcode::G_ICMP) {
5039	auto &PredOp = CondDef->getOperand(i: `1`);
5040	emitIntegerCompare(LHS&: CondDef->getOperand(i: `2`), RHS&: CondDef->getOperand(i: `3`), Predicate&: PredOp,
5041	MIRBuilder&: MIB);
5042	auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5043	CondCode =
5044	changeICMPPredToAArch64CC(P: Pred, RHS: CondDef->getOperand(i: `3`).getReg(), MRI: &MRI);
5045	} else {
5046	// Get the condition code for the select.
5047	auto Pred =
5048	static_cast<CmpInst::Predicate>(CondDef->getOperand(i: `1`).getPredicate());
5049	AArch64CC::CondCode CondCode2;
5050	changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2);
5051
5052	// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5053	// instructions to emit the comparison.
5054	// TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5055	// unnecessary.
5056	if (CondCode2 != AArch64CC::AL)
5057	return false;
5058
5059	if (!emitFPCompare(LHS: CondDef->getOperand(i: `2`).getReg(),
5060	RHS: CondDef->getOperand(i: `3`).getReg(), MIRBuilder&: MIB)) {
5061	LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5062	return false;
5063	}
5064	}
5065
5066	// Emit the select.
5067	emitSelect(Dst: I.getOperand(i: `0`).getReg(), True: I.getOperand(i: `2`).getReg(),
5068	False: I.getOperand(i: `3`).getReg(), CC: CondCode, MIB);
5069	I.eraseFromParent();
5070	return true;
5071	}
5072
5073	MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5074	MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5075	MachineIRBuilder &MIRBuilder) const {
5076	assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5077	"Unexpected MachineOperand");
5078	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5079	// We want to find this sort of thing:
5080	// x = G_SUB 0, y
5081	// G_ICMP z, x
5082	//
5083	// In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5084	// e.g:
5085	//
5086	// cmn z, y
5087
5088	// Check if the RHS or LHS of the G_ICMP is defined by a SUB
5089	MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
5090	MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
5091	auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5092
5093	// Given this:
5094	//
5095	// x = G_SUB 0, y
5096	// G_ICMP z, x
5097	//
5098	// Produce this:
5099	//
5100	// cmn z, y
5101	if (isCMN(MaybeSub: RHSDef, Pred: P, MRI))
5102	return emitCMN(LHS, RHS&: RHSDef->getOperand(i: `2`), MIRBuilder);
5103
5104	// Same idea here, but with the LHS of the compare instead:
5105	//
5106	// Given this:
5107	//
5108	// x = G_SUB 0, y
5109	// G_ICMP x, z
5110	//
5111	// Produce this:
5112	//
5113	// cmn y, z
5114	//
5115	// But be careful! We need to swap the predicate!
5116	if (isCMN(MaybeSub: LHSDef, Pred: P, MRI)) {
5117	if (!CmpInst::isEquality(pred: P)) {
5118	P = CmpInst::getSwappedPredicate(pred: P);
5119	Predicate = MachineOperand::CreatePredicate(Pred: P);
5120	}
5121	return emitCMN(LHS&: LHSDef->getOperand(i: `2`), RHS, MIRBuilder);
5122	}
5123
5124	// Given this:
5125	//
5126	// z = G_AND x, y
5127	// G_ICMP z, 0
5128	//
5129	// Produce this if the compare is signed:
5130	//
5131	// tst x, y
5132	if (!CmpInst::isUnsigned(Pred: P) && LHSDef &&
5133	LHSDef->getOpcode() == TargetOpcode::G_AND) {
5134	// Make sure that the RHS is 0.
5135	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI);
5136	if (!ValAndVReg \|\| ValAndVReg ->Value != `0`)
5137	return nullptr;
5138
5139	return emitTST(LHS&: LHSDef->getOperand(i: `1`),
5140	RHS&: LHSDef->getOperand(i: `2`), MIRBuilder);
5141	}
5142
5143	return nullptr;
5144	}
5145
5146	bool AArch64InstructionSelector::selectShuffleVector(
5147	MachineInstr &I, MachineRegisterInfo &MRI) {
5148	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5149	Register Src1Reg = I.getOperand(i: `1`).getReg();
5150	Register Src2Reg = I.getOperand(i: `2`).getReg();
5151	ArrayRef<int> Mask = I.getOperand(i: `3`).getShuffleMask();
5152	assert(DstTy == MRI.getType(Src1Reg) &&
5153	"Expected equal shuffle types during selection");
5154
5155	MachineBasicBlock &MBB = *I.getParent();
5156	MachineFunction &MF = *MBB.getParent();
5157	LLVMContext &Ctx = MF.getFunction().getContext();
5158
5159	unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / `8`;
5160	int NumElts = DstTy.getNumElements();
5161
5162	SmallVector<int> NewMask;
5163	bool FirstUsed = false;
5164	bool SecondUsed = false;
5165	for (int M : Mask) {
5166	// Map any undef or zero lanes to 255.
5167	if (M < `0` \|\| VT->getKnownBits(R: M < NumElts ? Src1Reg : Src2Reg,
5168	DemandedElts: APInt::getOneBitSet(numBits: NumElts, BitNo: M % NumElts))
5169	.isZero()) {
5170	for (unsigned Byte = `0`; Byte < BytesPerElt; ++Byte)
5171	NewMask.push_back(Elt: `255`);
5172	continue;
5173	}
5174
5175	FirstUsed \|= M < NumElts;
5176	SecondUsed \|= M >= NumElts;
5177	for (unsigned Byte = `0`; Byte < BytesPerElt; ++Byte) {
5178	unsigned Offset = Byte + M * BytesPerElt;
5179	NewMask.push_back(Elt: Offset);
5180	}
5181	}
5182
5183	// If the first is unused or all zeros, use the second src in a tbl1.
5184	if (!FirstUsed) {
5185	int ByteLanes = DstTy.getSizeInBits() == `128` ? `16` : `8`;
5186	for (int &M : NewMask) {
5187	if (M != `255`) {
5188	assert(M >= ByteLanes && M < `2` * ByteLanes);
5189	M -= ByteLanes;
5190	}
5191	}
5192	std::swap(a&: Src1Reg, b&: Src2Reg);
5193	std::swap(a&: FirstUsed, b&: SecondUsed);
5194	}
5195
5196	// Use a constant pool to load the index vector for TBL.
5197	SmallVector<Constant *> CstIdxs;
5198	transform(Range&: NewMask, d_first: std::back_inserter(x&: CstIdxs), F: [&Ctx](int M) {
5199	return ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: M);
5200	});
5201	Constant *CPVal = ConstantVector::get(V: CstIdxs);
5202	MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB);
5203	if (!IndexLoad) {
5204	LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5205	return false;
5206	}
5207
5208	if (DstTy.getSizeInBits() != `128`) {
5209	assert(DstTy.getSizeInBits() == `64` && "Unexpected shuffle result ty");
5210	// This case can be done with TBL1.
5211	MachineInstr *Concat =
5212	emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB);
5213	if (!Concat) {
5214	LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5215	return false;
5216	}
5217
5218	// The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5219	IndexLoad = emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass,
5220	Scalar: IndexLoad->getOperand(i: `0`).getReg(), MIRBuilder&: MIB);
5221
5222	auto TBL1 = MIB.buildInstr(
5223	Opc: AArch64::TBLv16i8One, DstOps: {&AArch64::FPR128RegClass},
5224	SrcOps: {Concat->getOperand(i: `0`).getReg(), IndexLoad->getOperand(i: `0`).getReg()});
5225	constrainSelectedInstRegOperands(I&: *TBL1, TII, TRI, RBI);
5226
5227	auto Copy =
5228	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {})
5229	.addReg(RegNo: TBL1.getReg(Idx: `0`), Flags: {}, SubReg: AArch64::dsub);
5230	RBI.constrainGenericRegister(Reg: Copy.getReg(Idx: `0`), RC: AArch64::FPR64RegClass, MRI);
5231	I.eraseFromParent();
5232	return true;
5233	}
5234
5235	if (!SecondUsed) {
5236	auto TBL1 = MIB.buildInstr(Opc: AArch64::TBLv16i8One, DstOps: {I.getOperand(i: `0`)},
5237	SrcOps: {Src1Reg, IndexLoad->getOperand(i: `0`)});
5238	constrainSelectedInstRegOperands(I&: *TBL1, TII, TRI, RBI);
5239	I.eraseFromParent();
5240	return true;
5241	}
5242
5243	// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5244	// Q registers for regalloc.
5245	SmallVector<Register, `2`> Regs = {Src1Reg, Src2Reg};
5246	auto RegSeq = createQTuple(Regs, MIB);
5247	auto TBL2 = MIB.buildInstr(Opc: AArch64::TBLv16i8Two, DstOps: {I.getOperand(i: `0`)},
5248	SrcOps: {RegSeq, IndexLoad->getOperand(i: `0`)});
5249	constrainSelectedInstRegOperands(I&: *TBL2, TII, TRI, RBI);
5250	I.eraseFromParent();
5251	return true;
5252	}
5253
5254	MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5255	std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5256	unsigned LaneIdx, const RegisterBank &RB,
5257	MachineIRBuilder &MIRBuilder) const {
5258	MachineInstr InsElt = nullptr*;
5259	const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5260	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5261
5262	// Create a register to define with the insert if one wasn't passed in.
5263	if (!DstReg)
5264	DstReg = MRI.createVirtualRegister(RegClass: DstRC);
5265
5266	unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits();
5267	unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5268
5269	if (RB.getID() == AArch64::FPRRegBankID) {
5270	auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder);
5271	InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5272	.addImm(Val: LaneIdx)
5273	.addUse(RegNo: InsSub->getOperand(i: `0`).getReg())
5274	.addImm(Val: `0`);
5275	} else {
5276	InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5277	.addImm(Val: LaneIdx)
5278	.addUse(RegNo: EltReg);
5279	}
5280
5281	constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
5282	return InsElt;
5283	}
5284
5285	bool AArch64InstructionSelector::selectUSMovFromExtend(
5286	MachineInstr &MI, MachineRegisterInfo &MRI) {
5287	if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5288	MI.getOpcode() != TargetOpcode::G_ZEXT &&
5289	MI.getOpcode() != TargetOpcode::G_ANYEXT)
5290	return false;
5291	bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5292	const Register DefReg = MI.getOperand(i: `0`).getReg();
5293	const LLT DstTy = MRI.getType(Reg: DefReg);
5294	unsigned DstSize = DstTy.getSizeInBits();
5295
5296	if (DstSize != `32` && DstSize != `64`)
5297	return false;
5298
5299	MachineInstr *Extract = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT,
5300	Reg: MI.getOperand(i: `1`).getReg(), MRI);
5301	int64_t Lane;
5302	if (!Extract \|\| !mi_match(R: Extract->getOperand(i: `2`).getReg(), MRI, P: m_ICst(Cst&: Lane)))
5303	return false;
5304	Register Src0 = Extract->getOperand(i: `1`).getReg();
5305
5306	const LLT VecTy = MRI.getType(Reg: Src0);
5307	if (VecTy.isScalableVector())
5308	return false;
5309
5310	if (VecTy.getSizeInBits() != `128`) {
5311	const MachineInstr *ScalarToVector = emitScalarToVector(
5312	EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: Src0, MIRBuilder&: MIB);
5313	assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5314	Src0 = ScalarToVector->getOperand(i: `0`).getReg();
5315	}
5316
5317	unsigned Opcode;
5318	if (DstSize == `64` && VecTy.getScalarSizeInBits() == `32`)
5319	Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5320	else if (DstSize == `64` && VecTy.getScalarSizeInBits() == `16`)
5321	Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5322	else if (DstSize == `64` && VecTy.getScalarSizeInBits() == `8`)
5323	Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5324	else if (DstSize == `32` && VecTy.getScalarSizeInBits() == `16`)
5325	Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5326	else if (DstSize == `32` && VecTy.getScalarSizeInBits() == `8`)
5327	Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5328	else
5329	llvm_unreachable("Unexpected type combo for S/UMov!");
5330
5331	// We may need to generate one of these, depending on the type and sign of the
5332	// input:
5333	// DstReg = SMOV Src0, Lane;
5334	// NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5335	MachineInstr ExtI = nullptr*;
5336	if (DstSize == `64` && !IsSigned) {
5337	Register NewReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
5338	MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane);
5339	ExtI = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
5340	.addUse(RegNo: NewReg)
5341	.addImm(Val: AArch64::sub_32);
5342	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
5343	} else
5344	ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane);
5345
5346	constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
5347	MI.eraseFromParent();
5348	return true;
5349	}
5350
5351	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5352	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5353	unsigned int Op;
5354	if (DstSize == `128`) {
5355	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5356	return nullptr;
5357	Op = AArch64::MOVIv16b_ns;
5358	} else {
5359	Op = AArch64::MOVIv8b_ns;
5360	}
5361
5362	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5363
5364	if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) {
5365	Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val);
5366	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5367	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5368	return &*Mov;
5369	}
5370	return nullptr;
5371	}
5372
5373	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5374	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5375	bool Inv) {
5376
5377	unsigned int Op;
5378	if (DstSize == `128`) {
5379	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5380	return nullptr;
5381	Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5382	} else {
5383	Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5384	}
5385
5386	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5387	uint64_t Shift;
5388
5389	if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) {
5390	Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val);
5391	Shift = `0`;
5392	} else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) {
5393	Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val);
5394	Shift = `8`;
5395	} else
5396	return nullptr;
5397
5398	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5399	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5400	return &*Mov;
5401	}
5402
5403	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5404	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5405	bool Inv) {
5406
5407	unsigned int Op;
5408	if (DstSize == `128`) {
5409	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5410	return nullptr;
5411	Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5412	} else {
5413	Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5414	}
5415
5416	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5417	uint64_t Shift;
5418
5419	if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) {
5420	Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val);
5421	Shift = `0`;
5422	} else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) {
5423	Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val);
5424	Shift = `8`;
5425	} else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) {
5426	Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val);
5427	Shift = `16`;
5428	} else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) {
5429	Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val);
5430	Shift = `24`;
5431	} else
5432	return nullptr;
5433
5434	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5435	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5436	return &*Mov;
5437	}
5438
5439	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5440	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5441
5442	unsigned int Op;
5443	if (DstSize == `128`) {
5444	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5445	return nullptr;
5446	Op = AArch64::MOVIv2d_ns;
5447	} else {
5448	Op = AArch64::MOVID;
5449	}
5450
5451	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5452	if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) {
5453	Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val);
5454	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5455	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5456	return &*Mov;
5457	}
5458	return nullptr;
5459	}
5460
5461	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5462	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5463	bool Inv) {
5464
5465	unsigned int Op;
5466	if (DstSize == `128`) {
5467	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5468	return nullptr;
5469	Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5470	} else {
5471	Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5472	}
5473
5474	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5475	uint64_t Shift;
5476
5477	if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) {
5478	Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val);
5479	Shift = `264`;
5480	} else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) {
5481	Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val);
5482	Shift = `272`;
5483	} else
5484	return nullptr;
5485
5486	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5487	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5488	return &*Mov;
5489	}
5490
5491	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5492	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5493
5494	unsigned int Op;
5495	bool IsWide = false;
5496	if (DstSize == `128`) {
5497	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5498	return nullptr;
5499	Op = AArch64::FMOVv4f32_ns;
5500	IsWide = true;
5501	} else {
5502	Op = AArch64::FMOVv2f32_ns;
5503	}
5504
5505	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5506
5507	if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) {
5508	Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val);
5509	} else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) {
5510	Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val);
5511	Op = AArch64::FMOVv2f64_ns;
5512	} else
5513	return nullptr;
5514
5515	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5516	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5517	return &*Mov;
5518	}
5519
5520	bool AArch64InstructionSelector::selectIndexedExtLoad(
5521	MachineInstr &MI, MachineRegisterInfo &MRI) {
5522	auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI);
5523	Register Dst = ExtLd.getDstReg();
5524	Register WriteBack = ExtLd.getWritebackReg();
5525	Register Base = ExtLd.getBaseReg();
5526	Register Offset = ExtLd.getOffsetReg();
5527	LLT Ty = MRI.getType(Reg: Dst);
5528	assert(Ty.getSizeInBits() <= `64`); // Only for scalar GPRs.
5529	unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5530	bool IsPre = ExtLd.isPre();
5531	bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd);
5532	unsigned InsertIntoSubReg = `0`;
5533	bool IsDst64 = Ty.getSizeInBits() == `64`;
5534
5535	// ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5536	// long as they are scalar.
5537	bool IsFPR = RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5538	if ((IsSExt && IsFPR) \|\| Ty.isVector())
5539	return false;
5540
5541	unsigned Opc = `0`;
5542	LLT NewLdDstTy;
5543	LLT s32 = LLT::scalar(SizeInBits: `32`);
5544	LLT s64 = LLT::scalar(SizeInBits: `64`);
5545
5546	if (MemSizeBits == `8`) {
5547	if (IsSExt) {
5548	if (IsDst64)
5549	Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5550	else
5551	Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5552	NewLdDstTy = IsDst64 ? s64 : s32;
5553	} else if (IsFPR) {
5554	Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5555	InsertIntoSubReg = AArch64::bsub;
5556	NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5557	} else {
5558	Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5559	InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : `0`;
5560	NewLdDstTy = s32;
5561	}
5562	} else if (MemSizeBits == `16`) {
5563	if (IsSExt) {
5564	if (IsDst64)
5565	Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5566	else
5567	Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5568	NewLdDstTy = IsDst64 ? s64 : s32;
5569	} else if (IsFPR) {
5570	Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5571	InsertIntoSubReg = AArch64::hsub;
5572	NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5573	} else {
5574	Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5575	InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : `0`;
5576	NewLdDstTy = s32;
5577	}
5578	} else if (MemSizeBits == `32`) {
5579	if (IsSExt) {
5580	Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5581	NewLdDstTy = s64;
5582	} else if (IsFPR) {
5583	Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5584	InsertIntoSubReg = AArch64::ssub;
5585	NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5586	} else {
5587	Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5588	InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : `0`;
5589	NewLdDstTy = s32;
5590	}
5591	} else {
5592	llvm_unreachable("Unexpected size for indexed load");
5593	}
5594
5595	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5596	if (!Cst)
5597	return false; // Shouldn't happen, but just in case.
5598
5599	auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base})
5600	.addImm(Val: Cst ->getSExtValue());
5601	LdMI.cloneMemRefs(OtherMI: ExtLd);
5602	constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5603	// Make sure to select the load with the MemTy as the dest type, and then
5604	// insert into a larger reg if needed.
5605	if (InsertIntoSubReg) {
5606	// Generate a SUBREG_TO_REG.
5607	auto SubToReg = MIB.buildInstr(Opc: TargetOpcode::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5608	.addUse(RegNo: LdMI.getReg(Idx: `1`))
5609	.addImm(Val: InsertIntoSubReg);
5610	RBI.constrainGenericRegister(
5611	Reg: SubToReg.getReg(Idx: `0`),
5612	RC: *getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst),
5613	RB: *RBI.getRegBank(Reg: Dst, MRI, TRI)),
5614	MRI);
5615	} else {
5616	auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: `1`));
5617	selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
5618	}
5619	MI.eraseFromParent();
5620
5621	return true;
5622	}
5623
5624	bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5625	MachineRegisterInfo &MRI) {
5626	auto &Ld = cast<GIndexedLoad>(Val&: MI);
5627	Register Dst = Ld.getDstReg();
5628	Register WriteBack = Ld.getWritebackReg();
5629	Register Base = Ld.getBaseReg();
5630	Register Offset = Ld.getOffsetReg();
5631	assert(MRI.getType(Dst).getSizeInBits() <= `128` &&
5632	"Unexpected type for indexed load");
5633	unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5634
5635	if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes())
5636	return selectIndexedExtLoad(MI, MRI);
5637
5638	unsigned Opc = `0`;
5639	if (Ld.isPre()) {
5640	static constexpr unsigned GPROpcodes[] = {
5641	AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5642	AArch64::LDRXpre};
5643	static constexpr unsigned FPROpcodes[] = {
5644	AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5645	AArch64::LDRQpre};
5646	Opc = (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5647	? FPROpcodes[Log2_32(Value: MemSize)]
5648	: GPROpcodes[Log2_32(Value: MemSize)];
5649	;
5650	} else {
5651	static constexpr unsigned GPROpcodes[] = {
5652	AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5653	AArch64::LDRXpost};
5654	static constexpr unsigned FPROpcodes[] = {
5655	AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5656	AArch64::LDRDpost, AArch64::LDRQpost};
5657	Opc = (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5658	? FPROpcodes[Log2_32(Value: MemSize)]
5659	: GPROpcodes[Log2_32(Value: MemSize)];
5660	;
5661	}
5662	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5663	if (!Cst)
5664	return false; // Shouldn't happen, but just in case.
5665	auto LdMI =
5666	MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst ->getSExtValue());
5667	LdMI.cloneMemRefs(OtherMI: Ld);
5668	constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5669	MI.eraseFromParent();
5670	return true;
5671	}
5672
5673	bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5674	MachineRegisterInfo &MRI) {
5675	Register Dst = I.getWritebackReg();
5676	Register Val = I.getValueReg();
5677	Register Base = I.getBaseReg();
5678	Register Offset = I.getOffsetReg();
5679	assert(MRI.getType(Val).getSizeInBits() <= `128` &&
5680	"Unexpected type for indexed store");
5681
5682	LocationSize MemSize = I.getMMO().getSize();
5683	unsigned MemSizeInBytes = MemSize.getValue();
5684
5685	assert(MemSizeInBytes && MemSizeInBytes <= `16` &&
5686	"Unexpected indexed store size");
5687	unsigned MemSizeLog2 = Log2_32(Value: MemSizeInBytes);
5688
5689	unsigned Opc = `0`;
5690	if (I.isPre()) {
5691	static constexpr unsigned GPROpcodes[] = {
5692	AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5693	AArch64::STRXpre};
5694	static constexpr unsigned FPROpcodes[] = {
5695	AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5696	AArch64::STRQpre};
5697
5698	if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5699	Opc = FPROpcodes[MemSizeLog2];
5700	else
5701	Opc = GPROpcodes[MemSizeLog2];
5702	} else {
5703	static constexpr unsigned GPROpcodes[] = {
5704	AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5705	AArch64::STRXpost};
5706	static constexpr unsigned FPROpcodes[] = {
5707	AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5708	AArch64::STRDpost, AArch64::STRQpost};
5709
5710	if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5711	Opc = FPROpcodes[MemSizeLog2];
5712	else
5713	Opc = GPROpcodes[MemSizeLog2];
5714	}
5715
5716	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5717	if (!Cst)
5718	return false; // Shouldn't happen, but just in case.
5719	auto Str =
5720	MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst ->getSExtValue());
5721	Str.cloneMemRefs(OtherMI: I);
5722	constrainSelectedInstRegOperands(I&: *Str, TII, TRI, RBI);
5723	I.eraseFromParent();
5724	return true;
5725	}
5726
5727	MachineInstr *
5728	AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5729	MachineIRBuilder &MIRBuilder,
5730	MachineRegisterInfo &MRI) {
5731	LLT DstTy = MRI.getType(Reg: Dst);
5732	unsigned DstSize = DstTy.getSizeInBits();
5733	assert((DstSize == `64` \|\| DstSize == `128`) &&
5734	"Unexpected vector constant size");
5735
5736	if (CV->isNullValue()) {
5737	if (DstSize == `128`) {
5738	auto Mov =
5739	MIRBuilder.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {Dst}, SrcOps: {}).addImm(Val: `0`);
5740	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5741	return &*Mov;
5742	}
5743
5744	if (DstSize == `64`) {
5745	auto Mov =
5746	MIRBuilder
5747	.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {&AArch64::FPR128RegClass}, SrcOps: {})
5748	.addImm(Val: `0`);
5749	auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {Dst}, SrcOps: {})
5750	.addReg(RegNo: Mov.getReg(Idx: `0`), Flags: {}, SubReg: AArch64::dsub);
5751	RBI.constrainGenericRegister(Reg: Dst, RC: AArch64::FPR64RegClass, MRI);
5752	return &*Copy;
5753	}
5754	}
5755
5756	if (Constant *SplatValue = CV->getSplatValue()) {
5757	APInt SplatValueAsInt =
5758	isa<ConstantFP>(Val: SplatValue)
5759	? cast<ConstantFP>(Val: SplatValue)->getValueAPF().bitcastToAPInt()
5760	: SplatValue->getUniqueInteger();
5761	APInt DefBits = APInt::getSplat(
5762	NewLen: DstSize, V: SplatValueAsInt.trunc(width: DstTy.getScalarSizeInBits()));
5763	auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5764	MachineInstr *NewOp;
5765	bool Inv = false;
5766	if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) \|\|
5767	(NewOp =
5768	tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5769	(NewOp =
5770	tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5771	(NewOp =
5772	tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5773	(NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) \|\|
5774	(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)))
5775	return NewOp;
5776
5777	DefBits = ~DefBits;
5778	Inv = true;
5779	if ((NewOp =
5780	tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5781	(NewOp =
5782	tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5783	(NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)))
5784	return NewOp;
5785	return nullptr;
5786	};
5787
5788	if (auto *NewOp = TryMOVIWithBits (DefBits))
5789	return NewOp;
5790
5791	// See if a fneg of the constant can be materialized with a MOVI, etc
5792	auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5793	unsigned NegOpc) -> MachineInstr * {
5794	// FNegate each sub-element of the constant
5795	APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: `1`).zext(width: DstSize);
5796	APInt NegBits(DstSize, `0`);
5797	unsigned NumElts = DstSize / NumBits;
5798	for (unsigned i = `0`; i < NumElts; i++)
5799	NegBits \|= Neg << (NumBits * i);
5800	NegBits = DefBits ^ NegBits;
5801
5802	// Try to create the new constants with MOVI, and if so generate a fneg
5803	// for it.
5804	if (auto *NewOp = TryMOVIWithBits (NegBits)) {
5805	Register NewDst = MRI.createVirtualRegister(
5806	RegClass: DstSize == `64` ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
5807	NewOp->getOperand(i: `0`).setReg(NewDst);
5808	return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst});
5809	}
5810	return nullptr;
5811	};
5812	MachineInstr *R;
5813	if ((R = TryWithFNeg (DefBits, `32`,
5814	DstSize == `64` ? AArch64::FNEGv2f32
5815	: AArch64::FNEGv4f32)) \|\|
5816	(R = TryWithFNeg (DefBits, `64`,
5817	DstSize == `64` ? AArch64::FNEGDr
5818	: AArch64::FNEGv2f64)) \|\|
5819	(STI.hasFullFP16() &&
5820	(R = TryWithFNeg (DefBits, `16`,
5821	DstSize == `64` ? AArch64::FNEGv4f16
5822	: AArch64::FNEGv8f16))))
5823	return R;
5824	}
5825
5826	auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder);
5827	if (!CPLoad) {
5828	LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5829	return nullptr;
5830	}
5831
5832	auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: `0`));
5833	RBI.constrainGenericRegister(
5834	Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: `0`).getReg()), MRI);
5835	return &*Copy;
5836	}
5837
5838	bool AArch64InstructionSelector::tryOptConstantBuildVec(
5839	MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5840	assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5841	unsigned DstSize = DstTy.getSizeInBits();
5842	assert(DstSize <= `128` && "Unexpected build_vec type!");
5843	if (DstSize < `32`)
5844	return false;
5845	// Check if we're building a constant vector, in which case we want to
5846	// generate a constant pool load instead of a vector insert sequence.
5847	SmallVector<Constant *, `16`> Csts;
5848	for (unsigned Idx = `1`; Idx < I.getNumOperands(); ++Idx) {
5849	Register OpReg = I.getOperand(i: Idx).getReg();
5850	if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
5851	VReg: OpReg, MRI, /LookThroughInstrs=/true,
5852	/LookThroughAnyExt=/true)) {
5853	MachineInstr *DefMI = MRI.getVRegDef(Reg: AnyConst ->VReg);
5854
5855	if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
5856	Csts.emplace_back(
5857	Args: ConstantInt::get(Context&: MIB.getMF().getFunction().getContext(),
5858	V: std::move(AnyConst ->Value)));
5859	continue;
5860	}
5861
5862	if (DefMI->getOpcode() == TargetOpcode::G_FCONSTANT) {
5863	Csts.emplace_back(
5864	Args: const_cast<ConstantFP *>(DefMI->getOperand(i: `1`).getFPImm()));
5865	continue;
5866	}
5867	}
5868	return false;
5869	}
5870	Constant *CV = ConstantVector::get(V: Csts);
5871	if (!emitConstantVector(Dst: I.getOperand(i: `0`).getReg(), CV, MIRBuilder&: MIB, MRI))
5872	return false;
5873	I.eraseFromParent();
5874	return true;
5875	}
5876
5877	bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5878	MachineInstr &I, MachineRegisterInfo &MRI) {
5879	// Given:
5880	// %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5881	//
5882	// Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5883	Register Dst = I.getOperand(i: `0`).getReg();
5884	Register EltReg = I.getOperand(i: `1`).getReg();
5885	LLT EltTy = MRI.getType(Reg: EltReg);
5886	// If the index isn't on the same bank as its elements, then this can't be a
5887	// SUBREG_TO_REG.
5888	const RegisterBank &EltRB = *RBI.getRegBank(Reg: EltReg, MRI, TRI);
5889	const RegisterBank &DstRB = *RBI.getRegBank(Reg: Dst, MRI, TRI);
5890	if (EltRB != DstRB)
5891	return false;
5892	if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: `2`), P: [&MRI](const MachineOperand &Op) {
5893	return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI);
5894	}))
5895	return false;
5896	unsigned SubReg;
5897	const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB);
5898	if (!EltRC)
5899	return false;
5900	const TargetRegisterClass *DstRC =
5901	getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB);
5902	if (!DstRC)
5903	return false;
5904	if (!getSubRegForClass(RC: EltRC, TRI, SubReg))
5905	return false;
5906	auto SubregToReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5907	.addUse(RegNo: EltReg)
5908	.addImm(Val: SubReg);
5909	I.eraseFromParent();
5910	constrainSelectedInstRegOperands(I&: *SubregToReg, TII, TRI, RBI);
5911	return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI);
5912	}
5913
5914	bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5915	MachineRegisterInfo &MRI) {
5916	assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5917	// Until we port more of the optimized selections, for now just use a vector
5918	// insert sequence.
5919	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5920	const LLT EltTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
5921	unsigned EltSize = EltTy.getSizeInBits();
5922
5923	if (tryOptConstantBuildVec(I, DstTy, MRI))
5924	return true;
5925	if (tryOptBuildVecToSubregToReg(I, MRI))
5926	return true;
5927
5928	if (EltSize != `8` && EltSize != `16` && EltSize != `32` && EltSize != `64`)
5929	return false; // Don't support all element types yet.
5930	const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI);
5931
5932	const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5933	MachineInstr *ScalarToVec =
5934	emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC,
5935	Scalar: I.getOperand(i: `1`).getReg(), MIRBuilder&: MIB);
5936	if (!ScalarToVec)
5937	return false;
5938
5939	Register DstVec = ScalarToVec->getOperand(i: `0`).getReg();
5940	unsigned DstSize = DstTy.getSizeInBits();
5941
5942	// Keep track of the last MI we inserted. Later on, we might be able to save
5943	// a copy using it.
5944	MachineInstr *PrevMI = ScalarToVec;
5945	for (unsigned i = `2`, e = DstSize / EltSize + `1`; i < e; ++i) {
5946	// Note that if we don't do a subregister copy, we can end up making an
5947	// extra register.
5948	Register OpReg = I.getOperand(i).getReg();
5949	// Do not emit inserts for undefs
5950	if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) {
5951	PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - `1`, RB, MIRBuilder&: MIB);
5952	DstVec = PrevMI->getOperand(i: `0`).getReg();
5953	}
5954	}
5955
5956	// If DstTy's size in bits is less than 128, then emit a subregister copy
5957	// from DstVec to the last register we've defined.
5958	if (DstSize < `128`) {
5959	// Force this to be FPR using the destination vector.
5960	const TargetRegisterClass *RC =
5961	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
5962	if (!RC)
5963	return false;
5964	if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5965	LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5966	return false;
5967	}
5968
5969	unsigned SubReg = `0`;
5970	if (!getSubRegForClass(RC, TRI, SubReg))
5971	return false;
5972	if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5973	LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5974	<< "\n");
5975	return false;
5976	}
5977
5978	Register Reg = MRI.createVirtualRegister(RegClass: RC);
5979	Register DstReg = I.getOperand(i: `0`).getReg();
5980
5981	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, Flags: {}, SubReg);
5982	MachineOperand &RegOp = I.getOperand(i: `1`);
5983	RegOp.setReg(Reg);
5984	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5985	} else {
5986	// We either have a vector with all elements (except the first one) undef or
5987	// at least one non-undef non-first element. In the first case, we need to
5988	// constrain the output register ourselves as we may have generated an
5989	// INSERT_SUBREG operation which is a generic operation for which the
5990	// output regclass cannot be automatically chosen.
5991	//
5992	// In the second case, there is no need to do this as it may generate an
5993	// instruction like INSvi32gpr where the regclass can be automatically
5994	// chosen.
5995	//
5996	// Also, we save a copy by re-using the destination register on the final
5997	// insert.
5998	PrevMI->getOperand(i: `0`).setReg(I.getOperand(i: `0`).getReg());
5999	constrainSelectedInstRegOperands(I&: *PrevMI, TII, TRI, RBI);
6000
6001	Register DstReg = PrevMI->getOperand(i: `0`).getReg();
6002	if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
6003	const TargetRegisterClass *RC =
6004	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
6005	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
6006	}
6007	}
6008
6009	I.eraseFromParent();
6010	return true;
6011	}
6012
6013	bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
6014	unsigned NumVecs,
6015	MachineInstr &I) {
6016	assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6017	assert(Opc && "Expected an opcode?");
6018	assert(NumVecs > `1` && NumVecs < `5` && "Only support 2, 3, or 4 vectors");
6019	auto &MRI = *MIB.getMRI();
6020	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6021	unsigned Size = Ty.getSizeInBits();
6022	assert((Size == `64` \|\| Size == `128`) &&
6023	"Destination must be 64 bits or 128 bits?");
6024	unsigned SubReg = Size == `64` ? AArch64::dsub0 : AArch64::qsub0;
6025	auto Ptr = I.getOperand(i: I.getNumOperands() - `1`).getReg();
6026	assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
6027	auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr});
6028	Load.cloneMemRefs(OtherMI: I);
6029	constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
6030	Register SelectedLoadDst = Load ->getOperand(i: `0`).getReg();
6031	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
6032	auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {})
6033	.addReg(RegNo: SelectedLoadDst, Flags: {}, SubReg: SubReg + Idx);
6034	// Emit the subreg copies and immediately select them.
6035	// FIXME: We should refactor our copy code into an emitCopy helper and
6036	// clean up uses of this pattern elsewhere in the selector.
6037	selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
6038	}
6039	return true;
6040	}
6041
6042	bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6043	unsigned Opc, unsigned NumVecs, MachineInstr &I) {
6044	assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6045	assert(Opc && "Expected an opcode?");
6046	assert(NumVecs > `1` && NumVecs < `5` && "Only support 2, 3, or 4 vectors");
6047	auto &MRI = *MIB.getMRI();
6048	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6049	bool Narrow = Ty.getSizeInBits() == `64`;
6050
6051	auto FirstSrcRegIt = I.operands_begin() + NumVecs + `1`;
6052	SmallVector<Register, `4`> Regs(NumVecs);
6053	std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(),
6054	unary_op: [](auto MO) { return MO.getReg(); });
6055
6056	if (Narrow) {
6057	transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6058	return emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
6059	->getOperand(i: `0`)
6060	.getReg();
6061	});
6062	Ty = Ty.multiplyElements(Factor: `2`);
6063	}
6064
6065	Register Tuple = createQTuple(Regs, MIB);
6066	auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI);
6067	if (!LaneNo)
6068	return false;
6069
6070	Register Ptr = (FirstSrcRegIt + NumVecs + `1`)->getReg();
6071	auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {})
6072	.addReg(RegNo: Tuple)
6073	.addImm(Val: LaneNo ->getZExtValue())
6074	.addReg(RegNo: Ptr);
6075	Load.cloneMemRefs(OtherMI: I);
6076	constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
6077	Register SelectedLoadDst = Load ->getOperand(i: `0`).getReg();
6078	unsigned SubReg = AArch64::qsub0;
6079	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
6080	auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY,
6081	DstOps: {Narrow ? DstOp (&AArch64::FPR128RegClass)
6082	: DstOp (I.getOperand(i: Idx).getReg())},
6083	SrcOps: {})
6084	.addReg(RegNo: SelectedLoadDst, Flags: {}, SubReg: SubReg + Idx);
6085	Register WideReg = Vec.getReg(Idx: `0`);
6086	// Emit the subreg copies and immediately select them.
6087	selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
6088	if (Narrow &&
6089	!emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI))
6090	return false;
6091	}
6092	return true;
6093	}
6094
6095	void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6096	unsigned NumVecs,
6097	unsigned Opc) {
6098	MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6099	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6100	Register Ptr = I.getOperand(i: `1` + NumVecs).getReg();
6101
6102	SmallVector<Register, `2`> Regs(NumVecs);
6103	std::transform(first: I.operands_begin() + `1`, last: I.operands_begin() + `1` + NumVecs,
6104	result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6105
6106	Register Tuple = Ty.getSizeInBits() == `128` ? createQTuple(Regs, MIB)
6107	: createDTuple(Regs, MIB);
6108	auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr});
6109	Store.cloneMemRefs(OtherMI: I);
6110	constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
6111	}
6112
6113	bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6114	MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6115	MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6116	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6117	bool Narrow = Ty.getSizeInBits() == `64`;
6118
6119	SmallVector<Register, `2`> Regs(NumVecs);
6120	std::transform(first: I.operands_begin() + `1`, last: I.operands_begin() + `1` + NumVecs,
6121	result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6122
6123	if (Narrow)
6124	transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6125	return emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
6126	->getOperand(i: `0`)
6127	.getReg();
6128	});
6129
6130	Register Tuple = createQTuple(Regs, MIB);
6131
6132	auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: `1` + NumVecs).getReg(), MRI);
6133	if (!LaneNo)
6134	return false;
6135	Register Ptr = I.getOperand(i: `1` + NumVecs + `1`).getReg();
6136	auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {})
6137	.addReg(RegNo: Tuple)
6138	.addImm(Val: LaneNo ->getZExtValue())
6139	.addReg(RegNo: Ptr);
6140	Store.cloneMemRefs(OtherMI: I);
6141	constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
6142	return true;
6143	}
6144
6145	bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6146	MachineInstr &I, MachineRegisterInfo &MRI) {
6147	// Find the intrinsic ID.
6148	unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6149
6150	const LLT S8 = LLT::scalar(SizeInBits: `8`);
6151	const LLT S16 = LLT::scalar(SizeInBits: `16`);
6152	const LLT S32 = LLT::scalar(SizeInBits: `32`);
6153	const LLT S64 = LLT::scalar(SizeInBits: `64`);
6154	const LLT P0 = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
6155	// Select the instruction.
6156	switch (IntrinID) {
6157	default:
6158	return false;
6159	case Intrinsic::aarch64_ldxp:
6160	case Intrinsic::aarch64_ldaxp: {
6161	auto NewI = MIB.buildInstr(
6162	Opc: IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6163	DstOps: {I.getOperand(i: `0`).getReg(), I.getOperand(i: `1`).getReg()},
6164	SrcOps: {I.getOperand(i: `3`)});
6165	NewI.cloneMemRefs(OtherMI: I);
6166	constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
6167	break;
6168	}
6169	case Intrinsic::aarch64_neon_ld1x2: {
6170	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6171	unsigned Opc = `0`;
6172	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6173	Opc = AArch64::LD1Twov8b;
6174	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6175	Opc = AArch64::LD1Twov16b;
6176	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6177	Opc = AArch64::LD1Twov4h;
6178	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6179	Opc = AArch64::LD1Twov8h;
6180	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6181	Opc = AArch64::LD1Twov2s;
6182	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6183	Opc = AArch64::LD1Twov4s;
6184	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6185	Opc = AArch64::LD1Twov2d;
6186	else if (Ty == S64 \|\| Ty == P0)
6187	Opc = AArch64::LD1Twov1d;
6188	else
6189	llvm_unreachable("Unexpected type for ld1x2!");
6190	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6191	break;
6192	}
6193	case Intrinsic::aarch64_neon_ld1x3: {
6194	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6195	unsigned Opc = `0`;
6196	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6197	Opc = AArch64::LD1Threev8b;
6198	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6199	Opc = AArch64::LD1Threev16b;
6200	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6201	Opc = AArch64::LD1Threev4h;
6202	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6203	Opc = AArch64::LD1Threev8h;
6204	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6205	Opc = AArch64::LD1Threev2s;
6206	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6207	Opc = AArch64::LD1Threev4s;
6208	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6209	Opc = AArch64::LD1Threev2d;
6210	else if (Ty == S64 \|\| Ty == P0)
6211	Opc = AArch64::LD1Threev1d;
6212	else
6213	llvm_unreachable("Unexpected type for ld1x3!");
6214	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6215	break;
6216	}
6217	case Intrinsic::aarch64_neon_ld1x4: {
6218	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6219	unsigned Opc = `0`;
6220	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6221	Opc = AArch64::LD1Fourv8b;
6222	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6223	Opc = AArch64::LD1Fourv16b;
6224	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6225	Opc = AArch64::LD1Fourv4h;
6226	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6227	Opc = AArch64::LD1Fourv8h;
6228	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6229	Opc = AArch64::LD1Fourv2s;
6230	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6231	Opc = AArch64::LD1Fourv4s;
6232	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6233	Opc = AArch64::LD1Fourv2d;
6234	else if (Ty == S64 \|\| Ty == P0)
6235	Opc = AArch64::LD1Fourv1d;
6236	else
6237	llvm_unreachable("Unexpected type for ld1x4!");
6238	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6239	break;
6240	}
6241	case Intrinsic::aarch64_neon_ld2: {
6242	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6243	unsigned Opc = `0`;
6244	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6245	Opc = AArch64::LD2Twov8b;
6246	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6247	Opc = AArch64::LD2Twov16b;
6248	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6249	Opc = AArch64::LD2Twov4h;
6250	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6251	Opc = AArch64::LD2Twov8h;
6252	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6253	Opc = AArch64::LD2Twov2s;
6254	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6255	Opc = AArch64::LD2Twov4s;
6256	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6257	Opc = AArch64::LD2Twov2d;
6258	else if (Ty == S64 \|\| Ty == P0)
6259	Opc = AArch64::LD1Twov1d;
6260	else
6261	llvm_unreachable("Unexpected type for ld2!");
6262	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6263	break;
6264	}
6265	case Intrinsic::aarch64_neon_ld2lane: {
6266	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6267	unsigned Opc;
6268	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6269	Opc = AArch64::LD2i8;
6270	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6271	Opc = AArch64::LD2i16;
6272	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6273	Opc = AArch64::LD2i32;
6274	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6275	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6276	Opc = AArch64::LD2i64;
6277	else
6278	llvm_unreachable("Unexpected type for st2lane!");
6279	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `2`, I))
6280	return false;
6281	break;
6282	}
6283	case Intrinsic::aarch64_neon_ld2r: {
6284	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6285	unsigned Opc = `0`;
6286	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6287	Opc = AArch64::LD2Rv8b;
6288	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6289	Opc = AArch64::LD2Rv16b;
6290	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6291	Opc = AArch64::LD2Rv4h;
6292	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6293	Opc = AArch64::LD2Rv8h;
6294	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6295	Opc = AArch64::LD2Rv2s;
6296	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6297	Opc = AArch64::LD2Rv4s;
6298	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6299	Opc = AArch64::LD2Rv2d;
6300	else if (Ty == S64 \|\| Ty == P0)
6301	Opc = AArch64::LD2Rv1d;
6302	else
6303	llvm_unreachable("Unexpected type for ld2r!");
6304	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6305	break;
6306	}
6307	case Intrinsic::aarch64_neon_ld3: {
6308	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6309	unsigned Opc = `0`;
6310	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6311	Opc = AArch64::LD3Threev8b;
6312	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6313	Opc = AArch64::LD3Threev16b;
6314	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6315	Opc = AArch64::LD3Threev4h;
6316	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6317	Opc = AArch64::LD3Threev8h;
6318	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6319	Opc = AArch64::LD3Threev2s;
6320	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6321	Opc = AArch64::LD3Threev4s;
6322	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6323	Opc = AArch64::LD3Threev2d;
6324	else if (Ty == S64 \|\| Ty == P0)
6325	Opc = AArch64::LD1Threev1d;
6326	else
6327	llvm_unreachable("Unexpected type for ld3!");
6328	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6329	break;
6330	}
6331	case Intrinsic::aarch64_neon_ld3lane: {
6332	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6333	unsigned Opc;
6334	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6335	Opc = AArch64::LD3i8;
6336	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6337	Opc = AArch64::LD3i16;
6338	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6339	Opc = AArch64::LD3i32;
6340	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6341	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6342	Opc = AArch64::LD3i64;
6343	else
6344	llvm_unreachable("Unexpected type for st3lane!");
6345	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `3`, I))
6346	return false;
6347	break;
6348	}
6349	case Intrinsic::aarch64_neon_ld3r: {
6350	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6351	unsigned Opc = `0`;
6352	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6353	Opc = AArch64::LD3Rv8b;
6354	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6355	Opc = AArch64::LD3Rv16b;
6356	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6357	Opc = AArch64::LD3Rv4h;
6358	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6359	Opc = AArch64::LD3Rv8h;
6360	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6361	Opc = AArch64::LD3Rv2s;
6362	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6363	Opc = AArch64::LD3Rv4s;
6364	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6365	Opc = AArch64::LD3Rv2d;
6366	else if (Ty == S64 \|\| Ty == P0)
6367	Opc = AArch64::LD3Rv1d;
6368	else
6369	llvm_unreachable("Unexpected type for ld3r!");
6370	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6371	break;
6372	}
6373	case Intrinsic::aarch64_neon_ld4: {
6374	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6375	unsigned Opc = `0`;
6376	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6377	Opc = AArch64::LD4Fourv8b;
6378	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6379	Opc = AArch64::LD4Fourv16b;
6380	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6381	Opc = AArch64::LD4Fourv4h;
6382	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6383	Opc = AArch64::LD4Fourv8h;
6384	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6385	Opc = AArch64::LD4Fourv2s;
6386	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6387	Opc = AArch64::LD4Fourv4s;
6388	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6389	Opc = AArch64::LD4Fourv2d;
6390	else if (Ty == S64 \|\| Ty == P0)
6391	Opc = AArch64::LD1Fourv1d;
6392	else
6393	llvm_unreachable("Unexpected type for ld4!");
6394	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6395	break;
6396	}
6397	case Intrinsic::aarch64_neon_ld4lane: {
6398	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6399	unsigned Opc;
6400	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6401	Opc = AArch64::LD4i8;
6402	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6403	Opc = AArch64::LD4i16;
6404	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6405	Opc = AArch64::LD4i32;
6406	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6407	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6408	Opc = AArch64::LD4i64;
6409	else
6410	llvm_unreachable("Unexpected type for st4lane!");
6411	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `4`, I))
6412	return false;
6413	break;
6414	}
6415	case Intrinsic::aarch64_neon_ld4r: {
6416	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6417	unsigned Opc = `0`;
6418	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6419	Opc = AArch64::LD4Rv8b;
6420	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6421	Opc = AArch64::LD4Rv16b;
6422	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6423	Opc = AArch64::LD4Rv4h;
6424	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6425	Opc = AArch64::LD4Rv8h;
6426	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6427	Opc = AArch64::LD4Rv2s;
6428	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6429	Opc = AArch64::LD4Rv4s;
6430	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6431	Opc = AArch64::LD4Rv2d;
6432	else if (Ty == S64 \|\| Ty == P0)
6433	Opc = AArch64::LD4Rv1d;
6434	else
6435	llvm_unreachable("Unexpected type for ld4r!");
6436	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6437	break;
6438	}
6439	case Intrinsic::aarch64_neon_st1x2: {
6440	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6441	unsigned Opc;
6442	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6443	Opc = AArch64::ST1Twov8b;
6444	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6445	Opc = AArch64::ST1Twov16b;
6446	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6447	Opc = AArch64::ST1Twov4h;
6448	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6449	Opc = AArch64::ST1Twov8h;
6450	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6451	Opc = AArch64::ST1Twov2s;
6452	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6453	Opc = AArch64::ST1Twov4s;
6454	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6455	Opc = AArch64::ST1Twov2d;
6456	else if (Ty == S64 \|\| Ty == P0)
6457	Opc = AArch64::ST1Twov1d;
6458	else
6459	llvm_unreachable("Unexpected type for st1x2!");
6460	selectVectorStoreIntrinsic(I, NumVecs: `2`, Opc);
6461	break;
6462	}
6463	case Intrinsic::aarch64_neon_st1x3: {
6464	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6465	unsigned Opc;
6466	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6467	Opc = AArch64::ST1Threev8b;
6468	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6469	Opc = AArch64::ST1Threev16b;
6470	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6471	Opc = AArch64::ST1Threev4h;
6472	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6473	Opc = AArch64::ST1Threev8h;
6474	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6475	Opc = AArch64::ST1Threev2s;
6476	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6477	Opc = AArch64::ST1Threev4s;
6478	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6479	Opc = AArch64::ST1Threev2d;
6480	else if (Ty == S64 \|\| Ty == P0)
6481	Opc = AArch64::ST1Threev1d;
6482	else
6483	llvm_unreachable("Unexpected type for st1x3!");
6484	selectVectorStoreIntrinsic(I, NumVecs: `3`, Opc);
6485	break;
6486	}
6487	case Intrinsic::aarch64_neon_st1x4: {
6488	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6489	unsigned Opc;
6490	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6491	Opc = AArch64::ST1Fourv8b;
6492	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6493	Opc = AArch64::ST1Fourv16b;
6494	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6495	Opc = AArch64::ST1Fourv4h;
6496	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6497	Opc = AArch64::ST1Fourv8h;
6498	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6499	Opc = AArch64::ST1Fourv2s;
6500	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6501	Opc = AArch64::ST1Fourv4s;
6502	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6503	Opc = AArch64::ST1Fourv2d;
6504	else if (Ty == S64 \|\| Ty == P0)
6505	Opc = AArch64::ST1Fourv1d;
6506	else
6507	llvm_unreachable("Unexpected type for st1x4!");
6508	selectVectorStoreIntrinsic(I, NumVecs: `4`, Opc);
6509	break;
6510	}
6511	case Intrinsic::aarch64_neon_st2: {
6512	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6513	unsigned Opc;
6514	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6515	Opc = AArch64::ST2Twov8b;
6516	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6517	Opc = AArch64::ST2Twov16b;
6518	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6519	Opc = AArch64::ST2Twov4h;
6520	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6521	Opc = AArch64::ST2Twov8h;
6522	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6523	Opc = AArch64::ST2Twov2s;
6524	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6525	Opc = AArch64::ST2Twov4s;
6526	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6527	Opc = AArch64::ST2Twov2d;
6528	else if (Ty == S64 \|\| Ty == P0)
6529	Opc = AArch64::ST1Twov1d;
6530	else
6531	llvm_unreachable("Unexpected type for st2!");
6532	selectVectorStoreIntrinsic(I, NumVecs: `2`, Opc);
6533	break;
6534	}
6535	case Intrinsic::aarch64_neon_st3: {
6536	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6537	unsigned Opc;
6538	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6539	Opc = AArch64::ST3Threev8b;
6540	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6541	Opc = AArch64::ST3Threev16b;
6542	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6543	Opc = AArch64::ST3Threev4h;
6544	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6545	Opc = AArch64::ST3Threev8h;
6546	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6547	Opc = AArch64::ST3Threev2s;
6548	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6549	Opc = AArch64::ST3Threev4s;
6550	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6551	Opc = AArch64::ST3Threev2d;
6552	else if (Ty == S64 \|\| Ty == P0)
6553	Opc = AArch64::ST1Threev1d;
6554	else
6555	llvm_unreachable("Unexpected type for st3!");
6556	selectVectorStoreIntrinsic(I, NumVecs: `3`, Opc);
6557	break;
6558	}
6559	case Intrinsic::aarch64_neon_st4: {
6560	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6561	unsigned Opc;
6562	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6563	Opc = AArch64::ST4Fourv8b;
6564	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6565	Opc = AArch64::ST4Fourv16b;
6566	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6567	Opc = AArch64::ST4Fourv4h;
6568	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6569	Opc = AArch64::ST4Fourv8h;
6570	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6571	Opc = AArch64::ST4Fourv2s;
6572	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6573	Opc = AArch64::ST4Fourv4s;
6574	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6575	Opc = AArch64::ST4Fourv2d;
6576	else if (Ty == S64 \|\| Ty == P0)
6577	Opc = AArch64::ST1Fourv1d;
6578	else
6579	llvm_unreachable("Unexpected type for st4!");
6580	selectVectorStoreIntrinsic(I, NumVecs: `4`, Opc);
6581	break;
6582	}
6583	case Intrinsic::aarch64_neon_st2lane: {
6584	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6585	unsigned Opc;
6586	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6587	Opc = AArch64::ST2i8;
6588	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6589	Opc = AArch64::ST2i16;
6590	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6591	Opc = AArch64::ST2i32;
6592	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6593	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6594	Opc = AArch64::ST2i64;
6595	else
6596	llvm_unreachable("Unexpected type for st2lane!");
6597	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `2`, Opc))
6598	return false;
6599	break;
6600	}
6601	case Intrinsic::aarch64_neon_st3lane: {
6602	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6603	unsigned Opc;
6604	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6605	Opc = AArch64::ST3i8;
6606	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6607	Opc = AArch64::ST3i16;
6608	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6609	Opc = AArch64::ST3i32;
6610	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6611	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6612	Opc = AArch64::ST3i64;
6613	else
6614	llvm_unreachable("Unexpected type for st3lane!");
6615	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `3`, Opc))
6616	return false;
6617	break;
6618	}
6619	case Intrinsic::aarch64_neon_st4lane: {
6620	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6621	unsigned Opc;
6622	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6623	Opc = AArch64::ST4i8;
6624	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6625	Opc = AArch64::ST4i16;
6626	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6627	Opc = AArch64::ST4i32;
6628	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6629	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6630	Opc = AArch64::ST4i64;
6631	else
6632	llvm_unreachable("Unexpected type for st4lane!");
6633	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `4`, Opc))
6634	return false;
6635	break;
6636	}
6637	case Intrinsic::aarch64_mops_memset_tag: {
6638	// Transform
6639	// %dst:gpr(p0) = \
6640	// G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6641	// \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6642	// where %dst is updated, into
6643	// %Rd:GPR64common, %Rn:GPR64) = \
6644	// MOPSMemorySetTaggingPseudo \
6645	// %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6646	// where Rd and Rn are tied.
6647	// It is expected that %val has been extended to s64 in legalization.
6648	// Note that the order of the size/value operands are swapped.
6649
6650	Register DstDef = I.getOperand(i: `0`).getReg();
6651	// I.getOperand(1) is the intrinsic function
6652	Register DstUse = I.getOperand(i: `2`).getReg();
6653	Register ValUse = I.getOperand(i: `3`).getReg();
6654	Register SizeUse = I.getOperand(i: `4`).getReg();
6655
6656	// MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6657	// Therefore an additional virtual register is required for the updated size
6658	// operand. This value is not accessible via the semantics of the intrinsic.
6659	Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
6660
6661	auto Memset = MIB.buildInstr(Opc: AArch64::MOPSMemorySetTaggingPseudo,
6662	DstOps: {DstDef, SizeDef}, SrcOps: {DstUse, SizeUse, ValUse});
6663	Memset.cloneMemRefs(OtherMI: I);
6664	constrainSelectedInstRegOperands(I&: *Memset, TII, TRI, RBI);
6665	break;
6666	}
6667	case Intrinsic::ptrauth_resign_load_relative: {
6668	Register DstReg = I.getOperand(i: `0`).getReg();
6669	Register ValReg = I.getOperand(i: `2`).getReg();
6670	uint64_t AUTKey = I.getOperand(i: `3`).getImm();
6671	Register AUTDisc = I.getOperand(i: `4`).getReg();
6672	uint64_t PACKey = I.getOperand(i: `5`).getImm();
6673	Register PACDisc = I.getOperand(i: `6`).getReg();
6674	int64_t Addend = I.getOperand(i: `7`).getImm();
6675
6676	Register AUTAddrDisc = AUTDisc;
6677	uint16_t AUTConstDiscC = `0`;
6678	std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6679	extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6680
6681	Register PACAddrDisc = PACDisc;
6682	uint16_t PACConstDiscC = `0`;
6683	std::tie(args&: PACConstDiscC, args&: PACAddrDisc) =
6684	extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI);
6685
6686	MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6687
6688	MIB.buildInstr(Opcode: AArch64::AUTRELLOADPAC)
6689	.addImm(Val: AUTKey)
6690	.addImm(Val: AUTConstDiscC)
6691	.addUse(RegNo: AUTAddrDisc)
6692	.addImm(Val: PACKey)
6693	.addImm(Val: PACConstDiscC)
6694	.addUse(RegNo: PACAddrDisc)
6695	.addImm(Val: Addend)
6696	.constrainAllUses(TII, TRI, RBI);
6697	MIB.buildCopy(Res: {DstReg}, Op: Register (AArch64::X16));
6698
6699	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6700	I.eraseFromParent();
6701	return true;
6702	}
6703	}
6704
6705	I.eraseFromParent();
6706	return true;
6707	}
6708
6709	bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6710	MachineRegisterInfo &MRI) {
6711	unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6712
6713	switch (IntrinID) {
6714	default:
6715	break;
6716	case Intrinsic::ptrauth_resign: {
6717	Register DstReg = I.getOperand(i: `0`).getReg();
6718	Register ValReg = I.getOperand(i: `2`).getReg();
6719	uint64_t AUTKey = I.getOperand(i: `3`).getImm();
6720	Register AUTDisc = I.getOperand(i: `4`).getReg();
6721	uint64_t PACKey = I.getOperand(i: `5`).getImm();
6722	Register PACDisc = I.getOperand(i: `6`).getReg();
6723
6724	Register AUTAddrDisc = AUTDisc;
6725	uint16_t AUTConstDiscC = `0`;
6726	std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6727	extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6728
6729	Register PACAddrDisc = PACDisc;
6730	uint16_t PACConstDiscC = `0`;
6731	std::tie(args&: PACConstDiscC, args&: PACAddrDisc) =
6732	extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI);
6733
6734	MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6735	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6736	MIB.buildInstr(Opcode: AArch64::AUTPAC)
6737	.addImm(Val: AUTKey)
6738	.addImm(Val: AUTConstDiscC)
6739	.addUse(RegNo: AUTAddrDisc)
6740	.addImm(Val: PACKey)
6741	.addImm(Val: PACConstDiscC)
6742	.addUse(RegNo: PACAddrDisc)
6743	.constrainAllUses(TII, TRI, RBI);
6744	MIB.buildCopy(Res: {DstReg}, Op: Register (AArch64::X16));
6745
6746	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6747	I.eraseFromParent();
6748	return true;
6749	}
6750	case Intrinsic::ptrauth_auth_with_pc_and_resign: {
6751	Register DstReg = I.getOperand(i: `0`).getReg();
6752	Register ValReg = I.getOperand(i: `2`).getReg();
6753	uint64_t AUTKey = I.getOperand(i: `3`).getImm();
6754	Register AUTDisc = I.getOperand(i: `4`).getReg();
6755	Register AUTPC = I.getOperand(i: `5`).getReg();
6756	uint64_t PACKey = I.getOperand(i: `6`).getImm();
6757	Register PACDisc = I.getOperand(i: `7`).getReg();
6758
6759	assert((AUTKey == AArch64PACKey::IA \|\| AUTKey == AArch64PACKey::IB) &&
6760	"auth_with_pc_and_resign only supports IA and IB keys");
6761
6762	uint16_t PACConstDiscC = `0`;
6763	Register PACAddrDisc;
6764	std::tie(args&: PACConstDiscC, args&: PACAddrDisc) =
6765	extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI);
6766
6767	if (PACAddrDisc == AArch64::NoRegister)
6768	PACAddrDisc = AArch64::XZR;
6769
6770	MIB.buildCopy(Res: {AArch64::X17}, Op: {ValReg});
6771	MIB.buildCopy(Res: {AArch64::X16}, Op: {AUTDisc});
6772	MIB.buildCopy(Res: {AArch64::X15}, Op: {AUTPC});
6773
6774	MIB.buildInstr(Opcode: AArch64::AUTPCPAC)
6775	.addImm(Val: AUTKey)
6776	.addImm(Val: PACKey)
6777	.addImm(Val: PACConstDiscC)
6778	.addUse(RegNo: PACAddrDisc)
6779	.constrainAllUses(TII, TRI, RBI);
6780
6781	MIB.buildCopy(Res: {DstReg}, Op: Register (AArch64::X17));
6782	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6783	I.eraseFromParent();
6784	return true;
6785	}
6786	case Intrinsic::ptrauth_auth: {
6787	Register DstReg = I.getOperand(i: `0`).getReg();
6788	Register ValReg = I.getOperand(i: `2`).getReg();
6789	uint64_t AUTKey = I.getOperand(i: `3`).getImm();
6790	Register AUTDisc = I.getOperand(i: `4`).getReg();
6791
6792	Register AUTAddrDisc = AUTDisc;
6793	uint16_t AUTConstDiscC = `0`;
6794	std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6795	extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6796
6797	if (STI.isX16X17Safer()) {
6798	MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6799	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6800	MIB.buildInstr(Opcode: AArch64::AUTx16x17)
6801	.addImm(Val: AUTKey)
6802	.addImm(Val: AUTConstDiscC)
6803	.addUse(RegNo: AUTAddrDisc)
6804	.constrainAllUses(TII, TRI, RBI);
6805	MIB.buildCopy(Res: {DstReg}, Op: Register (AArch64::X16));
6806	} else {
6807	Register ScratchReg =
6808	MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
6809	MIB.buildInstr(Opcode: AArch64::AUTxMxN)
6810	.addDef(RegNo: DstReg)
6811	.addDef(RegNo: ScratchReg)
6812	.addUse(RegNo: ValReg)
6813	.addImm(Val: AUTKey)
6814	.addImm(Val: AUTConstDiscC)
6815	.addUse(RegNo: AUTAddrDisc)
6816	.constrainAllUses(TII, TRI, RBI);
6817	}
6818
6819	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6820	I.eraseFromParent();
6821	return true;
6822	}
6823	case Intrinsic::frameaddress:
6824	case Intrinsic::returnaddress: {
6825	MachineFunction &MF = *I.getParent()->getParent();
6826	MachineFrameInfo &MFI = MF.getFrameInfo();
6827
6828	unsigned Depth = I.getOperand(i: `2`).getImm();
6829	Register DstReg = I.getOperand(i: `0`).getReg();
6830	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6831
6832	if (Depth == `0` && IntrinID == Intrinsic::returnaddress) {
6833	if (!MFReturnAddr) {
6834	// Insert the copy from LR/X30 into the entry block, before it can be
6835	// clobbered by anything.
6836	MFI.setReturnAddressIsTaken(true);
6837	MFReturnAddr = getFunctionLiveInPhysReg(
6838	MF, TII, PhysReg: AArch64::LR, RC: AArch64::GPR64RegClass, DL: I.getDebugLoc());
6839	}
6840
6841	if (STI.hasPAuth()) {
6842	MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {MFReturnAddr});
6843	} else {
6844	MIB.buildCopy(Res: {Register (AArch64::LR)}, Op: {MFReturnAddr});
6845	MIB.buildInstr(Opcode: AArch64::XPACLRI);
6846	MIB.buildCopy(Res: {DstReg}, Op: {Register (AArch64::LR)});
6847	}
6848
6849	I.eraseFromParent();
6850	return true;
6851	}
6852
6853	MFI.setFrameAddressIsTaken(true);
6854	Register FrameAddr(AArch64::FP);
6855	while (Depth--) {
6856	Register NextFrame = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
6857	auto Ldr =
6858	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {NextFrame}, SrcOps: {FrameAddr}).addImm(Val: `0`);
6859	constrainSelectedInstRegOperands(I&: *Ldr, TII, TRI, RBI);
6860	FrameAddr = NextFrame;
6861	}
6862
6863	if (IntrinID == Intrinsic::frameaddress)
6864	MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr});
6865	else {
6866	MFI.setReturnAddressIsTaken(true);
6867
6868	if (STI.hasPAuth()) {
6869	Register TmpReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
6870	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {TmpReg}, SrcOps: {FrameAddr}).addImm(Val: `1`);
6871	MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {TmpReg});
6872	} else {
6873	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {Register (AArch64::LR)}, SrcOps: {FrameAddr})
6874	.addImm(Val: `1`);
6875	MIB.buildInstr(Opcode: AArch64::XPACLRI);
6876	MIB.buildCopy(Res: {DstReg}, Op: {Register (AArch64::LR)});
6877	}
6878	}
6879
6880	I.eraseFromParent();
6881	return true;
6882	}
6883	case Intrinsic::aarch64_neon_tbl2:
6884	SelectTable(I, MRI, NumVecs: `2`, Opc1: AArch64::TBLv8i8Two, Opc2: AArch64::TBLv16i8Two, isExt: false);
6885	return true;
6886	case Intrinsic::aarch64_neon_tbl3:
6887	SelectTable(I, MRI, NumVecs: `3`, Opc1: AArch64::TBLv8i8Three, Opc2: AArch64::TBLv16i8Three,
6888	isExt: false);
6889	return true;
6890	case Intrinsic::aarch64_neon_tbl4:
6891	SelectTable(I, MRI, NumVecs: `4`, Opc1: AArch64::TBLv8i8Four, Opc2: AArch64::TBLv16i8Four, isExt: false);
6892	return true;
6893	case Intrinsic::aarch64_neon_tbx2:
6894	SelectTable(I, MRI, NumVecs: `2`, Opc1: AArch64::TBXv8i8Two, Opc2: AArch64::TBXv16i8Two, isExt: true);
6895	return true;
6896	case Intrinsic::aarch64_neon_tbx3:
6897	SelectTable(I, MRI, NumVecs: `3`, Opc1: AArch64::TBXv8i8Three, Opc2: AArch64::TBXv16i8Three, isExt: true);
6898	return true;
6899	case Intrinsic::aarch64_neon_tbx4:
6900	SelectTable(I, MRI, NumVecs: `4`, Opc1: AArch64::TBXv8i8Four, Opc2: AArch64::TBXv16i8Four, isExt: true);
6901	return true;
6902	case Intrinsic::swift_async_context_addr:
6903	auto Sub = MIB.buildInstr(Opc: AArch64::SUBXri, DstOps: {I.getOperand(i: `0`).getReg()},
6904	SrcOps: {Register (AArch64::FP)})
6905	.addImm(Val: `8`)
6906	.addImm(Val: `0`);
6907	constrainSelectedInstRegOperands(I&: *Sub, TII, TRI, RBI);
6908
6909	MF->getFrameInfo().setFrameAddressIsTaken(true);
6910	MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6911	I.eraseFromParent();
6912	return true;
6913	}
6914	return false;
6915	}
6916
6917	// G_PTRAUTH_GLOBAL_VALUE lowering
6918	//
6919	// We have 3 lowering alternatives to choose from:
6920	// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6921	// If the GV doesn't need a GOT load (i.e., is locally defined)
6922	// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6923	//
6924	// - LOADgotPAC: similar to LOADgot, with added PAC.
6925	// If the GV needs a GOT load, materialize the pointer using the usual
6926	// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6927	// section is assumed to be read-only (for example, via relro mechanism). See
6928	// LowerMOVaddrPAC.
6929	//
6930	// - LOADauthptrstatic: similar to LOADgot, but use a
6931	// special stub slot instead of a GOT slot.
6932	// Load a signed pointer for symbol 'sym' from a stub slot named
6933	// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6934	// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6935	// .data with an
6936	// @AUTH relocation. See LowerLOADauthptrstatic.
6937	//
6938	// All 3 are pseudos that are expand late to longer sequences: this lets us
6939	// provide integrity guarantees on the to-be-signed intermediate values.
6940	//
6941	// LOADauthptrstatic is undesirable because it requires a large section filled
6942	// with often similarly-signed pointers, making it a good harvesting target.
6943	// Thus, it's only used for ptrauth references to extern_weak to avoid null
6944	// checks.
6945
6946	bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6947	MachineInstr &I, MachineRegisterInfo &MRI) const {
6948	Register DefReg = I.getOperand(i: `0`).getReg();
6949	Register Addr = I.getOperand(i: `1`).getReg();
6950	uint64_t Key = I.getOperand(i: `2`).getImm();
6951	Register AddrDisc = I.getOperand(i: `3`).getReg();
6952	uint64_t Disc = I.getOperand(i: `4`).getImm();
6953	int64_t Offset = `0`;
6954
6955	if (Key > AArch64PACKey::LAST)
6956	report_fatal_error(reason: "key in ptrauth global out of range [0, " +
6957	Twine ((int)AArch64PACKey::LAST) + "]");
6958
6959	// Blend only works if the integer discriminator is 16-bit wide.
6960	if (!isUInt<`16`>(x: Disc))
6961	report_fatal_error(
6962	reason: "constant discriminator in ptrauth global out of range [0, 0xffff]");
6963
6964	// Choosing between 3 lowering alternatives is target-specific.
6965	if (!STI.isTargetELF() && !STI.isTargetMachO())
6966	report_fatal_error(reason: "ptrauth global lowering only supported on MachO/ELF");
6967
6968	if (!MRI.hasOneDef(RegNo: Addr))
6969	return false;
6970
6971	// First match any offset we take from the real global.
6972	const MachineInstr DefMI = &MRI.def_instr_begin(RegNo: Addr);
6973	if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6974	Register OffsetReg = DefMI->getOperand(i: `2`).getReg();
6975	if (!MRI.hasOneDef(RegNo: OffsetReg))
6976	return false;
6977	const MachineInstr &OffsetMI = *MRI.def_instr_begin(RegNo: OffsetReg);
6978	if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6979	return false;
6980
6981	Addr = DefMI->getOperand(i: `1`).getReg();
6982	if (!MRI.hasOneDef(RegNo: Addr))
6983	return false;
6984
6985	DefMI = &*MRI.def_instr_begin(RegNo: Addr);
6986	Offset = OffsetMI.getOperand(i: `1`).getCImm()->getSExtValue();
6987	}
6988
6989	// We should be left with a genuine unauthenticated GlobalValue.
6990	const GlobalValue *GV;
6991	if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6992	GV = DefMI->getOperand(i: `1`).getGlobal();
6993	Offset += DefMI->getOperand(i: `1`).getOffset();
6994	} else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6995	GV = DefMI->getOperand(i: `2`).getGlobal();
6996	Offset += DefMI->getOperand(i: `2`).getOffset();
6997	} else {
6998	return false;
6999	}
7000
7001	MachineIRBuilder MIB(I);
7002
7003	// Classify the reference to determine whether it needs a GOT load.
7004	unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
7005	const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != `0`);
7006	assert(((OpFlags & (~AArch64II::MO_GOT)) == `0`) &&
7007	"unsupported non-GOT op flags on ptrauth global reference");
7008	assert((!GV->hasExternalWeakLinkage() \|\| NeedsGOTLoad) &&
7009	"unsupported non-GOT reference to weak ptrauth global");
7010
7011	std::optional<APInt> AddrDiscVal = getIConstantVRegVal(VReg: AddrDisc, MRI);
7012	bool HasAddrDisc = !AddrDiscVal \|\| *AddrDiscVal != `0`;
7013
7014	// Non-extern_weak:
7015	// - No GOT load needed -> MOVaddrPAC
7016	// - GOT load for non-extern_weak -> LOADgotPAC
7017	// Note that we disallow extern_weak refs to avoid null checks later.
7018	if (!GV->hasExternalWeakLinkage()) {
7019	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
7020	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
7021	MIB.buildInstr(Opcode: NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
7022	.addGlobalAddress(GV, Offset)
7023	.addImm(Val: Key)
7024	.addReg(RegNo: HasAddrDisc ? AddrDisc : AArch64::XZR)
7025	.addImm(Val: Disc)
7026	.constrainAllUses(TII, TRI, RBI);
7027	MIB.buildCopy(Res: DefReg, Op: Register (AArch64::X16));
7028	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
7029	I.eraseFromParent();
7030	return true;
7031	}
7032
7033	// extern_weak -> LOADauthptrstatic
7034
7035	// Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
7036	// offset alone as a pointer if the symbol wasn't available, which would
7037	// probably break null checks in users. Ptrauth complicates things further:
7038	// error out.
7039	if (Offset != `0`)
7040	report_fatal_error(
7041	reason: "unsupported non-zero offset in weak ptrauth global reference");
7042
7043	if (HasAddrDisc)
7044	report_fatal_error(reason: "unsupported weak addr-div ptrauth global");
7045
7046	MIB.buildInstr(Opc: AArch64::LOADauthptrstatic, DstOps: {DefReg}, SrcOps: {})
7047	.addGlobalAddress(GV, Offset)
7048	.addImm(Val: Key)
7049	.addImm(Val: Disc);
7050	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
7051
7052	I.eraseFromParent();
7053	return true;
7054	}
7055
7056	void AArch64InstructionSelector::SelectTable(MachineInstr &I,
7057	MachineRegisterInfo &MRI,
7058	unsigned NumVec, unsigned Opc1,
7059	unsigned Opc2, bool isExt) {
7060	Register DstReg = I.getOperand(i: `0`).getReg();
7061	unsigned Opc = MRI.getType(Reg: DstReg) == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`) ? Opc1 : Opc2;
7062
7063	// Create the REG_SEQUENCE
7064	SmallVector<Register, `4`> Regs;
7065	for (unsigned i = `0`; i < NumVec; i++)
7066	Regs.push_back(Elt: I.getOperand(i: i + `2` + isExt).getReg());
7067	Register RegSeq = createQTuple(Regs, MIB);
7068
7069	Register IdxReg = I.getOperand(i: `2` + NumVec + isExt).getReg();
7070	MachineInstrBuilder Instr;
7071	if (isExt) {
7072	Register Reg = I.getOperand(i: `2`).getReg();
7073	Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Reg, RegSeq, IdxReg});
7074	} else
7075	Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {RegSeq, IdxReg});
7076	constrainSelectedInstRegOperands(I&: *Instr, TII, TRI, RBI);
7077	I.eraseFromParent();
7078	}
7079
7080	InstructionSelector::ComplexRendererFns
7081	AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
7082	auto MaybeImmed = getImmedFromMO(Root);
7083	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `31`)
7084	return std::nullopt;
7085	uint64_t Enc = (`32` - *MaybeImmed) & `0x1f`;
7086	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7087	}
7088
7089	InstructionSelector::ComplexRendererFns
7090	AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
7091	auto MaybeImmed = getImmedFromMO(Root);
7092	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `31`)
7093	return std::nullopt;
7094	uint64_t Enc = `31` - *MaybeImmed;
7095	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7096	}
7097
7098	InstructionSelector::ComplexRendererFns
7099	AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7100	auto MaybeImmed = getImmedFromMO(Root);
7101	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `63`)
7102	return std::nullopt;
7103	uint64_t Enc = (`64` - *MaybeImmed) & `0x3f`;
7104	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7105	}
7106
7107	InstructionSelector::ComplexRendererFns
7108	AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7109	auto MaybeImmed = getImmedFromMO(Root);
7110	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `63`)
7111	return std::nullopt;
7112	uint64_t Enc = `63` - *MaybeImmed;
7113	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7114	}
7115
7116	/// Helper to select an immediate value that can be represented as a 12-bit
7117	/// value shifted left by either 0 or 12. If it is possible to do so, return
7118	/// the immediate and shift value. If not, return std::nullopt.
7119	///
7120	/// Used by selectArithImmed and selectNegArithImmed.
7121	InstructionSelector::ComplexRendererFns
7122	AArch64InstructionSelector::select12BitValueWithLeftShift(
7123	uint64_t Immed) const {
7124	unsigned ShiftAmt;
7125	if (Immed >> `12` == `0`) {
7126	ShiftAmt = `0`;
7127	} else if ((Immed & `0xfff`) == `0` && Immed >> `24` == `0`) {
7128	ShiftAmt = `12`;
7129	Immed = Immed >> `12`;
7130	} else
7131	return std::nullopt;
7132
7133	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
7134	return {{
7135	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); },
7136	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); },
7137	}};
7138	}
7139
7140	/// SelectArithImmed - Select an immediate value that can be represented as
7141	/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7142	/// Val set to the 12-bit value and Shift set to the shifter operand.
7143	InstructionSelector::ComplexRendererFns
7144	AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7145	// This function is called from the addsub_shifted_imm ComplexPattern,
7146	// which lists [imm] as the list of opcode it's interested in, however
7147	// we still need to check whether the operand is actually an immediate
7148	// here because the ComplexPattern opcode list is only used in
7149	// root-level opcode matching.
7150	auto MaybeImmed = getImmedFromMO(Root);
7151	if (MaybeImmed == std::nullopt)
7152	return std::nullopt;
7153	return select12BitValueWithLeftShift(Immed: *MaybeImmed);
7154	}
7155
7156	/// SelectNegArithImmed - As above, but negates the value before trying to
7157	/// select it.
7158	InstructionSelector::ComplexRendererFns
7159	AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7160	// We need a register here, because we need to know if we have a 64 or 32
7161	// bit immediate.
7162	if (!Root.isReg())
7163	return std::nullopt;
7164	auto MaybeImmed = getImmedFromMO(Root);
7165	if (MaybeImmed == std::nullopt)
7166	return std::nullopt;
7167	uint64_t Immed = *MaybeImmed;
7168
7169	// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7170	// have the opposite effect on the C flag, so this pattern mustn't match under
7171	// those circumstances.
7172	if (Immed == `0`)
7173	return std::nullopt;
7174
7175	// Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7176	// the root.
7177	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7178	if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == `32`)
7179	Immed = ~((uint32_t)Immed) + `1`;
7180	else
7181	Immed = ~Immed + `1ULL`;
7182
7183	if (Immed & `0xFFFFFFFFFF000000ULL`)
7184	return std::nullopt;
7185
7186	Immed &= `0xFFFFFFULL`;
7187	return select12BitValueWithLeftShift(Immed);
7188	}
7189
7190	/// Checks if we are sure that folding MI into load/store addressing mode is
7191	/// beneficial or not.
7192	///
7193	/// Returns:
7194	/// - true if folding MI would be beneficial.
7195	/// - false if folding MI would be bad.
7196	/// - std::nullopt if it is not sure whether folding MI is beneficial.
7197	///
7198	/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7199	///
7200	/// %13:gpr(s64) = G_CONSTANT i64 1
7201	/// %8:gpr(s64) = G_SHL %6, %13(s64)
7202	/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7203	/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7204	std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7205	const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7206	if (MI.getOpcode() == AArch64::G_SHL) {
7207	// Address operands with shifts are free, except for running on subtargets
7208	// with AddrLSLSlow14.
7209	if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7210	VReg: MI.getOperand(i: `2`).getReg(), MRI)) {
7211	const APInt ShiftVal = ValAndVeg ->Value;
7212
7213	// Don't fold if we know this will be slow.
7214	return !(STI.hasAddrLSLSlow14() && (ShiftVal == `1` \|\| ShiftVal == `4`));
7215	}
7216	}
7217	return std::nullopt;
7218	}
7219
7220	/// Return true if it is worth folding MI into an extended register. That is,
7221	/// if it's safe to pull it into the addressing mode of a load or store as a
7222	/// shift.
7223	/// \p IsAddrOperand whether the def of MI is used as an address operand
7224	/// (e.g. feeding into an LDR/STR).
7225	bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7226	const MachineInstr &MI, const MachineRegisterInfo &MRI,
7227	bool IsAddrOperand) const {
7228
7229	// Always fold if there is one use, or if we're optimizing for size.
7230	Register DefReg = MI.getOperand(i: `0`).getReg();
7231	if (MRI.hasOneNonDBGUse(RegNo: DefReg) \|\|
7232	MI.getParent()->getParent()->getFunction().hasOptSize())
7233	return true;
7234
7235	if (IsAddrOperand) {
7236	// If we are already sure that folding MI is good or bad, return the result.
7237	if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7238	return *Worth;
7239
7240	// Fold G_PTR_ADD if its offset operand can be folded
7241	if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7242	MachineInstr *OffsetInst =
7243	getDefIgnoringCopies(Reg: MI.getOperand(i: `2`).getReg(), MRI);
7244
7245	// Note, we already know G_PTR_ADD is used by at least two instructions.
7246	// If we are also sure about whether folding is beneficial or not,
7247	// return the result.
7248	if (const auto Worth = isWorthFoldingIntoAddrMode(MI: *OffsetInst, MRI))
7249	return *Worth;
7250	}
7251	}
7252
7253	// FIXME: Consider checking HasALULSLFast as appropriate.
7254
7255	// We have a fastpath, so folding a shift in and potentially computing it
7256	// many times may be beneficial. Check if this is only used in memory ops.
7257	// If it is, then we should fold.
7258	return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg),
7259	P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7260	}
7261
7262	InstructionSelector::ComplexRendererFns
7263	AArch64InstructionSelector::selectExtendedSHL(
7264	MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7265	unsigned SizeInBytes, bool WantsExt) const {
7266	assert(Base.isReg() && "Expected base to be a register operand");
7267	assert(Offset.isReg() && "Expected offset to be a register operand");
7268
7269	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7270	MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg());
7271
7272	unsigned OffsetOpc = OffsetInst->getOpcode();
7273	bool LookedThroughZExt = false;
7274	if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7275	// Try to look through a ZEXT.
7276	if (OffsetOpc != TargetOpcode::G_ZEXT \|\| !WantsExt)
7277	return std::nullopt;
7278
7279	OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: `1`).getReg());
7280	OffsetOpc = OffsetInst->getOpcode();
7281	LookedThroughZExt = true;
7282
7283	if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7284	return std::nullopt;
7285	}
7286	// Make sure that the memory op is a valid size.
7287	int64_t LegalShiftVal = Log2_32(Value: SizeInBytes);
7288	if (LegalShiftVal == `0`)
7289	return std::nullopt;
7290	if (!isWorthFoldingIntoExtendedReg(MI: OffsetInst, MRI, IsAddrOperand: true*))
7291	return std::nullopt;
7292
7293	// Now, try to find the specific G_CONSTANT. Start by assuming that the
7294	// register we will offset is the LHS, and the register containing the
7295	// constant is the RHS.
7296	Register OffsetReg = OffsetInst->getOperand(i: `1`).getReg();
7297	Register ConstantReg = OffsetInst->getOperand(i: `2`).getReg();
7298	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7299	if (!ValAndVReg) {
7300	// We didn't get a constant on the RHS. If the opcode is a shift, then
7301	// we're done.
7302	if (OffsetOpc == TargetOpcode::G_SHL)
7303	return std::nullopt;
7304
7305	// If we have a G_MUL, we can use either register. Try looking at the RHS.
7306	std::swap(a&: OffsetReg, b&: ConstantReg);
7307	ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7308	if (!ValAndVReg)
7309	return std::nullopt;
7310	}
7311
7312	// The value must fit into 3 bits, and must be positive. Make sure that is
7313	// true.
7314	int64_t ImmVal = ValAndVReg ->Value.getSExtValue();
7315
7316	// Since we're going to pull this into a shift, the constant value must be
7317	// a power of 2. If we got a multiply, then we need to check this.
7318	if (OffsetOpc == TargetOpcode::G_MUL) {
7319	if (!llvm::has_single_bit<uint32_t>(Value: ImmVal))
7320	return std::nullopt;
7321
7322	// Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7323	ImmVal = Log2_32(Value: ImmVal);
7324	}
7325
7326	if ((ImmVal & `0x7`) != ImmVal)
7327	return std::nullopt;
7328
7329	// We are only allowed to shift by LegalShiftVal. This shift value is built
7330	// into the instruction, so we can't just use whatever we want.
7331	if (ImmVal != LegalShiftVal)
7332	return std::nullopt;
7333
7334	unsigned SignExtend = `0`;
7335	if (WantsExt) {
7336	// Check if the offset is defined by an extend, unless we looked through a
7337	// G_ZEXT earlier.
7338	if (!LookedThroughZExt) {
7339	MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI);
7340	auto Ext = getExtendTypeForInst(MI&: ExtInst, MRI, IsLoadStore: true*);
7341	if (Ext == AArch64_AM::InvalidShiftExtend)
7342	return std::nullopt;
7343
7344	SignExtend = AArch64_AM::isSignExtendShiftType(Type: Ext) ? `1` : `0`;
7345	// We only support SXTW for signed extension here.
7346	if (SignExtend && Ext != AArch64_AM::SXTW)
7347	return std::nullopt;
7348	OffsetReg = ExtInst->getOperand(i: `1`).getReg();
7349	}
7350
7351	// Need a 32-bit wide register here.
7352	MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg()));
7353	OffsetReg = moveScalarRegClass(Reg: OffsetReg, RC: AArch64::GPR32RegClass, MIB);
7354	}
7355
7356	// We can use the LHS of the GEP as the base, and the LHS of the shift as an
7357	// offset. Signify that we are shifting by setting the shift flag to 1.
7358	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); },
7359	[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); },
7360	[=](MachineInstrBuilder &MIB) {
7361	// Need to add both immediates here to make sure that they are both
7362	// added to the instruction.
7363	MIB.addImm(Val: SignExtend);
7364	MIB.addImm(Val: `1`);
7365	}}};
7366	}
7367
7368	/// This is used for computing addresses like this:
7369	///
7370	/// ldr x1, [x2, x3, lsl #3]
7371	///
7372	/// Where x2 is the base register, and x3 is an offset register. The shift-left
7373	/// is a constant value specific to this load instruction. That is, we'll never
7374	/// see anything other than a 3 here (which corresponds to the size of the
7375	/// element being loaded.)
7376	InstructionSelector::ComplexRendererFns
7377	AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7378	MachineOperand &Root, unsigned SizeInBytes) const {
7379	if (!Root.isReg())
7380	return std::nullopt;
7381	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7382
7383	// We want to find something like this:
7384	//
7385	// val = G_CONSTANT LegalShiftVal
7386	// shift = G_SHL off_reg val
7387	// ptr = G_PTR_ADD base_reg shift
7388	// x = G_LOAD ptr
7389	//
7390	// And fold it into this addressing mode:
7391	//
7392	// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7393
7394	// Check if we can find the G_PTR_ADD.
7395	MachineInstr *PtrAdd =
7396	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7397	if (!PtrAdd \|\| !isWorthFoldingIntoExtendedReg(MI: PtrAdd, MRI, IsAddrOperand: true*))
7398	return std::nullopt;
7399
7400	// Now, try to match an opcode which will match our specific offset.
7401	// We want a G_SHL or a G_MUL.
7402	MachineInstr *OffsetInst =
7403	getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: `2`).getReg(), MRI);
7404	return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: `1`),
7405	Offset&: OffsetInst->getOperand(i: `0`), SizeInBytes,
7406	/WantsExt=/false);
7407	}
7408
7409	/// This is used for computing addresses like this:
7410	///
7411	/// ldr x1, [x2, x3]
7412	///
7413	/// Where x2 is the base register, and x3 is an offset register.
7414	///
7415	/// When possible (or profitable) to fold a G_PTR_ADD into the address
7416	/// calculation, this will do so. Otherwise, it will return std::nullopt.
7417	InstructionSelector::ComplexRendererFns
7418	AArch64InstructionSelector::selectAddrModeRegisterOffset(
7419	MachineOperand &Root) const {
7420	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7421
7422	// We need a GEP.
7423	MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg());
7424	if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7425	return std::nullopt;
7426
7427	// If this is used more than once, let's not bother folding.
7428	// TODO: Check if they are memory ops. If they are, then we can still fold
7429	// without having to recompute anything.
7430	if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: `0`).getReg()))
7431	return std::nullopt;
7432
7433	// Base is the GEP's LHS, offset is its RHS.
7434	return {{[=](MachineInstrBuilder &MIB) {
7435	MIB.addUse(RegNo: Gep->getOperand(i: `1`).getReg());
7436	},
7437	[=](MachineInstrBuilder &MIB) {
7438	MIB.addUse(RegNo: Gep->getOperand(i: `2`).getReg());
7439	},
7440	[=](MachineInstrBuilder &MIB) {
7441	// Need to add both immediates here to make sure that they are both
7442	// added to the instruction.
7443	MIB.addImm(Val: `0`);
7444	MIB.addImm(Val: `0`);
7445	}}};
7446	}
7447
7448	/// This is intended to be equivalent to selectAddrModeXRO in
7449	/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7450	InstructionSelector::ComplexRendererFns
7451	AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7452	unsigned SizeInBytes) const {
7453	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7454	if (!Root.isReg())
7455	return std::nullopt;
7456	MachineInstr *PtrAdd =
7457	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7458	if (!PtrAdd)
7459	return std::nullopt;
7460
7461	// Check for an immediates which cannot be encoded in the [base + imm]
7462	// addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7463	// end up with code like:
7464	//
7465	// mov x0, wide
7466	// add x1 base, x0
7467	// ldr x2, [x1, x0]
7468	//
7469	// In this situation, we can use the [base, xreg] addressing mode to save an
7470	// add/sub:
7471	//
7472	// mov x0, wide
7473	// ldr x2, [base, x0]
7474	auto ValAndVReg =
7475	getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: `2`).getReg(), MRI);
7476	if (ValAndVReg) {
7477	unsigned Scale = Log2_32(Value: SizeInBytes);
7478	int64_t ImmOff = ValAndVReg ->Value.getSExtValue();
7479
7480	// Skip immediates that can be selected in the load/store addressing
7481	// mode.
7482	if (ImmOff % SizeInBytes == `0` && ImmOff >= `0` &&
7483	ImmOff < (`0x1000` << Scale))
7484	return std::nullopt;
7485
7486	// Helper lambda to decide whether or not it is preferable to emit an add.
7487	auto isPreferredADD = [](int64_t ImmOff) {
7488	// Constants in [0x0, 0xfff] can be encoded in an add.
7489	if ((ImmOff & `0xfffffffffffff000LL`) == `0x0LL`)
7490	return true;
7491
7492	// Can it be encoded in an add lsl #12?
7493	if ((ImmOff & `0xffffffffff000fffLL`) != `0x0LL`)
7494	return false;
7495
7496	// It can be encoded in an add lsl #12, but we may not want to. If it is
7497	// possible to select this as a single movz, then prefer that. A single
7498	// movz is faster than an add with a shift.
7499	return (ImmOff & `0xffffffffff00ffffLL`) != `0x0LL` &&
7500	(ImmOff & `0xffffffffffff0fffLL`) != `0x0LL`;
7501	};
7502
7503	// If the immediate can be encoded in a single add/sub, then bail out.
7504	if (isPreferredADD (ImmOff) \|\| isPreferredADD (-ImmOff))
7505	return std::nullopt;
7506	}
7507
7508	// Try to fold shifts into the addressing mode.
7509	auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7510	if (AddrModeFns)
7511	return AddrModeFns;
7512
7513	// If that doesn't work, see if it's possible to fold in registers from
7514	// a GEP.
7515	return selectAddrModeRegisterOffset(Root);
7516	}
7517
7518	/// This is used for computing addresses like this:
7519	///
7520	/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7521	///
7522	/// Where we have a 64-bit base register, a 32-bit offset register, and an
7523	/// extend (which may or may not be signed).
7524	InstructionSelector::ComplexRendererFns
7525	AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7526	unsigned SizeInBytes) const {
7527	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7528
7529	MachineInstr *PtrAdd =
7530	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7531	if (!PtrAdd \|\| !isWorthFoldingIntoExtendedReg(MI: PtrAdd, MRI, IsAddrOperand: true*))
7532	return std::nullopt;
7533
7534	MachineOperand &LHS = PtrAdd->getOperand(i: `1`);
7535	MachineOperand &RHS = PtrAdd->getOperand(i: `2`);
7536	MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
7537
7538	// The first case is the same as selectAddrModeXRO, except we need an extend.
7539	// In this case, we try to find a shift and extend, and fold them into the
7540	// addressing mode.
7541	//
7542	// E.g.
7543	//
7544	// off_reg = G_Z/S/ANYEXT ext_reg
7545	// val = G_CONSTANT LegalShiftVal
7546	// shift = G_SHL off_reg val
7547	// ptr = G_PTR_ADD base_reg shift
7548	// x = G_LOAD ptr
7549	//
7550	// In this case we can get a load like this:
7551	//
7552	// ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7553	auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: `0`),
7554	SizeInBytes, /WantsExt=/true);
7555	if (ExtendedShl)
7556	return ExtendedShl;
7557
7558	// There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7559	//
7560	// e.g.
7561	// ldr something, [base_reg, ext_reg, sxtw]
7562	if (!isWorthFoldingIntoExtendedReg(MI: OffsetInst, MRI, IsAddrOperand: true*))
7563	return std::nullopt;
7564
7565	// Check if this is an extend. We'll get an extend type if it is.
7566	AArch64_AM::ShiftExtendType Ext =
7567	getExtendTypeForInst(MI&: OffsetInst, MRI, /IsLoadStore=/*true);
7568	if (Ext == AArch64_AM::InvalidShiftExtend)
7569	return std::nullopt;
7570
7571	// Need a 32-bit wide register.
7572	MachineIRBuilder MIB(*PtrAdd);
7573	Register ExtReg = moveScalarRegClass(Reg: OffsetInst->getOperand(i: `1`).getReg(),
7574	RC: AArch64::GPR32RegClass, MIB);
7575	unsigned SignExtend = Ext == AArch64_AM::SXTW;
7576
7577	// Base is LHS, offset is ExtReg.
7578	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); },
7579	[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7580	[=](MachineInstrBuilder &MIB) {
7581	MIB.addImm(Val: SignExtend);
7582	MIB.addImm(Val: `0`);
7583	}}};
7584	}
7585
7586	/// Select a "register plus unscaled signed 9-bit immediate" address. This
7587	/// should only match when there is an offset that is not valid for a scaled
7588	/// immediate addressing mode. The "Size" argument is the size in bytes of the
7589	/// memory reference, which is needed here to know what is valid for a scaled
7590	/// immediate.
7591	InstructionSelector::ComplexRendererFns
7592	AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7593	unsigned Size) const {
7594	MachineRegisterInfo &MRI =
7595	Root.getParent()->getParent()->getParent()->getRegInfo();
7596
7597	if (!Root.isReg())
7598	return std::nullopt;
7599
7600	if (!isBaseWithConstantOffset(Root, MRI))
7601	return std::nullopt;
7602
7603	MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7604
7605	MachineOperand &OffImm = RootDef->getOperand(i: `2`);
7606	if (!OffImm.isReg())
7607	return std::nullopt;
7608	MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg());
7609	if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7610	return std::nullopt;
7611	int64_t RHSC;
7612	MachineOperand &RHSOp1 = RHS->getOperand(i: `1`);
7613	if (!RHSOp1.isCImm() \|\| RHSOp1.getCImm()->getBitWidth() > `64`)
7614	return std::nullopt;
7615	RHSC = RHSOp1.getCImm()->getSExtValue();
7616
7617	if (RHSC >= -`256` && RHSC < `256`) {
7618	MachineOperand &Base = RootDef->getOperand(i: `1`);
7619	return {{
7620	[=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); },
7621	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); },
7622	}};
7623	}
7624	return std::nullopt;
7625	}
7626
7627	InstructionSelector::ComplexRendererFns
7628	AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7629	unsigned Size,
7630	MachineRegisterInfo &MRI) const {
7631	if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7632	return std::nullopt;
7633	MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: `1`).getReg());
7634	if (Adrp.getOpcode() != AArch64::ADRP)
7635	return std::nullopt;
7636
7637	// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7638	auto Offset = Adrp.getOperand(i: `1`).getOffset();
7639	if (Offset % Size != `0`)
7640	return std::nullopt;
7641
7642	auto GV = Adrp.getOperand(i: `1`).getGlobal();
7643	if (GV->isThreadLocal())
7644	return std::nullopt;
7645
7646	auto &MF = *RootDef.getParent()->getParent();
7647	if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size)
7648	return std::nullopt;
7649
7650	unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget());
7651	MachineIRBuilder MIRBuilder(RootDef);
7652	Register AdrpReg = Adrp.getOperand(i: `0`).getReg();
7653	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); },
7654	[=](MachineInstrBuilder &MIB) {
7655	MIB.addGlobalAddress(GV, Offset,
7656	TargetFlags: OpFlags \| AArch64II::MO_PAGEOFF \|
7657	AArch64II::MO_NC);
7658	}}};
7659	}
7660
7661	/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7662	/// "Size" argument is the size in bytes of the memory reference, which
7663	/// determines the scale.
7664	InstructionSelector::ComplexRendererFns
7665	AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7666	unsigned Size) const {
7667	MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7668	MachineRegisterInfo &MRI = MF.getRegInfo();
7669
7670	if (!Root.isReg())
7671	return std::nullopt;
7672
7673	MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7674	if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7675	return {{
7676	[=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: `1`)); },
7677	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: `0`); },
7678	}};
7679	}
7680
7681	CodeModel::Model CM = MF.getTarget().getCodeModel();
7682	// Check if we can fold in the ADD of small code model ADRP + ADD address.
7683	// HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7684	// globals into the offset.
7685	MachineInstr *RootParent = Root.getParent();
7686	if (CM == CodeModel::Small &&
7687	!(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7688	STI.isTargetDarwin())) {
7689	auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI);
7690	if (OpFns)
7691	return OpFns;
7692	}
7693
7694	if (isBaseWithConstantOffset(Root, MRI)) {
7695	MachineOperand &LHS = RootDef->getOperand(i: `1`);
7696	MachineOperand &RHS = RootDef->getOperand(i: `2`);
7697	MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg());
7698	MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg());
7699
7700	int64_t RHSC = (int64_t)RHSDef->getOperand(i: `1`).getCImm()->getZExtValue();
7701	unsigned Scale = Log2_32(Value: Size);
7702	if ((RHSC & (Size - `1`)) == `0` && RHSC >= `0` && RHSC < (`0x1000` << Scale)) {
7703	if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7704	return {{
7705	[=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: `1`)); },
7706	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7707	}};
7708
7709	return {{
7710	[=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); },
7711	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7712	}};
7713	}
7714	}
7715
7716	// Before falling back to our general case, check if the unscaled
7717	// instructions can handle this. If so, that's preferable.
7718	if (selectAddrModeUnscaled(Root, Size))
7719	return std::nullopt;
7720
7721	return {{
7722	[=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); },
7723	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: `0`); },
7724	}};
7725	}
7726
7727	/// Given a shift instruction, return the correct shift type for that
7728	/// instruction.
7729	static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
7730	switch (MI.getOpcode()) {
7731	default:
7732	return AArch64_AM::InvalidShiftExtend;
7733	case TargetOpcode::G_SHL:
7734	return AArch64_AM::LSL;
7735	case TargetOpcode::G_LSHR:
7736	return AArch64_AM::LSR;
7737	case TargetOpcode::G_ASHR:
7738	return AArch64_AM::ASR;
7739	case TargetOpcode::G_ROTR:
7740	return AArch64_AM::ROR;
7741	}
7742	}
7743
7744	/// Select a "shifted register" operand. If the value is not shifted, set the
7745	/// shift operand to a default value of "lsl 0".
7746	InstructionSelector::ComplexRendererFns
7747	AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7748	bool AllowROR) const {
7749	if (!Root.isReg())
7750	return std::nullopt;
7751	MachineRegisterInfo &MRI =
7752	Root.getParent()->getParent()->getParent()->getRegInfo();
7753
7754	// Check if the operand is defined by an instruction which corresponds to
7755	// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7756	MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg());
7757	AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst);
7758	if (ShType == AArch64_AM::InvalidShiftExtend)
7759	return std::nullopt;
7760	if (ShType == AArch64_AM::ROR && !AllowROR)
7761	return std::nullopt;
7762	if (!isWorthFoldingIntoExtendedReg(MI: ShiftInst, MRI, IsAddrOperand: false*))
7763	return std::nullopt;
7764
7765	// Need an immediate on the RHS.
7766	MachineOperand &ShiftRHS = ShiftInst->getOperand(i: `2`);
7767	auto Immed = getImmedFromMO(Root: ShiftRHS);
7768	if (!Immed)
7769	return std::nullopt;
7770
7771	// We have something that we can fold. Fold in the shift's LHS and RHS into
7772	// the instruction.
7773	MachineOperand &ShiftLHS = ShiftInst->getOperand(i: `1`);
7774	Register ShiftReg = ShiftLHS.getReg();
7775
7776	unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits();
7777	unsigned Val = *Immed & (NumBits - `1`);
7778	unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
7779
7780	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); },
7781	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}};
7782	}
7783
7784	AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7785	MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7786	unsigned Opc = MI.getOpcode();
7787
7788	// Handle explicit extend instructions first.
7789	if (Opc == TargetOpcode::G_SEXT \|\| Opc == TargetOpcode::G_SEXT_INREG) {
7790	unsigned Size;
7791	if (Opc == TargetOpcode::G_SEXT)
7792	Size = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getSizeInBits();
7793	else
7794	Size = MI.getOperand(i: `2`).getImm();
7795	assert(Size != `64` && "Extend from 64 bits?");
7796	switch (Size) {
7797	case `8`:
7798	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7799	case `16`:
7800	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7801	case `32`:
7802	return AArch64_AM::SXTW;
7803	default:
7804	return AArch64_AM::InvalidShiftExtend;
7805	}
7806	}
7807
7808	if (Opc == TargetOpcode::G_ZEXT \|\| Opc == TargetOpcode::G_ANYEXT) {
7809	unsigned Size = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getSizeInBits();
7810	assert(Size != `64` && "Extend from 64 bits?");
7811	switch (Size) {
7812	case `8`:
7813	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7814	case `16`:
7815	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7816	case `32`:
7817	return AArch64_AM::UXTW;
7818	default:
7819	return AArch64_AM::InvalidShiftExtend;
7820	}
7821	}
7822
7823	// Don't have an explicit extend. Try to handle a G_AND with a constant mask
7824	// on the RHS.
7825	if (Opc != TargetOpcode::G_AND)
7826	return AArch64_AM::InvalidShiftExtend;
7827
7828	std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: `2`));
7829	if (!MaybeAndMask)
7830	return AArch64_AM::InvalidShiftExtend;
7831	uint64_t AndMask = *MaybeAndMask;
7832	switch (AndMask) {
7833	default:
7834	return AArch64_AM::InvalidShiftExtend;
7835	case `0xFF`:
7836	return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7837	case `0xFFFF`:
7838	return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7839	case `0xFFFFFFFF`:
7840	return AArch64_AM::UXTW;
7841	}
7842	}
7843
7844	Register AArch64InstructionSelector::moveScalarRegClass(
7845	Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7846	MachineRegisterInfo &MRI = *MIB.getMRI();
7847	auto Ty = MRI.getType(Reg);
7848	assert(!Ty.isVector() && "Expected scalars only!");
7849	if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7850	return Reg;
7851
7852	// Create a copy and immediately select it.
7853	// FIXME: We should have an emitCopy function?
7854	auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg});
7855	selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
7856	return Copy.getReg(Idx: `0`);
7857	}
7858
7859	/// Select an "extended register" operand. This operand folds in an extend
7860	/// followed by an optional left shift.
7861	InstructionSelector::ComplexRendererFns
7862	AArch64InstructionSelector::selectArithExtendedRegister(
7863	MachineOperand &Root) const {
7864	if (!Root.isReg())
7865	return std::nullopt;
7866	MachineRegisterInfo &MRI =
7867	Root.getParent()->getParent()->getParent()->getRegInfo();
7868
7869	uint64_t ShiftVal = `0`;
7870	Register ExtReg;
7871	AArch64_AM::ShiftExtendType Ext;
7872	MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI);
7873	if (!RootDef)
7874	return std::nullopt;
7875
7876	if (!isWorthFoldingIntoExtendedReg(MI: RootDef, MRI, IsAddrOperand: false*))
7877	return std::nullopt;
7878
7879	// Check if we can fold a shift and an extend.
7880	if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7881	// Look for a constant on the RHS of the shift.
7882	MachineOperand &RHS = RootDef->getOperand(i: `2`);
7883	std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS);
7884	if (!MaybeShiftVal)
7885	return std::nullopt;
7886	ShiftVal = *MaybeShiftVal;
7887	if (ShiftVal > `4`)
7888	return std::nullopt;
7889	// Look for a valid extend instruction on the LHS of the shift.
7890	MachineOperand &LHS = RootDef->getOperand(i: `1`);
7891	MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
7892	if (!ExtDef)
7893	return std::nullopt;
7894	Ext = getExtendTypeForInst(MI&: *ExtDef, MRI);
7895	if (Ext == AArch64_AM::InvalidShiftExtend)
7896	return std::nullopt;
7897	ExtReg = ExtDef->getOperand(i: `1`).getReg();
7898	} else {
7899	// Didn't get a shift. Try just folding an extend.
7900	Ext = getExtendTypeForInst(MI&: *RootDef, MRI);
7901	if (Ext == AArch64_AM::InvalidShiftExtend)
7902	return std::nullopt;
7903	ExtReg = RootDef->getOperand(i: `1`).getReg();
7904
7905	// If we have a 32 bit instruction which zeroes out the high half of a
7906	// register, we get an implicit zero extend for free. Check if we have one.
7907	// FIXME: We actually emit the extend right now even though we don't have
7908	// to.
7909	if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == `32`) {
7910	MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg);
7911	if (isDef32(MI: *ExtInst))
7912	return std::nullopt;
7913	}
7914	}
7915
7916	// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7917	// copy.
7918	MachineIRBuilder MIB(*RootDef);
7919	ExtReg = moveScalarRegClass(Reg: ExtReg, RC: AArch64::GPR32RegClass, MIB);
7920
7921	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7922	[=](MachineInstrBuilder &MIB) {
7923	MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal));
7924	}}};
7925	}
7926
7927	InstructionSelector::ComplexRendererFns
7928	AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7929	if (!Root.isReg())
7930	return std::nullopt;
7931	MachineRegisterInfo &MRI =
7932	Root.getParent()->getParent()->getParent()->getRegInfo();
7933
7934	auto Extract = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI);
7935	while (Extract && Extract ->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7936	STI.isLittleEndian())
7937	Extract =
7938	getDefSrcRegIgnoringCopies(Reg: Extract ->MI->getOperand(i: `1`).getReg(), MRI);
7939	if (!Extract)
7940	return std::nullopt;
7941
7942	if (auto *Unmerge = dyn_cast<GUnmerge>(Val: Extract ->MI)) {
7943	if (Unmerge->getNumDefs() == `2` &&
7944	Extract ->Reg == Unmerge->getOperand(i: `1`).getReg()) {
7945	Register ExtReg = Unmerge->getSourceReg();
7946	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7947	}
7948	}
7949	if (auto *ExtElt = dyn_cast<GExtractVectorElement>(Val: Extract ->MI)) {
7950	LLT SrcTy = MRI.getType(Reg: ExtElt->getVectorReg());
7951	auto LaneIdx =
7952	getIConstantVRegValWithLookThrough(VReg: ExtElt->getIndexReg(), MRI);
7953	if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) &&
7954	LaneIdx ->Value.getSExtValue() == `1`) {
7955	Register ExtReg = ExtElt->getVectorReg();
7956	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7957	}
7958	}
7959	if (auto *Subvec = dyn_cast<GExtractSubvector>(Val: Extract ->MI)) {
7960	LLT SrcTy = MRI.getType(Reg: Subvec->getSrcVec());
7961	auto LaneIdx = Subvec->getIndexImm();
7962	if (LaneIdx == SrcTy.getNumElements() / `2`) {
7963	Register ExtReg = Subvec->getSrcVec();
7964	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7965	}
7966	}
7967
7968	return std::nullopt;
7969	}
7970
7971	InstructionSelector::ComplexRendererFns
7972	AArch64InstructionSelector::selectCVTFixedPointVecBase(
7973	const MachineOperand &Root, bool isReciprocal) const {
7974	if (!Root.isReg())
7975	return std::nullopt;
7976	const MachineRegisterInfo &MRI =
7977	Root.getParent()->getParent()->getParent()->getRegInfo();
7978
7979	MachineInstr *Dup = getDefIgnoringCopies(Reg: Root.getReg(), MRI);
7980	if (Dup->getOpcode() != AArch64::G_DUP)
7981	return std::nullopt;
7982	std::optional<ValueAndVReg> CstVal =
7983	getAnyConstantVRegValWithLookThrough(VReg: Dup->getOperand(i: `1`).getReg(), MRI);
7984	if (!CstVal)
7985	return std::nullopt;
7986
7987	unsigned RegWidth = MRI.getType(Reg: Root.getReg()).getScalarSizeInBits();
7988	APFloat FVal(`0.0`);
7989	switch (RegWidth) {
7990	case `16`:
7991	FVal = APFloat (APFloat::IEEEhalf(), CstVal ->Value);
7992	break;
7993	case `32`:
7994	FVal = APFloat (APFloat::IEEEsingle(), CstVal ->Value);
7995	break;
7996	case `64`:
7997	FVal = APFloat (APFloat::IEEEdouble(), CstVal ->Value);
7998	break;
7999	default:
8000	return std::nullopt;
8001	};
8002	if (unsigned FBits =
8003	CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal))
8004	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: FBits); }}};
8005
8006	return std::nullopt;
8007	}
8008
8009	InstructionSelector::ComplexRendererFns
8010	AArch64InstructionSelector::selectCVTFixedPointVec(MachineOperand &Root) const {
8011	return selectCVTFixedPointVecBase(Root, /isReciprocal/ false);
8012	}
8013
8014	InstructionSelector::ComplexRendererFns
8015	AArch64InstructionSelector::selectCVTFixedPosRecipOperandVec(
8016	MachineOperand &Root) const {
8017	return selectCVTFixedPointVecBase(Root, /isReciprocal/ true);
8018	}
8019
8020	void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
8021	const MachineInstr &MI,
8022	int OpIdx) const {
8023	// FIXME: This is only needed to satisfy the type checking in tablegen, and
8024	// should be able to reuse the Renderers already calculated by
8025	// selectCVTFixedPointVecBase.
8026	InstructionSelector::ComplexRendererFns Renderer =
8027	selectCVTFixedPointVecBase(Root: MI.getOperand(i: OpIdx), /isReciprocal/ false);
8028	assert((Renderer && Renderer->size() == `1`) &&
8029	"Expected selectCVTFixedPointVec to provide a function\n");
8030	(Renderer ->front())(MIB);
8031	}
8032
8033	void AArch64InstructionSelector::renderFixedPointRecipXForm(
8034	MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
8035	InstructionSelector::ComplexRendererFns Renderer =
8036	selectCVTFixedPointVecBase(Root: MI.getOperand(i: OpIdx), /isReciprocal/ true);
8037	assert((Renderer && Renderer->size() == `1`) &&
8038	"Expected selectCVTFixedPosRecipOperandVec to provide a function\n");
8039	(Renderer ->front())(MIB);
8040	}
8041
8042	void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
8043	const MachineInstr &MI,
8044	int OpIdx) const {
8045	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8046	assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
8047	"Expected G_CONSTANT");
8048	std::optional<int64_t> CstVal =
8049	getIConstantVRegSExtVal(VReg: MI.getOperand(i: `0`).getReg(), MRI);
8050	assert(CstVal && "Expected constant value");
8051	MIB.addImm(Val: *CstVal);
8052	}
8053
8054	void AArch64InstructionSelector::renderLogicalImm32(
8055	MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
8056	assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
8057	"Expected G_CONSTANT");
8058	uint64_t CstVal = I.getOperand(i: `1`).getCImm()->getZExtValue();
8059	uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: `32`);
8060	MIB.addImm(Val: Enc);
8061	}
8062
8063	void AArch64InstructionSelector::renderLogicalImm64(
8064	MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
8065	assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
8066	"Expected G_CONSTANT");
8067	uint64_t CstVal = I.getOperand(i: `1`).getCImm()->getZExtValue();
8068	uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: `64`);
8069	MIB.addImm(Val: Enc);
8070	}
8071
8072	void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
8073	const MachineInstr &MI,
8074	int OpIdx) const {
8075	assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == `0` &&
8076	"Expected G_UBSANTRAP");
8077	MIB.addImm(Val: MI.getOperand(i: `0`).getImm() \| (`'U'` << `8`));
8078	}
8079
8080	void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
8081	const MachineInstr &MI,
8082	int OpIdx) const {
8083	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
8084	"Expected G_FCONSTANT");
8085	MIB.addImm(
8086	Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
8087	}
8088
8089	void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
8090	const MachineInstr &MI,
8091	int OpIdx) const {
8092	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
8093	"Expected G_FCONSTANT");
8094	MIB.addImm(
8095	Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
8096	}
8097
8098	void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
8099	const MachineInstr &MI,
8100	int OpIdx) const {
8101	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
8102	"Expected G_FCONSTANT");
8103	MIB.addImm(
8104	Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
8105	}
8106
8107	void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
8108	MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
8109	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
8110	"Expected G_FCONSTANT");
8111	MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: `1`)
8112	.getFPImm()
8113	->getValueAPF()
8114	.bitcastToAPInt()
8115	.getZExtValue()));
8116	}
8117
8118	bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
8119	const MachineInstr &MI, unsigned NumBytes) const {
8120	if (!MI.mayLoadOrStore())
8121	return false;
8122	assert(MI.hasOneMemOperand() &&
8123	"Expected load/store to have only one mem op!");
8124	return (*MI.memoperands_begin())->getSize() == NumBytes;
8125	}
8126
8127	bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
8128	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8129	if (MRI.getType(Reg: MI.getOperand(i: `0`).getReg()).getSizeInBits() != `32`)
8130	return false;
8131
8132	// Only return true if we know the operation will zero-out the high half of
8133	// the 64-bit register. Truncates can be subregister copies, which don't
8134	// zero out the high bits. Copies and other copy-like instructions can be
8135	// fed by truncates, or could be lowered as subregister copies.
8136	switch (MI.getOpcode()) {
8137	default:
8138	return true;
8139	case TargetOpcode::COPY:
8140	case TargetOpcode::G_BITCAST:
8141	case TargetOpcode::G_TRUNC:
8142	case TargetOpcode::G_PHI:
8143	return false;
8144	}
8145	}
8146
8147
8148	// Perform fixups on the given PHI instruction's operands to force them all
8149	// to be the same as the destination regbank.
8150	static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
8151	const AArch64RegisterBankInfo &RBI) {
8152	assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8153	Register DstReg = MI.getOperand(i: `0`).getReg();
8154	const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg);
8155	assert(DstRB && "Expected PHI dst to have regbank assigned");
8156	MachineIRBuilder MIB(MI);
8157
8158	// Go through each operand and ensure it has the same regbank.
8159	for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) {
8160	if (!MO.isReg())
8161	continue;
8162	Register OpReg = MO.getReg();
8163	const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg);
8164	if (RB != DstRB) {
8165	// Insert a cross-bank copy.
8166	auto *OpDef = MRI.getVRegDef(Reg: OpReg);
8167	const LLT &Ty = MRI.getType(Reg: OpReg);
8168	MachineBasicBlock &OpDefBB = *OpDef->getParent();
8169
8170	// Any instruction we insert must appear after all PHIs in the block
8171	// for the block to be valid MIR.
8172	MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator());
8173	if (InsertPt != OpDefBB.end() && InsertPt ->isPHI())
8174	InsertPt = OpDefBB.getFirstNonPHI();
8175	MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt);
8176	auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg);
8177	MRI.setRegBank(Reg: Copy.getReg(Idx: `0`), RegBank: *DstRB);
8178	MO.setReg(Copy.getReg(Idx: `0`));
8179	}
8180	}
8181	}
8182
8183	void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8184	// We're looking for PHIs, build a list so we don't invalidate iterators.
8185	MachineRegisterInfo &MRI = MF.getRegInfo();
8186	SmallVector<MachineInstr *, `32`> Phis;
8187	for (auto &BB : MF) {
8188	for (auto &MI : BB) {
8189	if (MI.getOpcode() == TargetOpcode::G_PHI)
8190	Phis.emplace_back(Args: &MI);
8191	}
8192	}
8193
8194	for (auto *MI : Phis) {
8195	// We need to do some work here if the operand types are < 16 bit and they
8196	// are split across fpr/gpr banks. Since all types <32b on gpr
8197	// end up being assigned gpr32 regclasses, we can end up with PHIs here
8198	// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8199	// be selecting heterogenous regbanks for operands if possible, but we
8200	// still need to be able to deal with it here.
8201	//
8202	// To fix this, if we have a gpr-bank operand < 32b in size and at least
8203	// one other operand is on the fpr bank, then we add cross-bank copies
8204	// to homogenize the operand banks. For simplicity the bank that we choose
8205	// to settle on is whatever bank the def operand has. For example:
8206	//
8207	// %endbb:
8208	// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8209	// =>
8210	// %bb2:
8211	// ...
8212	// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8213	// ...
8214	// %endbb:
8215	// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8216	bool HasGPROp = false, HasFPROp = false;
8217	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
8218	if (!MO.isReg())
8219	continue;
8220	const LLT &Ty = MRI.getType(Reg: MO.getReg());
8221	if (!Ty.isValid() \|\| !Ty.isScalar())
8222	break;
8223	if (Ty.getSizeInBits() >= `32`)
8224	break;
8225	const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg());
8226	// If for some reason we don't have a regbank yet. Don't try anything.
8227	if (!RB)
8228	break;
8229
8230	if (RB->getID() == AArch64::GPRRegBankID)
8231	HasGPROp = true;
8232	else
8233	HasFPROp = true;
8234	}
8235	// We have heterogenous regbanks, need to fixup.
8236	if (HasGPROp && HasFPROp)
8237	fixupPHIOpBanks(MI&: *MI, MRI, RBI);
8238	}
8239	}
8240
8241	namespace llvm {
8242	InstructionSelector *
8243	createAArch64InstructionSelector(const AArch64TargetMachine &TM,
8244	const AArch64Subtarget &Subtarget,
8245	const AArch64RegisterBankInfo &RBI) {
8246	return new AArch64InstructionSelector (TM, Subtarget, RBI);
8247	}
8248	}
8249

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp