AArch64PostLegalizerLowering.cpp source code [llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp]

1	//=== AArch64PostLegalizerLowering.cpp --------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Post-legalization lowering for instructions.
11	///
12	/// This is used to offload pattern matching from the selector.
13	///
14	/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15	/// a G_ZIP, G_UZP, etc.
16	///
17	/// General optimization combines should be handled by either the
18	/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19	///
20	//===----------------------------------------------------------------------===//
21
22	#include "AArch64ExpandImm.h"
23	#include "AArch64GlobalISelUtils.h"
24	#include "AArch64PerfectShuffle.h"
25	#include "AArch64Subtarget.h"
26	#include "GISel/AArch64LegalizerInfo.h"
27	#include "MCTargetDesc/AArch64MCTargetDesc.h"
28	#include "Utils/AArch64BaseInfo.h"
29	#include "llvm/CodeGen/GlobalISel/Combiner.h"
30	#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
31	#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
32	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
33	#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
34	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
35	#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
36	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
37	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
38	#include "llvm/CodeGen/GlobalISel/Utils.h"
39	#include "llvm/CodeGen/MachineFrameInfo.h"
40	#include "llvm/CodeGen/MachineFunctionPass.h"
41	#include "llvm/CodeGen/MachineInstrBuilder.h"
42	#include "llvm/CodeGen/MachineRegisterInfo.h"
43	#include "llvm/CodeGen/TargetOpcodes.h"
44	#include "llvm/CodeGen/TargetPassConfig.h"
45	#include "llvm/IR/InstrTypes.h"
46	#include "llvm/Support/ErrorHandling.h"
47	#include <optional>
48
49	#define GET_GICOMBINER_DEPS
50	#include "AArch64GenPostLegalizeGILowering.inc"
51	#undef GET_GICOMBINER_DEPS
52
53	#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
54
55	using namespace llvm;
56	using namespace MIPatternMatch;
57	using namespace AArch64GISelUtils;
58
59	namespace {
60
61	#define GET_GICOMBINER_TYPES
62	#include "AArch64GenPostLegalizeGILowering.inc"
63	#undef GET_GICOMBINER_TYPES
64
65	/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
66	///
67	/// Used for matching target-supported shuffles before codegen.
68	struct ShuffleVectorPseudo {
69	unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
70	Register Dst; ///< Destination register.
71	SmallVector<SrcOp, `2`> SrcOps; ///< Source registers.
72	ShuffleVectorPseudo(unsigned Opc, Register Dst,
73	std::initializer_list<SrcOp> SrcOps)
74	: Opc(Opc), Dst (Dst), SrcOps (SrcOps){};
75	ShuffleVectorPseudo() = default;
76	};
77
78	/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
79	/// sources of the shuffle are different.
80	std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
81	unsigned NumElts) {
82	// Look for the first non-undef element.
83	auto FirstRealElt = find_if(Range&: M, P: [](int Elt) { return Elt >= `0`; });
84	if (FirstRealElt == M.end())
85	return std::nullopt;
86
87	// Use APInt to handle overflow when calculating expected element.
88	unsigned MaskBits = APInt (`32`, NumElts * `2`).logBase2();
89	APInt ExpectedElt = APInt (MaskBits, FirstRealElt + `1`, false, true*);
90
91	// The following shuffle indices must be the successive elements after the
92	// first real element.
93	if (any_of(
94	Range: make_range(x: std::next(x: FirstRealElt), y: M.end()),
95	P: [&ExpectedElt](int Elt) { return Elt != ExpectedElt ++ && Elt >= `0`; }))
96	return std::nullopt;
97
98	// The index of an EXT is the first element if it is not UNDEF.
99	// Watch out for the beginning UNDEFs. The EXT index should be the expected
100	// value of the first element. E.g.
101	// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
102	// <-1, -1, 0, 1, ...> is treated as <2NumElts-2, 2NumElts-1, 0, 1, ...>.
103	// ExpectedElt is the last mask index plus 1.
104	uint64_t Imm = ExpectedElt.getZExtValue();
105	bool ReverseExt = false;
106
107	// There are two difference cases requiring to reverse input vectors.
108	// For example, for vector <4 x i32> we have the following cases,
109	// Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
110	// Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
111	// For both cases, we finally use mask <5, 6, 7, 0>, which requires
112	// to reverse two input vectors.
113	if (Imm < NumElts)
114	ReverseExt = true;
115	else
116	Imm -= NumElts;
117	return std::make_pair(x&: ReverseExt, y&: Imm);
118	}
119
120	/// Helper function for matchINS.
121	///
122	/// \returns a value when \p M is an ins mask for \p NumInputElements.
123	///
124	/// First element of the returned pair is true when the produced
125	/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
126	///
127	/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
128	std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
129	int NumInputElements) {
130	if (M.size() != static_cast<size_t>(NumInputElements))
131	return std::nullopt;
132	int NumLHSMatch = `0`, NumRHSMatch = `0`;
133	int LastLHSMismatch = -`1`, LastRHSMismatch = -`1`;
134	for (int Idx = `0`; Idx < NumInputElements; ++Idx) {
135	if (M [Idx] == -`1`) {
136	++NumLHSMatch;
137	++NumRHSMatch;
138	continue;
139	}
140	M [Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
141	M [Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
142	}
143	const int NumNeededToMatch = NumInputElements - `1`;
144	if (NumLHSMatch == NumNeededToMatch)
145	return std::make_pair(x: true, y&: LastLHSMismatch);
146	if (NumRHSMatch == NumNeededToMatch)
147	return std::make_pair(x: false, y&: LastRHSMismatch);
148	return std::nullopt;
149	}
150
151	/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
152	/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
153	bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
154	ShuffleVectorPseudo &MatchInfo) {
155	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
156	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
157	Register Dst = MI.getOperand(i: `0`).getReg();
158	Register Src = MI.getOperand(i: `1`).getReg();
159	LLT Ty = MRI.getType(Reg: Dst);
160	unsigned EltSize = Ty.getScalarSizeInBits();
161
162	// Element size for a rev cannot be 64.
163	if (EltSize == `64`)
164	return false;
165
166	unsigned NumElts = Ty.getNumElements();
167
168	// Try to produce a G_REV instruction
169	for (unsigned LaneSize : {`64U`, `32U`, `16U`}) {
170	if (isREVMask(M: ShuffleMask, EltSize, NumElts, BlockSize: LaneSize)) {
171	unsigned Opcode;
172	if (LaneSize == `64U`)
173	Opcode = AArch64::G_REV64;
174	else if (LaneSize == `32U`)
175	Opcode = AArch64::G_REV32;
176	else
177	Opcode = AArch64::G_REV16;
178
179	MatchInfo = ShuffleVectorPseudo (Opcode, Dst, {Src});
180	return true;
181	}
182	}
183
184	return false;
185	}
186
187	/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
188	/// a G_TRN1 or G_TRN2 instruction.
189	bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
190	ShuffleVectorPseudo &MatchInfo) {
191	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
192	unsigned WhichResult;
193	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
194	Register Dst = MI.getOperand(i: `0`).getReg();
195	unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
196	if (!isTRNMask(M: ShuffleMask, NumElts, WhichResult))
197	return false;
198	unsigned Opc = (WhichResult == `0`) ? AArch64::G_TRN1 : AArch64::G_TRN2;
199	Register V1 = MI.getOperand(i: `1`).getReg();
200	Register V2 = MI.getOperand(i: `2`).getReg();
201	MatchInfo = ShuffleVectorPseudo (Opc, Dst, {V1, V2});
202	return true;
203	}
204
205	/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
206	/// a G_UZP1 or G_UZP2 instruction.
207	///
208	/// \param [in] MI - The shuffle vector instruction.
209	/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
210	bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
211	ShuffleVectorPseudo &MatchInfo) {
212	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
213	unsigned WhichResult;
214	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
215	Register Dst = MI.getOperand(i: `0`).getReg();
216	unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
217	if (!isUZPMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult))
218	return false;
219	unsigned Opc = (WhichResult == `0`) ? AArch64::G_UZP1 : AArch64::G_UZP2;
220	Register V1 = MI.getOperand(i: `1`).getReg();
221	Register V2 = MI.getOperand(i: `2`).getReg();
222	MatchInfo = ShuffleVectorPseudo (Opc, Dst, {V1, V2});
223	return true;
224	}
225
226	bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
227	ShuffleVectorPseudo &MatchInfo) {
228	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
229	unsigned WhichResult;
230	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
231	Register Dst = MI.getOperand(i: `0`).getReg();
232	unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
233	if (!isZIPMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult))
234	return false;
235	unsigned Opc = (WhichResult == `0`) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
236	Register V1 = MI.getOperand(i: `1`).getReg();
237	Register V2 = MI.getOperand(i: `2`).getReg();
238	MatchInfo = ShuffleVectorPseudo (Opc, Dst, {V1, V2});
239	return true;
240	}
241
242	/// Helper function for matchDup.
243	bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
244	MachineRegisterInfo &MRI,
245	ShuffleVectorPseudo &MatchInfo) {
246	if (Lane != `0`)
247	return false;
248
249	// Try to match a vector splat operation into a dup instruction.
250	// We're looking for this pattern:
251	//
252	// %scalar:gpr(s64) = COPY $x0
253	// %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
254	// %cst0:gpr(s32) = G_CONSTANT i32 0
255	// %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
256	// %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
257	// %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
258	// %zerovec(<2 x s32>)
259	//
260	// ...into:
261	// %splat = G_DUP %scalar
262
263	// Begin matching the insert.
264	auto *InsMI = getOpcodeDef(Opcode: TargetOpcode::G_INSERT_VECTOR_ELT,
265	Reg: MI.getOperand(i: `1`).getReg(), MRI);
266	if (!InsMI)
267	return false;
268	// Match the undef vector operand.
269	if (!getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: InsMI->getOperand(i: `1`).getReg(),
270	MRI))
271	return false;
272
273	// Match the index constant 0.
274	if (!mi_match(R: InsMI->getOperand(i: `3`).getReg(), MRI, P: m_ZeroInt()))
275	return false;
276
277	MatchInfo = ShuffleVectorPseudo (AArch64::G_DUP, MI.getOperand(i: `0`).getReg(),
278	{InsMI->getOperand(i: `2`).getReg()});
279	return true;
280	}
281
282	/// Helper function for matchDup.
283	bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
284	MachineRegisterInfo &MRI,
285	ShuffleVectorPseudo &MatchInfo) {
286	assert(Lane >= `0` && "Expected positive lane?");
287	int NumElements = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getNumElements();
288	// Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
289	// lane's definition directly.
290	auto *BuildVecMI =
291	getOpcodeDef(Opcode: TargetOpcode::G_BUILD_VECTOR,
292	Reg: MI.getOperand(i: Lane < NumElements ? `1` : `2`).getReg(), MRI);
293	// If Lane >= NumElements then it is point to RHS, just check from RHS
294	if (NumElements <= Lane)
295	Lane -= NumElements;
296
297	if (!BuildVecMI)
298	return false;
299	Register Reg = BuildVecMI->getOperand(i: Lane + `1`).getReg();
300	MatchInfo =
301	ShuffleVectorPseudo (AArch64::G_DUP, MI.getOperand(i: `0`).getReg(), {Reg});
302	return true;
303	}
304
305	bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
306	ShuffleVectorPseudo &MatchInfo) {
307	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
308	auto MaybeLane = getSplatIndex(MI);
309	if (!MaybeLane)
310	return false;
311	int Lane = *MaybeLane;
312	// If this is undef splat, generate it via "just" vdup, if possible.
313	if (Lane < `0`)
314	Lane = `0`;
315	if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
316	return true;
317	if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
318	return true;
319	return false;
320	}
321
322	// Check if an EXT instruction can handle the shuffle mask when the vector
323	// sources of the shuffle are the same.
324	bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
325	unsigned NumElts = Ty.getNumElements();
326
327	// Assume that the first shuffle index is not UNDEF. Fail if it is.
328	if (M [`0`] < `0`)
329	return false;
330
331	// If this is a VEXT shuffle, the immediate value is the index of the first
332	// element. The other shuffle indices must be the successive elements after
333	// the first one.
334	unsigned ExpectedElt = M [`0`];
335	for (unsigned I = `1`; I < NumElts; ++I) {
336	// Increment the expected index. If it wraps around, just follow it
337	// back to index zero and keep going.
338	++ExpectedElt;
339	if (ExpectedElt == NumElts)
340	ExpectedElt = `0`;
341
342	if (M [I] < `0`)
343	continue; // Ignore UNDEF indices.
344	if (ExpectedElt != static_cast<unsigned>(M [I]))
345	return false;
346	}
347
348	return true;
349	}
350
351	bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
352	ShuffleVectorPseudo &MatchInfo) {
353	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
354	Register Dst = MI.getOperand(i: `0`).getReg();
355	LLT DstTy = MRI.getType(Reg: Dst);
356	Register V1 = MI.getOperand(i: `1`).getReg();
357	Register V2 = MI.getOperand(i: `2`).getReg();
358	auto Mask = MI.getOperand(i: `3`).getShuffleMask();
359	uint64_t Imm;
360	auto ExtInfo = getExtMask(M: Mask, NumElts: DstTy.getNumElements());
361	uint64_t ExtFactor = MRI.getType(Reg: V1).getScalarSizeInBits() / `8`;
362
363	if (!ExtInfo) {
364	if (!getOpcodeDef<GImplicitDef>(Reg: V2, MRI) \|\|
365	!isSingletonExtMask(M: Mask, Ty: DstTy))
366	return false;
367
368	Imm = Mask [`0`] * ExtFactor;
369	MatchInfo = ShuffleVectorPseudo (AArch64::G_EXT, Dst, {V1, V1, Imm});
370	return true;
371	}
372	bool ReverseExt;
373	std::tie(args&: ReverseExt, args&: Imm) = *ExtInfo;
374	if (ReverseExt)
375	std::swap(a&: V1, b&: V2);
376	Imm *= ExtFactor;
377	MatchInfo = ShuffleVectorPseudo (AArch64::G_EXT, Dst, {V1, V2, Imm});
378	return true;
379	}
380
381	/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
382	/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
383	void applyShuffleVectorPseudo(MachineInstr &MI,
384	ShuffleVectorPseudo &MatchInfo) {
385	MachineIRBuilder MIRBuilder(MI);
386	MIRBuilder.buildInstr(Opc: MatchInfo.Opc, DstOps: {MatchInfo.Dst}, SrcOps: MatchInfo.SrcOps);
387	MI.eraseFromParent();
388	}
389
390	/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
391	/// Special-cased because the constant operand must be emitted as a G_CONSTANT
392	/// for the imported tablegen patterns to work.
393	void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
394	MachineIRBuilder MIRBuilder(MI);
395	if (MatchInfo.SrcOps [`2`].getImm() == `0`)
396	MIRBuilder.buildCopy(Res: MatchInfo.Dst, Op: MatchInfo.SrcOps [`0`]);
397	else {
398	// Tablegen patterns expect an i32 G_CONSTANT as the final op.
399	auto Cst =
400	MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: `32`), Val: MatchInfo.SrcOps [`2`].getImm());
401	MIRBuilder.buildInstr(Opc: MatchInfo.Opc, DstOps: {MatchInfo.Dst},
402	SrcOps: {MatchInfo.SrcOps [`0`], MatchInfo.SrcOps [`1`], Cst});
403	}
404	MI.eraseFromParent();
405	}
406
407	void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
408	Register Dst = MI.getOperand(i: `0`).getReg();
409	Register Src = MI.getOperand(i: `1`).getReg();
410	LLT DstTy = MRI.getType(Reg: Dst);
411	assert(DstTy.getSizeInBits() == `128` &&
412	"Expected 128bit vector in applyFullRev");
413	MachineIRBuilder MIRBuilder(MI);
414	auto Cst = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: `32`), Val: `8`);
415	auto Rev = MIRBuilder.buildInstr(Opc: AArch64::G_REV64, DstOps: {DstTy}, SrcOps: {Src});
416	MIRBuilder.buildInstr(Opc: AArch64::G_EXT, DstOps: {Dst}, SrcOps: {Rev, Rev, Cst});
417	MI.eraseFromParent();
418	}
419
420	bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
421	assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
422
423	auto ValAndVReg =
424	getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: `3`).getReg(), MRI);
425	return !ValAndVReg;
426	}
427
428	void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
429	MachineIRBuilder &Builder) {
430	auto &Insert = cast<GInsertVectorElement>(Val&: MI);
431	Builder.setInstrAndDebugLoc(Insert);
432
433	Register Offset = Insert.getIndexReg();
434	LLT VecTy = MRI.getType(Reg: Insert.getReg(Idx: `0`));
435	LLT EltTy = MRI.getType(Reg: Insert.getElementReg());
436	LLT IdxTy = MRI.getType(Reg: Insert.getIndexReg());
437
438	if (VecTy.isScalableVector())
439	return;
440
441	// Create a stack slot and store the vector into it
442	MachineFunction &MF = Builder.getMF();
443	Align Alignment(
444	std::min<uint64_t>(a: VecTy.getSizeInBytes().getKnownMinValue(), b: `16`));
445	int FrameIdx = MF.getFrameInfo().CreateStackObject(Size: VecTy.getSizeInBytes(),
446	Alignment, isSpillSlot: false);
447	LLT FramePtrTy = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
448	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIdx);
449	auto StackTemp = Builder.buildFrameIndex(Res: FramePtrTy, Idx: FrameIdx);
450
451	Builder.buildStore(Val: Insert.getOperand(i: `1`), Addr: StackTemp, PtrInfo, Alignment: Align (`8`));
452
453	// Get the pointer to the element, and be sure not to hit undefined behavior
454	// if the index is out of bounds.
455	assert(isPowerOf2_64(VecTy.getNumElements()) &&
456	"Expected a power-2 vector size");
457	auto Mask = Builder.buildConstant(Res: IdxTy, Val: VecTy.getNumElements() - `1`);
458	Register And = Builder.buildAnd(Dst: IdxTy, Src0: Offset, Src1: Mask).getReg(Idx: `0`);
459	auto EltSize = Builder.buildConstant(Res: IdxTy, Val: EltTy.getSizeInBytes());
460	Register Mul = Builder.buildMul(Dst: IdxTy, Src0: And, Src1: EltSize).getReg(Idx: `0`);
461	Register EltPtr =
462	Builder.buildPtrAdd(Res: MRI.getType(Reg: StackTemp.getReg(Idx: `0`)), Op0: StackTemp, Op1: Mul)
463	.getReg(Idx: `0`);
464
465	// Write the inserted element
466	Builder.buildStore(Val: Insert.getElementReg(), Addr: EltPtr, PtrInfo, Alignment: Align (`1`));
467	// Reload the whole vector.
468	Builder.buildLoad(Res: Insert.getReg(Idx: `0`), Addr: StackTemp, PtrInfo, Alignment: Align (`8`));
469	Insert.eraseFromParent();
470	}
471
472	/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
473	/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
474	///
475	/// e.g.
476	/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
477	///
478	/// Can be represented as
479	///
480	/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
481	/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
482	///
483	bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
484	std::tuple<Register, int, Register, int> &MatchInfo) {
485	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
486	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
487	Register Dst = MI.getOperand(i: `0`).getReg();
488	int NumElts = MRI.getType(Reg: Dst).getNumElements();
489	auto DstIsLeftAndDstLane = isINSMask(M: ShuffleMask, NumInputElements: NumElts);
490	if (!DstIsLeftAndDstLane)
491	return false;
492	bool DstIsLeft;
493	int DstLane;
494	std::tie(args&: DstIsLeft, args&: DstLane) = *DstIsLeftAndDstLane;
495	Register Left = MI.getOperand(i: `1`).getReg();
496	Register Right = MI.getOperand(i: `2`).getReg();
497	Register DstVec = DstIsLeft ? Left : Right;
498	Register SrcVec = Left;
499
500	int SrcLane = ShuffleMask [DstLane];
501	if (SrcLane >= NumElts) {
502	SrcVec = Right;
503	SrcLane -= NumElts;
504	}
505
506	MatchInfo = std::make_tuple(args&: DstVec, args&: DstLane, args&: SrcVec, args&: SrcLane);
507	return true;
508	}
509
510	void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
511	MachineIRBuilder &Builder,
512	std::tuple<Register, int, Register, int> &MatchInfo) {
513	Builder.setInstrAndDebugLoc(MI);
514	Register Dst = MI.getOperand(i: `0`).getReg();
515	auto ScalarTy = MRI.getType(Reg: Dst).getElementType();
516	Register DstVec, SrcVec;
517	int DstLane, SrcLane;
518	std::tie(args&: DstVec, args&: DstLane, args&: SrcVec, args&: SrcLane) = MatchInfo;
519	auto SrcCst = Builder.buildConstant(Res: LLT::scalar(SizeInBits: `64`), Val: SrcLane);
520	auto Extract = Builder.buildExtractVectorElement(Res: ScalarTy, Val: SrcVec, Idx: SrcCst);
521	auto DstCst = Builder.buildConstant(Res: LLT::scalar(SizeInBits: `64`), Val: DstLane);
522	Builder.buildInsertVectorElement(Res: Dst, Val: DstVec, Elt: Extract, Idx: DstCst);
523	MI.eraseFromParent();
524	}
525
526	/// isVShiftRImm - Check if this is a valid vector for the immediate
527	/// operand of a vector shift right operation. The value must be in the range:
528	/// 1 <= Value <= ElementBits for a right shift.
529	bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
530	int64_t &Cnt) {
531	assert(Ty.isVector() && "vector shift count is not a vector type");
532	MachineInstr *MI = MRI.getVRegDef(Reg);
533	auto Cst = getAArch64VectorSplatScalar(MI: *MI, MRI);
534	if (!Cst)
535	return false;
536	Cnt = *Cst;
537	int64_t ElementBits = Ty.getScalarSizeInBits();
538	return Cnt >= `1` && Cnt <= ElementBits;
539	}
540
541	/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
542	bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
543	int64_t &Imm) {
544	assert(MI.getOpcode() == TargetOpcode::G_ASHR \|\|
545	MI.getOpcode() == TargetOpcode::G_LSHR);
546	LLT Ty = MRI.getType(Reg: MI.getOperand(i: `1`).getReg());
547	if (!Ty.isVector())
548	return false;
549	return isVShiftRImm(Reg: MI.getOperand(i: `2`).getReg(), MRI, Ty, Cnt&: Imm);
550	}
551
552	void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
553	int64_t &Imm) {
554	unsigned Opc = MI.getOpcode();
555	assert(Opc == TargetOpcode::G_ASHR \|\| Opc == TargetOpcode::G_LSHR);
556	unsigned NewOpc =
557	Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
558	MachineIRBuilder MIB(MI);
559	auto ImmDef = MIB.buildConstant(Res: LLT::scalar(SizeInBits: `32`), Val: Imm);
560	MIB.buildInstr(Opc: NewOpc, DstOps: {MI.getOperand(i: `0`)}, SrcOps: {MI.getOperand(i: `1`), ImmDef});
561	MI.eraseFromParent();
562	}
563
564	/// Determine if it is possible to modify the \p RHS and predicate \p P of a
565	/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
566	///
567	/// \returns A pair containing the updated immediate and predicate which may
568	/// be used to optimize the instruction.
569	///
570	/// \note This assumes that the comparison has been legalized.
571	std::optional<std::pair<uint64_t, CmpInst::Predicate>>
572	tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
573	const MachineRegisterInfo &MRI) {
574	const auto &Ty = MRI.getType(Reg: RHS);
575	if (Ty.isVector())
576	return std::nullopt;
577	unsigned Size = Ty.getSizeInBits();
578	assert((Size == `32` \|\| Size == `64`) && "Expected 32 or 64 bit compare only?");
579
580	// If the RHS is not a constant, or the RHS is already a valid arithmetic
581	// immediate, then there is nothing to change.
582	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
583	if (!ValAndVReg)
584	return std::nullopt;
585	uint64_t OriginalC = ValAndVReg ->Value.getZExtValue();
586	uint64_t C = OriginalC;
587	if (isLegalArithImmed(C))
588	return std::nullopt;
589
590	// We have a non-arithmetic immediate. Check if adjusting the immediate and
591	// adjusting the predicate will result in a legal arithmetic immediate.
592	switch (P) {
593	default:
594	return std::nullopt;
595	case CmpInst::ICMP_SLT:
596	case CmpInst::ICMP_SGE:
597	// Check for
598	//
599	// x slt c => x sle c - 1
600	// x sge c => x sgt c - 1
601	//
602	// When c is not the smallest possible negative number.
603	if ((Size == `64` && static_cast<int64_t>(C) == INT64_MIN) \|\|
604	(Size == `32` && static_cast<int32_t>(C) == INT32_MIN))
605	return std::nullopt;
606	P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
607	C -= `1`;
608	break;
609	case CmpInst::ICMP_ULT:
610	case CmpInst::ICMP_UGE:
611	// Check for
612	//
613	// x ult c => x ule c - 1
614	// x uge c => x ugt c - 1
615	//
616	// When c is not zero.
617	if (C == `0`)
618	return std::nullopt;
619	P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
620	C -= `1`;
621	break;
622	case CmpInst::ICMP_SLE:
623	case CmpInst::ICMP_SGT:
624	// Check for
625	//
626	// x sle c => x slt c + 1
627	// x sgt c => s sge c + 1
628	//
629	// When c is not the largest possible signed integer.
630	if ((Size == `32` && static_cast<int32_t>(C) == INT32_MAX) \|\|
631	(Size == `64` && static_cast<int64_t>(C) == INT64_MAX))
632	return std::nullopt;
633	P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
634	C += `1`;
635	break;
636	case CmpInst::ICMP_ULE:
637	case CmpInst::ICMP_UGT:
638	// Check for
639	//
640	// x ule c => x ult c + 1
641	// x ugt c => s uge c + 1
642	//
643	// When c is not the largest possible unsigned integer.
644	if ((Size == `32` && static_cast<uint32_t>(C) == UINT32_MAX) \|\|
645	(Size == `64` && C == UINT64_MAX))
646	return std::nullopt;
647	P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
648	C += `1`;
649	break;
650	}
651
652	// Check if the new constant is valid, and return the updated constant and
653	// predicate if it is.
654	if (Size == `32`)
655	C = static_cast<uint32_t>(C);
656	if (isLegalArithImmed(C))
657	return {{C, P}};
658
659	auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) {
660	SmallVector<AArch64_IMM::ImmInsnModel> Insn;
661	AArch64_IMM::expandMOVImm(Imm, BitSize: `32`, Insn);
662	return Insn.size() == `1`;
663	};
664
665	if (!IsMaterializableInSingleInstruction (OriginalC) &&
666	IsMaterializableInSingleInstruction (C))
667	return {{C, P}};
668
669	return std::nullopt;
670	}
671
672	/// Determine whether or not it is possible to update the RHS and predicate of
673	/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
674	/// immediate.
675	///
676	/// \p MI - The G_ICMP instruction
677	/// \p MatchInfo - The new RHS immediate and predicate on success
678	///
679	/// See tryAdjustICmpImmAndPred for valid transformations.
680	bool matchAdjustICmpImmAndPred(
681	MachineInstr &MI, const MachineRegisterInfo &MRI,
682	std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
683	assert(MI.getOpcode() == TargetOpcode::G_ICMP);
684	Register RHS = MI.getOperand(i: `3`).getReg();
685	auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: `1`).getPredicate());
686	if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, P: Pred, MRI)) {
687	MatchInfo = *MaybeNewImmAndPred;
688	return true;
689	}
690	return false;
691	}
692
693	void applyAdjustICmpImmAndPred(
694	MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
695	MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
696	MIB.setInstrAndDebugLoc(MI);
697	MachineOperand &RHS = MI.getOperand(i: `3`);
698	MachineRegisterInfo &MRI = *MIB.getMRI();
699	auto Cst = MIB.buildConstant(Res: MRI.cloneVirtualRegister(VReg: RHS.getReg()),
700	Val: MatchInfo.first);
701	Observer.changingInstr(MI);
702	RHS.setReg(Cst ->getOperand(i: `0`).getReg());
703	MI.getOperand(i: `1`).setPredicate(MatchInfo.second);
704	Observer.changedInstr(MI);
705	}
706
707	bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
708	std::pair<unsigned, int> &MatchInfo) {
709	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
710	Register Src1Reg = MI.getOperand(i: `1`).getReg();
711	const LLT SrcTy = MRI.getType(Reg: Src1Reg);
712	const LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
713
714	auto LaneIdx = getSplatIndex(MI);
715	if (!LaneIdx)
716	return false;
717
718	// The lane idx should be within the first source vector.
719	if (*LaneIdx >= SrcTy.getNumElements())
720	return false;
721
722	if (DstTy != SrcTy)
723	return false;
724
725	LLT ScalarTy = SrcTy.getElementType();
726	unsigned ScalarSize = ScalarTy.getSizeInBits();
727
728	unsigned Opc = `0`;
729	switch (SrcTy.getNumElements()) {
730	case `2`:
731	if (ScalarSize == `64`)
732	Opc = AArch64::G_DUPLANE64;
733	else if (ScalarSize == `32`)
734	Opc = AArch64::G_DUPLANE32;
735	break;
736	case `4`:
737	if (ScalarSize == `32`)
738	Opc = AArch64::G_DUPLANE32;
739	else if (ScalarSize == `16`)
740	Opc = AArch64::G_DUPLANE16;
741	break;
742	case `8`:
743	if (ScalarSize == `8`)
744	Opc = AArch64::G_DUPLANE8;
745	else if (ScalarSize == `16`)
746	Opc = AArch64::G_DUPLANE16;
747	break;
748	case `16`:
749	if (ScalarSize == `8`)
750	Opc = AArch64::G_DUPLANE8;
751	break;
752	default:
753	break;
754	}
755	if (!Opc)
756	return false;
757
758	MatchInfo.first = Opc;
759	MatchInfo.second = *LaneIdx;
760	return true;
761	}
762
763	void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
764	MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
765	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
766	Register Src1Reg = MI.getOperand(i: `1`).getReg();
767	const LLT SrcTy = MRI.getType(Reg: Src1Reg);
768
769	B.setInstrAndDebugLoc(MI);
770	auto Lane = B.buildConstant(Res: LLT::scalar(SizeInBits: `64`), Val: MatchInfo.second);
771
772	Register DupSrc = MI.getOperand(i: `1`).getReg();
773	// For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
774	// To do this, we can use a G_CONCAT_VECTORS to do the widening.
775	if (SrcTy.getSizeInBits() == `64`) {
776	auto Undef = B.buildUndef(Res: SrcTy);
777	DupSrc = B.buildConcatVectors(Res: SrcTy.multiplyElements(Factor: `2`),
778	Ops: {Src1Reg, Undef.getReg(Idx: `0`)})
779	.getReg(Idx: `0`);
780	}
781	B.buildInstr(Opc: MatchInfo.first, DstOps: {MI.getOperand(i: `0`).getReg()}, SrcOps: {DupSrc, Lane});
782	MI.eraseFromParent();
783	}
784
785	bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
786	auto &Unmerge = cast<GUnmerge>(Val&: MI);
787	Register Src1Reg = Unmerge.getReg(Idx: Unmerge.getNumOperands() - `1`);
788	const LLT SrcTy = MRI.getType(Reg: Src1Reg);
789	if (SrcTy.getSizeInBits() != `128` && SrcTy.getSizeInBits() != `64`)
790	return false;
791	return SrcTy.isVector() && !SrcTy.isScalable() &&
792	Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + `1`;
793	}
794
795	void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
796	MachineIRBuilder &B) {
797	auto &Unmerge = cast<GUnmerge>(Val&: MI);
798	Register Src1Reg = Unmerge.getReg(Idx: Unmerge.getNumOperands() - `1`);
799	const LLT SrcTy = MRI.getType(Reg: Src1Reg);
800	assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
801	"Expected a fixed length vector");
802
803	for (int I = `0`; I < SrcTy.getNumElements(); ++I)
804	B.buildExtractVectorElementConstant(Res: Unmerge.getReg(Idx: I), Val: Src1Reg, Idx: I);
805	MI.eraseFromParent();
806	}
807
808	bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
809	assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
810
811	// Later, during selection, we'll try to match imported patterns using
812	// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
813	// G_BUILD_VECTORs which could match those patterns.
814	if (isBuildVectorAllZeros(MI, MRI) \|\| isBuildVectorAllOnes(MI, MRI))
815	return false;
816
817	return getAArch64VectorSplat(MI, MRI).has_value();
818	}
819
820	void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
821	MachineIRBuilder &B) {
822	B.setInstrAndDebugLoc(MI);
823	B.buildInstr(Opc: AArch64::G_DUP, DstOps: {MI.getOperand(i: `0`).getReg()},
824	SrcOps: {MI.getOperand(i: `1`).getReg()});
825	MI.eraseFromParent();
826	}
827
828	/// \returns how many instructions would be saved by folding a G_ICMP's shift
829	/// and/or extension operations.
830	unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {
831	// No instructions to save if there's more than one use or no uses.
832	if (!MRI.hasOneNonDBGUse(RegNo: CmpOp))
833	return `0`;
834
835	// FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
836	auto IsSupportedExtend = [&](const MachineInstr &MI) {
837	if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
838	return true;
839	if (MI.getOpcode() != TargetOpcode::G_AND)
840	return false;
841	auto ValAndVReg =
842	getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: `2`).getReg(), MRI);
843	if (!ValAndVReg)
844	return false;
845	uint64_t Mask = ValAndVReg ->Value.getZExtValue();
846	return (Mask == `0xFF` \|\| Mask == `0xFFFF` \|\| Mask == `0xFFFFFFFF`);
847	};
848
849	MachineInstr *Def = getDefIgnoringCopies(Reg: CmpOp, MRI);
850	if (IsSupportedExtend (*Def))
851	return `1`;
852
853	unsigned Opc = Def->getOpcode();
854	if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
855	Opc != TargetOpcode::G_LSHR)
856	return `0`;
857
858	auto MaybeShiftAmt =
859	getIConstantVRegValWithLookThrough(VReg: Def->getOperand(i: `2`).getReg(), MRI);
860	if (!MaybeShiftAmt)
861	return `0`;
862	uint64_t ShiftAmt = MaybeShiftAmt ->Value.getZExtValue();
863	MachineInstr *ShiftLHS =
864	getDefIgnoringCopies(Reg: Def->getOperand(i: `1`).getReg(), MRI);
865
866	// Check if we can fold an extend and a shift.
867	// FIXME: This is duplicated with the selector. (See:
868	// selectArithExtendedRegister)
869	if (IsSupportedExtend (*ShiftLHS))
870	return (ShiftAmt <= `4`) ? `2` : `1`;
871
872	LLT Ty = MRI.getType(Reg: Def->getOperand(i: `0`).getReg());
873	if (Ty.isVector())
874	return `0`;
875	unsigned ShiftSize = Ty.getSizeInBits();
876	if ((ShiftSize == `32` && ShiftAmt <= `31`) \|\|
877	(ShiftSize == `64` && ShiftAmt <= `63`))
878	return `1`;
879	return `0`;
880	}
881
882	/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
883	/// instruction \p MI.
884	bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
885	assert(MI.getOpcode() == TargetOpcode::G_ICMP);
886	// Swap the operands if it would introduce a profitable folding opportunity.
887	// (e.g. a shift + extend).
888	//
889	// For example:
890	// lsl w13, w11, #1
891	// cmp w13, w12
892	// can be turned into:
893	// cmp w12, w11, lsl #1
894
895	// Don't swap if there's a constant on the RHS, because we know we can fold
896	// that.
897	Register RHS = MI.getOperand(i: `3`).getReg();
898	auto RHSCst = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
899	if (RHSCst && isLegalArithImmed(C: RHSCst ->Value.getSExtValue()))
900	return false;
901
902	Register LHS = MI.getOperand(i: `2`).getReg();
903	auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: `1`).getPredicate());
904	auto GetRegForProfit = [&](Register Reg) {
905	MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
906	return isCMN(MaybeSub: Def, Pred, MRI) ? Def->getOperand(i: `2`).getReg() : Reg;
907	};
908
909	// Don't have a constant on the RHS. If we swap the LHS and RHS of the
910	// compare, would we be able to fold more instructions?
911	Register TheLHS = GetRegForProfit (LHS);
912	Register TheRHS = GetRegForProfit (RHS);
913
914	// If the LHS is more likely to give us a folding opportunity, then swap the
915	// LHS and RHS.
916	return (getCmpOperandFoldingProfit(CmpOp: TheLHS, MRI) >
917	getCmpOperandFoldingProfit(CmpOp: TheRHS, MRI));
918	}
919
920	void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
921	auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: `1`).getPredicate());
922	Register LHS = MI.getOperand(i: `2`).getReg();
923	Register RHS = MI.getOperand(i: `3`).getReg();
924	Observer.changedInstr(MI);
925	MI.getOperand(i: `1`).setPredicate(CmpInst::getSwappedPredicate(pred: Pred));
926	MI.getOperand(i: `2`).setReg(RHS);
927	MI.getOperand(i: `3`).setReg(LHS);
928	Observer.changedInstr(MI);
929	}
930
931	/// \returns a function which builds a vector floating point compare instruction
932	/// for a condition code \p CC.
933	/// \param [in] NoNans - True if the target has NoNansFPMath.
934	std::function<Register(MachineIRBuilder &)>
935	getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
936	MachineRegisterInfo &MRI) {
937	LLT DstTy = MRI.getType(Reg: LHS);
938	assert(DstTy.isVector() && "Expected vector types only?");
939	assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
940	switch (CC) {
941	default:
942	llvm_unreachable("Unexpected condition code!");
943	case AArch64CC::NE:
944	return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
945	auto FCmp = MIB.buildInstr(Opc: AArch64::G_FCMEQ, DstOps: {DstTy}, SrcOps: {LHS, RHS});
946	return MIB.buildNot(Dst: DstTy, Src0: FCmp).getReg(Idx: `0`);
947	};
948	case AArch64CC::EQ:
949	return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
950	return MIB.buildInstr(Opc: AArch64::G_FCMEQ, DstOps: {DstTy}, SrcOps: {LHS, RHS}).getReg(Idx: `0`);
951	};
952	case AArch64CC::GE:
953	return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
954	return MIB.buildInstr(Opc: AArch64::G_FCMGE, DstOps: {DstTy}, SrcOps: {LHS, RHS}).getReg(Idx: `0`);
955	};
956	case AArch64CC::GT:
957	return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
958	return MIB.buildInstr(Opc: AArch64::G_FCMGT, DstOps: {DstTy}, SrcOps: {LHS, RHS}).getReg(Idx: `0`);
959	};
960	case AArch64CC::LS:
961	return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
962	return MIB.buildInstr(Opc: AArch64::G_FCMGE, DstOps: {DstTy}, SrcOps: {RHS, LHS}).getReg(Idx: `0`);
963	};
964	case AArch64CC::MI:
965	return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
966	return MIB.buildInstr(Opc: AArch64::G_FCMGT, DstOps: {DstTy}, SrcOps: {RHS, LHS}).getReg(Idx: `0`);
967	};
968	}
969	}
970
971	/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
972	bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
973	MachineIRBuilder &MIB) {
974	assert(MI.getOpcode() == TargetOpcode::G_FCMP);
975	const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
976
977	Register Dst = MI.getOperand(i: `0`).getReg();
978	LLT DstTy = MRI.getType(Reg: Dst);
979	if (!DstTy.isVector() \|\| !ST.hasNEON())
980	return false;
981	Register LHS = MI.getOperand(i: `2`).getReg();
982	unsigned EltSize = MRI.getType(Reg: LHS).getScalarSizeInBits();
983	if (EltSize == `16` && !ST.hasFullFP16())
984	return false;
985	if (EltSize != `16` && EltSize != `32` && EltSize != `64`)
986	return false;
987
988	return true;
989	}
990
991	/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
992	void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
993	MachineIRBuilder &MIB) {
994	assert(MI.getOpcode() == TargetOpcode::G_FCMP);
995	const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
996
997	const auto &CmpMI = cast<GFCmp>(Val&: MI);
998
999	Register Dst = CmpMI.getReg(Idx: `0`);
1000	CmpInst::Predicate Pred = CmpMI.getCond();
1001	Register LHS = CmpMI.getLHSReg();
1002	Register RHS = CmpMI.getRHSReg();
1003
1004	LLT DstTy = MRI.getType(Reg: Dst);
1005
1006	bool Invert = false;
1007	AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
1008	if ((Pred == CmpInst::Predicate::FCMP_ORD \|\|
1009	Pred == CmpInst::Predicate::FCMP_UNO) &&
1010	isBuildVectorAllZeros(MI: *MRI.getVRegDef(Reg: RHS), MRI)) {
1011	// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1012	// NaN, so equivalent to a == a and doesn't need the two comparisons an
1013	// "ord" normally would.
1014	// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1015	// thus equivalent to a != a.
1016	RHS = LHS;
1017	CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1018	} else
1019	changeVectorFCMPPredToAArch64CC(P: Pred, CondCode&: CC, CondCode2&: CC2, Invert);
1020
1021	// Instead of having an apply function, just build here to simplify things.
1022	MIB.setInstrAndDebugLoc(MI);
1023
1024	const bool NoNans =
1025	ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1026
1027	auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
1028	Register CmpRes;
1029	if (CC2 == AArch64CC::AL)
1030	CmpRes = Cmp (MIB);
1031	else {
1032	auto Cmp2 = getVectorFCMP(CC: CC2, LHS, RHS, NoNans, MRI);
1033	auto Cmp2Dst = Cmp2 (MIB);
1034	auto Cmp1Dst = Cmp (MIB);
1035	CmpRes = MIB.buildOr(Dst: DstTy, Src0: Cmp1Dst, Src1: Cmp2Dst).getReg(Idx: `0`);
1036	}
1037	if (Invert)
1038	CmpRes = MIB.buildNot(Dst: DstTy, Src0: CmpRes).getReg(Idx: `0`);
1039	MRI.replaceRegWith(FromReg: Dst, ToReg: CmpRes);
1040	MI.eraseFromParent();
1041	}
1042
1043	// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1044	bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1045	auto *GBuildVec = cast<GBuildVector>(Val: &MI);
1046
1047	// Check if the values are all constants
1048	for (unsigned I = `0`; I < GBuildVec->getNumSources(); ++I) {
1049	auto ConstVal =
1050	getAnyConstantVRegValWithLookThrough(VReg: GBuildVec->getSourceReg(I), MRI);
1051
1052	if (!ConstVal.has_value())
1053	return true;
1054	}
1055
1056	return false;
1057	}
1058
1059	void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1060	MachineIRBuilder &B) {
1061	auto *GBuildVec = cast<GBuildVector>(Val: &MI);
1062	LLT DstTy = MRI.getType(Reg: GBuildVec->getReg(Idx: `0`));
1063	Register DstReg = B.buildUndef(Res: DstTy).getReg(Idx: `0`);
1064
1065	for (unsigned I = `0`; I < GBuildVec->getNumSources(); ++I) {
1066	Register SrcReg = GBuildVec->getSourceReg(I);
1067	if (mi_match(R: SrcReg, MRI, P: m_GImplicitDef()))
1068	continue;
1069	auto IdxReg = B.buildConstant(Res: LLT::scalar(SizeInBits: `64`), Val: I);
1070	DstReg =
1071	B.buildInsertVectorElement(Res: DstTy, Val: DstReg, Elt: SrcReg, Idx: IdxReg).getReg(Idx: `0`);
1072	}
1073	B.buildCopy(Res: GBuildVec->getReg(Idx: `0`), Op: DstReg);
1074	GBuildVec->eraseFromParent();
1075	}
1076
1077	bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1078	Register &SrcReg) {
1079	assert(MI.getOpcode() == TargetOpcode::G_STORE);
1080	Register DstReg = MI.getOperand(i: `0`).getReg();
1081	if (MRI.getType(Reg: DstReg).isVector())
1082	return false;
1083	// Match a store of a truncate.
1084	if (!mi_match(R: DstReg, MRI, P: m_GTrunc(Src: m_Reg(R&: SrcReg))))
1085	return false;
1086	// Only form truncstores for value types of max 64b.
1087	return MRI.getType(Reg: SrcReg).getSizeInBits() <= `64`;
1088	}
1089
1090	void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1091	MachineIRBuilder &B, GISelChangeObserver &Observer,
1092	Register &SrcReg) {
1093	assert(MI.getOpcode() == TargetOpcode::G_STORE);
1094	Observer.changingInstr(MI);
1095	MI.getOperand(i: `0`).setReg(SrcReg);
1096	Observer.changedInstr(MI);
1097	}
1098
1099	// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1100	// form in the first place for combine opportunities, so any remaining ones
1101	// at this stage need be lowered back.
1102	bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1103	assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1104	Register DstReg = MI.getOperand(i: `0`).getReg();
1105	LLT DstTy = MRI.getType(Reg: DstReg);
1106	return DstTy.isVector();
1107	}
1108
1109	void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1110	MachineIRBuilder &B, GISelChangeObserver &Observer) {
1111	assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1112	B.setInstrAndDebugLoc(MI);
1113	LegalizerHelper Helper(*MI.getMF(), Observer, B);
1114	Helper.lower(MI, TypeIdx: `0`, / Unused hint type / Ty: LLT ());
1115	}
1116
1117	/// Combine <N x t>, unused = unmerge(G_EXT <2N x t> v, undef, N)*
1118	/// => unused, <N x t> = unmerge v
1119	bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1120	Register &MatchInfo) {
1121	auto &Unmerge = cast<GUnmerge>(Val&: MI);
1122	if (Unmerge.getNumDefs() != `2`)
1123	return false;
1124	if (!MRI.use_nodbg_empty(RegNo: Unmerge.getReg(Idx: `1`)))
1125	return false;
1126
1127	LLT DstTy = MRI.getType(Reg: Unmerge.getReg(Idx: `0`));
1128	if (!DstTy.isVector())
1129	return false;
1130
1131	MachineInstr *Ext = getOpcodeDef(Opcode: AArch64::G_EXT, Reg: Unmerge.getSourceReg(), MRI);
1132	if (!Ext)
1133	return false;
1134
1135	Register ExtSrc1 = Ext->getOperand(i: `1`).getReg();
1136	Register ExtSrc2 = Ext->getOperand(i: `2`).getReg();
1137	auto LowestVal =
1138	getIConstantVRegValWithLookThrough(VReg: Ext->getOperand(i: `3`).getReg(), MRI);
1139	if (!LowestVal \|\| LowestVal ->Value.getZExtValue() != DstTy.getSizeInBytes())
1140	return false;
1141
1142	if (!getOpcodeDef<GImplicitDef>(Reg: ExtSrc2, MRI))
1143	return false;
1144
1145	MatchInfo = ExtSrc1;
1146	return true;
1147	}
1148
1149	void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1150	MachineIRBuilder &B,
1151	GISelChangeObserver &Observer, Register &SrcReg) {
1152	Observer.changingInstr(MI);
1153	// Swap dst registers.
1154	Register Dst1 = MI.getOperand(i: `0`).getReg();
1155	MI.getOperand(i: `0`).setReg(MI.getOperand(i: `1`).getReg());
1156	MI.getOperand(i: `1`).setReg(Dst1);
1157	MI.getOperand(i: `2`).setReg(SrcReg);
1158	Observer.changedInstr(MI);
1159	}
1160
1161	// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1162	// Match v2s64 mul instructions, which will then be scalarised later on
1163	// Doing these two matches in one function to ensure that the order of matching
1164	// will always be the same.
1165	// Try lowering MUL to MULL before trying to scalarize if needed.
1166	bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {
1167	// Get the instructions that defined the source operand
1168	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
1169	return DstTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`);
1170	}
1171
1172	void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,
1173	MachineIRBuilder &B, GISelChangeObserver &Observer) {
1174	assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1175	"Expected a G_MUL instruction");
1176
1177	// Get the instructions that defined the source operand
1178	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
1179	assert(DstTy == LLT::fixed_vector(`2`, `64`) && "Expected v2s64 Mul");
1180	LegalizerHelper Helper(*MI.getMF(), Observer, B);
1181	Helper.fewerElementsVector(
1182	MI, TypeIdx: `0`,
1183	NarrowTy: DstTy.changeElementCount(EC: DstTy.getElementCount().divideCoefficientBy(RHS: `2`)));
1184	}
1185
1186	class AArch64PostLegalizerLoweringImpl : public Combiner {
1187	protected:
1188	const CombinerHelper Helper;
1189	const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1190	const AArch64Subtarget &STI;
1191
1192	public:
1193	AArch64PostLegalizerLoweringImpl(
1194	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1195	GISelCSEInfo *CSEInfo,
1196	const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1197	const AArch64Subtarget &STI);
1198
1199	static const char getName() { return* "AArch6400PreLegalizerCombiner"; }
1200
1201	bool tryCombineAll(MachineInstr &I) const override;
1202
1203	private:
1204	#define GET_GICOMBINER_CLASS_MEMBERS
1205	#include "AArch64GenPostLegalizeGILowering.inc"
1206	#undef GET_GICOMBINER_CLASS_MEMBERS
1207	};
1208
1209	#define GET_GICOMBINER_IMPL
1210	#include "AArch64GenPostLegalizeGILowering.inc"
1211	#undef GET_GICOMBINER_IMPL
1212
1213	AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1214	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1215	GISelCSEInfo *CSEInfo,
1216	const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1217	const AArch64Subtarget &STI)
1218	: Combiner (MF, CInfo, TPC, /VT/ nullptr, CSEInfo),
1219	Helper (Observer, B, /IsPreLegalize/ true), RuleConfig(RuleConfig),
1220	STI(STI),
1221	#define GET_GICOMBINER_CONSTRUCTOR_INITS
1222	#include "AArch64GenPostLegalizeGILowering.inc"
1223	#undef GET_GICOMBINER_CONSTRUCTOR_INITS
1224	{
1225	}
1226
1227	class AArch64PostLegalizerLowering : public MachineFunctionPass {
1228	public:
1229	static char ID;
1230
1231	AArch64PostLegalizerLowering();
1232
1233	StringRef getPassName() const override {
1234	return "AArch64PostLegalizerLowering";
1235	}
1236
1237	bool runOnMachineFunction(MachineFunction &MF) override;
1238	void getAnalysisUsage(AnalysisUsage &AU) const override;
1239
1240	private:
1241	AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1242	};
1243	} // end anonymous namespace
1244
1245	void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1246	AU.addRequired<TargetPassConfig>();
1247	AU.setPreservesCFG();
1248	getSelectionDAGFallbackAnalysisUsage(AU);
1249	MachineFunctionPass::getAnalysisUsage(AU);
1250	}
1251
1252	AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1253	: MachineFunctionPass (ID) {
1254	if (!RuleConfig.parseCommandLineOption())
1255	report_fatal_error(reason: "Invalid rule identifier");
1256	}
1257
1258	bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1259	if (MF.getProperties().hasFailedISel())
1260	return false;
1261	assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
1262	auto *TPC = &getAnalysis<TargetPassConfig>();
1263	const Function &F = MF.getFunction();
1264
1265	const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
1266	CombinerInfo CInfo(/AllowIllegalOps/ true, /ShouldLegalizeIllegal/ false,
1267	/LegalizerInfo/ nullptr, /OptEnabled=/true,
1268	F.hasOptSize(), F.hasMinSize());
1269	// Disable fixed-point iteration to reduce compile-time
1270	CInfo.MaxIterations = `1`;
1271	CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1272	// PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1273	CInfo.EnableFullDCE = false;
1274	AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /CSEInfo/ nullptr,
1275	RuleConfig, ST);
1276	return Impl.combineMachineInstrs();
1277	}
1278
1279	char AArch64PostLegalizerLowering::ID = `0`;
1280	INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1281	"Lower AArch64 MachineInstrs after legalization", false,
1282	false)
1283	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
1284	INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1285	"Lower AArch64 MachineInstrs after legalization", false,
1286	false)
1287
1288	namespace llvm {
1289	FunctionPass *createAArch64PostLegalizerLowering() {
1290	return new AArch64PostLegalizerLowering ();
1291	}
1292	} // end namespace llvm
1293

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp