AArch64PostLegalizerLowering.cpp source code [llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp]

1	//=== AArch64PostLegalizerLowering.cpp --------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Post-legalization lowering for instructions.
11	///
12	/// This is used to offload pattern matching from the selector.
13	///
14	/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15	/// a G_ZIP, G_UZP, etc.
16	///
17	/// General optimization combines should be handled by either the
18	/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19	///
20	//===----------------------------------------------------------------------===//
21
22	#include "AArch64ExpandImm.h"
23	#include "AArch64GlobalISelUtils.h"
24	#include "AArch64PerfectShuffle.h"
25	#include "AArch64Subtarget.h"
26	#include "AArch64TargetMachine.h"
27	#include "GISel/AArch64LegalizerInfo.h"
28	#include "MCTargetDesc/AArch64MCTargetDesc.h"
29	#include "TargetInfo/AArch64TargetInfo.h"
30	#include "Utils/AArch64BaseInfo.h"
31	#include "llvm/CodeGen/GlobalISel/Combiner.h"
32	#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
33	#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
34	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
35	#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
36	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
37	#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
38	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
39	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
40	#include "llvm/CodeGen/GlobalISel/Utils.h"
41	#include "llvm/CodeGen/MachineFrameInfo.h"
42	#include "llvm/CodeGen/MachineFunctionPass.h"
43	#include "llvm/CodeGen/MachineInstrBuilder.h"
44	#include "llvm/CodeGen/MachineRegisterInfo.h"
45	#include "llvm/CodeGen/TargetOpcodes.h"
46	#include "llvm/CodeGen/TargetPassConfig.h"
47	#include "llvm/IR/InstrTypes.h"
48	#include "llvm/InitializePasses.h"
49	#include "llvm/Support/Debug.h"
50	#include "llvm/Support/ErrorHandling.h"
51	#include <optional>
52
53	#define GET_GICOMBINER_DEPS
54	#include "AArch64GenPostLegalizeGILowering.inc"
55	#undef GET_GICOMBINER_DEPS
56
57	#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
58
59	using namespace llvm;
60	using namespace MIPatternMatch;
61	using namespace AArch64GISelUtils;
62
63	namespace {
64
65	#define GET_GICOMBINER_TYPES
66	#include "AArch64GenPostLegalizeGILowering.inc"
67	#undef GET_GICOMBINER_TYPES
68
69	/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
70	///
71	/// Used for matching target-supported shuffles before codegen.
72	struct ShuffleVectorPseudo {
73	unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
74	Register Dst; ///< Destination register.
75	SmallVector<SrcOp, `2`> SrcOps; ///< Source registers.
76	ShuffleVectorPseudo(unsigned Opc, Register Dst,
77	std::initializer_list<SrcOp> SrcOps)
78	: Opc(Opc), Dst (Dst), SrcOps (SrcOps){};
79	ShuffleVectorPseudo() = default;
80	};
81
82	/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
83	/// sources of the shuffle are different.
84	std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
85	unsigned NumElts) {
86	// Look for the first non-undef element.
87	auto FirstRealElt = find_if(Range&: M, P: [](int Elt) { return Elt >= `0`; });
88	if (FirstRealElt == M.end())
89	return std::nullopt;
90
91	// Use APInt to handle overflow when calculating expected element.
92	unsigned MaskBits = APInt (`32`, NumElts * `2`).logBase2();
93	APInt ExpectedElt = APInt (MaskBits, *FirstRealElt + `1`);
94
95	// The following shuffle indices must be the successive elements after the
96	// first real element.
97	if (any_of(
98	Range: make_range(x: std::next(x: FirstRealElt), y: M.end()),
99	P: [&ExpectedElt](int Elt) { return Elt != ExpectedElt ++ && Elt >= `0`; }))
100	return std::nullopt;
101
102	// The index of an EXT is the first element if it is not UNDEF.
103	// Watch out for the beginning UNDEFs. The EXT index should be the expected
104	// value of the first element. E.g.
105	// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
106	// <-1, -1, 0, 1, ...> is treated as <2NumElts-2, 2NumElts-1, 0, 1, ...>.
107	// ExpectedElt is the last mask index plus 1.
108	uint64_t Imm = ExpectedElt.getZExtValue();
109	bool ReverseExt = false;
110
111	// There are two difference cases requiring to reverse input vectors.
112	// For example, for vector <4 x i32> we have the following cases,
113	// Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
114	// Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
115	// For both cases, we finally use mask <5, 6, 7, 0>, which requires
116	// to reverse two input vectors.
117	if (Imm < NumElts)
118	ReverseExt = true;
119	else
120	Imm -= NumElts;
121	return std::make_pair(x&: ReverseExt, y&: Imm);
122	}
123
124	/// Helper function for matchINS.
125	///
126	/// \returns a value when \p M is an ins mask for \p NumInputElements.
127	///
128	/// First element of the returned pair is true when the produced
129	/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
130	///
131	/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
132	std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
133	int NumInputElements) {
134	if (M.size() != static_cast<size_t>(NumInputElements))
135	return std::nullopt;
136	int NumLHSMatch = `0`, NumRHSMatch = `0`;
137	int LastLHSMismatch = -`1`, LastRHSMismatch = -`1`;
138	for (int Idx = `0`; Idx < NumInputElements; ++Idx) {
139	if (M [Idx] == -`1`) {
140	++NumLHSMatch;
141	++NumRHSMatch;
142	continue;
143	}
144	M [Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
145	M [Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
146	}
147	const int NumNeededToMatch = NumInputElements - `1`;
148	if (NumLHSMatch == NumNeededToMatch)
149	return std::make_pair(x: true, y&: LastLHSMismatch);
150	if (NumRHSMatch == NumNeededToMatch)
151	return std::make_pair(x: false, y&: LastRHSMismatch);
152	return std::nullopt;
153	}
154
155	/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
156	/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
157	bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
158	ShuffleVectorPseudo &MatchInfo) {
159	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
160	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
161	Register Dst = MI.getOperand(i: `0`).getReg();
162	Register Src = MI.getOperand(i: `1`).getReg();
163	LLT Ty = MRI.getType(Reg: Dst);
164	unsigned EltSize = Ty.getScalarSizeInBits();
165
166	// Element size for a rev cannot be 64.
167	if (EltSize == `64`)
168	return false;
169
170	unsigned NumElts = Ty.getNumElements();
171
172	// Try to produce a G_REV instruction
173	for (unsigned LaneSize : {`64U`, `32U`, `16U`}) {
174	if (isREVMask(M: ShuffleMask, EltSize, NumElts, BlockSize: LaneSize)) {
175	unsigned Opcode;
176	if (LaneSize == `64U`)
177	Opcode = AArch64::G_REV64;
178	else if (LaneSize == `32U`)
179	Opcode = AArch64::G_REV32;
180	else
181	Opcode = AArch64::G_REV16;
182
183	MatchInfo = ShuffleVectorPseudo (Opcode, Dst, {Src});
184	return true;
185	}
186	}
187
188	return false;
189	}
190
191	/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
192	/// a G_TRN1 or G_TRN2 instruction.
193	bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
194	ShuffleVectorPseudo &MatchInfo) {
195	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
196	unsigned WhichResult;
197	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
198	Register Dst = MI.getOperand(i: `0`).getReg();
199	unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
200	if (!isTRNMask(M: ShuffleMask, NumElts, WhichResult))
201	return false;
202	unsigned Opc = (WhichResult == `0`) ? AArch64::G_TRN1 : AArch64::G_TRN2;
203	Register V1 = MI.getOperand(i: `1`).getReg();
204	Register V2 = MI.getOperand(i: `2`).getReg();
205	MatchInfo = ShuffleVectorPseudo (Opc, Dst, {V1, V2});
206	return true;
207	}
208
209	/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
210	/// a G_UZP1 or G_UZP2 instruction.
211	///
212	/// \param [in] MI - The shuffle vector instruction.
213	/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
214	bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
215	ShuffleVectorPseudo &MatchInfo) {
216	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217	unsigned WhichResult;
218	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
219	Register Dst = MI.getOperand(i: `0`).getReg();
220	unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
221	if (!isUZPMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult))
222	return false;
223	unsigned Opc = (WhichResult == `0`) ? AArch64::G_UZP1 : AArch64::G_UZP2;
224	Register V1 = MI.getOperand(i: `1`).getReg();
225	Register V2 = MI.getOperand(i: `2`).getReg();
226	MatchInfo = ShuffleVectorPseudo (Opc, Dst, {V1, V2});
227	return true;
228	}
229
230	bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
231	ShuffleVectorPseudo &MatchInfo) {
232	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
233	unsigned WhichResult;
234	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
235	Register Dst = MI.getOperand(i: `0`).getReg();
236	unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
237	if (!isZIPMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult))
238	return false;
239	unsigned Opc = (WhichResult == `0`) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
240	Register V1 = MI.getOperand(i: `1`).getReg();
241	Register V2 = MI.getOperand(i: `2`).getReg();
242	MatchInfo = ShuffleVectorPseudo (Opc, Dst, {V1, V2});
243	return true;
244	}
245
246	/// Helper function for matchDup.
247	bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
248	MachineRegisterInfo &MRI,
249	ShuffleVectorPseudo &MatchInfo) {
250	if (Lane != `0`)
251	return false;
252
253	// Try to match a vector splat operation into a dup instruction.
254	// We're looking for this pattern:
255	//
256	// %scalar:gpr(s64) = COPY $x0
257	// %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
258	// %cst0:gpr(s32) = G_CONSTANT i32 0
259	// %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
260	// %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
261	// %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
262	// %zerovec(<2 x s32>)
263	//
264	// ...into:
265	// %splat = G_DUP %scalar
266
267	// Begin matching the insert.
268	auto *InsMI = getOpcodeDef(Opcode: TargetOpcode::G_INSERT_VECTOR_ELT,
269	Reg: MI.getOperand(i: `1`).getReg(), MRI);
270	if (!InsMI)
271	return false;
272	// Match the undef vector operand.
273	if (!getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: InsMI->getOperand(i: `1`).getReg(),
274	MRI))
275	return false;
276
277	// Match the index constant 0.
278	if (!mi_match(R: InsMI->getOperand(i: `3`).getReg(), MRI, P: m_ZeroInt()))
279	return false;
280
281	MatchInfo = ShuffleVectorPseudo (AArch64::G_DUP, MI.getOperand(i: `0`).getReg(),
282	{InsMI->getOperand(i: `2`).getReg()});
283	return true;
284	}
285
286	/// Helper function for matchDup.
287	bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
288	MachineRegisterInfo &MRI,
289	ShuffleVectorPseudo &MatchInfo) {
290	assert(Lane >= `0` && "Expected positive lane?");
291	// Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
292	// lane's definition directly.
293	auto *BuildVecMI = getOpcodeDef(Opcode: TargetOpcode::G_BUILD_VECTOR,
294	Reg: MI.getOperand(i: `1`).getReg(), MRI);
295	if (!BuildVecMI)
296	return false;
297	Register Reg = BuildVecMI->getOperand(i: Lane + `1`).getReg();
298	MatchInfo =
299	ShuffleVectorPseudo (AArch64::G_DUP, MI.getOperand(i: `0`).getReg(), {Reg});
300	return true;
301	}
302
303	bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
304	ShuffleVectorPseudo &MatchInfo) {
305	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
306	auto MaybeLane = getSplatIndex(MI);
307	if (!MaybeLane)
308	return false;
309	int Lane = *MaybeLane;
310	// If this is undef splat, generate it via "just" vdup, if possible.
311	if (Lane < `0`)
312	Lane = `0`;
313	if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
314	return true;
315	if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
316	return true;
317	return false;
318	}
319
320	// Check if an EXT instruction can handle the shuffle mask when the vector
321	// sources of the shuffle are the same.
322	bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
323	unsigned NumElts = Ty.getNumElements();
324
325	// Assume that the first shuffle index is not UNDEF. Fail if it is.
326	if (M [`0`] < `0`)
327	return false;
328
329	// If this is a VEXT shuffle, the immediate value is the index of the first
330	// element. The other shuffle indices must be the successive elements after
331	// the first one.
332	unsigned ExpectedElt = M [`0`];
333	for (unsigned I = `1`; I < NumElts; ++I) {
334	// Increment the expected index. If it wraps around, just follow it
335	// back to index zero and keep going.
336	++ExpectedElt;
337	if (ExpectedElt == NumElts)
338	ExpectedElt = `0`;
339
340	if (M [I] < `0`)
341	continue; // Ignore UNDEF indices.
342	if (ExpectedElt != static_cast<unsigned>(M [I]))
343	return false;
344	}
345
346	return true;
347	}
348
349	bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
350	ShuffleVectorPseudo &MatchInfo) {
351	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
352	Register Dst = MI.getOperand(i: `0`).getReg();
353	LLT DstTy = MRI.getType(Reg: Dst);
354	Register V1 = MI.getOperand(i: `1`).getReg();
355	Register V2 = MI.getOperand(i: `2`).getReg();
356	auto Mask = MI.getOperand(i: `3`).getShuffleMask();
357	uint64_t Imm;
358	auto ExtInfo = getExtMask(M: Mask, NumElts: DstTy.getNumElements());
359	uint64_t ExtFactor = MRI.getType(Reg: V1).getScalarSizeInBits() / `8`;
360
361	if (!ExtInfo) {
362	if (!getOpcodeDef<GImplicitDef>(Reg: V2, MRI) \|\|
363	!isSingletonExtMask(M: Mask, Ty: DstTy))
364	return false;
365
366	Imm = Mask [`0`] * ExtFactor;
367	MatchInfo = ShuffleVectorPseudo (AArch64::G_EXT, Dst, {V1, V1, Imm});
368	return true;
369	}
370	bool ReverseExt;
371	std::tie(args&: ReverseExt, args&: Imm) = *ExtInfo;
372	if (ReverseExt)
373	std::swap(a&: V1, b&: V2);
374	Imm *= ExtFactor;
375	MatchInfo = ShuffleVectorPseudo (AArch64::G_EXT, Dst, {V1, V2, Imm});
376	return true;
377	}
378
379	/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
380	/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
381	void applyShuffleVectorPseudo(MachineInstr &MI,
382	ShuffleVectorPseudo &MatchInfo) {
383	MachineIRBuilder MIRBuilder(MI);
384	MIRBuilder.buildInstr(Opc: MatchInfo.Opc, DstOps: {MatchInfo.Dst}, SrcOps: MatchInfo.SrcOps);
385	MI.eraseFromParent();
386	}
387
388	/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
389	/// Special-cased because the constant operand must be emitted as a G_CONSTANT
390	/// for the imported tablegen patterns to work.
391	void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
392	MachineIRBuilder MIRBuilder(MI);
393	if (MatchInfo.SrcOps [`2`].getImm() == `0`)
394	MIRBuilder.buildCopy(Res: MatchInfo.Dst, Op: MatchInfo.SrcOps [`0`]);
395	else {
396	// Tablegen patterns expect an i32 G_CONSTANT as the final op.
397	auto Cst =
398	MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: `32`), Val: MatchInfo.SrcOps [`2`].getImm());
399	MIRBuilder.buildInstr(Opc: MatchInfo.Opc, DstOps: {MatchInfo.Dst},
400	SrcOps: {MatchInfo.SrcOps [`0`], MatchInfo.SrcOps [`1`], Cst});
401	}
402	MI.eraseFromParent();
403	}
404
405	bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
406	assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
407
408	auto ValAndVReg =
409	getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: `3`).getReg(), MRI);
410	return !ValAndVReg;
411	}
412
413	void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
414	MachineIRBuilder &Builder) {
415	auto &Insert = cast<GInsertVectorElement>(Val&: MI);
416	Builder.setInstrAndDebugLoc(Insert);
417
418	Register Offset = Insert.getIndexReg();
419	LLT VecTy = MRI.getType(Reg: Insert.getReg(Idx: `0`));
420	LLT EltTy = MRI.getType(Reg: Insert.getElementReg());
421	LLT IdxTy = MRI.getType(Reg: Insert.getIndexReg());
422
423	// Create a stack slot and store the vector into it
424	MachineFunction &MF = Builder.getMF();
425	Align Alignment(
426	std::min<uint64_t>(a: VecTy.getSizeInBytes().getKnownMinValue(), b: `16`));
427	int FrameIdx = MF.getFrameInfo().CreateStackObject(Size: VecTy.getSizeInBytes(),
428	Alignment, isSpillSlot: false);
429	LLT FramePtrTy = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
430	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIdx);
431	auto StackTemp = Builder.buildFrameIndex(Res: FramePtrTy, Idx: FrameIdx);
432
433	Builder.buildStore(Val: Insert.getOperand(i: `1`), Addr: StackTemp, PtrInfo, Alignment: Align (`8`));
434
435	// Get the pointer to the element, and be sure not to hit undefined behavior
436	// if the index is out of bounds.
437	assert(isPowerOf2_64(VecTy.getNumElements()) &&
438	"Expected a power-2 vector size");
439	auto Mask = Builder.buildConstant(Res: IdxTy, Val: VecTy.getNumElements() - `1`);
440	Register And = Builder.buildAnd(Dst: IdxTy, Src0: Offset, Src1: Mask).getReg(Idx: `0`);
441	auto EltSize = Builder.buildConstant(Res: IdxTy, Val: EltTy.getSizeInBytes());
442	Register Mul = Builder.buildMul(Dst: IdxTy, Src0: And, Src1: EltSize).getReg(Idx: `0`);
443	Register EltPtr =
444	Builder.buildPtrAdd(Res: MRI.getType(Reg: StackTemp.getReg(Idx: `0`)), Op0: StackTemp, Op1: Mul)
445	.getReg(Idx: `0`);
446
447	// Write the inserted element
448	Builder.buildStore(Val: Insert.getElementReg(), Addr: EltPtr, PtrInfo, Alignment: Align (`1`));
449	// Reload the whole vector.
450	Builder.buildLoad(Res: Insert.getReg(Idx: `0`), Addr: StackTemp, PtrInfo, Alignment: Align (`8`));
451	Insert.eraseFromParent();
452	}
453
454	/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
455	/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
456	///
457	/// e.g.
458	/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
459	///
460	/// Can be represented as
461	///
462	/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
463	/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
464	///
465	bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
466	std::tuple<Register, int, Register, int> &MatchInfo) {
467	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
468	ArrayRef<int> ShuffleMask = MI.getOperand(i: `3`).getShuffleMask();
469	Register Dst = MI.getOperand(i: `0`).getReg();
470	int NumElts = MRI.getType(Reg: Dst).getNumElements();
471	auto DstIsLeftAndDstLane = isINSMask(M: ShuffleMask, NumInputElements: NumElts);
472	if (!DstIsLeftAndDstLane)
473	return false;
474	bool DstIsLeft;
475	int DstLane;
476	std::tie(args&: DstIsLeft, args&: DstLane) = *DstIsLeftAndDstLane;
477	Register Left = MI.getOperand(i: `1`).getReg();
478	Register Right = MI.getOperand(i: `2`).getReg();
479	Register DstVec = DstIsLeft ? Left : Right;
480	Register SrcVec = Left;
481
482	int SrcLane = ShuffleMask [DstLane];
483	if (SrcLane >= NumElts) {
484	SrcVec = Right;
485	SrcLane -= NumElts;
486	}
487
488	MatchInfo = std::make_tuple(args&: DstVec, args&: DstLane, args&: SrcVec, args&: SrcLane);
489	return true;
490	}
491
492	void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
493	MachineIRBuilder &Builder,
494	std::tuple<Register, int, Register, int> &MatchInfo) {
495	Builder.setInstrAndDebugLoc(MI);
496	Register Dst = MI.getOperand(i: `0`).getReg();
497	auto ScalarTy = MRI.getType(Reg: Dst).getElementType();
498	Register DstVec, SrcVec;
499	int DstLane, SrcLane;
500	std::tie(args&: DstVec, args&: DstLane, args&: SrcVec, args&: SrcLane) = MatchInfo;
501	auto SrcCst = Builder.buildConstant(Res: LLT::scalar(SizeInBits: `64`), Val: SrcLane);
502	auto Extract = Builder.buildExtractVectorElement(Res: ScalarTy, Val: SrcVec, Idx: SrcCst);
503	auto DstCst = Builder.buildConstant(Res: LLT::scalar(SizeInBits: `64`), Val: DstLane);
504	Builder.buildInsertVectorElement(Res: Dst, Val: DstVec, Elt: Extract, Idx: DstCst);
505	MI.eraseFromParent();
506	}
507
508	/// isVShiftRImm - Check if this is a valid vector for the immediate
509	/// operand of a vector shift right operation. The value must be in the range:
510	/// 1 <= Value <= ElementBits for a right shift.
511	bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
512	int64_t &Cnt) {
513	assert(Ty.isVector() && "vector shift count is not a vector type");
514	MachineInstr *MI = MRI.getVRegDef(Reg);
515	auto Cst = getAArch64VectorSplatScalar(MI: *MI, MRI);
516	if (!Cst)
517	return false;
518	Cnt = *Cst;
519	int64_t ElementBits = Ty.getScalarSizeInBits();
520	return Cnt >= `1` && Cnt <= ElementBits;
521	}
522
523	/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
524	bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
525	int64_t &Imm) {
526	assert(MI.getOpcode() == TargetOpcode::G_ASHR \|\|
527	MI.getOpcode() == TargetOpcode::G_LSHR);
528	LLT Ty = MRI.getType(Reg: MI.getOperand(i: `1`).getReg());
529	if (!Ty.isVector())
530	return false;
531	return isVShiftRImm(Reg: MI.getOperand(i: `2`).getReg(), MRI, Ty, Cnt&: Imm);
532	}
533
534	void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
535	int64_t &Imm) {
536	unsigned Opc = MI.getOpcode();
537	assert(Opc == TargetOpcode::G_ASHR \|\| Opc == TargetOpcode::G_LSHR);
538	unsigned NewOpc =
539	Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
540	MachineIRBuilder MIB(MI);
541	auto ImmDef = MIB.buildConstant(Res: LLT::scalar(SizeInBits: `32`), Val: Imm);
542	MIB.buildInstr(Opc: NewOpc, DstOps: {MI.getOperand(i: `0`)}, SrcOps: {MI.getOperand(i: `1`), ImmDef});
543	MI.eraseFromParent();
544	}
545
546	/// Determine if it is possible to modify the \p RHS and predicate \p P of a
547	/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
548	///
549	/// \returns A pair containing the updated immediate and predicate which may
550	/// be used to optimize the instruction.
551	///
552	/// \note This assumes that the comparison has been legalized.
553	std::optional<std::pair<uint64_t, CmpInst::Predicate>>
554	tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
555	const MachineRegisterInfo &MRI) {
556	const auto &Ty = MRI.getType(Reg: RHS);
557	if (Ty.isVector())
558	return std::nullopt;
559	unsigned Size = Ty.getSizeInBits();
560	assert((Size == `32` \|\| Size == `64`) && "Expected 32 or 64 bit compare only?");
561
562	// If the RHS is not a constant, or the RHS is already a valid arithmetic
563	// immediate, then there is nothing to change.
564	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
565	if (!ValAndVReg)
566	return std::nullopt;
567	uint64_t OriginalC = ValAndVReg ->Value.getZExtValue();
568	uint64_t C = OriginalC;
569	if (isLegalArithImmed(C))
570	return std::nullopt;
571
572	// We have a non-arithmetic immediate. Check if adjusting the immediate and
573	// adjusting the predicate will result in a legal arithmetic immediate.
574	switch (P) {
575	default:
576	return std::nullopt;
577	case CmpInst::ICMP_SLT:
578	case CmpInst::ICMP_SGE:
579	// Check for
580	//
581	// x slt c => x sle c - 1
582	// x sge c => x sgt c - 1
583	//
584	// When c is not the smallest possible negative number.
585	if ((Size == `64` && static_cast<int64_t>(C) == INT64_MIN) \|\|
586	(Size == `32` && static_cast<int32_t>(C) == INT32_MIN))
587	return std::nullopt;
588	P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
589	C -= `1`;
590	break;
591	case CmpInst::ICMP_ULT:
592	case CmpInst::ICMP_UGE:
593	// Check for
594	//
595	// x ult c => x ule c - 1
596	// x uge c => x ugt c - 1
597	//
598	// When c is not zero.
599	if (C == `0`)
600	return std::nullopt;
601	P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
602	C -= `1`;
603	break;
604	case CmpInst::ICMP_SLE:
605	case CmpInst::ICMP_SGT:
606	// Check for
607	//
608	// x sle c => x slt c + 1
609	// x sgt c => s sge c + 1
610	//
611	// When c is not the largest possible signed integer.
612	if ((Size == `32` && static_cast<int32_t>(C) == INT32_MAX) \|\|
613	(Size == `64` && static_cast<int64_t>(C) == INT64_MAX))
614	return std::nullopt;
615	P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
616	C += `1`;
617	break;
618	case CmpInst::ICMP_ULE:
619	case CmpInst::ICMP_UGT:
620	// Check for
621	//
622	// x ule c => x ult c + 1
623	// x ugt c => s uge c + 1
624	//
625	// When c is not the largest possible unsigned integer.
626	if ((Size == `32` && static_cast<uint32_t>(C) == UINT32_MAX) \|\|
627	(Size == `64` && C == UINT64_MAX))
628	return std::nullopt;
629	P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
630	C += `1`;
631	break;
632	}
633
634	// Check if the new constant is valid, and return the updated constant and
635	// predicate if it is.
636	if (Size == `32`)
637	C = static_cast<uint32_t>(C);
638	if (isLegalArithImmed(C))
639	return {{C, P}};
640
641	auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) {
642	SmallVector<AArch64_IMM::ImmInsnModel> Insn;
643	AArch64_IMM::expandMOVImm(Imm, BitSize: `32`, Insn);
644	return Insn.size() == `1`;
645	};
646
647	if (!IsMaterializableInSingleInstruction (OriginalC) &&
648	IsMaterializableInSingleInstruction (C))
649	return {{C, P}};
650
651	return std::nullopt;
652	}
653
654	/// Determine whether or not it is possible to update the RHS and predicate of
655	/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
656	/// immediate.
657	///
658	/// \p MI - The G_ICMP instruction
659	/// \p MatchInfo - The new RHS immediate and predicate on success
660	///
661	/// See tryAdjustICmpImmAndPred for valid transformations.
662	bool matchAdjustICmpImmAndPred(
663	MachineInstr &MI, const MachineRegisterInfo &MRI,
664	std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
665	assert(MI.getOpcode() == TargetOpcode::G_ICMP);
666	Register RHS = MI.getOperand(i: `3`).getReg();
667	auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: `1`).getPredicate());
668	if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, P: Pred, MRI)) {
669	MatchInfo = *MaybeNewImmAndPred;
670	return true;
671	}
672	return false;
673	}
674
675	void applyAdjustICmpImmAndPred(
676	MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
677	MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
678	MIB.setInstrAndDebugLoc(MI);
679	MachineOperand &RHS = MI.getOperand(i: `3`);
680	MachineRegisterInfo &MRI = *MIB.getMRI();
681	auto Cst = MIB.buildConstant(Res: MRI.cloneVirtualRegister(VReg: RHS.getReg()),
682	Val: MatchInfo.first);
683	Observer.changingInstr(MI);
684	RHS.setReg(Cst ->getOperand(i: `0`).getReg());
685	MI.getOperand(i: `1`).setPredicate(MatchInfo.second);
686	Observer.changedInstr(MI);
687	}
688
689	bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
690	std::pair<unsigned, int> &MatchInfo) {
691	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
692	Register Src1Reg = MI.getOperand(i: `1`).getReg();
693	const LLT SrcTy = MRI.getType(Reg: Src1Reg);
694	const LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
695
696	auto LaneIdx = getSplatIndex(MI);
697	if (!LaneIdx)
698	return false;
699
700	// The lane idx should be within the first source vector.
701	if (*LaneIdx >= SrcTy.getNumElements())
702	return false;
703
704	if (DstTy != SrcTy)
705	return false;
706
707	LLT ScalarTy = SrcTy.getElementType();
708	unsigned ScalarSize = ScalarTy.getSizeInBits();
709
710	unsigned Opc = `0`;
711	switch (SrcTy.getNumElements()) {
712	case `2`:
713	if (ScalarSize == `64`)
714	Opc = AArch64::G_DUPLANE64;
715	else if (ScalarSize == `32`)
716	Opc = AArch64::G_DUPLANE32;
717	break;
718	case `4`:
719	if (ScalarSize == `32`)
720	Opc = AArch64::G_DUPLANE32;
721	else if (ScalarSize == `16`)
722	Opc = AArch64::G_DUPLANE16;
723	break;
724	case `8`:
725	if (ScalarSize == `8`)
726	Opc = AArch64::G_DUPLANE8;
727	else if (ScalarSize == `16`)
728	Opc = AArch64::G_DUPLANE16;
729	break;
730	case `16`:
731	if (ScalarSize == `8`)
732	Opc = AArch64::G_DUPLANE8;
733	break;
734	default:
735	break;
736	}
737	if (!Opc)
738	return false;
739
740	MatchInfo.first = Opc;
741	MatchInfo.second = *LaneIdx;
742	return true;
743	}
744
745	void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
746	MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
747	assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
748	Register Src1Reg = MI.getOperand(i: `1`).getReg();
749	const LLT SrcTy = MRI.getType(Reg: Src1Reg);
750
751	B.setInstrAndDebugLoc(MI);
752	auto Lane = B.buildConstant(Res: LLT::scalar(SizeInBits: `64`), Val: MatchInfo.second);
753
754	Register DupSrc = MI.getOperand(i: `1`).getReg();
755	// For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
756	// To do this, we can use a G_CONCAT_VECTORS to do the widening.
757	if (SrcTy.getSizeInBits() == `64`) {
758	auto Undef = B.buildUndef(Res: SrcTy);
759	DupSrc = B.buildConcatVectors(Res: SrcTy.multiplyElements(Factor: `2`),
760	Ops: {Src1Reg, Undef.getReg(Idx: `0`)})
761	.getReg(Idx: `0`);
762	}
763	B.buildInstr(Opc: MatchInfo.first, DstOps: {MI.getOperand(i: `0`).getReg()}, SrcOps: {DupSrc, Lane});
764	MI.eraseFromParent();
765	}
766
767	bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
768	auto &Unmerge = cast<GUnmerge>(Val&: MI);
769	Register Src1Reg = Unmerge.getReg(Idx: Unmerge.getNumOperands() - `1`);
770	const LLT SrcTy = MRI.getType(Reg: Src1Reg);
771	if (SrcTy.getSizeInBits() != `128` && SrcTy.getSizeInBits() != `64`)
772	return false;
773	return SrcTy.isVector() && !SrcTy.isScalable() &&
774	Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + `1`;
775	}
776
777	void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
778	MachineIRBuilder &B) {
779	auto &Unmerge = cast<GUnmerge>(Val&: MI);
780	Register Src1Reg = Unmerge.getReg(Idx: Unmerge.getNumOperands() - `1`);
781	const LLT SrcTy = MRI.getType(Reg: Src1Reg);
782	assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
783	"Expected a fixed length vector");
784
785	for (int I = `0`; I < SrcTy.getNumElements(); ++I)
786	B.buildExtractVectorElementConstant(Res: Unmerge.getReg(Idx: I), Val: Src1Reg, Idx: I);
787	MI.eraseFromParent();
788	}
789
790	bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
791	assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
792	auto Splat = getAArch64VectorSplat(MI, MRI);
793	if (!Splat)
794	return false;
795	if (Splat ->isReg())
796	return true;
797	// Later, during selection, we'll try to match imported patterns using
798	// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
799	// G_BUILD_VECTORs which could match those patterns.
800	int64_t Cst = Splat ->getCst();
801	return (Cst != `0` && Cst != -`1`);
802	}
803
804	void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
805	MachineIRBuilder &B) {
806	B.setInstrAndDebugLoc(MI);
807	B.buildInstr(Opc: AArch64::G_DUP, DstOps: {MI.getOperand(i: `0`).getReg()},
808	SrcOps: {MI.getOperand(i: `1`).getReg()});
809	MI.eraseFromParent();
810	}
811
812	/// \returns how many instructions would be saved by folding a G_ICMP's shift
813	/// and/or extension operations.
814	unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {
815	// No instructions to save if there's more than one use or no uses.
816	if (!MRI.hasOneNonDBGUse(RegNo: CmpOp))
817	return `0`;
818
819	// FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
820	auto IsSupportedExtend = [&](const MachineInstr &MI) {
821	if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
822	return true;
823	if (MI.getOpcode() != TargetOpcode::G_AND)
824	return false;
825	auto ValAndVReg =
826	getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: `2`).getReg(), MRI);
827	if (!ValAndVReg)
828	return false;
829	uint64_t Mask = ValAndVReg ->Value.getZExtValue();
830	return (Mask == `0xFF` \|\| Mask == `0xFFFF` \|\| Mask == `0xFFFFFFFF`);
831	};
832
833	MachineInstr *Def = getDefIgnoringCopies(Reg: CmpOp, MRI);
834	if (IsSupportedExtend (*Def))
835	return `1`;
836
837	unsigned Opc = Def->getOpcode();
838	if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
839	Opc != TargetOpcode::G_LSHR)
840	return `0`;
841
842	auto MaybeShiftAmt =
843	getIConstantVRegValWithLookThrough(VReg: Def->getOperand(i: `2`).getReg(), MRI);
844	if (!MaybeShiftAmt)
845	return `0`;
846	uint64_t ShiftAmt = MaybeShiftAmt ->Value.getZExtValue();
847	MachineInstr *ShiftLHS =
848	getDefIgnoringCopies(Reg: Def->getOperand(i: `1`).getReg(), MRI);
849
850	// Check if we can fold an extend and a shift.
851	// FIXME: This is duplicated with the selector. (See:
852	// selectArithExtendedRegister)
853	if (IsSupportedExtend (*ShiftLHS))
854	return (ShiftAmt <= `4`) ? `2` : `1`;
855
856	LLT Ty = MRI.getType(Reg: Def->getOperand(i: `0`).getReg());
857	if (Ty.isVector())
858	return `0`;
859	unsigned ShiftSize = Ty.getSizeInBits();
860	if ((ShiftSize == `32` && ShiftAmt <= `31`) \|\|
861	(ShiftSize == `64` && ShiftAmt <= `63`))
862	return `1`;
863	return `0`;
864	}
865
866	/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
867	/// instruction \p MI.
868	bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
869	assert(MI.getOpcode() == TargetOpcode::G_ICMP);
870	// Swap the operands if it would introduce a profitable folding opportunity.
871	// (e.g. a shift + extend).
872	//
873	// For example:
874	// lsl w13, w11, #1
875	// cmp w13, w12
876	// can be turned into:
877	// cmp w12, w11, lsl #1
878
879	// Don't swap if there's a constant on the RHS, because we know we can fold
880	// that.
881	Register RHS = MI.getOperand(i: `3`).getReg();
882	auto RHSCst = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
883	if (RHSCst && isLegalArithImmed(C: RHSCst ->Value.getSExtValue()))
884	return false;
885
886	Register LHS = MI.getOperand(i: `2`).getReg();
887	auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: `1`).getPredicate());
888	auto GetRegForProfit = [&](Register Reg) {
889	MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
890	return isCMN(MaybeSub: Def, Pred, MRI) ? Def->getOperand(i: `2`).getReg() : Reg;
891	};
892
893	// Don't have a constant on the RHS. If we swap the LHS and RHS of the
894	// compare, would we be able to fold more instructions?
895	Register TheLHS = GetRegForProfit (LHS);
896	Register TheRHS = GetRegForProfit (RHS);
897
898	// If the LHS is more likely to give us a folding opportunity, then swap the
899	// LHS and RHS.
900	return (getCmpOperandFoldingProfit(CmpOp: TheLHS, MRI) >
901	getCmpOperandFoldingProfit(CmpOp: TheRHS, MRI));
902	}
903
904	void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
905	auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: `1`).getPredicate());
906	Register LHS = MI.getOperand(i: `2`).getReg();
907	Register RHS = MI.getOperand(i: `3`).getReg();
908	Observer.changedInstr(MI);
909	MI.getOperand(i: `1`).setPredicate(CmpInst::getSwappedPredicate(pred: Pred));
910	MI.getOperand(i: `2`).setReg(RHS);
911	MI.getOperand(i: `3`).setReg(LHS);
912	Observer.changedInstr(MI);
913	}
914
915	/// \returns a function which builds a vector floating point compare instruction
916	/// for a condition code \p CC.
917	/// \param [in] IsZero - True if the comparison is against 0.
918	/// \param [in] NoNans - True if the target has NoNansFPMath.
919	std::function<Register(MachineIRBuilder &)>
920	getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
921	bool NoNans, MachineRegisterInfo &MRI) {
922	LLT DstTy = MRI.getType(Reg: LHS);
923	assert(DstTy.isVector() && "Expected vector types only?");
924	assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
925	switch (CC) {
926	default:
927	llvm_unreachable("Unexpected condition code!");
928	case AArch64CC::NE:
929	return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
930	auto FCmp = IsZero
931	? MIB.buildInstr(Opc: AArch64::G_FCMEQZ, DstOps: {DstTy}, SrcOps: {LHS})
932	: MIB.buildInstr(Opc: AArch64::G_FCMEQ, DstOps: {DstTy}, SrcOps: {LHS, RHS});
933	return MIB.buildNot(Dst: DstTy, Src0: FCmp).getReg(Idx: `0`);
934	};
935	case AArch64CC::EQ:
936	return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
937	return IsZero
938	? MIB.buildInstr(Opc: AArch64::G_FCMEQZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: `0`)
939	: MIB.buildInstr(Opc: AArch64::G_FCMEQ, DstOps: {DstTy}, SrcOps: {LHS, RHS})
940	.getReg(Idx: `0`);
941	};
942	case AArch64CC::GE:
943	return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
944	return IsZero
945	? MIB.buildInstr(Opc: AArch64::G_FCMGEZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: `0`)
946	: MIB.buildInstr(Opc: AArch64::G_FCMGE, DstOps: {DstTy}, SrcOps: {LHS, RHS})
947	.getReg(Idx: `0`);
948	};
949	case AArch64CC::GT:
950	return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
951	return IsZero
952	? MIB.buildInstr(Opc: AArch64::G_FCMGTZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: `0`)
953	: MIB.buildInstr(Opc: AArch64::G_FCMGT, DstOps: {DstTy}, SrcOps: {LHS, RHS})
954	.getReg(Idx: `0`);
955	};
956	case AArch64CC::LS:
957	return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
958	return IsZero
959	? MIB.buildInstr(Opc: AArch64::G_FCMLEZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: `0`)
960	: MIB.buildInstr(Opc: AArch64::G_FCMGE, DstOps: {DstTy}, SrcOps: {RHS, LHS})
961	.getReg(Idx: `0`);
962	};
963	case AArch64CC::MI:
964	return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
965	return IsZero
966	? MIB.buildInstr(Opc: AArch64::G_FCMLTZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: `0`)
967	: MIB.buildInstr(Opc: AArch64::G_FCMGT, DstOps: {DstTy}, SrcOps: {RHS, LHS})
968	.getReg(Idx: `0`);
969	};
970	}
971	}
972
973	/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
974	bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
975	MachineIRBuilder &MIB) {
976	assert(MI.getOpcode() == TargetOpcode::G_FCMP);
977	const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
978
979	Register Dst = MI.getOperand(i: `0`).getReg();
980	LLT DstTy = MRI.getType(Reg: Dst);
981	if (!DstTy.isVector() \|\| !ST.hasNEON())
982	return false;
983	Register LHS = MI.getOperand(i: `2`).getReg();
984	unsigned EltSize = MRI.getType(Reg: LHS).getScalarSizeInBits();
985	if (EltSize == `16` && !ST.hasFullFP16())
986	return false;
987	if (EltSize != `16` && EltSize != `32` && EltSize != `64`)
988	return false;
989
990	return true;
991	}
992
993	/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
994	void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
995	MachineIRBuilder &MIB) {
996	assert(MI.getOpcode() == TargetOpcode::G_FCMP);
997	const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
998
999	const auto &CmpMI = cast<GFCmp>(Val&: MI);
1000
1001	Register Dst = CmpMI.getReg(Idx: `0`);
1002	CmpInst::Predicate Pred = CmpMI.getCond();
1003	Register LHS = CmpMI.getLHSReg();
1004	Register RHS = CmpMI.getRHSReg();
1005
1006	LLT DstTy = MRI.getType(Reg: Dst);
1007
1008	auto Splat = getAArch64VectorSplat(MI: *MRI.getVRegDef(Reg: RHS), MRI);
1009
1010	// Compares against 0 have special target-specific pseudos.
1011	bool IsZero = Splat && Splat ->isCst() && Splat ->getCst() == `0`;
1012
1013	bool Invert = false;
1014	AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
1015	if ((Pred == CmpInst::Predicate::FCMP_ORD \|\|
1016	Pred == CmpInst::Predicate::FCMP_UNO) &&
1017	IsZero) {
1018	// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1019	// NaN, so equivalent to a == a and doesn't need the two comparisons an
1020	// "ord" normally would.
1021	// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1022	// thus equivalent to a != a.
1023	RHS = LHS;
1024	IsZero = false;
1025	CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1026	} else
1027	changeVectorFCMPPredToAArch64CC(P: Pred, CondCode&: CC, CondCode2&: CC2, Invert);
1028
1029	// Instead of having an apply function, just build here to simplify things.
1030	MIB.setInstrAndDebugLoc(MI);
1031
1032	const bool NoNans =
1033	ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1034
1035	auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1036	Register CmpRes;
1037	if (CC2 == AArch64CC::AL)
1038	CmpRes = Cmp (MIB);
1039	else {
1040	auto Cmp2 = getVectorFCMP(CC: CC2, LHS, RHS, IsZero, NoNans, MRI);
1041	auto Cmp2Dst = Cmp2 (MIB);
1042	auto Cmp1Dst = Cmp (MIB);
1043	CmpRes = MIB.buildOr(Dst: DstTy, Src0: Cmp1Dst, Src1: Cmp2Dst).getReg(Idx: `0`);
1044	}
1045	if (Invert)
1046	CmpRes = MIB.buildNot(Dst: DstTy, Src0: CmpRes).getReg(Idx: `0`);
1047	MRI.replaceRegWith(FromReg: Dst, ToReg: CmpRes);
1048	MI.eraseFromParent();
1049	}
1050
1051	bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1052	Register &SrcReg) {
1053	assert(MI.getOpcode() == TargetOpcode::G_STORE);
1054	Register DstReg = MI.getOperand(i: `0`).getReg();
1055	if (MRI.getType(Reg: DstReg).isVector())
1056	return false;
1057	// Match a store of a truncate.
1058	if (!mi_match(R: DstReg, MRI, P: m_GTrunc(Src: m_Reg(R&: SrcReg))))
1059	return false;
1060	// Only form truncstores for value types of max 64b.
1061	return MRI.getType(Reg: SrcReg).getSizeInBits() <= `64`;
1062	}
1063
1064	void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1065	MachineIRBuilder &B, GISelChangeObserver &Observer,
1066	Register &SrcReg) {
1067	assert(MI.getOpcode() == TargetOpcode::G_STORE);
1068	Observer.changingInstr(MI);
1069	MI.getOperand(i: `0`).setReg(SrcReg);
1070	Observer.changedInstr(MI);
1071	}
1072
1073	// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1074	// form in the first place for combine opportunities, so any remaining ones
1075	// at this stage need be lowered back.
1076	bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1077	assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1078	Register DstReg = MI.getOperand(i: `0`).getReg();
1079	LLT DstTy = MRI.getType(Reg: DstReg);
1080	return DstTy.isVector();
1081	}
1082
1083	void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1084	MachineIRBuilder &B, GISelChangeObserver &Observer) {
1085	assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1086	B.setInstrAndDebugLoc(MI);
1087	LegalizerHelper Helper(*MI.getMF(), Observer, B);
1088	Helper.lower(MI, TypeIdx: `0`, / Unused hint type / Ty: LLT ());
1089	}
1090
1091	/// Combine <N x t>, unused = unmerge(G_EXT <2N x t> v, undef, N)*
1092	/// => unused, <N x t> = unmerge v
1093	bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1094	Register &MatchInfo) {
1095	auto &Unmerge = cast<GUnmerge>(Val&: MI);
1096	if (Unmerge.getNumDefs() != `2`)
1097	return false;
1098	if (!MRI.use_nodbg_empty(RegNo: Unmerge.getReg(Idx: `1`)))
1099	return false;
1100
1101	LLT DstTy = MRI.getType(Reg: Unmerge.getReg(Idx: `0`));
1102	if (!DstTy.isVector())
1103	return false;
1104
1105	MachineInstr *Ext = getOpcodeDef(Opcode: AArch64::G_EXT, Reg: Unmerge.getSourceReg(), MRI);
1106	if (!Ext)
1107	return false;
1108
1109	Register ExtSrc1 = Ext->getOperand(i: `1`).getReg();
1110	Register ExtSrc2 = Ext->getOperand(i: `2`).getReg();
1111	auto LowestVal =
1112	getIConstantVRegValWithLookThrough(VReg: Ext->getOperand(i: `3`).getReg(), MRI);
1113	if (!LowestVal \|\| LowestVal ->Value.getZExtValue() != DstTy.getSizeInBytes())
1114	return false;
1115
1116	if (!getOpcodeDef<GImplicitDef>(Reg: ExtSrc2, MRI))
1117	return false;
1118
1119	MatchInfo = ExtSrc1;
1120	return true;
1121	}
1122
1123	void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1124	MachineIRBuilder &B,
1125	GISelChangeObserver &Observer, Register &SrcReg) {
1126	Observer.changingInstr(MI);
1127	// Swap dst registers.
1128	Register Dst1 = MI.getOperand(i: `0`).getReg();
1129	MI.getOperand(i: `0`).setReg(MI.getOperand(i: `1`).getReg());
1130	MI.getOperand(i: `1`).setReg(Dst1);
1131	MI.getOperand(i: `2`).setReg(SrcReg);
1132	Observer.changedInstr(MI);
1133	}
1134
1135	// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1136	// Match v2s64 mul instructions, which will then be scalarised later on
1137	// Doing these two matches in one function to ensure that the order of matching
1138	// will always be the same.
1139	// Try lowering MUL to MULL before trying to scalarize if needed.
1140	bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
1141	// Get the instructions that defined the source operand
1142	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
1143	MachineInstr *I1 = getDefIgnoringCopies(Reg: MI.getOperand(i: `1`).getReg(), MRI);
1144	MachineInstr *I2 = getDefIgnoringCopies(Reg: MI.getOperand(i: `2`).getReg(), MRI);
1145
1146	if (DstTy.isVector()) {
1147	// If the source operands were EXTENDED before, then {U/S}MULL can be used
1148	unsigned I1Opc = I1->getOpcode();
1149	unsigned I2Opc = I2->getOpcode();
1150	if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) \|\|
1151	(I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1152	(MRI.getType(Reg: I1->getOperand(i: `0`).getReg()).getScalarSizeInBits() ==
1153	MRI.getType(Reg: I1->getOperand(i: `1`).getReg()).getScalarSizeInBits() * `2`) &&
1154	(MRI.getType(Reg: I2->getOperand(i: `0`).getReg()).getScalarSizeInBits() ==
1155	MRI.getType(Reg: I2->getOperand(i: `1`).getReg()).getScalarSizeInBits() * `2`)) {
1156	return true;
1157	}
1158	// If result type is v2s64, scalarise the instruction
1159	else if (DstTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1160	return true;
1161	}
1162	}
1163	return false;
1164	}
1165
1166	void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
1167	MachineIRBuilder &B, GISelChangeObserver &Observer) {
1168	assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1169	"Expected a G_MUL instruction");
1170
1171	// Get the instructions that defined the source operand
1172	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
1173	MachineInstr *I1 = getDefIgnoringCopies(Reg: MI.getOperand(i: `1`).getReg(), MRI);
1174	MachineInstr *I2 = getDefIgnoringCopies(Reg: MI.getOperand(i: `2`).getReg(), MRI);
1175
1176	// If the source operands were EXTENDED before, then {U/S}MULL can be used
1177	unsigned I1Opc = I1->getOpcode();
1178	unsigned I2Opc = I2->getOpcode();
1179	if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) \|\|
1180	(I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
1181	(MRI.getType(Reg: I1->getOperand(i: `0`).getReg()).getScalarSizeInBits() ==
1182	MRI.getType(Reg: I1->getOperand(i: `1`).getReg()).getScalarSizeInBits() * `2`) &&
1183	(MRI.getType(Reg: I2->getOperand(i: `0`).getReg()).getScalarSizeInBits() ==
1184	MRI.getType(Reg: I2->getOperand(i: `1`).getReg()).getScalarSizeInBits() * `2`)) {
1185
1186	B.setInstrAndDebugLoc(MI);
1187	B.buildInstr(Opc: I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
1188	: AArch64::G_SMULL,
1189	DstOps: {MI.getOperand(i: `0`).getReg()},
1190	SrcOps: {I1->getOperand(i: `1`).getReg(), I2->getOperand(i: `1`).getReg()});
1191	MI.eraseFromParent();
1192	}
1193	// If result type is v2s64, scalarise the instruction
1194	else if (DstTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1195	LegalizerHelper Helper(*MI.getMF(), Observer, B);
1196	B.setInstrAndDebugLoc(MI);
1197	Helper.fewerElementsVector(
1198	MI, TypeIdx: `0`,
1199	NarrowTy: DstTy.changeElementCount(
1200	EC: DstTy.getElementCount().divideCoefficientBy(RHS: `2`)));
1201	}
1202	}
1203
1204	class AArch64PostLegalizerLoweringImpl : public Combiner {
1205	protected:
1206	// TODO: Make CombinerHelper methods const.
1207	mutable CombinerHelper Helper;
1208	const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1209	const AArch64Subtarget &STI;
1210
1211	public:
1212	AArch64PostLegalizerLoweringImpl(
1213	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1214	GISelCSEInfo *CSEInfo,
1215	const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1216	const AArch64Subtarget &STI);
1217
1218	static const char getName() { return* "AArch6400PreLegalizerCombiner"; }
1219
1220	bool tryCombineAll(MachineInstr &I) const override;
1221
1222	private:
1223	#define GET_GICOMBINER_CLASS_MEMBERS
1224	#include "AArch64GenPostLegalizeGILowering.inc"
1225	#undef GET_GICOMBINER_CLASS_MEMBERS
1226	};
1227
1228	#define GET_GICOMBINER_IMPL
1229	#include "AArch64GenPostLegalizeGILowering.inc"
1230	#undef GET_GICOMBINER_IMPL
1231
1232	AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1233	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1234	GISelCSEInfo *CSEInfo,
1235	const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1236	const AArch64Subtarget &STI)
1237	: Combiner (MF, CInfo, TPC, /KB/ nullptr, CSEInfo),
1238	Helper (Observer, B, /IsPreLegalize/ true), RuleConfig(RuleConfig),
1239	STI(STI),
1240	#define GET_GICOMBINER_CONSTRUCTOR_INITS
1241	#include "AArch64GenPostLegalizeGILowering.inc"
1242	#undef GET_GICOMBINER_CONSTRUCTOR_INITS
1243	{
1244	}
1245
1246	class AArch64PostLegalizerLowering : public MachineFunctionPass {
1247	public:
1248	static char ID;
1249
1250	AArch64PostLegalizerLowering();
1251
1252	StringRef getPassName() const override {
1253	return "AArch64PostLegalizerLowering";
1254	}
1255
1256	bool runOnMachineFunction(MachineFunction &MF) override;
1257	void getAnalysisUsage(AnalysisUsage &AU) const override;
1258
1259	private:
1260	AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1261	};
1262	} // end anonymous namespace
1263
1264	void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1265	AU.addRequired<TargetPassConfig>();
1266	AU.setPreservesCFG();
1267	getSelectionDAGFallbackAnalysisUsage(AU);
1268	MachineFunctionPass::getAnalysisUsage(AU);
1269	}
1270
1271	AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1272	: MachineFunctionPass (ID) {
1273	initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
1274
1275	if (!RuleConfig.parseCommandLineOption())
1276	report_fatal_error(reason: "Invalid rule identifier");
1277	}
1278
1279	bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1280	if (MF.getProperties().hasProperty(
1281	P: MachineFunctionProperties::Property::FailedISel))
1282	return false;
1283	assert(MF.getProperties().hasProperty(
1284	MachineFunctionProperties::Property::Legalized) &&
1285	"Expected a legalized function?");
1286	auto *TPC = &getAnalysis<TargetPassConfig>();
1287	const Function &F = MF.getFunction();
1288
1289	const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
1290	CombinerInfo CInfo(/AllowIllegalOps/ true, /ShouldLegalizeIllegal/ false,
1291	/LegalizerInfo/ nullptr, /OptEnabled=/true,
1292	F.hasOptSize(), F.hasMinSize());
1293	AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /CSEInfo/ nullptr,
1294	RuleConfig, ST);
1295	return Impl.combineMachineInstrs();
1296	}
1297
1298	char AArch64PostLegalizerLowering::ID = `0`;
1299	INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1300	"Lower AArch64 MachineInstrs after legalization", false,
1301	false)
1302	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
1303	INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1304	"Lower AArch64 MachineInstrs after legalization", false,
1305	false)
1306
1307	namespace llvm {
1308	FunctionPass *createAArch64PostLegalizerLowering() {
1309	return new AArch64PostLegalizerLowering ();
1310	}
1311	} // end namespace llvm
1312

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp