AArch64PostLegalizerCombiner.cpp source code [llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp]

1	//=== AArch64PostLegalizerCombiner.cpp --------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Post-legalization combines on generic MachineInstrs.
11	///
12	/// The combines here must preserve instruction legality.
13	///
14	/// Lowering combines (e.g. pseudo matching) should be handled by
15	/// AArch64PostLegalizerLowering.
16	///
17	/// Combines which don't rely on instruction legality should go in the
18	/// AArch64PreLegalizerCombiner.
19	///
20	//===----------------------------------------------------------------------===//
21
22	#include "AArch64.h"
23	#include "AArch64TargetMachine.h"
24	#include "llvm/ADT/STLExtras.h"
25	#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
26	#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
27	#include "llvm/CodeGen/GlobalISel/Combiner.h"
28	#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
29	#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
30	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
31	#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
32	#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
33	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
34	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
35	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
36	#include "llvm/CodeGen/GlobalISel/Utils.h"
37	#include "llvm/CodeGen/MachineDominators.h"
38	#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
39	#include "llvm/CodeGen/MachineFunctionPass.h"
40	#include "llvm/CodeGen/MachinePassManager.h"
41	#include "llvm/CodeGen/MachineRegisterInfo.h"
42	#include "llvm/CodeGen/TargetOpcodes.h"
43	#include "llvm/Support/Debug.h"
44
45	#define GET_GICOMBINER_DEPS
46	#include "AArch64GenPostLegalizeGICombiner.inc"
47	#undef GET_GICOMBINER_DEPS
48
49	#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
50
51	using namespace llvm;
52	using namespace MIPatternMatch;
53
54	#define GET_GICOMBINER_TYPES
55	#include "AArch64GenPostLegalizeGICombiner.inc"
56	#undef GET_GICOMBINER_TYPES
57
58	namespace {
59
60	/// This combine tries do what performExtractVectorEltCombine does in SDAG.
61	/// Rewrite for pairwise fadd pattern
62	/// (s32 (g_extract_vector_elt
63	/// (g_fadd (vXs32 Other)
64	/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
65	/// ->
66	/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
67	/// (g_extract_vector_elt (vXs32 Other) 1))
68	bool matchExtractVecEltPairwiseAdd(
69	MachineInstr &MI, MachineRegisterInfo &MRI,
70	std::tuple<unsigned, LLT, Register> &MatchInfo) {
71	Register Src1 = MI.getOperand(i: `1`).getReg();
72	Register Src2 = MI.getOperand(i: `2`).getReg();
73	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
74
75	auto Cst = getIConstantVRegValWithLookThrough(VReg: Src2, MRI);
76	if (!Cst \|\| Cst ->Value != `0`)
77	return false;
78	// SDAG also checks for FullFP16, but this looks to be beneficial anyway.
79
80	// Now check for an fadd operation. TODO: expand this for integer add?
81	auto *FAddMI = getOpcodeDef(Opcode: TargetOpcode::G_FADD, Reg: Src1, MRI);
82	if (!FAddMI)
83	return false;
84
85	// If we add support for integer add, must restrict these types to just s64.
86	unsigned DstSize = DstTy.getSizeInBits();
87	if (DstSize != `16` && DstSize != `32` && DstSize != `64`)
88	return false;
89
90	Register Src1Op1 = FAddMI->getOperand(i: `1`).getReg();
91	Register Src1Op2 = FAddMI->getOperand(i: `2`).getReg();
92	MachineInstr *Shuffle =
93	getOpcodeDef(Opcode: TargetOpcode::G_SHUFFLE_VECTOR, Reg: Src1Op2, MRI);
94	MachineInstr *Other = MRI.getVRegDef(Reg: Src1Op1);
95	if (!Shuffle) {
96	Shuffle = getOpcodeDef(Opcode: TargetOpcode::G_SHUFFLE_VECTOR, Reg: Src1Op1, MRI);
97	Other = MRI.getVRegDef(Reg: Src1Op2);
98	}
99
100	// We're looking for a shuffle that moves the second element to index 0.
101	if (Shuffle && Shuffle->getOperand(i: `3`).getShuffleMask()[`0`] == `1` &&
102	Other == MRI.getVRegDef(Reg: Shuffle->getOperand(i: `1`).getReg())) {
103	std::get<`0`>(t&: MatchInfo) = TargetOpcode::G_FADD;
104	std::get<`1`>(t&: MatchInfo) = DstTy;
105	std::get<`2`>(t&: MatchInfo) = Other->getOperand(i: `0`).getReg();
106	return true;
107	}
108	return false;
109	}
110
111	void applyExtractVecEltPairwiseAdd(
112	MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
113	std::tuple<unsigned, LLT, Register> &MatchInfo) {
114	unsigned Opc = std::get<`0`>(t&: MatchInfo);
115	assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
116	// We want to generate two extracts of elements 0 and 1, and add them.
117	LLT Ty = std::get<`1`>(t&: MatchInfo);
118	Register Src = std::get<`2`>(t&: MatchInfo);
119	LLT s64 = LLT::integer(SizeInBits: `64`);
120	B.setInstrAndDebugLoc(MI);
121	auto Elt0 = B.buildExtractVectorElement(Res: Ty, Val: Src, Idx: B.buildConstant(Res: s64, Val: `0`));
122	auto Elt1 = B.buildExtractVectorElement(Res: Ty, Val: Src, Idx: B.buildConstant(Res: s64, Val: `1`));
123	B.buildInstr(Opc, DstOps: {MI.getOperand(i: `0`).getReg()}, SrcOps: {Elt0, Elt1});
124	MI.eraseFromParent();
125	}
126
127	bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
128	// TODO: check if extended build vector as well.
129	unsigned Opc = MRI.getVRegDef(Reg: R)->getOpcode();
130	return Opc == TargetOpcode::G_SEXT \|\| Opc == TargetOpcode::G_SEXT_INREG;
131	}
132
133	bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
134	// TODO: check if extended build vector as well.
135	return MRI.getVRegDef(Reg: R)->getOpcode() == TargetOpcode::G_ZEXT;
136	}
137
138	bool matchAArch64MulConstCombine(
139	MachineInstr &MI, MachineRegisterInfo &MRI,
140	std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
141	assert(MI.getOpcode() == TargetOpcode::G_MUL);
142	Register LHS = MI.getOperand(i: `1`).getReg();
143	Register RHS = MI.getOperand(i: `2`).getReg();
144	Register Dst = MI.getOperand(i: `0`).getReg();
145	const LLT Ty = MRI.getType(Reg: LHS);
146
147	// The below optimizations require a constant RHS.
148	auto Const = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
149	if (!Const)
150	return false;
151
152	APInt ConstValue = Const ->Value.sext(width: Ty.getSizeInBits());
153	// The following code is ported from AArch64ISelLowering.
154	// Multiplication of a power of two plus/minus one can be done more
155	// cheaply as shift+add/sub. For now, this is true unilaterally. If
156	// future CPUs have a cheaper MADD instruction, this may need to be
157	// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
158	// 64-bit is 5 cycles, so this is always a win.
159	// More aggressively, some multiplications N0 C can be lowered to*
160	// shift+add+shift if the constant C = A B where A = 2^N + 1 and B = 2^M,*
161	// e.g. 6=32=(2+1)2.
162	// TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
163	// which equals to (1+2)16-(1+2).*
164	// TrailingZeroes is used to test if the mul can be lowered to
165	// shift+add+shift.
166	unsigned TrailingZeroes = ConstValue.countr_zero();
167	if (TrailingZeroes) {
168	// Conservatively do not lower to shift+add+shift if the mul might be
169	// folded into smul or umul.
170	if (MRI.hasOneNonDBGUse(RegNo: LHS) &&
171	(isSignExtended(R: LHS, MRI) \|\| isZeroExtended(R: LHS, MRI)))
172	return false;
173	// Conservatively do not lower to shift+add+shift if the mul might be
174	// folded into madd or msub.
175	if (MRI.hasOneNonDBGUse(RegNo: Dst)) {
176	MachineInstr &UseMI = *MRI.use_instr_begin(RegNo: Dst);
177	unsigned UseOpc = UseMI.getOpcode();
178	if (UseOpc == TargetOpcode::G_ADD \|\| UseOpc == TargetOpcode::G_PTR_ADD \|\|
179	UseOpc == TargetOpcode::G_SUB)
180	return false;
181	}
182	}
183	// Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
184	// and shift+add+shift.
185	APInt ShiftedConstValue = ConstValue.ashr(ShiftAmt: TrailingZeroes);
186
187	unsigned ShiftAmt, AddSubOpc;
188	// Is the shifted value the LHS operand of the add/sub?
189	bool ShiftValUseIsLHS = true;
190	// Do we need to negate the result?
191	bool NegateResult = false;
192
193	if (ConstValue.isNonNegative()) {
194	// (mul x, 2^N + 1) => (add (shl x, N), x)
195	// (mul x, 2^N - 1) => (sub (shl x, N), x)
196	// (mul x, (2^N + 1) 2^M) => (shl (add (shl x, N), x), M)*
197	APInt SCVMinus1 = ShiftedConstValue - `1`;
198	APInt CVPlus1 = ConstValue + `1`;
199	if (SCVMinus1.isPowerOf2()) {
200	ShiftAmt = SCVMinus1.logBase2();
201	AddSubOpc = TargetOpcode::G_ADD;
202	} else if (CVPlus1.isPowerOf2()) {
203	ShiftAmt = CVPlus1.logBase2();
204	AddSubOpc = TargetOpcode::G_SUB;
205	} else
206	return false;
207	} else {
208	// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
209	// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
210	APInt CVNegPlus1 = -ConstValue + `1`;
211	APInt CVNegMinus1 = -ConstValue - `1`;
212	if (CVNegPlus1.isPowerOf2()) {
213	ShiftAmt = CVNegPlus1.logBase2();
214	AddSubOpc = TargetOpcode::G_SUB;
215	ShiftValUseIsLHS = false;
216	} else if (CVNegMinus1.isPowerOf2()) {
217	ShiftAmt = CVNegMinus1.logBase2();
218	AddSubOpc = TargetOpcode::G_ADD;
219	NegateResult = true;
220	} else
221	return false;
222	}
223
224	if (NegateResult && TrailingZeroes)
225	return false;
226
227	ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
228	auto Shift = B.buildConstant(Res: LLT::integer(SizeInBits: `64`), Val: ShiftAmt);
229	auto ShiftedVal = B.buildShl(Dst: Ty, Src0: LHS, Src1: Shift);
230
231	Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(Idx: `0`) : LHS;
232	Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(Idx: `0`);
233	auto Res = B.buildInstr(Opc: AddSubOpc, DstOps: {Ty}, SrcOps: {AddSubLHS, AddSubRHS});
234	assert(!(NegateResult && TrailingZeroes) &&
235	"NegateResult and TrailingZeroes cannot both be true for now.");
236	// Negate the result.
237	if (NegateResult) {
238	B.buildSub(Dst: DstReg, Src0: B.buildConstant(Res: Ty, Val: `0`), Src1: Res);
239	return;
240	}
241	// Shift the result.
242	if (TrailingZeroes) {
243	B.buildShl(Dst: DstReg, Src0: Res,
244	Src1: B.buildConstant(Res: LLT::integer(SizeInBits: `64`), Val: TrailingZeroes));
245	return;
246	}
247	B.buildCopy(Res: DstReg, Op: Res.getReg(Idx: `0`));
248	};
249	return true;
250	}
251
252	void applyAArch64MulConstCombine(
253	MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
254	std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
255	B.setInstrAndDebugLoc(MI);
256	ApplyFn (B, MI.getOperand(i: `0`).getReg());
257	MI.eraseFromParent();
258	}
259
260	/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source
261	/// is a zero, into a G_ZEXT of the first.
262	bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {
263	auto &Merge = cast<GMerge>(Val&: MI);
264	LLT SrcTy = MRI.getType(Reg: Merge.getSourceReg(I: `0`));
265	if (SrcTy != LLT::scalar(SizeInBits: `32`) \|\| Merge.getNumSources() != `2`)
266	return false;
267	return mi_match(R: Merge.getSourceReg(I: `1`), MRI, P: m_SpecificICst(RequestedValue: `0`));
268	}
269
270	void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
271	MachineIRBuilder &B, GISelChangeObserver &Observer) {
272	// Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)
273	// ->
274	// %d(s64) = G_ZEXT %a(s32)
275	Observer.changingInstr(MI);
276	MI.setDesc(B.getTII().get(Opcode: TargetOpcode::G_ZEXT));
277	MI.removeOperand(OpNo: `2`);
278	Observer.changedInstr(MI);
279	}
280
281	/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
282	/// instruction.
283	bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
284	// If this is coming from a scalar compare then we can use a G_ZEXT instead of
285	// a G_ANYEXT:
286	//
287	// %cmp:_(s32) = G_[I\|F]CMP ... <-- produces 0/1.
288	// %ext:_(s64) = G_ANYEXT %cmp(s32)
289	//
290	// By doing this, we can leverage more KnownBits combines.
291	assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
292	Register Dst = MI.getOperand(i: `0`).getReg();
293	Register Src = MI.getOperand(i: `1`).getReg();
294	return MRI.getType(Reg: Dst).isScalar() &&
295	mi_match(R: Src, MRI,
296	P: m_any_of(preds: m_GICmp(P: m_Pred(), L: m_Reg(), R: m_Reg()),
297	preds: m_GFCmp(P: m_Pred(), L: m_Reg(), R: m_Reg())));
298	}
299
300	void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
301	MachineIRBuilder &B,
302	GISelChangeObserver &Observer) {
303	Observer.changingInstr(MI);
304	MI.setDesc(B.getTII().get(Opcode: TargetOpcode::G_ZEXT));
305	Observer.changedInstr(MI);
306	}
307
308	/// Match a 128b store of zero and split it into two 64 bit stores, for
309	/// size/performance reasons.
310	bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
311	GStore &Store = cast<GStore>(Val&: MI);
312	if (!Store.isSimple())
313	return false;
314	LLT ValTy = MRI.getType(Reg: Store.getValueReg());
315	if (ValTy.isScalableVector())
316	return false;
317	if (!ValTy.isVector() \|\| ValTy.getSizeInBits() != `128`)
318	return false;
319	if (Store.getMemSizeInBits() != ValTy.getSizeInBits())
320	return false; // Don't split truncating stores.
321	if (!MRI.hasOneNonDBGUse(RegNo: Store.getValueReg()))
322	return false;
323	auto MaybeCst = isConstantOrConstantSplatVector(Def: Store.getValueReg(), MRI);
324	return MaybeCst && MaybeCst ->isZero();
325	}
326
327	void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
328	MachineIRBuilder &B,
329	GISelChangeObserver &Observer) {
330	B.setInstrAndDebugLoc(MI);
331	GStore &Store = cast<GStore>(Val&: MI);
332	assert(MRI.getType(Store.getValueReg()).isVector() &&
333	"Expected a vector store value");
334	LLT NewTy = LLT::integer(SizeInBits: `64`);
335	Register PtrReg = Store.getPointerReg();
336	auto Zero = B.buildConstant(Res: NewTy, Val: `0`);
337	auto HighPtr =
338	B.buildPtrAdd(Res: MRI.getType(Reg: PtrReg), Op0: PtrReg, Op1: B.buildConstant(Res: NewTy, Val: `8`));
339	auto &MF = *MI.getMF();
340	auto *LowMMO = MF.getMachineMemOperand(MMO: &Store.getMMO(), Offset: `0`, Ty: NewTy);
341	auto *HighMMO = MF.getMachineMemOperand(MMO: &Store.getMMO(), Offset: `8`, Ty: NewTy);
342	B.buildStore(Val: Zero, Addr: PtrReg, MMO&: *LowMMO);
343	B.buildStore(Val: Zero, Addr: HighPtr, MMO&: *HighMMO);
344	Store.eraseFromParent();
345	}
346
347	bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
348	std::tuple<Register, Register, Register> &MatchInfo) {
349	const LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
350	if (!DstTy.isVector())
351	return false;
352
353	Register AO1, AO2, BVO1, BVO2;
354	if (!mi_match(MI, MRI,
355	P: m_GOr(L: m_GAnd(L: m_Reg(R&: AO1), R: m_Reg(R&: BVO1)),
356	R: m_GAnd(L: m_Reg(R&: AO2), R: m_Reg(R&: BVO2)))))
357	return false;
358
359	auto *BV1 = getOpcodeDef<GBuildVector>(Reg: BVO1, MRI);
360	auto *BV2 = getOpcodeDef<GBuildVector>(Reg: BVO2, MRI);
361	if (!BV1 \|\| !BV2)
362	return false;
363
364	for (int I = `0`, E = DstTy.getNumElements(); I < E; I++) {
365	auto ValAndVReg1 =
366	getIConstantVRegValWithLookThrough(VReg: BV1->getSourceReg(I), MRI);
367	auto ValAndVReg2 =
368	getIConstantVRegValWithLookThrough(VReg: BV2->getSourceReg(I), MRI);
369	if (!ValAndVReg1 \|\| !ValAndVReg2 \|\|
370	ValAndVReg1 ->Value != ~ValAndVReg2 ->Value)
371	return false;
372	}
373
374	MatchInfo = {AO1, AO2, BVO1};
375	return true;
376	}
377
378	void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
379	MachineIRBuilder &B,
380	std::tuple<Register, Register, Register> &MatchInfo) {
381	B.setInstrAndDebugLoc(MI);
382	B.buildInstr(
383	Opc: AArch64::G_BSP, DstOps: {MI.getOperand(i: `0`).getReg()},
384	SrcOps: {std::get<`2`>(t&: MatchInfo), std::get<`0`>(t&: MatchInfo), std::get<`1`>(t&: MatchInfo)});
385	MI.eraseFromParent();
386	}
387
388	// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
389	bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
390	Register &SrcReg) {
391	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
392
393	if (DstTy != LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) && DstTy != LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`) &&
394	DstTy != LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`) && DstTy != LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`) &&
395	DstTy != LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`))
396	return false;
397
398	auto AndMI = getDefIgnoringCopies(Reg: MI.getOperand(i: `1`).getReg(), MRI);
399	if (AndMI->getOpcode() != TargetOpcode::G_AND)
400	return false;
401	auto LShrMI = getDefIgnoringCopies(Reg: AndMI->getOperand(i: `1`).getReg(), MRI);
402	if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
403	return false;
404
405	// Check the constant splat values
406	auto V1 = isConstantOrConstantSplatVector(Def: MI.getOperand(i: `2`).getReg(), MRI);
407	auto V2 = isConstantOrConstantSplatVector(Def: AndMI->getOperand(i: `2`).getReg(), MRI);
408	auto V3 =
409	isConstantOrConstantSplatVector(Def: LShrMI->getOperand(i: `2`).getReg(), MRI);
410	if (!V1.has_value() \|\| !V2.has_value() \|\| !V3.has_value())
411	return false;
412	unsigned HalfSize = DstTy.getScalarSizeInBits() / `2`;
413	if (!V1.value().isMask(numBits: HalfSize) \|\| V2.value() != (`1ULL` \| `1ULL` << HalfSize) \|\|
414	V3 != (HalfSize - `1`))
415	return false;
416
417	SrcReg = LShrMI->getOperand(i: `1`).getReg();
418
419	return true;
420	}
421
422	void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
423	MachineIRBuilder &B, Register &SrcReg) {
424	Register DstReg = MI.getOperand(i: `0`).getReg();
425	LLT DstTy = MRI.getType(Reg: DstReg);
426	LLT HalfTy =
427	DstTy.changeElementCount(EC: DstTy.getElementCount().multiplyCoefficientBy(RHS: `2`))
428	.changeElementSize(NewEltSize: DstTy.getScalarSizeInBits() / `2`);
429
430	Register ZeroVec = B.buildConstant(Res: HalfTy, Val: `0`).getReg(Idx: `0`);
431	Register CastReg =
432	B.buildInstr(Opc: TargetOpcode::G_BITCAST, DstOps: {HalfTy}, SrcOps: {SrcReg}).getReg(Idx: `0`);
433	Register CMLTReg =
434	B.buildICmp(Pred: CmpInst::Predicate::ICMP_SLT, Res: HalfTy, Op0: CastReg, Op1: ZeroVec)
435	.getReg(Idx: `0`);
436
437	B.buildInstr(Opc: TargetOpcode::G_BITCAST, DstOps: {DstReg}, SrcOps: {CMLTReg}).getReg(Idx: `0`);
438	MI.eraseFromParent();
439	}
440
441	// Match mul({z/s}ext , {z/s}ext) => {u/s}mull
442	bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
443	GISelValueTracking *KB,
444	std::tuple<bool, Register, Register> &MatchInfo) {
445	// Get the instructions that defined the source operand
446	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
447	MachineInstr *I1 = getDefIgnoringCopies(Reg: MI.getOperand(i: `1`).getReg(), MRI);
448	MachineInstr *I2 = getDefIgnoringCopies(Reg: MI.getOperand(i: `2`).getReg(), MRI);
449	unsigned I1Opc = I1->getOpcode();
450	unsigned I2Opc = I2->getOpcode();
451	unsigned EltSize = DstTy.getScalarSizeInBits();
452
453	if (!DstTy.isVector() \|\| I1->getNumOperands() < `2` \|\| I2->getNumOperands() < `2`)
454	return false;
455
456	auto IsAtLeastDoubleExtend = [&](Register R) {
457	LLT Ty = MRI.getType(Reg: R);
458	return EltSize >= Ty.getScalarSizeInBits() * `2`;
459	};
460
461	// If the source operands were EXTENDED before, then {U/S}MULL can be used
462	bool IsZExt1 =
463	I1Opc == TargetOpcode::G_ZEXT \|\| I1Opc == TargetOpcode::G_ANYEXT;
464	bool IsZExt2 =
465	I2Opc == TargetOpcode::G_ZEXT \|\| I2Opc == TargetOpcode::G_ANYEXT;
466	if (IsZExt1 && IsZExt2 && IsAtLeastDoubleExtend (I1->getOperand(i: `1`).getReg()) &&
467	IsAtLeastDoubleExtend (I2->getOperand(i: `1`).getReg())) {
468	get<`0`>(t&: MatchInfo) = true;
469	get<`1`>(t&: MatchInfo) = I1->getOperand(i: `1`).getReg();
470	get<`2`>(t&: MatchInfo) = I2->getOperand(i: `1`).getReg();
471	return true;
472	}
473
474	bool IsSExt1 =
475	I1Opc == TargetOpcode::G_SEXT \|\| I1Opc == TargetOpcode::G_ANYEXT;
476	bool IsSExt2 =
477	I2Opc == TargetOpcode::G_SEXT \|\| I2Opc == TargetOpcode::G_ANYEXT;
478	if (IsSExt1 && IsSExt2 && IsAtLeastDoubleExtend (I1->getOperand(i: `1`).getReg()) &&
479	IsAtLeastDoubleExtend (I2->getOperand(i: `1`).getReg())) {
480	get<`0`>(t&: MatchInfo) = false;
481	get<`1`>(t&: MatchInfo) = I1->getOperand(i: `1`).getReg();
482	get<`2`>(t&: MatchInfo) = I2->getOperand(i: `1`).getReg();
483	return true;
484	}
485
486	// Select UMULL if we can replace the other operand with an extend.
487	APInt Mask = APInt::getHighBitsSet(numBits: EltSize, hiBitsSet: EltSize / `2`);
488	if (KB && (IsZExt1 \|\| IsZExt2) &&
489	IsAtLeastDoubleExtend (IsZExt1 ? I1->getOperand(i: `1`).getReg()
490	: I2->getOperand(i: `1`).getReg())) {
491	Register ZExtOp =
492	IsZExt1 ? MI.getOperand(i: `2`).getReg() : MI.getOperand(i: `1`).getReg();
493	if (KB->maskedValueIsZero(Val: ZExtOp, Mask)) {
494	get<`0`>(t&: MatchInfo) = true;
495	get<`1`>(t&: MatchInfo) = IsZExt1 ? I1->getOperand(i: `1`).getReg() : ZExtOp;
496	get<`2`>(t&: MatchInfo) = IsZExt1 ? ZExtOp : I2->getOperand(i: `1`).getReg();
497	return true;
498	}
499	} else if (KB && DstTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) &&
500	KB->maskedValueIsZero(Val: MI.getOperand(i: `1`).getReg(), Mask) &&
501	KB->maskedValueIsZero(Val: MI.getOperand(i: `2`).getReg(), Mask)) {
502	get<`0`>(t&: MatchInfo) = true;
503	get<`1`>(t&: MatchInfo) = MI.getOperand(i: `1`).getReg();
504	get<`2`>(t&: MatchInfo) = MI.getOperand(i: `2`).getReg();
505	return true;
506	}
507
508	if (KB && (IsSExt1 \|\| IsSExt2) &&
509	IsAtLeastDoubleExtend (IsSExt1 ? I1->getOperand(i: `1`).getReg()
510	: I2->getOperand(i: `1`).getReg())) {
511	Register SExtOp =
512	IsSExt1 ? MI.getOperand(i: `2`).getReg() : MI.getOperand(i: `1`).getReg();
513	if (KB->computeNumSignBits(R: SExtOp) > EltSize / `2`) {
514	get<`0`>(t&: MatchInfo) = false;
515	get<`1`>(t&: MatchInfo) = IsSExt1 ? I1->getOperand(i: `1`).getReg() : SExtOp;
516	get<`2`>(t&: MatchInfo) = IsSExt1 ? SExtOp : I2->getOperand(i: `1`).getReg();
517	return true;
518	}
519	} else if (KB && DstTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) &&
520	KB->computeNumSignBits(R: MI.getOperand(i: `1`).getReg()) > EltSize / `2` &&
521	KB->computeNumSignBits(R: MI.getOperand(i: `2`).getReg()) > EltSize / `2`) {
522	get<`0`>(t&: MatchInfo) = false;
523	get<`1`>(t&: MatchInfo) = MI.getOperand(i: `1`).getReg();
524	get<`2`>(t&: MatchInfo) = MI.getOperand(i: `2`).getReg();
525	return true;
526	}
527
528	return false;
529	}
530
531	void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
532	MachineIRBuilder &B, GISelChangeObserver &Observer,
533	std::tuple<bool, Register, Register> &MatchInfo) {
534	assert(MI.getOpcode() == TargetOpcode::G_MUL &&
535	"Expected a G_MUL instruction");
536
537	// Get the instructions that defined the source operand
538	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
539	bool IsZExt = get<`0`>(t&: MatchInfo);
540	Register Src1Reg = get<`1`>(t&: MatchInfo);
541	Register Src2Reg = get<`2`>(t&: MatchInfo);
542	LLT Src1Ty = MRI.getType(Reg: Src1Reg);
543	LLT Src2Ty = MRI.getType(Reg: Src2Reg);
544	LLT HalfDstTy = DstTy.changeElementSize(NewEltSize: DstTy.getScalarSizeInBits() / `2`);
545	unsigned ExtOpc = IsZExt ? TargetOpcode::G_ZEXT : TargetOpcode::G_SEXT;
546
547	if (Src1Ty.getScalarSizeInBits() * `2` != DstTy.getScalarSizeInBits())
548	Src1Reg = B.buildExtOrTrunc(ExtOpc, Res: {HalfDstTy}, Op: {Src1Reg}).getReg(Idx: `0`);
549	if (Src2Ty.getScalarSizeInBits() * `2` != DstTy.getScalarSizeInBits())
550	Src2Reg = B.buildExtOrTrunc(ExtOpc, Res: {HalfDstTy}, Op: {Src2Reg}).getReg(Idx: `0`);
551
552	B.buildInstr(Opc: IsZExt ? AArch64::G_UMULL : AArch64::G_SMULL,
553	DstOps: {MI.getOperand(i: `0`).getReg()}, SrcOps: {Src1Reg, Src2Reg});
554	MI.eraseFromParent();
555	}
556
557	static bool matchSubAddMulReassoc(Register Mul1, Register Mul2, Register Sub,
558	Register Src, MachineRegisterInfo &MRI) {
559	if (!MRI.hasOneUse(RegNo: Sub))
560	return false;
561	if (getIConstantVRegValWithLookThrough(VReg: Src, MRI))
562	return false;
563	MachineInstr *M1 = getDefIgnoringCopies(Reg: Mul1, MRI);
564	if (M1->getOpcode() != AArch64::G_MUL &&
565	M1->getOpcode() != AArch64::G_SMULL &&
566	M1->getOpcode() != AArch64::G_UMULL)
567	return false;
568	MachineInstr *M2 = getDefIgnoringCopies(Reg: Mul2, MRI);
569	if (M2->getOpcode() != AArch64::G_MUL &&
570	M2->getOpcode() != AArch64::G_SMULL &&
571	M2->getOpcode() != AArch64::G_UMULL)
572	return false;
573	return true;
574	}
575
576	static void applySubAddMulReassoc(MachineInstr &MI, MachineInstr &Sub,
577	MachineRegisterInfo &MRI, MachineIRBuilder &B,
578	GISelChangeObserver &Observer) {
579	Register Src = MI.getOperand(i: `1`).getReg();
580	Register Tmp = MI.getOperand(i: `2`).getReg();
581	Register Mul1 = Sub.getOperand(i: `1`).getReg();
582	Register Mul2 = Sub.getOperand(i: `2`).getReg();
583	Observer.changingInstr(MI);
584	B.buildInstr(Opc: AArch64::G_SUB, DstOps: {Tmp}, SrcOps: {Src, Mul1});
585	MI.getOperand(i: `1`).setReg(Tmp);
586	MI.getOperand(i: `2`).setReg(Mul2);
587	Sub.eraseFromParent();
588	Observer.changedInstr(MI);
589	}
590
591	class AArch64PostLegalizerCombinerImpl : public Combiner {
592	protected:
593	const CombinerHelper Helper;
594	const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
595	const AArch64Subtarget &STI;
596
597	public:
598	AArch64PostLegalizerCombinerImpl(
599	MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
600	GISelCSEInfo *CSEInfo,
601	const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
602	const AArch64Subtarget &STI, MachineDominatorTree *MDT,
603	const LegalizerInfo *LI);
604
605	static const char getName() { return* "AArch64PostLegalizerCombiner"; }
606
607	bool tryCombineAll(MachineInstr &I) const override;
608
609	private:
610	#define GET_GICOMBINER_CLASS_MEMBERS
611	#include "AArch64GenPostLegalizeGICombiner.inc"
612	#undef GET_GICOMBINER_CLASS_MEMBERS
613	};
614
615	#define GET_GICOMBINER_IMPL
616	#include "AArch64GenPostLegalizeGICombiner.inc"
617	#undef GET_GICOMBINER_IMPL
618
619	AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
620	MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
621	GISelCSEInfo *CSEInfo,
622	const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
623	const AArch64Subtarget &STI, MachineDominatorTree *MDT,
624	const LegalizerInfo *LI)
625	: Combiner (MF, CInfo, &VT, CSEInfo),
626	Helper (Observer, B, /IsPreLegalize/ false, &VT, MDT, LI),
627	RuleConfig(RuleConfig), STI(STI),
628	#define GET_GICOMBINER_CONSTRUCTOR_INITS
629	#include "AArch64GenPostLegalizeGICombiner.inc"
630	#undef GET_GICOMBINER_CONSTRUCTOR_INITS
631	{
632	}
633
634	struct StoreInfo {
635	GStore St = nullptr*;
636	// The G_PTR_ADD that's used by the store. We keep this to cache the
637	// MachineInstr def.
638	GPtrAdd Ptr = nullptr*;
639	// The signed offset to the Ptr instruction.
640	int64_t Offset = `0`;
641	LLT StoredType;
642	};
643
644	static bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
645	CSEMIRBuilder &MIB) {
646	if (Stores.size() <= `2`)
647	return false;
648
649	// Profitabity checks:
650	int64_t BaseOffset = Stores [`0`].Offset;
651	unsigned NumPairsExpected = Stores.size() / `2`;
652	unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % `2`);
653	// Size savings will depend on whether we can fold the offset, as an
654	// immediate of an ADD.
655	auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
656	if (!TLI.isLegalAddImmediate(BaseOffset))
657	TotalInstsExpected++;
658	int SavingsExpected = Stores.size() - TotalInstsExpected;
659	if (SavingsExpected <= `0`)
660	return false;
661
662	auto &MRI = MIB.getMF().getRegInfo();
663
664	// We have a series of consecutive stores. Factor out the common base
665	// pointer and rewrite the offsets.
666	Register NewBase = Stores [`0`].Ptr->getReg(Idx: `0`);
667	for (auto &SInfo : Stores) {
668	// Compute a new pointer with the new base ptr and adjusted offset.
669	MIB.setInstrAndDebugLoc(*SInfo.St);
670	auto NewOff =
671	MIB.buildConstant(Res: LLT::integer(SizeInBits: `64`), Val: SInfo.Offset - BaseOffset);
672	auto NewPtr = MIB.buildPtrAdd(Res: MRI.getType(Reg: SInfo.St->getPointerReg()),
673	Op0: NewBase, Op1: NewOff);
674	if (MIB.getObserver())
675	MIB.getObserver()->changingInstr(MI&: *SInfo.St);
676	SInfo.St->getOperand(i: `1`).setReg(NewPtr.getReg(Idx: `0`));
677	if (MIB.getObserver())
678	MIB.getObserver()->changedInstr(MI&: *SInfo.St);
679	}
680	LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
681	<< " stores into a base pointer and offsets.\n");
682	return true;
683	}
684
685	static cl::opt<bool>
686	EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
687	cl::init(Val: true), cl::Hidden,
688	cl::desc("Enable consecutive memop optimization "
689	"in AArch64PostLegalizerCombiner"));
690
691	static bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
692	CSEMIRBuilder &MIB) {
693	// This combine needs to run after all reassociations/folds on pointer
694	// addressing have been done, specifically those that combine two G_PTR_ADDs
695	// with constant offsets into a single G_PTR_ADD with a combined offset.
696	// The goal of this optimization is to undo that combine in the case where
697	// doing so has prevented the formation of pair stores due to illegal
698	// addressing modes of STP. The reason that we do it here is because
699	// it's much easier to undo the transformation of a series consecutive
700	// mem ops, than it is to detect when doing it would be a bad idea looking
701	// at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
702	//
703	// An example:
704	// G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
705	// %off1:_(s64) = G_CONSTANT i64 4128
706	// %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
707	// G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
708	// %off2:_(s64) = G_CONSTANT i64 4144
709	// %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
710	// G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
711	// %off3:_(s64) = G_CONSTANT i64 4160
712	// %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
713	// G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
714	bool Changed = false;
715	auto &MRI = MF.getRegInfo();
716
717	if (!EnableConsecutiveMemOpOpt)
718	return Changed;
719
720	SmallVector<StoreInfo, `8`> Stores;
721	// If we see a load, then we keep track of any values defined by it.
722	// In the following example, STP formation will fail anyway because
723	// the latter store is using a load result that appears after the
724	// the prior store. In this situation if we factor out the offset then
725	// we increase code size for no benefit.
726	// G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
727	// %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
728	// G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
729	SmallVector<Register> LoadValsSinceLastStore;
730
731	auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
732	// Check if this store is consecutive to the last one.
733	if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() \|\|
734	(Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
735	New.Offset) \|\|
736	Last.StoredType != New.StoredType)
737	return false;
738
739	// Check if this store is using a load result that appears after the
740	// last store. If so, bail out.
741	if (any_of(Range&: LoadValsSinceLastStore, P: [&](Register LoadVal) {
742	return New.St->getValueReg() == LoadVal;
743	}))
744	return false;
745
746	// Check if the current offset would be too large for STP.
747	// If not, then STP formation should be able to handle it, so we don't
748	// need to do anything.
749	int64_t MaxLegalOffset;
750	switch (New.StoredType.getSizeInBits()) {
751	case `32`:
752	MaxLegalOffset = `252`;
753	break;
754	case `64`:
755	MaxLegalOffset = `504`;
756	break;
757	case `128`:
758	MaxLegalOffset = `1008`;
759	break;
760	default:
761	llvm_unreachable("Unexpected stored type size");
762	}
763	if (New.Offset < MaxLegalOffset)
764	return false;
765
766	// If factoring it out still wouldn't help then don't bother.
767	return New.Offset - Stores [`0`].Offset <= MaxLegalOffset;
768	};
769
770	auto resetState = [&]() {
771	Stores.clear();
772	LoadValsSinceLastStore.clear();
773	};
774
775	for (auto &MBB : MF) {
776	// We're looking inside a single BB at a time since the memset pattern
777	// should only be in a single block.
778	resetState ();
779	for (auto &MI : MBB) {
780	// Skip for scalable vectors
781	if (auto *LdSt = dyn_cast<GLoadStore>(Val: &MI);
782	LdSt && MRI.getType(Reg: LdSt->getOperand(i: `0`).getReg()).isScalableVector())
783	continue;
784
785	if (auto *St = dyn_cast<GStore>(Val: &MI)) {
786	Register PtrBaseReg;
787	APInt Offset;
788	LLT StoredValTy = MRI.getType(Reg: St->getValueReg());
789	unsigned ValSize = StoredValTy.getSizeInBits();
790	if (ValSize < `32` \|\| St->getMMO().getSizeInBits() != ValSize)
791	continue;
792
793	Register PtrReg = St->getPointerReg();
794	if (mi_match(
795	R: PtrReg, MRI,
796	P: m_OneNonDBGUse(SP: m_GPtrAdd(L: m_Reg(R&: PtrBaseReg), R: m_ICst(Cst&: Offset))))) {
797	GPtrAdd *PtrAdd = cast<GPtrAdd>(Val: MRI.getVRegDef(Reg: PtrReg));
798	StoreInfo New = {.St: St, .Ptr: PtrAdd, .Offset: Offset.getSExtValue(), .StoredType: StoredValTy};
799
800	if (Stores.empty()) {
801	Stores.push_back(Elt: New);
802	continue;
803	}
804
805	// Check if this store is a valid continuation of the sequence.
806	auto &Last = Stores.back();
807	if (storeIsValid (Last, New)) {
808	Stores.push_back(Elt: New);
809	LoadValsSinceLastStore.clear(); // Reset the load value tracking.
810	} else {
811	// The store isn't a valid to consider for the prior sequence,
812	// so try to optimize what we have so far and start a new sequence.
813	Changed \|= tryOptimizeConsecStores(Stores, MIB);
814	resetState ();
815	Stores.push_back(Elt: New);
816	}
817	}
818	} else if (auto *Ld = dyn_cast<GLoad>(Val: &MI)) {
819	LoadValsSinceLastStore.push_back(Elt: Ld->getDstReg());
820	}
821	}
822	Changed \|= tryOptimizeConsecStores(Stores, MIB);
823	resetState ();
824	}
825
826	return Changed;
827	}
828
829	bool runCombiner(MachineFunction &MF, GISelCSEInfo *CSEInfo,
830	GISelValueTracking VT, MachineDominatorTree MDT,
831	const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
832	bool EnableOpt, bool IsOptNone) {
833	if (MF.getProperties().hasFailedISel())
834	return false;
835	const Function &F = MF.getFunction();
836
837	const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
838	const LegalizerInfo *LI = ST.getLegalizerInfo();
839
840	CombinerInfo CInfo(/AllowIllegalOps=/false, /ShouldLegalizeIllegal=/false,
841	/LegalizerInfo=/LI, EnableOpt, F.hasOptSize(),
842	F.hasMinSize());
843	// Disable fixed-point iteration to reduce compile-time
844	CInfo.MaxIterations = `1`;
845	CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
846	// Legalizer performs DCE, so a full DCE pass is unnecessary.
847	CInfo.EnableFullDCE = false;
848	AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, ST,
849	MDT, LI);
850	bool Changed = Impl.combineMachineInstrs();
851
852	CSEMIRBuilder MIB(MF);
853	MIB.setCSEInfo(CSEInfo);
854	Changed \|= optimizeConsecutiveMemOpAddressing(MF, MIB);
855	return Changed;
856	}
857
858	class AArch64PostLegalizerCombinerLegacy : public MachineFunctionPass {
859	public:
860	static char ID;
861
862	AArch64PostLegalizerCombinerLegacy(bool IsOptNone = false);
863
864	StringRef getPassName() const override {
865	return "AArch64PostLegalizerCombiner";
866	}
867
868	bool runOnMachineFunction(MachineFunction &MF) override;
869	void getAnalysisUsage(AnalysisUsage &AU) const override;
870
871	MachineFunctionProperties getRequiredProperties() const override {
872	return MachineFunctionProperties ().set(
873	MachineFunctionProperties::Property::Legalized);
874	}
875
876	private:
877	bool IsOptNone;
878	AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
879	};
880	} // end anonymous namespace
881
882	void AArch64PostLegalizerCombinerLegacy::getAnalysisUsage(
883	AnalysisUsage &AU) const {
884	AU.setPreservesCFG();
885	getSelectionDAGFallbackAnalysisUsage(AU);
886	AU.addRequired<GISelValueTrackingAnalysisLegacy>();
887	AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
888	if (!IsOptNone) {
889	AU.addRequired<MachineDominatorTreeWrapperPass>();
890	AU.addPreserved<MachineDominatorTreeWrapperPass>();
891	AU.addRequired<GISelCSEAnalysisWrapperPass>();
892	AU.addPreserved<GISelCSEAnalysisWrapperPass>();
893	}
894	MachineFunctionPass::getAnalysisUsage(AU);
895	}
896
897	AArch64PostLegalizerCombinerLegacy::AArch64PostLegalizerCombinerLegacy(
898	bool IsOptNone)
899	: MachineFunctionPass (ID), IsOptNone(IsOptNone) {
900	if (!RuleConfig.parseCommandLineOption())
901	reportFatalUsageError(reason: "Invalid rule identifier");
902	}
903
904	bool AArch64PostLegalizerCombinerLegacy::runOnMachineFunction(
905	MachineFunction &MF) {
906	if (MF.getProperties().hasFailedISel())
907	return false;
908
909	GISelValueTracking *VT =
910	&getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
911	MachineDominatorTree *MDT =
912	IsOptNone ? nullptr
913	: &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
914	GISelCSEAnalysisWrapper &Wrapper =
915	getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
916	auto *CSEInfo =
917	&Wrapper.get(CSEOpt: getStandardCSEConfigForOpt(Level: MF.getTarget().getOptLevel()));
918
919	bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
920	!skipFunction(F: MF.getFunction());
921
922	return runCombiner(MF, CSEInfo, VT, MDT, RuleConfig, EnableOpt, IsOptNone);
923	}
924
925	char AArch64PostLegalizerCombinerLegacy::ID = `0`;
926	INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombinerLegacy, DEBUG_TYPE,
927	"Combine AArch64 MachineInstrs after legalization", false,
928	false)
929	INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)
930	INITIALIZE_PASS_END(AArch64PostLegalizerCombinerLegacy, DEBUG_TYPE,
931	"Combine AArch64 MachineInstrs after legalization", false,
932	false)
933
934	AArch64PostLegalizerCombinerPass::AArch64PostLegalizerCombinerPass(
935	const AArch64TargetMachine *TM)
936	: RuleConfig(
937	std::make_unique<AArch64PostLegalizerCombinerImplRuleConfig>()),
938	TM(TM) {
939	if (!RuleConfig ->parseCommandLineOption())
940	reportFatalUsageError(reason: "invalid rule identifier");
941	}
942
943	AArch64PostLegalizerCombinerPass::AArch64PostLegalizerCombinerPass(
944	AArch64PostLegalizerCombinerPass &&) = default;
945
946	AArch64PostLegalizerCombinerPass::~AArch64PostLegalizerCombinerPass() = default;
947
948	PreservedAnalyses
949	AArch64PostLegalizerCombinerPass::run(MachineFunction &MF,
950	MachineFunctionAnalysisManager &MFAM) {
951	if (MF.getProperties().hasFailedISel())
952	return PreservedAnalyses::all();
953
954	const bool IsOptNone = TM->isGlobalISelOptNone();
955	bool EnableOpt = !IsOptNone;
956
957	GISelValueTracking *VT = &MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF);
958	MachineDominatorTree *MDT =
959	IsOptNone ? nullptr : &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
960	GISelCSEInfo *CSEInfo = MFAM.getResult<GISelCSEAnalysis>(IR&: MF).get();
961
962	if (!runCombiner(MF, CSEInfo, VT, MDT, RuleConfig: *RuleConfig, EnableOpt, IsOptNone))
963	return PreservedAnalyses::all();
964
965	PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
966	PA.preserveSet<CFGAnalyses>();
967	PA.preserve<GISelValueTrackingAnalysis>();
968	PA.preserve<GISelCSEAnalysis>();
969	return PA;
970	}
971
972	namespace llvm {
973	FunctionPass createAArch64PostLegalizerCombinerLegacy(bool* IsOptNone) {
974	return new AArch64PostLegalizerCombinerLegacy (IsOptNone);
975	}
976	} // end namespace llvm
977

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp