1//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization combines on generic MachineInstrs.
11///
12/// The combines here must preserve instruction legality.
13///
14/// Lowering combines (e.g. pseudo matching) should be handled by
15/// AArch64PostLegalizerLowering.
16///
17/// Combines which don't rely on instruction legality should go in the
18/// AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64.h"
23#include "AArch64TargetMachine.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
26#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
27#include "llvm/CodeGen/GlobalISel/Combiner.h"
28#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
29#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
30#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
31#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
32#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
33#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
34#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
35#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
36#include "llvm/CodeGen/GlobalISel/Utils.h"
37#include "llvm/CodeGen/MachineDominators.h"
38#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
39#include "llvm/CodeGen/MachineFunctionPass.h"
40#include "llvm/CodeGen/MachinePassManager.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetOpcodes.h"
43#include "llvm/Support/Debug.h"
44
45#define GET_GICOMBINER_DEPS
46#include "AArch64GenPostLegalizeGICombiner.inc"
47#undef GET_GICOMBINER_DEPS
48
49#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
50
51using namespace llvm;
52using namespace MIPatternMatch;
53
54#define GET_GICOMBINER_TYPES
55#include "AArch64GenPostLegalizeGICombiner.inc"
56#undef GET_GICOMBINER_TYPES
57
58namespace {
59
60/// This combine tries do what performExtractVectorEltCombine does in SDAG.
61/// Rewrite for pairwise fadd pattern
62/// (s32 (g_extract_vector_elt
63/// (g_fadd (vXs32 Other)
64/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
65/// ->
66/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
67/// (g_extract_vector_elt (vXs32 Other) 1))
68bool matchExtractVecEltPairwiseAdd(
69 MachineInstr &MI, MachineRegisterInfo &MRI,
70 std::tuple<unsigned, LLT, Register> &MatchInfo) {
71 Register Src1 = MI.getOperand(i: 1).getReg();
72 Register Src2 = MI.getOperand(i: 2).getReg();
73 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
74
75 auto Cst = getIConstantVRegValWithLookThrough(VReg: Src2, MRI);
76 if (!Cst || Cst->Value != 0)
77 return false;
78 // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
79
80 // Now check for an fadd operation. TODO: expand this for integer add?
81 auto *FAddMI = getOpcodeDef(Opcode: TargetOpcode::G_FADD, Reg: Src1, MRI);
82 if (!FAddMI)
83 return false;
84
85 // If we add support for integer add, must restrict these types to just s64.
86 unsigned DstSize = DstTy.getSizeInBits();
87 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
88 return false;
89
90 Register Src1Op1 = FAddMI->getOperand(i: 1).getReg();
91 Register Src1Op2 = FAddMI->getOperand(i: 2).getReg();
92 MachineInstr *Shuffle =
93 getOpcodeDef(Opcode: TargetOpcode::G_SHUFFLE_VECTOR, Reg: Src1Op2, MRI);
94 MachineInstr *Other = MRI.getVRegDef(Reg: Src1Op1);
95 if (!Shuffle) {
96 Shuffle = getOpcodeDef(Opcode: TargetOpcode::G_SHUFFLE_VECTOR, Reg: Src1Op1, MRI);
97 Other = MRI.getVRegDef(Reg: Src1Op2);
98 }
99
100 // We're looking for a shuffle that moves the second element to index 0.
101 if (Shuffle && Shuffle->getOperand(i: 3).getShuffleMask()[0] == 1 &&
102 Other == MRI.getVRegDef(Reg: Shuffle->getOperand(i: 1).getReg())) {
103 std::get<0>(t&: MatchInfo) = TargetOpcode::G_FADD;
104 std::get<1>(t&: MatchInfo) = DstTy;
105 std::get<2>(t&: MatchInfo) = Other->getOperand(i: 0).getReg();
106 return true;
107 }
108 return false;
109}
110
111void applyExtractVecEltPairwiseAdd(
112 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
113 std::tuple<unsigned, LLT, Register> &MatchInfo) {
114 unsigned Opc = std::get<0>(t&: MatchInfo);
115 assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
116 // We want to generate two extracts of elements 0 and 1, and add them.
117 LLT Ty = std::get<1>(t&: MatchInfo);
118 Register Src = std::get<2>(t&: MatchInfo);
119 LLT s64 = LLT::integer(SizeInBits: 64);
120 B.setInstrAndDebugLoc(MI);
121 auto Elt0 = B.buildExtractVectorElement(Res: Ty, Val: Src, Idx: B.buildConstant(Res: s64, Val: 0));
122 auto Elt1 = B.buildExtractVectorElement(Res: Ty, Val: Src, Idx: B.buildConstant(Res: s64, Val: 1));
123 B.buildInstr(Opc, DstOps: {MI.getOperand(i: 0).getReg()}, SrcOps: {Elt0, Elt1});
124 MI.eraseFromParent();
125}
126
127bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
128 // TODO: check if extended build vector as well.
129 unsigned Opc = MRI.getVRegDef(Reg: R)->getOpcode();
130 return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
131}
132
133bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
134 // TODO: check if extended build vector as well.
135 return MRI.getVRegDef(Reg: R)->getOpcode() == TargetOpcode::G_ZEXT;
136}
137
138bool matchAArch64MulConstCombine(
139 MachineInstr &MI, MachineRegisterInfo &MRI,
140 std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
141 assert(MI.getOpcode() == TargetOpcode::G_MUL);
142 Register LHS = MI.getOperand(i: 1).getReg();
143 Register RHS = MI.getOperand(i: 2).getReg();
144 Register Dst = MI.getOperand(i: 0).getReg();
145 const LLT Ty = MRI.getType(Reg: LHS);
146
147 // The below optimizations require a constant RHS.
148 auto Const = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
149 if (!Const)
150 return false;
151
152 APInt ConstValue = Const->Value.sext(width: Ty.getSizeInBits());
153 // The following code is ported from AArch64ISelLowering.
154 // Multiplication of a power of two plus/minus one can be done more
155 // cheaply as shift+add/sub. For now, this is true unilaterally. If
156 // future CPUs have a cheaper MADD instruction, this may need to be
157 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
158 // 64-bit is 5 cycles, so this is always a win.
159 // More aggressively, some multiplications N0 * C can be lowered to
160 // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
161 // e.g. 6=3*2=(2+1)*2.
162 // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
163 // which equals to (1+2)*16-(1+2).
164 // TrailingZeroes is used to test if the mul can be lowered to
165 // shift+add+shift.
166 unsigned TrailingZeroes = ConstValue.countr_zero();
167 if (TrailingZeroes) {
168 // Conservatively do not lower to shift+add+shift if the mul might be
169 // folded into smul or umul.
170 if (MRI.hasOneNonDBGUse(RegNo: LHS) &&
171 (isSignExtended(R: LHS, MRI) || isZeroExtended(R: LHS, MRI)))
172 return false;
173 // Conservatively do not lower to shift+add+shift if the mul might be
174 // folded into madd or msub.
175 if (MRI.hasOneNonDBGUse(RegNo: Dst)) {
176 MachineInstr &UseMI = *MRI.use_instr_begin(RegNo: Dst);
177 unsigned UseOpc = UseMI.getOpcode();
178 if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||
179 UseOpc == TargetOpcode::G_SUB)
180 return false;
181 }
182 }
183 // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
184 // and shift+add+shift.
185 APInt ShiftedConstValue = ConstValue.ashr(ShiftAmt: TrailingZeroes);
186
187 unsigned ShiftAmt, AddSubOpc;
188 // Is the shifted value the LHS operand of the add/sub?
189 bool ShiftValUseIsLHS = true;
190 // Do we need to negate the result?
191 bool NegateResult = false;
192
193 if (ConstValue.isNonNegative()) {
194 // (mul x, 2^N + 1) => (add (shl x, N), x)
195 // (mul x, 2^N - 1) => (sub (shl x, N), x)
196 // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
197 APInt SCVMinus1 = ShiftedConstValue - 1;
198 APInt CVPlus1 = ConstValue + 1;
199 if (SCVMinus1.isPowerOf2()) {
200 ShiftAmt = SCVMinus1.logBase2();
201 AddSubOpc = TargetOpcode::G_ADD;
202 } else if (CVPlus1.isPowerOf2()) {
203 ShiftAmt = CVPlus1.logBase2();
204 AddSubOpc = TargetOpcode::G_SUB;
205 } else
206 return false;
207 } else {
208 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
209 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
210 APInt CVNegPlus1 = -ConstValue + 1;
211 APInt CVNegMinus1 = -ConstValue - 1;
212 if (CVNegPlus1.isPowerOf2()) {
213 ShiftAmt = CVNegPlus1.logBase2();
214 AddSubOpc = TargetOpcode::G_SUB;
215 ShiftValUseIsLHS = false;
216 } else if (CVNegMinus1.isPowerOf2()) {
217 ShiftAmt = CVNegMinus1.logBase2();
218 AddSubOpc = TargetOpcode::G_ADD;
219 NegateResult = true;
220 } else
221 return false;
222 }
223
224 if (NegateResult && TrailingZeroes)
225 return false;
226
227 ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
228 auto Shift = B.buildConstant(Res: LLT::integer(SizeInBits: 64), Val: ShiftAmt);
229 auto ShiftedVal = B.buildShl(Dst: Ty, Src0: LHS, Src1: Shift);
230
231 Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(Idx: 0) : LHS;
232 Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(Idx: 0);
233 auto Res = B.buildInstr(Opc: AddSubOpc, DstOps: {Ty}, SrcOps: {AddSubLHS, AddSubRHS});
234 assert(!(NegateResult && TrailingZeroes) &&
235 "NegateResult and TrailingZeroes cannot both be true for now.");
236 // Negate the result.
237 if (NegateResult) {
238 B.buildSub(Dst: DstReg, Src0: B.buildConstant(Res: Ty, Val: 0), Src1: Res);
239 return;
240 }
241 // Shift the result.
242 if (TrailingZeroes) {
243 B.buildShl(Dst: DstReg, Src0: Res,
244 Src1: B.buildConstant(Res: LLT::integer(SizeInBits: 64), Val: TrailingZeroes));
245 return;
246 }
247 B.buildCopy(Res: DstReg, Op: Res.getReg(Idx: 0));
248 };
249 return true;
250}
251
252void applyAArch64MulConstCombine(
253 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
254 std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
255 B.setInstrAndDebugLoc(MI);
256 ApplyFn(B, MI.getOperand(i: 0).getReg());
257 MI.eraseFromParent();
258}
259
260/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source
261/// is a zero, into a G_ZEXT of the first.
262bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {
263 auto &Merge = cast<GMerge>(Val&: MI);
264 LLT SrcTy = MRI.getType(Reg: Merge.getSourceReg(I: 0));
265 if (SrcTy != LLT::scalar(SizeInBits: 32) || Merge.getNumSources() != 2)
266 return false;
267 return mi_match(R: Merge.getSourceReg(I: 1), MRI, P: m_SpecificICst(RequestedValue: 0));
268}
269
270void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
271 MachineIRBuilder &B, GISelChangeObserver &Observer) {
272 // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)
273 // ->
274 // %d(s64) = G_ZEXT %a(s32)
275 Observer.changingInstr(MI);
276 MI.setDesc(B.getTII().get(Opcode: TargetOpcode::G_ZEXT));
277 MI.removeOperand(OpNo: 2);
278 Observer.changedInstr(MI);
279}
280
281/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
282/// instruction.
283bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
284 // If this is coming from a scalar compare then we can use a G_ZEXT instead of
285 // a G_ANYEXT:
286 //
287 // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.
288 // %ext:_(s64) = G_ANYEXT %cmp(s32)
289 //
290 // By doing this, we can leverage more KnownBits combines.
291 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
292 Register Dst = MI.getOperand(i: 0).getReg();
293 Register Src = MI.getOperand(i: 1).getReg();
294 return MRI.getType(Reg: Dst).isScalar() &&
295 mi_match(R: Src, MRI,
296 P: m_any_of(preds: m_GICmp(P: m_Pred(), L: m_Reg(), R: m_Reg()),
297 preds: m_GFCmp(P: m_Pred(), L: m_Reg(), R: m_Reg())));
298}
299
300void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
301 MachineIRBuilder &B,
302 GISelChangeObserver &Observer) {
303 Observer.changingInstr(MI);
304 MI.setDesc(B.getTII().get(Opcode: TargetOpcode::G_ZEXT));
305 Observer.changedInstr(MI);
306}
307
308/// Match a 128b store of zero and split it into two 64 bit stores, for
309/// size/performance reasons.
310bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
311 GStore &Store = cast<GStore>(Val&: MI);
312 if (!Store.isSimple())
313 return false;
314 LLT ValTy = MRI.getType(Reg: Store.getValueReg());
315 if (ValTy.isScalableVector())
316 return false;
317 if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
318 return false;
319 if (Store.getMemSizeInBits() != ValTy.getSizeInBits())
320 return false; // Don't split truncating stores.
321 if (!MRI.hasOneNonDBGUse(RegNo: Store.getValueReg()))
322 return false;
323 auto MaybeCst = isConstantOrConstantSplatVector(
324 MI&: *MRI.getVRegDef(Reg: Store.getValueReg()), MRI);
325 return MaybeCst && MaybeCst->isZero();
326}
327
328void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
329 MachineIRBuilder &B,
330 GISelChangeObserver &Observer) {
331 B.setInstrAndDebugLoc(MI);
332 GStore &Store = cast<GStore>(Val&: MI);
333 assert(MRI.getType(Store.getValueReg()).isVector() &&
334 "Expected a vector store value");
335 LLT NewTy = LLT::integer(SizeInBits: 64);
336 Register PtrReg = Store.getPointerReg();
337 auto Zero = B.buildConstant(Res: NewTy, Val: 0);
338 auto HighPtr =
339 B.buildPtrAdd(Res: MRI.getType(Reg: PtrReg), Op0: PtrReg, Op1: B.buildConstant(Res: NewTy, Val: 8));
340 auto &MF = *MI.getMF();
341 auto *LowMMO = MF.getMachineMemOperand(MMO: &Store.getMMO(), Offset: 0, Ty: NewTy);
342 auto *HighMMO = MF.getMachineMemOperand(MMO: &Store.getMMO(), Offset: 8, Ty: NewTy);
343 B.buildStore(Val: Zero, Addr: PtrReg, MMO&: *LowMMO);
344 B.buildStore(Val: Zero, Addr: HighPtr, MMO&: *HighMMO);
345 Store.eraseFromParent();
346}
347
348bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
349 std::tuple<Register, Register, Register> &MatchInfo) {
350 const LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
351 if (!DstTy.isVector())
352 return false;
353
354 Register AO1, AO2, BVO1, BVO2;
355 if (!mi_match(MI, MRI,
356 P: m_GOr(L: m_GAnd(L: m_Reg(R&: AO1), R: m_Reg(R&: BVO1)),
357 R: m_GAnd(L: m_Reg(R&: AO2), R: m_Reg(R&: BVO2)))))
358 return false;
359
360 auto *BV1 = getOpcodeDef<GBuildVector>(Reg: BVO1, MRI);
361 auto *BV2 = getOpcodeDef<GBuildVector>(Reg: BVO2, MRI);
362 if (!BV1 || !BV2)
363 return false;
364
365 for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
366 auto ValAndVReg1 =
367 getIConstantVRegValWithLookThrough(VReg: BV1->getSourceReg(I), MRI);
368 auto ValAndVReg2 =
369 getIConstantVRegValWithLookThrough(VReg: BV2->getSourceReg(I), MRI);
370 if (!ValAndVReg1 || !ValAndVReg2 ||
371 ValAndVReg1->Value != ~ValAndVReg2->Value)
372 return false;
373 }
374
375 MatchInfo = {AO1, AO2, BVO1};
376 return true;
377}
378
379void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
380 MachineIRBuilder &B,
381 std::tuple<Register, Register, Register> &MatchInfo) {
382 B.setInstrAndDebugLoc(MI);
383 B.buildInstr(
384 Opc: AArch64::G_BSP, DstOps: {MI.getOperand(i: 0).getReg()},
385 SrcOps: {std::get<2>(t&: MatchInfo), std::get<0>(t&: MatchInfo), std::get<1>(t&: MatchInfo)});
386 MI.eraseFromParent();
387}
388
389// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
390bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
391 Register &SrcReg) {
392 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
393
394 if (DstTy != LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) && DstTy != LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) &&
395 DstTy != LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) && DstTy != LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) &&
396 DstTy != LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16))
397 return false;
398
399 auto AndMI = getDefIgnoringCopies(Reg: MI.getOperand(i: 1).getReg(), MRI);
400 if (AndMI->getOpcode() != TargetOpcode::G_AND)
401 return false;
402 auto LShrMI = getDefIgnoringCopies(Reg: AndMI->getOperand(i: 1).getReg(), MRI);
403 if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
404 return false;
405
406 // Check the constant splat values
407 auto V1 = isConstantOrConstantSplatVector(
408 MI&: *MRI.getVRegDef(Reg: MI.getOperand(i: 2).getReg()), MRI);
409 auto V2 = isConstantOrConstantSplatVector(
410 MI&: *MRI.getVRegDef(Reg: AndMI->getOperand(i: 2).getReg()), MRI);
411 auto V3 = isConstantOrConstantSplatVector(
412 MI&: *MRI.getVRegDef(Reg: LShrMI->getOperand(i: 2).getReg()), MRI);
413 if (!V1.has_value() || !V2.has_value() || !V3.has_value())
414 return false;
415 unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
416 if (!V1.value().isMask(numBits: HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
417 V3 != (HalfSize - 1))
418 return false;
419
420 SrcReg = LShrMI->getOperand(i: 1).getReg();
421
422 return true;
423}
424
425void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
426 MachineIRBuilder &B, Register &SrcReg) {
427 Register DstReg = MI.getOperand(i: 0).getReg();
428 LLT DstTy = MRI.getType(Reg: DstReg);
429 LLT HalfTy =
430 DstTy.changeElementCount(EC: DstTy.getElementCount().multiplyCoefficientBy(RHS: 2))
431 .changeElementSize(NewEltSize: DstTy.getScalarSizeInBits() / 2);
432
433 Register ZeroVec = B.buildConstant(Res: HalfTy, Val: 0).getReg(Idx: 0);
434 Register CastReg =
435 B.buildInstr(Opc: TargetOpcode::G_BITCAST, DstOps: {HalfTy}, SrcOps: {SrcReg}).getReg(Idx: 0);
436 Register CMLTReg =
437 B.buildICmp(Pred: CmpInst::Predicate::ICMP_SLT, Res: HalfTy, Op0: CastReg, Op1: ZeroVec)
438 .getReg(Idx: 0);
439
440 B.buildInstr(Opc: TargetOpcode::G_BITCAST, DstOps: {DstReg}, SrcOps: {CMLTReg}).getReg(Idx: 0);
441 MI.eraseFromParent();
442}
443
444// Match mul({z/s}ext , {z/s}ext) => {u/s}mull
445bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
446 GISelValueTracking *KB,
447 std::tuple<bool, Register, Register> &MatchInfo) {
448 // Get the instructions that defined the source operand
449 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
450 MachineInstr *I1 = getDefIgnoringCopies(Reg: MI.getOperand(i: 1).getReg(), MRI);
451 MachineInstr *I2 = getDefIgnoringCopies(Reg: MI.getOperand(i: 2).getReg(), MRI);
452 unsigned I1Opc = I1->getOpcode();
453 unsigned I2Opc = I2->getOpcode();
454 unsigned EltSize = DstTy.getScalarSizeInBits();
455
456 if (!DstTy.isVector() || I1->getNumOperands() < 2 || I2->getNumOperands() < 2)
457 return false;
458
459 auto IsAtLeastDoubleExtend = [&](Register R) {
460 LLT Ty = MRI.getType(Reg: R);
461 return EltSize >= Ty.getScalarSizeInBits() * 2;
462 };
463
464 // If the source operands were EXTENDED before, then {U/S}MULL can be used
465 bool IsZExt1 =
466 I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_ANYEXT;
467 bool IsZExt2 =
468 I2Opc == TargetOpcode::G_ZEXT || I2Opc == TargetOpcode::G_ANYEXT;
469 if (IsZExt1 && IsZExt2 && IsAtLeastDoubleExtend(I1->getOperand(i: 1).getReg()) &&
470 IsAtLeastDoubleExtend(I2->getOperand(i: 1).getReg())) {
471 get<0>(t&: MatchInfo) = true;
472 get<1>(t&: MatchInfo) = I1->getOperand(i: 1).getReg();
473 get<2>(t&: MatchInfo) = I2->getOperand(i: 1).getReg();
474 return true;
475 }
476
477 bool IsSExt1 =
478 I1Opc == TargetOpcode::G_SEXT || I1Opc == TargetOpcode::G_ANYEXT;
479 bool IsSExt2 =
480 I2Opc == TargetOpcode::G_SEXT || I2Opc == TargetOpcode::G_ANYEXT;
481 if (IsSExt1 && IsSExt2 && IsAtLeastDoubleExtend(I1->getOperand(i: 1).getReg()) &&
482 IsAtLeastDoubleExtend(I2->getOperand(i: 1).getReg())) {
483 get<0>(t&: MatchInfo) = false;
484 get<1>(t&: MatchInfo) = I1->getOperand(i: 1).getReg();
485 get<2>(t&: MatchInfo) = I2->getOperand(i: 1).getReg();
486 return true;
487 }
488
489 // Select UMULL if we can replace the other operand with an extend.
490 APInt Mask = APInt::getHighBitsSet(numBits: EltSize, hiBitsSet: EltSize / 2);
491 if (KB && (IsZExt1 || IsZExt2) &&
492 IsAtLeastDoubleExtend(IsZExt1 ? I1->getOperand(i: 1).getReg()
493 : I2->getOperand(i: 1).getReg())) {
494 Register ZExtOp =
495 IsZExt1 ? MI.getOperand(i: 2).getReg() : MI.getOperand(i: 1).getReg();
496 if (KB->maskedValueIsZero(Val: ZExtOp, Mask)) {
497 get<0>(t&: MatchInfo) = true;
498 get<1>(t&: MatchInfo) = IsZExt1 ? I1->getOperand(i: 1).getReg() : ZExtOp;
499 get<2>(t&: MatchInfo) = IsZExt1 ? ZExtOp : I2->getOperand(i: 1).getReg();
500 return true;
501 }
502 } else if (KB && DstTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) &&
503 KB->maskedValueIsZero(Val: MI.getOperand(i: 1).getReg(), Mask) &&
504 KB->maskedValueIsZero(Val: MI.getOperand(i: 2).getReg(), Mask)) {
505 get<0>(t&: MatchInfo) = true;
506 get<1>(t&: MatchInfo) = MI.getOperand(i: 1).getReg();
507 get<2>(t&: MatchInfo) = MI.getOperand(i: 2).getReg();
508 return true;
509 }
510
511 if (KB && (IsSExt1 || IsSExt2) &&
512 IsAtLeastDoubleExtend(IsSExt1 ? I1->getOperand(i: 1).getReg()
513 : I2->getOperand(i: 1).getReg())) {
514 Register SExtOp =
515 IsSExt1 ? MI.getOperand(i: 2).getReg() : MI.getOperand(i: 1).getReg();
516 if (KB->computeNumSignBits(R: SExtOp) > EltSize / 2) {
517 get<0>(t&: MatchInfo) = false;
518 get<1>(t&: MatchInfo) = IsSExt1 ? I1->getOperand(i: 1).getReg() : SExtOp;
519 get<2>(t&: MatchInfo) = IsSExt1 ? SExtOp : I2->getOperand(i: 1).getReg();
520 return true;
521 }
522 } else if (KB && DstTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) &&
523 KB->computeNumSignBits(R: MI.getOperand(i: 1).getReg()) > EltSize / 2 &&
524 KB->computeNumSignBits(R: MI.getOperand(i: 2).getReg()) > EltSize / 2) {
525 get<0>(t&: MatchInfo) = false;
526 get<1>(t&: MatchInfo) = MI.getOperand(i: 1).getReg();
527 get<2>(t&: MatchInfo) = MI.getOperand(i: 2).getReg();
528 return true;
529 }
530
531 return false;
532}
533
534void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
535 MachineIRBuilder &B, GISelChangeObserver &Observer,
536 std::tuple<bool, Register, Register> &MatchInfo) {
537 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
538 "Expected a G_MUL instruction");
539
540 // Get the instructions that defined the source operand
541 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
542 bool IsZExt = get<0>(t&: MatchInfo);
543 Register Src1Reg = get<1>(t&: MatchInfo);
544 Register Src2Reg = get<2>(t&: MatchInfo);
545 LLT Src1Ty = MRI.getType(Reg: Src1Reg);
546 LLT Src2Ty = MRI.getType(Reg: Src2Reg);
547 LLT HalfDstTy = DstTy.changeElementSize(NewEltSize: DstTy.getScalarSizeInBits() / 2);
548 unsigned ExtOpc = IsZExt ? TargetOpcode::G_ZEXT : TargetOpcode::G_SEXT;
549
550 if (Src1Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())
551 Src1Reg = B.buildExtOrTrunc(ExtOpc, Res: {HalfDstTy}, Op: {Src1Reg}).getReg(Idx: 0);
552 if (Src2Ty.getScalarSizeInBits() * 2 != DstTy.getScalarSizeInBits())
553 Src2Reg = B.buildExtOrTrunc(ExtOpc, Res: {HalfDstTy}, Op: {Src2Reg}).getReg(Idx: 0);
554
555 B.buildInstr(Opc: IsZExt ? AArch64::G_UMULL : AArch64::G_SMULL,
556 DstOps: {MI.getOperand(i: 0).getReg()}, SrcOps: {Src1Reg, Src2Reg});
557 MI.eraseFromParent();
558}
559
560static bool matchSubAddMulReassoc(Register Mul1, Register Mul2, Register Sub,
561 Register Src, MachineRegisterInfo &MRI) {
562 if (!MRI.hasOneUse(RegNo: Sub))
563 return false;
564 if (getIConstantVRegValWithLookThrough(VReg: Src, MRI))
565 return false;
566 MachineInstr *M1 = getDefIgnoringCopies(Reg: Mul1, MRI);
567 if (M1->getOpcode() != AArch64::G_MUL &&
568 M1->getOpcode() != AArch64::G_SMULL &&
569 M1->getOpcode() != AArch64::G_UMULL)
570 return false;
571 MachineInstr *M2 = getDefIgnoringCopies(Reg: Mul2, MRI);
572 if (M2->getOpcode() != AArch64::G_MUL &&
573 M2->getOpcode() != AArch64::G_SMULL &&
574 M2->getOpcode() != AArch64::G_UMULL)
575 return false;
576 return true;
577}
578
579static void applySubAddMulReassoc(MachineInstr &MI, MachineInstr &Sub,
580 MachineRegisterInfo &MRI, MachineIRBuilder &B,
581 GISelChangeObserver &Observer) {
582 Register Src = MI.getOperand(i: 1).getReg();
583 Register Tmp = MI.getOperand(i: 2).getReg();
584 Register Mul1 = Sub.getOperand(i: 1).getReg();
585 Register Mul2 = Sub.getOperand(i: 2).getReg();
586 Observer.changingInstr(MI);
587 B.buildInstr(Opc: AArch64::G_SUB, DstOps: {Tmp}, SrcOps: {Src, Mul1});
588 MI.getOperand(i: 1).setReg(Tmp);
589 MI.getOperand(i: 2).setReg(Mul2);
590 Sub.eraseFromParent();
591 Observer.changedInstr(MI);
592}
593
594class AArch64PostLegalizerCombinerImpl : public Combiner {
595protected:
596 const CombinerHelper Helper;
597 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
598 const AArch64Subtarget &STI;
599
600public:
601 AArch64PostLegalizerCombinerImpl(
602 MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
603 GISelCSEInfo *CSEInfo,
604 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
605 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
606 const LegalizerInfo *LI);
607
608 static const char *getName() { return "AArch64PostLegalizerCombiner"; }
609
610 bool tryCombineAll(MachineInstr &I) const override;
611
612private:
613#define GET_GICOMBINER_CLASS_MEMBERS
614#include "AArch64GenPostLegalizeGICombiner.inc"
615#undef GET_GICOMBINER_CLASS_MEMBERS
616};
617
618#define GET_GICOMBINER_IMPL
619#include "AArch64GenPostLegalizeGICombiner.inc"
620#undef GET_GICOMBINER_IMPL
621
622AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
623 MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
624 GISelCSEInfo *CSEInfo,
625 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
626 const AArch64Subtarget &STI, MachineDominatorTree *MDT,
627 const LegalizerInfo *LI)
628 : Combiner(MF, CInfo, &VT, CSEInfo),
629 Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI),
630 RuleConfig(RuleConfig), STI(STI),
631#define GET_GICOMBINER_CONSTRUCTOR_INITS
632#include "AArch64GenPostLegalizeGICombiner.inc"
633#undef GET_GICOMBINER_CONSTRUCTOR_INITS
634{
635}
636
637struct StoreInfo {
638 GStore *St = nullptr;
639 // The G_PTR_ADD that's used by the store. We keep this to cache the
640 // MachineInstr def.
641 GPtrAdd *Ptr = nullptr;
642 // The signed offset to the Ptr instruction.
643 int64_t Offset = 0;
644 LLT StoredType;
645};
646
647static bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,
648 CSEMIRBuilder &MIB) {
649 if (Stores.size() <= 2)
650 return false;
651
652 // Profitabity checks:
653 int64_t BaseOffset = Stores[0].Offset;
654 unsigned NumPairsExpected = Stores.size() / 2;
655 unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);
656 // Size savings will depend on whether we can fold the offset, as an
657 // immediate of an ADD.
658 auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();
659 if (!TLI.isLegalAddImmediate(BaseOffset))
660 TotalInstsExpected++;
661 int SavingsExpected = Stores.size() - TotalInstsExpected;
662 if (SavingsExpected <= 0)
663 return false;
664
665 auto &MRI = MIB.getMF().getRegInfo();
666
667 // We have a series of consecutive stores. Factor out the common base
668 // pointer and rewrite the offsets.
669 Register NewBase = Stores[0].Ptr->getReg(Idx: 0);
670 for (auto &SInfo : Stores) {
671 // Compute a new pointer with the new base ptr and adjusted offset.
672 MIB.setInstrAndDebugLoc(*SInfo.St);
673 auto NewOff =
674 MIB.buildConstant(Res: LLT::integer(SizeInBits: 64), Val: SInfo.Offset - BaseOffset);
675 auto NewPtr = MIB.buildPtrAdd(Res: MRI.getType(Reg: SInfo.St->getPointerReg()),
676 Op0: NewBase, Op1: NewOff);
677 if (MIB.getObserver())
678 MIB.getObserver()->changingInstr(MI&: *SInfo.St);
679 SInfo.St->getOperand(i: 1).setReg(NewPtr.getReg(Idx: 0));
680 if (MIB.getObserver())
681 MIB.getObserver()->changedInstr(MI&: *SInfo.St);
682 }
683 LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()
684 << " stores into a base pointer and offsets.\n");
685 return true;
686}
687
688static cl::opt<bool>
689 EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",
690 cl::init(Val: true), cl::Hidden,
691 cl::desc("Enable consecutive memop optimization "
692 "in AArch64PostLegalizerCombiner"));
693
694static bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,
695 CSEMIRBuilder &MIB) {
696 // This combine needs to run after all reassociations/folds on pointer
697 // addressing have been done, specifically those that combine two G_PTR_ADDs
698 // with constant offsets into a single G_PTR_ADD with a combined offset.
699 // The goal of this optimization is to undo that combine in the case where
700 // doing so has prevented the formation of pair stores due to illegal
701 // addressing modes of STP. The reason that we do it here is because
702 // it's much easier to undo the transformation of a series consecutive
703 // mem ops, than it is to detect when doing it would be a bad idea looking
704 // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.
705 //
706 // An example:
707 // G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)
708 // %off1:_(s64) = G_CONSTANT i64 4128
709 // %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)
710 // G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)
711 // %off2:_(s64) = G_CONSTANT i64 4144
712 // %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)
713 // G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)
714 // %off3:_(s64) = G_CONSTANT i64 4160
715 // %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)
716 // G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)
717 bool Changed = false;
718 auto &MRI = MF.getRegInfo();
719
720 if (!EnableConsecutiveMemOpOpt)
721 return Changed;
722
723 SmallVector<StoreInfo, 8> Stores;
724 // If we see a load, then we keep track of any values defined by it.
725 // In the following example, STP formation will fail anyway because
726 // the latter store is using a load result that appears after the
727 // the prior store. In this situation if we factor out the offset then
728 // we increase code size for no benefit.
729 // G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))
730 // %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))
731 // G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))
732 SmallVector<Register> LoadValsSinceLastStore;
733
734 auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {
735 // Check if this store is consecutive to the last one.
736 if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||
737 (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=
738 New.Offset) ||
739 Last.StoredType != New.StoredType)
740 return false;
741
742 // Check if this store is using a load result that appears after the
743 // last store. If so, bail out.
744 if (any_of(Range&: LoadValsSinceLastStore, P: [&](Register LoadVal) {
745 return New.St->getValueReg() == LoadVal;
746 }))
747 return false;
748
749 // Check if the current offset would be too large for STP.
750 // If not, then STP formation should be able to handle it, so we don't
751 // need to do anything.
752 int64_t MaxLegalOffset;
753 switch (New.StoredType.getSizeInBits()) {
754 case 32:
755 MaxLegalOffset = 252;
756 break;
757 case 64:
758 MaxLegalOffset = 504;
759 break;
760 case 128:
761 MaxLegalOffset = 1008;
762 break;
763 default:
764 llvm_unreachable("Unexpected stored type size");
765 }
766 if (New.Offset < MaxLegalOffset)
767 return false;
768
769 // If factoring it out still wouldn't help then don't bother.
770 return New.Offset - Stores[0].Offset <= MaxLegalOffset;
771 };
772
773 auto resetState = [&]() {
774 Stores.clear();
775 LoadValsSinceLastStore.clear();
776 };
777
778 for (auto &MBB : MF) {
779 // We're looking inside a single BB at a time since the memset pattern
780 // should only be in a single block.
781 resetState();
782 for (auto &MI : MBB) {
783 // Skip for scalable vectors
784 if (auto *LdSt = dyn_cast<GLoadStore>(Val: &MI);
785 LdSt && MRI.getType(Reg: LdSt->getOperand(i: 0).getReg()).isScalableVector())
786 continue;
787
788 if (auto *St = dyn_cast<GStore>(Val: &MI)) {
789 Register PtrBaseReg;
790 APInt Offset;
791 LLT StoredValTy = MRI.getType(Reg: St->getValueReg());
792 unsigned ValSize = StoredValTy.getSizeInBits();
793 if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize)
794 continue;
795
796 Register PtrReg = St->getPointerReg();
797 if (mi_match(
798 R: PtrReg, MRI,
799 P: m_OneNonDBGUse(SP: m_GPtrAdd(L: m_Reg(R&: PtrBaseReg), R: m_ICst(Cst&: Offset))))) {
800 GPtrAdd *PtrAdd = cast<GPtrAdd>(Val: MRI.getVRegDef(Reg: PtrReg));
801 StoreInfo New = {.St: St, .Ptr: PtrAdd, .Offset: Offset.getSExtValue(), .StoredType: StoredValTy};
802
803 if (Stores.empty()) {
804 Stores.push_back(Elt: New);
805 continue;
806 }
807
808 // Check if this store is a valid continuation of the sequence.
809 auto &Last = Stores.back();
810 if (storeIsValid(Last, New)) {
811 Stores.push_back(Elt: New);
812 LoadValsSinceLastStore.clear(); // Reset the load value tracking.
813 } else {
814 // The store isn't a valid to consider for the prior sequence,
815 // so try to optimize what we have so far and start a new sequence.
816 Changed |= tryOptimizeConsecStores(Stores, MIB);
817 resetState();
818 Stores.push_back(Elt: New);
819 }
820 }
821 } else if (auto *Ld = dyn_cast<GLoad>(Val: &MI)) {
822 LoadValsSinceLastStore.push_back(Elt: Ld->getDstReg());
823 }
824 }
825 Changed |= tryOptimizeConsecStores(Stores, MIB);
826 resetState();
827 }
828
829 return Changed;
830}
831
832bool runCombiner(MachineFunction &MF, GISelCSEInfo *CSEInfo,
833 GISelValueTracking *VT, MachineDominatorTree *MDT,
834 const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
835 bool EnableOpt, bool IsOptNone) {
836 if (MF.getProperties().hasFailedISel())
837 return false;
838 const Function &F = MF.getFunction();
839
840 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
841 const LegalizerInfo *LI = ST.getLegalizerInfo();
842
843 CombinerInfo CInfo(/*AllowIllegalOps=*/false, /*ShouldLegalizeIllegal=*/false,
844 /*LegalizerInfo=*/LI, EnableOpt, F.hasOptSize(),
845 F.hasMinSize());
846 // Disable fixed-point iteration to reduce compile-time
847 CInfo.MaxIterations = 1;
848 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
849 // Legalizer performs DCE, so a full DCE pass is unnecessary.
850 CInfo.EnableFullDCE = false;
851 AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, ST,
852 MDT, LI);
853 bool Changed = Impl.combineMachineInstrs();
854
855 CSEMIRBuilder MIB(MF);
856 MIB.setCSEInfo(CSEInfo);
857 Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);
858 return Changed;
859}
860
861class AArch64PostLegalizerCombinerLegacy : public MachineFunctionPass {
862public:
863 static char ID;
864
865 AArch64PostLegalizerCombinerLegacy(bool IsOptNone = false);
866
867 StringRef getPassName() const override {
868 return "AArch64PostLegalizerCombiner";
869 }
870
871 bool runOnMachineFunction(MachineFunction &MF) override;
872 void getAnalysisUsage(AnalysisUsage &AU) const override;
873
874 MachineFunctionProperties getRequiredProperties() const override {
875 return MachineFunctionProperties().set(
876 MachineFunctionProperties::Property::Legalized);
877 }
878
879private:
880 bool IsOptNone;
881 AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
882};
883} // end anonymous namespace
884
885void AArch64PostLegalizerCombinerLegacy::getAnalysisUsage(
886 AnalysisUsage &AU) const {
887 AU.setPreservesCFG();
888 getSelectionDAGFallbackAnalysisUsage(AU);
889 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
890 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
891 if (!IsOptNone) {
892 AU.addRequired<MachineDominatorTreeWrapperPass>();
893 AU.addPreserved<MachineDominatorTreeWrapperPass>();
894 AU.addRequired<GISelCSEAnalysisWrapperPass>();
895 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
896 }
897 MachineFunctionPass::getAnalysisUsage(AU);
898}
899
900AArch64PostLegalizerCombinerLegacy::AArch64PostLegalizerCombinerLegacy(
901 bool IsOptNone)
902 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
903 if (!RuleConfig.parseCommandLineOption())
904 reportFatalUsageError(reason: "Invalid rule identifier");
905}
906
907bool AArch64PostLegalizerCombinerLegacy::runOnMachineFunction(
908 MachineFunction &MF) {
909 if (MF.getProperties().hasFailedISel())
910 return false;
911
912 GISelValueTracking *VT =
913 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
914 MachineDominatorTree *MDT =
915 IsOptNone ? nullptr
916 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
917 GISelCSEAnalysisWrapper &Wrapper =
918 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
919 auto *CSEInfo =
920 &Wrapper.get(CSEOpt: getStandardCSEConfigForOpt(Level: MF.getTarget().getOptLevel()));
921
922 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
923 !skipFunction(F: MF.getFunction());
924
925 return runCombiner(MF, CSEInfo, VT, MDT, RuleConfig, EnableOpt, IsOptNone);
926}
927
928char AArch64PostLegalizerCombinerLegacy::ID = 0;
929INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombinerLegacy, DEBUG_TYPE,
930 "Combine AArch64 MachineInstrs after legalization", false,
931 false)
932INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)
933INITIALIZE_PASS_END(AArch64PostLegalizerCombinerLegacy, DEBUG_TYPE,
934 "Combine AArch64 MachineInstrs after legalization", false,
935 false)
936
937AArch64PostLegalizerCombinerPass::AArch64PostLegalizerCombinerPass(
938 const AArch64TargetMachine *TM)
939 : RuleConfig(
940 std::make_unique<AArch64PostLegalizerCombinerImplRuleConfig>()),
941 TM(TM) {
942 if (!RuleConfig->parseCommandLineOption())
943 reportFatalUsageError(reason: "invalid rule identifier");
944}
945
946AArch64PostLegalizerCombinerPass::AArch64PostLegalizerCombinerPass(
947 AArch64PostLegalizerCombinerPass &&) = default;
948
949AArch64PostLegalizerCombinerPass::~AArch64PostLegalizerCombinerPass() = default;
950
951PreservedAnalyses
952AArch64PostLegalizerCombinerPass::run(MachineFunction &MF,
953 MachineFunctionAnalysisManager &MFAM) {
954 if (MF.getProperties().hasFailedISel())
955 return PreservedAnalyses::all();
956
957 const bool IsOptNone = TM->isGlobalISelOptNone();
958 bool EnableOpt = !IsOptNone;
959
960 GISelValueTracking *VT = &MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF);
961 MachineDominatorTree *MDT =
962 IsOptNone ? nullptr : &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
963 GISelCSEInfo *CSEInfo = MFAM.getResult<GISelCSEAnalysis>(IR&: MF).get();
964
965 if (!runCombiner(MF, CSEInfo, VT, MDT, RuleConfig: *RuleConfig, EnableOpt, IsOptNone))
966 return PreservedAnalyses::all();
967
968 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
969 PA.preserveSet<CFGAnalyses>();
970 PA.preserve<GISelValueTrackingAnalysis>();
971 PA.preserve<GISelCSEAnalysis>();
972 return PA;
973}
974
975namespace llvm {
976FunctionPass *createAArch64PostLegalizerCombinerLegacy(bool IsOptNone) {
977 return new AArch64PostLegalizerCombinerLegacy(IsOptNone);
978}
979} // end namespace llvm
980