1//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64.h"
15#include "AArch64GlobalISelUtils.h"
16#include "AArch64TargetMachine.h"
17#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
18#include "llvm/CodeGen/GlobalISel/Combiner.h"
19#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
20#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
21#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
22#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
23#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
24#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25#include "llvm/CodeGen/GlobalISel/Utils.h"
26#include "llvm/CodeGen/LibcallLoweringInfo.h"
27#include "llvm/CodeGen/MachineDominators.h"
28#include "llvm/CodeGen/MachineFunction.h"
29#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachinePassManager.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/IR/Instructions.h"
34#include <memory>
35
36#define GET_GICOMBINER_DEPS
37#include "AArch64GenPreLegalizeGICombiner.inc"
38#undef GET_GICOMBINER_DEPS
39
40#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
41
42using namespace llvm;
43using namespace MIPatternMatch;
44
45#define GET_GICOMBINER_TYPES
46#include "AArch64GenPreLegalizeGICombiner.inc"
47#undef GET_GICOMBINER_TYPES
48
49namespace {
50
51/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
52/// are sign bits. In this case, we can transform the G_ICMP to directly compare
53/// the wide value with a zero.
54bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
55 GISelValueTracking *VT, Register &MatchInfo) {
56 assert(MI.getOpcode() == TargetOpcode::G_ICMP && VT);
57
58 auto Pred = (CmpInst::Predicate)MI.getOperand(i: 1).getPredicate();
59 if (!ICmpInst::isEquality(P: Pred))
60 return false;
61
62 Register LHS = MI.getOperand(i: 2).getReg();
63 LLT LHSTy = MRI.getType(Reg: LHS);
64 if (!LHSTy.isScalar())
65 return false;
66
67 Register RHS = MI.getOperand(i: 3).getReg();
68 Register WideReg;
69
70 if (!mi_match(R: LHS, MRI, P: m_GTrunc(Src: m_Reg(R&: WideReg))) ||
71 !mi_match(R: RHS, MRI, P: m_SpecificICst(RequestedValue: 0)))
72 return false;
73
74 LLT WideTy = MRI.getType(Reg: WideReg);
75 if (VT->computeNumSignBits(R: WideReg) <=
76 WideTy.getSizeInBits() - LHSTy.getSizeInBits())
77 return false;
78
79 MatchInfo = WideReg;
80 return true;
81}
82
83void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
84 MachineIRBuilder &Builder,
85 GISelChangeObserver &Observer, Register &WideReg) {
86 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
87
88 LLT WideTy = MRI.getType(Reg: WideReg);
89 // We're going to directly use the wide register as the LHS, and then use an
90 // equivalent size zero for RHS.
91 Builder.setInstrAndDebugLoc(MI);
92 auto WideZero = Builder.buildConstant(Res: WideTy, Val: 0);
93 Observer.changingInstr(MI);
94 MI.getOperand(i: 2).setReg(WideReg);
95 MI.getOperand(i: 3).setReg(WideZero.getReg(Idx: 0));
96 Observer.changedInstr(MI);
97}
98
99/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
100///
101/// e.g.
102///
103/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
104bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
105 std::pair<uint64_t, uint64_t> &MatchInfo) {
106 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
107 MachineFunction &MF = *MI.getMF();
108 auto &GlobalOp = MI.getOperand(i: 1);
109 auto *GV = GlobalOp.getGlobal();
110 if (GV->isThreadLocal())
111 return false;
112
113 // Don't allow anything that could represent offsets etc.
114 if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference(
115 GV, TM: MF.getTarget()) != AArch64II::MO_NO_FLAG)
116 return false;
117
118 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
119 //
120 // %g = G_GLOBAL_VALUE @x
121 // %ptr1 = G_PTR_ADD %g, cst1
122 // %ptr2 = G_PTR_ADD %g, cst2
123 // ...
124 // %ptrN = G_PTR_ADD %g, cstN
125 //
126 // Identify the *smallest* constant. We want to be able to form this:
127 //
128 // %offset_g = G_GLOBAL_VALUE @x + min_cst
129 // %g = G_PTR_ADD %offset_g, -min_cst
130 // %ptr1 = G_PTR_ADD %g, cst1
131 // ...
132 Register Dst = MI.getOperand(i: 0).getReg();
133 uint64_t MinOffset = -1ull;
134 for (auto &UseInstr : MRI.use_nodbg_instructions(Reg: Dst)) {
135 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
136 return false;
137 auto Cst = getIConstantVRegValWithLookThrough(
138 VReg: UseInstr.getOperand(i: 2).getReg(), MRI);
139 if (!Cst)
140 return false;
141 MinOffset = std::min(a: MinOffset, b: Cst->Value.getZExtValue());
142 }
143
144 // Require that the new offset is larger than the existing one to avoid
145 // infinite loops.
146 uint64_t CurrOffset = GlobalOp.getOffset();
147 uint64_t NewOffset = MinOffset + CurrOffset;
148 if (NewOffset <= CurrOffset)
149 return false;
150
151 // Check whether folding this offset is legal. It must not go out of bounds of
152 // the referenced object to avoid violating the code model, and must be
153 // smaller than 2^20 because this is the largest offset expressible in all
154 // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
155 // stores an immediate signed 21 bit offset.)
156 //
157 // This check also prevents us from folding negative offsets, which will end
158 // up being treated in the same way as large positive ones. They could also
159 // cause code model violations, and aren't really common enough to matter.
160 if (NewOffset >= (1 << 20))
161 return false;
162
163 Type *T = GV->getValueType();
164 if (!T->isSized() ||
165 NewOffset > GV->getDataLayout().getTypeAllocSize(Ty: T))
166 return false;
167 MatchInfo = std::make_pair(x&: NewOffset, y&: MinOffset);
168 return true;
169}
170
171void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
172 MachineIRBuilder &B, GISelChangeObserver &Observer,
173 std::pair<uint64_t, uint64_t> &MatchInfo) {
174 // Change:
175 //
176 // %g = G_GLOBAL_VALUE @x
177 // %ptr1 = G_PTR_ADD %g, cst1
178 // %ptr2 = G_PTR_ADD %g, cst2
179 // ...
180 // %ptrN = G_PTR_ADD %g, cstN
181 //
182 // To:
183 //
184 // %offset_g = G_GLOBAL_VALUE @x + min_cst
185 // %g = G_PTR_ADD %offset_g, -min_cst
186 // %ptr1 = G_PTR_ADD %g, cst1
187 // ...
188 // %ptrN = G_PTR_ADD %g, cstN
189 //
190 // Then, the original G_PTR_ADDs should be folded later on so that they look
191 // like this:
192 //
193 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
194 uint64_t Offset, MinOffset;
195 std::tie(args&: Offset, args&: MinOffset) = MatchInfo;
196 B.setInstrAndDebugLoc(*std::next(x: MI.getIterator()));
197 Observer.changingInstr(MI);
198 auto &GlobalOp = MI.getOperand(i: 1);
199 auto *GV = GlobalOp.getGlobal();
200 GlobalOp.ChangeToGA(GV, Offset, TargetFlags: GlobalOp.getTargetFlags());
201 Register Dst = MI.getOperand(i: 0).getReg();
202 Register NewGVDst = MRI.cloneVirtualRegister(VReg: Dst);
203 MI.getOperand(i: 0).setReg(NewGVDst);
204 Observer.changedInstr(MI);
205 B.buildPtrAdd(
206 Res: Dst, Op0: NewGVDst,
207 Op1: B.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: -static_cast<int64_t>(MinOffset)));
208}
209
210// Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add([us]dot(x, y))
211// Or vecreduce_add(ext(mul(ext(x), ext(y)))) -> vecreduce_add([us]dot(x, y))
212// Or vecreduce_add(ext(x)) -> vecreduce_add([us]dot(x, 1))
213// Similar to performVecReduceAddCombine in SelectionDAG
214bool matchExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
215 const AArch64Subtarget &STI,
216 std::tuple<Register, Register, bool> &MatchInfo) {
217 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
218 "Expected a G_VECREDUCE_ADD instruction");
219 assert(STI.hasDotProd() && "Target should have Dot Product feature");
220
221 MachineInstr *I1 = getDefIgnoringCopies(Reg: MI.getOperand(i: 1).getReg(), MRI);
222 Register DstReg = MI.getOperand(i: 0).getReg();
223 Register MidReg = I1->getOperand(i: 0).getReg();
224 LLT DstTy = MRI.getType(Reg: DstReg);
225 LLT MidTy = MRI.getType(Reg: MidReg);
226 if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
227 return false;
228
229 // Detect mul(ext, ext) with symmetric ext's. If I1Opc is G_ZEXT or G_SEXT
230 // then the ext's must match the same opcode. It is set to the ext opcode on
231 // output.
232 auto tryMatchingMulOfExt = [&MRI](MachineInstr *MI, Register &Out1,
233 Register &Out2, unsigned &I1Opc) {
234 // If result of this has more than 1 use, then there is no point in creating
235 // a dot instruction
236 if (!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
237 return false;
238
239 MachineInstr *ExtMI1 =
240 getDefIgnoringCopies(Reg: MI->getOperand(i: 1).getReg(), MRI);
241 MachineInstr *ExtMI2 =
242 getDefIgnoringCopies(Reg: MI->getOperand(i: 2).getReg(), MRI);
243 LLT Ext1DstTy = MRI.getType(Reg: ExtMI1->getOperand(i: 0).getReg());
244 LLT Ext2DstTy = MRI.getType(Reg: ExtMI2->getOperand(i: 0).getReg());
245
246 if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
247 return false;
248 if ((I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) &&
249 I1Opc != ExtMI1->getOpcode())
250 return false;
251 Out1 = ExtMI1->getOperand(i: 1).getReg();
252 Out2 = ExtMI2->getOperand(i: 1).getReg();
253 I1Opc = ExtMI1->getOpcode();
254 return true;
255 };
256
257 LLT SrcTy;
258 unsigned I1Opc = I1->getOpcode();
259 if (I1Opc == TargetOpcode::G_MUL) {
260 Register Out1, Out2;
261 if (!tryMatchingMulOfExt(I1, Out1, Out2, I1Opc))
262 return false;
263 SrcTy = MRI.getType(Reg: Out1);
264 std::get<0>(t&: MatchInfo) = Out1;
265 std::get<1>(t&: MatchInfo) = Out2;
266 } else if (I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) {
267 Register I1Op = I1->getOperand(i: 1).getReg();
268 MachineInstr *M = getDefIgnoringCopies(Reg: I1Op, MRI);
269 Register Out1, Out2;
270 if (M->getOpcode() == TargetOpcode::G_MUL &&
271 tryMatchingMulOfExt(M, Out1, Out2, I1Opc)) {
272 SrcTy = MRI.getType(Reg: Out1);
273 std::get<0>(t&: MatchInfo) = Out1;
274 std::get<1>(t&: MatchInfo) = Out2;
275 } else {
276 SrcTy = MRI.getType(Reg: I1Op);
277 std::get<0>(t&: MatchInfo) = I1Op;
278 std::get<1>(t&: MatchInfo) = 0;
279 }
280 } else {
281 return false;
282 }
283
284 if (I1Opc == TargetOpcode::G_ZEXT)
285 std::get<2>(t&: MatchInfo) = 0;
286 else if (I1Opc == TargetOpcode::G_SEXT)
287 std::get<2>(t&: MatchInfo) = 1;
288 else
289 return false;
290
291 if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
292 return false;
293
294 return true;
295}
296
297void applyExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
298 MachineIRBuilder &Builder,
299 GISelChangeObserver &Observer,
300 const AArch64Subtarget &STI,
301 std::tuple<Register, Register, bool> &MatchInfo) {
302 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
303 "Expected a G_VECREDUCE_ADD instruction");
304 assert(STI.hasDotProd() && "Target should have Dot Product feature");
305
306 // Initialise the variables
307 unsigned DotOpcode =
308 std::get<2>(t&: MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
309 Register Ext1SrcReg = std::get<0>(t&: MatchInfo);
310
311 // If there is one source register, create a vector of 0s as the second
312 // source register
313 Register Ext2SrcReg;
314 if (std::get<1>(t&: MatchInfo) == 0)
315 Ext2SrcReg = Builder.buildConstant(Res: MRI.getType(Reg: Ext1SrcReg), Val: 1)
316 ->getOperand(i: 0)
317 .getReg();
318 else
319 Ext2SrcReg = std::get<1>(t&: MatchInfo);
320
321 // Find out how many DOT instructions are needed
322 LLT SrcTy = MRI.getType(Reg: Ext1SrcReg);
323 LLT MidTy;
324 unsigned NumOfDotMI;
325 if (SrcTy.getNumElements() % 16 == 0) {
326 NumOfDotMI = SrcTy.getNumElements() / 16;
327 MidTy = LLT::fixed_vector(NumElements: 4, ScalarTy: LLT::integer(SizeInBits: 32));
328 } else if (SrcTy.getNumElements() % 8 == 0) {
329 NumOfDotMI = SrcTy.getNumElements() / 8;
330 MidTy = LLT::fixed_vector(NumElements: 2, ScalarTy: LLT::integer(SizeInBits: 32));
331 } else {
332 llvm_unreachable("Source type number of elements is not multiple of 8");
333 }
334
335 // Handle case where one DOT instruction is needed
336 if (NumOfDotMI == 1) {
337 auto Zeroes = Builder.buildConstant(Res: MidTy, Val: 0)->getOperand(i: 0).getReg();
338 auto Dot = Builder.buildInstr(Opc: DotOpcode, DstOps: {MidTy},
339 SrcOps: {Zeroes, Ext1SrcReg, Ext2SrcReg});
340 Builder.buildVecReduceAdd(Dst: MI.getOperand(i: 0), Src: Dot->getOperand(i: 0));
341 } else {
342 // If not pad the last v8 element with 0s to a v16
343 SmallVector<Register, 4> Ext1UnmergeReg;
344 SmallVector<Register, 4> Ext2UnmergeReg;
345 if (SrcTy.getNumElements() % 16 != 0) {
346 SmallVector<Register> Leftover1;
347 SmallVector<Register> Leftover2;
348
349 // Split the elements into v16i8 and v8i8
350 LLT MainTy = LLT::fixed_vector(NumElements: 16, ScalarTy: LLT::integer(SizeInBits: 8));
351 LLT LeftoverTy1, LeftoverTy2;
352 if ((!extractParts(Reg: Ext1SrcReg, RegTy: MRI.getType(Reg: Ext1SrcReg), MainTy,
353 LeftoverTy&: LeftoverTy1, VRegs&: Ext1UnmergeReg, LeftoverVRegs&: Leftover1, MIRBuilder&: Builder,
354 MRI)) ||
355 (!extractParts(Reg: Ext2SrcReg, RegTy: MRI.getType(Reg: Ext2SrcReg), MainTy,
356 LeftoverTy&: LeftoverTy2, VRegs&: Ext2UnmergeReg, LeftoverVRegs&: Leftover2, MIRBuilder&: Builder,
357 MRI))) {
358 llvm_unreachable("Unable to split this vector properly");
359 }
360
361 // Pad the leftover v8i8 vector with register of 0s of type v8i8
362 Register v8Zeroes = Builder.buildConstant(Res: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8), Val: 0)
363 ->getOperand(i: 0)
364 .getReg();
365
366 Ext1UnmergeReg.push_back(
367 Elt: Builder
368 .buildMergeLikeInstr(Res: LLT::fixed_vector(NumElements: 16, ScalarTy: LLT::integer(SizeInBits: 8)),
369 Ops: {Leftover1[0], v8Zeroes})
370 .getReg(Idx: 0));
371 Ext2UnmergeReg.push_back(
372 Elt: Builder
373 .buildMergeLikeInstr(Res: LLT::fixed_vector(NumElements: 16, ScalarTy: LLT::integer(SizeInBits: 8)),
374 Ops: {Leftover2[0], v8Zeroes})
375 .getReg(Idx: 0));
376
377 } else {
378 // Unmerge the source vectors to v16i8
379 unsigned SrcNumElts = SrcTy.getNumElements();
380 extractParts(Reg: Ext1SrcReg, Ty: LLT::fixed_vector(NumElements: 16, ScalarTy: LLT::integer(SizeInBits: 8)),
381 NumParts: SrcNumElts / 16, VRegs&: Ext1UnmergeReg, MIRBuilder&: Builder, MRI);
382 extractParts(Reg: Ext2SrcReg, Ty: LLT::fixed_vector(NumElements: 16, ScalarTy: LLT::integer(SizeInBits: 8)),
383 NumParts: SrcNumElts / 16, VRegs&: Ext2UnmergeReg, MIRBuilder&: Builder, MRI);
384 }
385
386 // Build the UDOT instructions
387 SmallVector<Register, 2> DotReg;
388 unsigned NumElements = 0;
389 for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) {
390 LLT ZeroesLLT;
391 // Check if it is 16 or 8 elements. Set Zeroes to the according size
392 if (MRI.getType(Reg: Ext1UnmergeReg[i]).getNumElements() == 16) {
393 ZeroesLLT = LLT::fixed_vector(NumElements: 4, ScalarTy: LLT::integer(SizeInBits: 32));
394 NumElements += 4;
395 } else {
396 ZeroesLLT = LLT::fixed_vector(NumElements: 2, ScalarTy: LLT::integer(SizeInBits: 32));
397 NumElements += 2;
398 }
399 auto Zeroes = Builder.buildConstant(Res: ZeroesLLT, Val: 0)->getOperand(i: 0).getReg();
400 DotReg.push_back(
401 Elt: Builder
402 .buildInstr(Opc: DotOpcode, DstOps: {MRI.getType(Reg: Zeroes)},
403 SrcOps: {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
404 .getReg(Idx: 0));
405 }
406
407 // Merge the output
408 auto ConcatMI = Builder.buildConcatVectors(
409 Res: LLT::fixed_vector(NumElements, ScalarTy: LLT::integer(SizeInBits: 32)), Ops: DotReg);
410
411 // Put it through a vector reduction
412 Builder.buildVecReduceAdd(Dst: MI.getOperand(i: 0).getReg(),
413 Src: ConcatMI->getOperand(i: 0).getReg());
414 }
415
416 // Erase the dead instructions
417 MI.eraseFromParent();
418}
419
420// Matches {U/S}ADDV(ext(x)) => {U/S}ADDLV(x)
421// Ensure that the type coming from the extend instruction is the right size
422bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
423 std::pair<Register, bool> &MatchInfo) {
424 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
425 "Expected G_VECREDUCE_ADD Opcode");
426
427 // Check if the last instruction is an extend
428 MachineInstr *ExtMI = getDefIgnoringCopies(Reg: MI.getOperand(i: 1).getReg(), MRI);
429 auto ExtOpc = ExtMI->getOpcode();
430
431 if (ExtOpc == TargetOpcode::G_ZEXT)
432 std::get<1>(in&: MatchInfo) = 0;
433 else if (ExtOpc == TargetOpcode::G_SEXT)
434 std::get<1>(in&: MatchInfo) = 1;
435 else
436 return false;
437
438 // Check if the source register is a valid type
439 Register ExtSrcReg = ExtMI->getOperand(i: 1).getReg();
440 LLT ExtSrcTy = MRI.getType(Reg: ExtSrcReg);
441 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
442 if (ExtSrcTy.getScalarSizeInBits() * 2 > DstTy.getScalarSizeInBits())
443 return false;
444 if ((DstTy.getScalarSizeInBits() == 16 &&
445 ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) ||
446 (DstTy.getScalarSizeInBits() == 32 &&
447 ExtSrcTy.getNumElements() % 4 == 0) ||
448 (DstTy.getScalarSizeInBits() == 64 &&
449 ExtSrcTy.getNumElements() % 4 == 0)) {
450 std::get<0>(in&: MatchInfo) = ExtSrcReg;
451 return true;
452 }
453 return false;
454}
455
456void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
457 MachineIRBuilder &B, GISelChangeObserver &Observer,
458 std::pair<Register, bool> &MatchInfo) {
459 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
460 "Expected G_VECREDUCE_ADD Opcode");
461
462 unsigned Opc = std::get<1>(in&: MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV;
463 Register SrcReg = std::get<0>(in&: MatchInfo);
464 Register DstReg = MI.getOperand(i: 0).getReg();
465 LLT SrcTy = MRI.getType(Reg: SrcReg);
466 LLT DstTy = MRI.getType(Reg: DstReg);
467
468 // If SrcTy has more elements than expected, split them into multiple
469 // instructions and sum the results
470 LLT MainTy;
471 SmallVector<Register, 1> WorkingRegisters;
472 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
473 unsigned SrcNumElem = SrcTy.getNumElements();
474 if ((SrcScalSize == 8 && SrcNumElem > 16) ||
475 (SrcScalSize == 16 && SrcNumElem > 8) ||
476 (SrcScalSize == 32 && SrcNumElem > 4)) {
477
478 LLT LeftoverTy;
479 SmallVector<Register, 4> LeftoverRegs;
480 if (SrcScalSize == 8)
481 MainTy = LLT::fixed_vector(NumElements: 16, ScalarTy: LLT::integer(SizeInBits: 8));
482 else if (SrcScalSize == 16)
483 MainTy = LLT::fixed_vector(NumElements: 8, ScalarTy: LLT::integer(SizeInBits: 16));
484 else if (SrcScalSize == 32)
485 MainTy = LLT::fixed_vector(NumElements: 4, ScalarTy: LLT::integer(SizeInBits: 32));
486 else
487 llvm_unreachable("Source's Scalar Size not supported");
488
489 // Extract the parts and put each extracted sources through U/SADDLV and put
490 // the values inside a small vec
491 extractParts(Reg: SrcReg, RegTy: SrcTy, MainTy, LeftoverTy, VRegs&: WorkingRegisters,
492 LeftoverVRegs&: LeftoverRegs, MIRBuilder&: B, MRI);
493 llvm::append_range(C&: WorkingRegisters, R&: LeftoverRegs);
494 } else {
495 WorkingRegisters.push_back(Elt: SrcReg);
496 MainTy = SrcTy;
497 }
498
499 unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2;
500 LLT MidScalarLLT = LLT::integer(SizeInBits: MidScalarSize);
501 Register ZeroReg = B.buildConstant(Res: LLT::integer(SizeInBits: 64), Val: 0).getReg(Idx: 0);
502 for (unsigned I = 0; I < WorkingRegisters.size(); I++) {
503 // If the number of elements is too small to build an instruction, extend
504 // its size before applying addlv
505 LLT WorkingRegTy = MRI.getType(Reg: WorkingRegisters[I]);
506 if ((WorkingRegTy.getScalarSizeInBits() == 8) &&
507 (WorkingRegTy.getNumElements() == 4)) {
508 WorkingRegisters[I] =
509 B.buildInstr(Opc: std::get<1>(in&: MatchInfo) ? TargetOpcode::G_SEXT
510 : TargetOpcode::G_ZEXT,
511 DstOps: {LLT::fixed_vector(NumElements: 4, ScalarTy: LLT::integer(SizeInBits: 16))},
512 SrcOps: {WorkingRegisters[I]})
513 .getReg(Idx: 0);
514 }
515
516 // Generate the {U/S}ADDLV instruction, whose output is always double of the
517 // Src's Scalar size
518 LLT AddlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(NumElements: 4, ScalarTy: LLT::integer(SizeInBits: 32))
519 : LLT::fixed_vector(NumElements: 2, ScalarTy: LLT::integer(SizeInBits: 64));
520 Register AddlvReg =
521 B.buildInstr(Opc, DstOps: {AddlvTy}, SrcOps: {WorkingRegisters[I]}).getReg(Idx: 0);
522
523 // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or
524 // v2i64 register.
525 // i16, i32 results uses v4i32 registers
526 // i64 results uses v2i64 registers
527 // Therefore we have to extract/truncate the the value to the right type
528 if (MidScalarSize == 32 || MidScalarSize == 64) {
529 WorkingRegisters[I] = B.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT,
530 DstOps: {MidScalarLLT}, SrcOps: {AddlvReg, ZeroReg})
531 .getReg(Idx: 0);
532 } else {
533 Register ExtractReg =
534 B.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT, DstOps: {LLT::integer(SizeInBits: 32)},
535 SrcOps: {AddlvReg, ZeroReg})
536 .getReg(Idx: 0);
537 WorkingRegisters[I] =
538 B.buildTrunc(Res: {MidScalarLLT}, Op: {ExtractReg}).getReg(Idx: 0);
539 }
540 }
541
542 Register OutReg;
543 if (WorkingRegisters.size() > 1) {
544 OutReg = B.buildAdd(Dst: MidScalarLLT, Src0: WorkingRegisters[0], Src1: WorkingRegisters[1])
545 .getReg(Idx: 0);
546 for (unsigned I = 2; I < WorkingRegisters.size(); I++) {
547 OutReg = B.buildAdd(Dst: MidScalarLLT, Src0: OutReg, Src1: WorkingRegisters[I]).getReg(Idx: 0);
548 }
549 } else {
550 OutReg = WorkingRegisters[0];
551 }
552
553 if (DstTy.getScalarSizeInBits() > MidScalarSize) {
554 // Handle the scalar value if the DstTy's Scalar Size is more than double
555 // Src's ScalarType
556 B.buildInstr(Opc: std::get<1>(in&: MatchInfo) ? TargetOpcode::G_SEXT
557 : TargetOpcode::G_ZEXT,
558 DstOps: {DstReg}, SrcOps: {OutReg});
559 } else {
560 B.buildCopy(Res: DstReg, Op: OutReg);
561 }
562
563 MI.eraseFromParent();
564}
565
566// Pushes ADD/SUB/MUL through extend instructions to decrease the number of
567// extend instruction at the end by allowing selection of {s|u}addl sooner
568// i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8))
569bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
570 Register DstReg, Register SrcReg1, Register SrcReg2) {
571 assert((MI.getOpcode() == TargetOpcode::G_ADD ||
572 MI.getOpcode() == TargetOpcode::G_SUB ||
573 MI.getOpcode() == TargetOpcode::G_MUL) &&
574 "Expected a G_ADD, G_SUB or G_MUL instruction\n");
575
576 // Deal with vector types only
577 LLT DstTy = MRI.getType(Reg: DstReg);
578 if (!DstTy.isVector())
579 return false;
580
581 // Return true if G_{S|Z}EXT instruction is more than 2* source
582 Register ExtDstReg = MI.getOperand(i: 1).getReg();
583 LLT Ext1SrcTy = MRI.getType(Reg: SrcReg1);
584 LLT Ext2SrcTy = MRI.getType(Reg: SrcReg2);
585 unsigned ExtDstScal = MRI.getType(Reg: ExtDstReg).getScalarSizeInBits();
586 unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits();
587 if (((Ext1SrcScal == 8 && ExtDstScal == 32) ||
588 ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) &&
589 Ext1SrcTy == Ext2SrcTy)
590 return true;
591
592 return false;
593}
594
595void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
596 MachineIRBuilder &B, bool isSExt, Register DstReg,
597 Register SrcReg1, Register SrcReg2) {
598 LLT SrcTy = MRI.getType(Reg: SrcReg1);
599 LLT MidTy = SrcTy.changeElementSize(NewEltSize: SrcTy.getScalarSizeInBits() * 2);
600 unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
601 Register Ext1Reg = B.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg1}).getReg(Idx: 0);
602 Register Ext2Reg = B.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg2}).getReg(Idx: 0);
603 Register AddReg =
604 B.buildInstr(Opc: MI.getOpcode(), DstOps: {MidTy}, SrcOps: {Ext1Reg, Ext2Reg}).getReg(Idx: 0);
605
606 // G_SUB has to sign-extend the result.
607 // G_ADD needs to sext from sext and can sext or zext from zext, and G_MUL
608 // needs to use the original opcode so the original opcode is used for both.
609 if (MI.getOpcode() == TargetOpcode::G_ADD ||
610 MI.getOpcode() == TargetOpcode::G_MUL)
611 B.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {AddReg});
612 else
613 B.buildSExt(Res: DstReg, Op: AddReg);
614
615 MI.eraseFromParent();
616}
617
618bool matchSimplifyUADDO(MachineInstr &MI, MachineRegisterInfo &MRI,
619 std::pair<Register, Register> &MatchInfo) {
620 // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
621 // result is only used in the no-overflow case. It is restricted to cases
622 // where we know that the high-bits of the operands are 0. If there's an
623 // overflow, then the 9th or 17th bit must be set, which can be checked
624 // using TBNZ.
625 //
626 // Change (for UADDOs on 8 and 16 bits):
627 //
628 // %z0 = G_ASSERT_ZEXT _
629 // %op0 = G_TRUNC %z0
630 // %z1 = G_ASSERT_ZEXT _
631 // %op1 = G_TRUNC %z1
632 // %val, %cond = G_UADDO %op0, %op1
633 // G_BRCOND %cond, %error.bb
634 //
635 // error.bb:
636 // (no successors and no uses of %val)
637 //
638 // To:
639 //
640 // %z0 = G_ASSERT_ZEXT _
641 // %z1 = G_ASSERT_ZEXT _
642 // %add = G_ADD %z0, %z1
643 // %val = G_TRUNC %add
644 // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
645 // %cond = G_ICMP NE, %bit, 0
646 // G_BRCOND %cond, %error.bb
647
648 MachineOperand *DefOp0 = MRI.getOneDef(Reg: MI.getOperand(i: 2).getReg());
649 MachineOperand *DefOp1 = MRI.getOneDef(Reg: MI.getOperand(i: 3).getReg());
650 Register Op0Wide;
651 Register Op1Wide;
652 if (!mi_match(R: DefOp0->getParent(), MRI, P: m_GTrunc(Src: m_Reg(R&: Op0Wide))) ||
653 !mi_match(R: DefOp1->getParent(), MRI, P: m_GTrunc(Src: m_Reg(R&: Op1Wide))))
654 return false;
655 LLT WideTy0 = MRI.getType(Reg: Op0Wide);
656 LLT WideTy1 = MRI.getType(Reg: Op1Wide);
657 Register ResVal = MI.getOperand(i: 0).getReg();
658 LLT OpTy = MRI.getType(Reg: ResVal);
659 MachineInstr *Op0WideDef = MRI.getVRegDef(Reg: Op0Wide);
660 MachineInstr *Op1WideDef = MRI.getVRegDef(Reg: Op1Wide);
661
662 unsigned OpTySize = OpTy.getScalarSizeInBits();
663 // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
664 // inputs have been zero-extended.
665 if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
666 Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
667 OpTySize != Op0WideDef->getOperand(i: 2).getImm() ||
668 OpTySize != Op1WideDef->getOperand(i: 2).getImm())
669 return false;
670
671 // Only scalar UADDO with either 8 or 16 bit operands are handled.
672 if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
673 OpTySize >= WideTy0.getScalarSizeInBits() ||
674 (OpTySize != 8 && OpTySize != 16))
675 return false;
676
677 // The overflow-status result must be used by a branch only.
678 Register ResStatus = MI.getOperand(i: 1).getReg();
679 if (!MRI.hasOneNonDBGUse(RegNo: ResStatus))
680 return false;
681 MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(RegNo: ResStatus);
682 if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
683 return false;
684
685 // Make sure the computed result is only used in the no-overflow blocks.
686 MachineBasicBlock *CurrentMBB = MI.getParent();
687 MachineBasicBlock *FailMBB = CondUser->getOperand(i: 1).getMBB();
688 if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
689 return false;
690 if (any_of(Range: MRI.use_nodbg_instructions(Reg: ResVal),
691 P: [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
692 return &MI != &I &&
693 (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
694 }))
695 return false;
696
697 MatchInfo = {Op0Wide, Op1Wide};
698 return true;
699}
700
701void applySimplifyUADDO(MachineInstr &MI, MachineRegisterInfo &MRI,
702 MachineIRBuilder &B, GISelChangeObserver &Observer,
703 const CombinerHelper &Helper,
704 const std::pair<Register, Register> &MatchInfo) {
705 Register Op0Wide = MatchInfo.first;
706 Register Op1Wide = MatchInfo.second;
707 Register ResVal = MI.getOperand(i: 0).getReg();
708 Register ResStatus = MI.getOperand(i: 1).getReg();
709 unsigned OpTySize = MRI.getType(Reg: ResVal).getScalarSizeInBits();
710
711 // Remove G_UADDO.
712 B.setInstrAndDebugLoc(*MI.getNextNode());
713 MI.eraseFromParent();
714
715 // Emit wide add.
716 Register AddDst = MRI.cloneVirtualRegister(VReg: Op0Wide);
717 B.buildInstr(Opc: TargetOpcode::G_ADD, DstOps: {AddDst}, SrcOps: {Op0Wide, Op1Wide});
718
719 // Emit check of the 9th or 17th bit and update users (the branch). This will
720 // later be folded to TBNZ.
721 Register CondBit = MRI.cloneVirtualRegister(VReg: Op0Wide);
722 B.buildAnd(
723 Dst: CondBit, Src0: AddDst,
724 Src1: B.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: OpTySize == 8 ? 1 << 8 : 1 << 16));
725 B.buildICmp(Pred: CmpInst::ICMP_NE, Res: ResStatus, Op0: CondBit,
726 Op1: B.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: 0));
727
728 // Update ZEXts users of the result value. Because all uses are in the
729 // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
730 B.buildZExtOrTrunc(Res: ResVal, Op: AddDst);
731 for (MachineOperand &U : make_early_inc_range(Range: MRI.use_operands(Reg: ResVal))) {
732 Register WideReg;
733 if (mi_match(R: U.getParent(), MRI, P: m_GZExt(Src: m_Reg(R&: WideReg)))) {
734 auto OldR = U.getParent()->getOperand(i: 0).getReg();
735 Observer.erasingInstr(MI&: *U.getParent());
736 U.getParent()->eraseFromParent();
737 Helper.replaceRegWith(MRI, FromReg: OldR, ToReg: AddDst);
738 }
739 }
740}
741
742class AArch64PreLegalizerCombinerImpl : public Combiner {
743protected:
744 const CombinerHelper Helper;
745 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
746 const AArch64Subtarget &STI;
747 const LibcallLoweringInfo &Libcalls;
748
749public:
750 AArch64PreLegalizerCombinerImpl(
751 MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
752 GISelCSEInfo *CSEInfo,
753 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
754 const AArch64Subtarget &STI, const LibcallLoweringInfo &Libcalls,
755 MachineDominatorTree *MDT, const LegalizerInfo *LI);
756
757 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
758
759 bool tryCombineAll(MachineInstr &I) const override;
760
761 bool tryCombineAllImpl(MachineInstr &I) const;
762
763private:
764#define GET_GICOMBINER_CLASS_MEMBERS
765#include "AArch64GenPreLegalizeGICombiner.inc"
766#undef GET_GICOMBINER_CLASS_MEMBERS
767};
768
769#define GET_GICOMBINER_IMPL
770#include "AArch64GenPreLegalizeGICombiner.inc"
771#undef GET_GICOMBINER_IMPL
772
773AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
774 MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
775 GISelCSEInfo *CSEInfo,
776 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
777 const AArch64Subtarget &STI, const LibcallLoweringInfo &Libcalls,
778 MachineDominatorTree *MDT, const LegalizerInfo *LI)
779 : Combiner(MF, CInfo, &VT, CSEInfo),
780 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI),
781 RuleConfig(RuleConfig), STI(STI), Libcalls(Libcalls),
782#define GET_GICOMBINER_CONSTRUCTOR_INITS
783#include "AArch64GenPreLegalizeGICombiner.inc"
784#undef GET_GICOMBINER_CONSTRUCTOR_INITS
785{
786}
787
788bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
789 if (tryCombineAllImpl(I&: MI))
790 return true;
791
792 return false;
793}
794
795bool runCombiner(MachineFunction &MF, GISelCSEInfo *CSEInfo,
796 GISelValueTracking *VT, MachineDominatorTree *MDT,
797 const LibcallLoweringInfo &Libcalls,
798 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
799 bool EnableOpt) {
800 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
801 const auto *LI = ST.getLegalizerInfo();
802
803 const Function &F = MF.getFunction();
804
805 CombinerInfo CInfo(/*AllowIllegalOps=*/true, /*ShouldLegalizeIllegal=*/false,
806 /*LegalizerInfo=*/nullptr, EnableOpt, F.hasOptSize(),
807 F.hasMinSize());
808 // Disable fixed-point iteration to reduce compile-time
809 CInfo.MaxIterations = 1;
810 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
811 // This is the first Combiner, so the input IR might contain dead
812 // instructions.
813 CInfo.EnableFullDCE = true;
814 AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, ST,
815 Libcalls, MDT, LI);
816 return Impl.combineMachineInstrs();
817}
818
819// Pass boilerplate
820// ================
821
822class AArch64PreLegalizerCombinerLegacy : public MachineFunctionPass {
823public:
824 static char ID;
825
826 AArch64PreLegalizerCombinerLegacy();
827
828 StringRef getPassName() const override {
829 return "AArch64PreLegalizerCombiner";
830 }
831
832 bool runOnMachineFunction(MachineFunction &MF) override;
833
834 void getAnalysisUsage(AnalysisUsage &AU) const override;
835
836private:
837 AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
838};
839} // end anonymous namespace
840
841void AArch64PreLegalizerCombinerLegacy::getAnalysisUsage(
842 AnalysisUsage &AU) const {
843 AU.setPreservesCFG();
844 getSelectionDAGFallbackAnalysisUsage(AU);
845 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
846 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
847 AU.addRequired<MachineDominatorTreeWrapperPass>();
848 AU.addPreserved<MachineDominatorTreeWrapperPass>();
849 AU.addRequired<GISelCSEAnalysisWrapperPass>();
850 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
851 AU.addRequired<LibcallLoweringInfoWrapper>();
852 MachineFunctionPass::getAnalysisUsage(AU);
853}
854
855AArch64PreLegalizerCombinerLegacy::AArch64PreLegalizerCombinerLegacy()
856 : MachineFunctionPass(ID) {
857 if (!RuleConfig.parseCommandLineOption())
858 report_fatal_error(reason: "Invalid rule identifier");
859}
860
861bool AArch64PreLegalizerCombinerLegacy::runOnMachineFunction(
862 MachineFunction &MF) {
863 if (MF.getProperties().hasFailedISel())
864 return false;
865 // Enable CSE.
866 GISelCSEAnalysisWrapper &Wrapper =
867 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
868 auto *CSEInfo =
869 &Wrapper.get(CSEOpt: getStandardCSEConfigForOpt(Level: MF.getTarget().getOptLevel()));
870
871 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
872 const LibcallLoweringInfo &Libcalls =
873 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
874 M: *MF.getFunction().getParent(), Subtarget: ST);
875
876 GISelValueTracking *VT =
877 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
878 MachineDominatorTree *MDT =
879 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
880 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
881 !skipFunction(F: MF.getFunction());
882 return runCombiner(MF, CSEInfo, VT, MDT, Libcalls, RuleConfig, EnableOpt);
883}
884
885char AArch64PreLegalizerCombinerLegacy::ID = 0;
886INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombinerLegacy, DEBUG_TYPE,
887 "Combine AArch64 machine instrs before legalization",
888 false, false)
889INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)
890INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
891INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
892INITIALIZE_PASS_END(AArch64PreLegalizerCombinerLegacy, DEBUG_TYPE,
893 "Combine AArch64 machine instrs before legalization", false,
894 false)
895
896AArch64PreLegalizerCombinerPass::AArch64PreLegalizerCombinerPass()
897 : RuleConfig(
898 std::make_unique<AArch64PreLegalizerCombinerImplRuleConfig>()) {
899 if (!RuleConfig->parseCommandLineOption())
900 reportFatalUsageError(reason: "invalid rule identifier");
901}
902
903AArch64PreLegalizerCombinerPass::AArch64PreLegalizerCombinerPass(
904 AArch64PreLegalizerCombinerPass &&) = default;
905
906AArch64PreLegalizerCombinerPass::~AArch64PreLegalizerCombinerPass() = default;
907
908PreservedAnalyses
909AArch64PreLegalizerCombinerPass::run(MachineFunction &MF,
910 MachineFunctionAnalysisManager &MFAM) {
911 if (MF.getProperties().hasFailedISel())
912 return PreservedAnalyses::all();
913
914 auto *CSEInfo = MFAM.getResult<GISelCSEAnalysis>(IR&: MF).get();
915 GISelValueTracking &VT = MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF);
916 MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
917
918 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
919 auto &MAMProxy =
920 MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(IR&: MF);
921 const LibcallLoweringModuleAnalysisResult *LibcallResult =
922 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(
923 IR&: *MF.getFunction().getParent());
924 if (!LibcallResult)
925 reportFatalUsageError(reason: "LibcallLoweringModuleAnalysis result not available");
926
927 const LibcallLoweringInfo &Libcalls = LibcallResult->getLibcallLowering(Subtarget: ST);
928
929 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None;
930
931 if (!runCombiner(MF, CSEInfo, VT: &VT, MDT: &MDT, Libcalls, RuleConfig: *RuleConfig, EnableOpt))
932 return PreservedAnalyses::all();
933
934 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
935 PA.preserveSet<CFGAnalyses>();
936 PA.preserve<GISelValueTrackingAnalysis>();
937 PA.preserve<MachineDominatorTreeAnalysis>();
938 PA.preserve<GISelCSEAnalysis>();
939 return PA;
940}
941
942namespace llvm {
943FunctionPass *createAArch64PreLegalizerCombiner() {
944 return new AArch64PreLegalizerCombinerLegacy();
945}
946} // end namespace llvm
947