1//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUCombinerHelper.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "llvm/CodeGen/GlobalISel/Combiner.h"
20#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
24#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/TargetPassConfig.h"
28#include "llvm/IR/IntrinsicsAMDGPU.h"
29#include "llvm/Target/TargetMachine.h"
30
31#define GET_GICOMBINER_DEPS
32#include "AMDGPUGenPreLegalizeGICombiner.inc"
33#undef GET_GICOMBINER_DEPS
34
35#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36
37using namespace llvm;
38using namespace MIPatternMatch;
39
40namespace {
41#define GET_GICOMBINER_TYPES
42#include "AMDGPUGenPostLegalizeGICombiner.inc"
43#undef GET_GICOMBINER_TYPES
44
45class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46protected:
47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48 const GCNSubtarget &STI;
49 const SIInstrInfo &TII;
50 // TODO: Make CombinerHelper methods const.
51 mutable AMDGPUCombinerHelper Helper;
52
53public:
54 AMDGPUPostLegalizerCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62
63 bool tryCombineAllImpl(MachineInstr &I) const;
64 bool tryCombineAll(MachineInstr &I) const override;
65
66 struct FMinFMaxLegacyInfo {
67 Register LHS;
68 Register RHS;
69 CmpInst::Predicate Pred;
70 };
71
72 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
73 bool matchFMinFMaxLegacy(MachineInstr &MI, MachineInstr &FCmp,
74 FMinFMaxLegacyInfo &Info) const;
75 void applySelectFCmpToFMinFMaxLegacy(MachineInstr &MI,
76 const FMinFMaxLegacyInfo &Info) const;
77
78 bool matchUCharToFloat(MachineInstr &MI) const;
79 void applyUCharToFloat(MachineInstr &MI) const;
80
81 bool
82 matchRcpSqrtToRsq(MachineInstr &MI,
83 std::function<void(MachineIRBuilder &)> &MatchInfo) const;
84
85 bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;
86 void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;
87
88 // FIXME: Should be able to have 2 separate matchdatas rather than custom
89 // struct boilerplate.
90 struct CvtF32UByteMatchInfo {
91 Register CvtVal;
92 unsigned ShiftOffset;
93 };
94
95 bool matchCvtF32UByteN(MachineInstr &MI,
96 CvtF32UByteMatchInfo &MatchInfo) const;
97 void applyCvtF32UByteN(MachineInstr &MI,
98 const CvtF32UByteMatchInfo &MatchInfo) const;
99
100 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
101
102 // Combine unsigned buffer load and signed extension instructions to generate
103 // signed buffer load instructions.
104 bool matchCombineSignExtendInReg(
105 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
106 void applyCombineSignExtendInReg(
107 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
108
109 // Find the s_mul_u64 instructions where the higher bits are either
110 // zero-extended or sign-extended.
111 // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
112 // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
113 // bits are zero extended.
114 bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
115
116private:
117#define GET_GICOMBINER_CLASS_MEMBERS
118#define AMDGPUSubtarget GCNSubtarget
119#include "AMDGPUGenPostLegalizeGICombiner.inc"
120#undef GET_GICOMBINER_CLASS_MEMBERS
121#undef AMDGPUSubtarget
122};
123
124#define GET_GICOMBINER_IMPL
125#define AMDGPUSubtarget GCNSubtarget
126#include "AMDGPUGenPostLegalizeGICombiner.inc"
127#undef AMDGPUSubtarget
128#undef GET_GICOMBINER_IMPL
129
130AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
131 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
132 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
133 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
134 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
135 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
136 TII(*STI.getInstrInfo()),
137 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
138#define GET_GICOMBINER_CONSTRUCTOR_INITS
139#include "AMDGPUGenPostLegalizeGICombiner.inc"
140#undef GET_GICOMBINER_CONSTRUCTOR_INITS
141{
142}
143
144bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
145 if (tryCombineAllImpl(I&: MI))
146 return true;
147
148 switch (MI.getOpcode()) {
149 case TargetOpcode::G_SHL:
150 case TargetOpcode::G_LSHR:
151 case TargetOpcode::G_ASHR:
152 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
153 // common case, splitting this into a move and a 32-bit shift is faster and
154 // the same code size.
155 return Helper.tryCombineShiftToUnmerge(MI, TargetShiftAmount: 32);
156 }
157
158 return false;
159}
160
161bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
162 MachineInstr &MI, MachineInstr &FCmp, FMinFMaxLegacyInfo &Info) const {
163 if (!MRI.hasOneNonDBGUse(RegNo: FCmp.getOperand(i: 0).getReg()))
164 return false;
165
166 Info.Pred =
167 static_cast<CmpInst::Predicate>(FCmp.getOperand(i: 1).getPredicate());
168 Info.LHS = FCmp.getOperand(i: 2).getReg();
169 Info.RHS = FCmp.getOperand(i: 3).getReg();
170 Register True = MI.getOperand(i: 2).getReg();
171 Register False = MI.getOperand(i: 3).getReg();
172
173 // TODO: Handle case where the the selected value is an fneg and the compared
174 // constant is the negation of the selected value.
175 if ((Info.LHS != True || Info.RHS != False) &&
176 (Info.LHS != False || Info.RHS != True))
177 return false;
178
179 // Invert the predicate if necessary so that the apply function can assume
180 // that the select operands are the same as the fcmp operands.
181 // (select (fcmp P, L, R), R, L) -> (select (fcmp !P, L, R), L, R)
182 if (Info.LHS != True)
183 Info.Pred = CmpInst::getInversePredicate(pred: Info.Pred);
184
185 // Only match </<=/>=/> not ==/!= etc.
186 return Info.Pred != CmpInst::getSwappedPredicate(pred: Info.Pred);
187}
188
189void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinFMaxLegacy(
190 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
191 unsigned Opc = (Info.Pred & CmpInst::FCMP_OGT) ? AMDGPU::G_AMDGPU_FMAX_LEGACY
192 : AMDGPU::G_AMDGPU_FMIN_LEGACY;
193 Register X = Info.LHS;
194 Register Y = Info.RHS;
195 if (Info.Pred == CmpInst::getUnorderedPredicate(Pred: Info.Pred)) {
196 // We need to permute the operands to get the correct NaN behavior. The
197 // selected operand is the second one based on the failing compare with NaN,
198 // so permute it based on the compare type the hardware uses.
199 std::swap(a&: X, b&: Y);
200 }
201
202 B.buildInstr(Opc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {X, Y}, Flags: MI.getFlags());
203
204 MI.eraseFromParent();
205}
206
207bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
208 MachineInstr &MI) const {
209 Register DstReg = MI.getOperand(i: 0).getReg();
210
211 // TODO: We could try to match extracting the higher bytes, which would be
212 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
213 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
214 // about in practice.
215 LLT Ty = MRI.getType(Reg: DstReg);
216 if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::scalar(SizeInBits: 16)) {
217 Register SrcReg = MI.getOperand(i: 1).getReg();
218 unsigned SrcSize = MRI.getType(Reg: SrcReg).getSizeInBits();
219 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
220 const APInt Mask = APInt::getHighBitsSet(numBits: SrcSize, hiBitsSet: SrcSize - 8);
221 return Helper.getKnownBits()->maskedValueIsZero(Val: SrcReg, Mask);
222 }
223
224 return false;
225}
226
227void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
228 MachineInstr &MI) const {
229 const LLT S32 = LLT::scalar(SizeInBits: 32);
230
231 Register DstReg = MI.getOperand(i: 0).getReg();
232 Register SrcReg = MI.getOperand(i: 1).getReg();
233 LLT Ty = MRI.getType(Reg: DstReg);
234 LLT SrcTy = MRI.getType(Reg: SrcReg);
235 if (SrcTy != S32)
236 SrcReg = B.buildAnyExtOrTrunc(Res: S32, Op: SrcReg).getReg(Idx: 0);
237
238 if (Ty == S32) {
239 B.buildInstr(Opc: AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, DstOps: {DstReg}, SrcOps: {SrcReg},
240 Flags: MI.getFlags());
241 } else {
242 auto Cvt0 = B.buildInstr(Opc: AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, DstOps: {S32}, SrcOps: {SrcReg},
243 Flags: MI.getFlags());
244 B.buildFPTrunc(Res: DstReg, Op: Cvt0, Flags: MI.getFlags());
245 }
246
247 MI.eraseFromParent();
248}
249
250bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
251 MachineInstr &MI,
252 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
253 auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
254 if (!MI.getFlag(Flag: MachineInstr::FmContract))
255 return nullptr;
256
257 if (auto *GI = dyn_cast<GIntrinsic>(Val: &MI)) {
258 if (GI->is(ID: Intrinsic::amdgcn_rcp))
259 return MRI.getVRegDef(Reg: MI.getOperand(i: 2).getReg());
260 }
261 return nullptr;
262 };
263
264 auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
265 if (!MI.getFlag(Flag: MachineInstr::FmContract))
266 return nullptr;
267 MachineInstr *SqrtSrcMI = nullptr;
268 auto Match =
269 mi_match(R: MI.getOperand(i: 0).getReg(), MRI, P: m_GFSqrt(Src: m_MInstr(MI&: SqrtSrcMI)));
270 (void)Match;
271 return SqrtSrcMI;
272 };
273
274 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
275 // rcp(sqrt(x))
276 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
277 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
278 B.buildIntrinsic(ID: Intrinsic::amdgcn_rsq, Res: {MI.getOperand(i: 0)})
279 .addUse(RegNo: SqrtSrcMI->getOperand(i: 0).getReg())
280 .setMIFlags(MI.getFlags());
281 };
282 return true;
283 }
284
285 // sqrt(rcp(x))
286 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
287 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
288 B.buildIntrinsic(ID: Intrinsic::amdgcn_rsq, Res: {MI.getOperand(i: 0)})
289 .addUse(RegNo: RcpSrcMI->getOperand(i: 0).getReg())
290 .setMIFlags(MI.getFlags());
291 };
292 return true;
293 }
294 return false;
295}
296
297bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
298 MachineInstr &MI) const {
299 Register Sqrt = MI.getOperand(i: 2).getReg();
300 return MRI.hasOneNonDBGUse(RegNo: Sqrt);
301}
302
303void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
304 MachineInstr &MI, const Register &X) const {
305 Register Dst = MI.getOperand(i: 0).getReg();
306 Register Y = MI.getOperand(i: 1).getReg();
307 LLT DstTy = MRI.getType(Reg: Dst);
308 uint32_t Flags = MI.getFlags();
309 Register RSQ = B.buildIntrinsic(ID: Intrinsic::amdgcn_rsq, Res: {DstTy})
310 .addUse(RegNo: X)
311 .setMIFlags(Flags)
312 .getReg(Idx: 0);
313 B.buildFMul(Dst, Src0: RSQ, Src1: Y, Flags);
314 MI.eraseFromParent();
315}
316
317bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
318 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
319 Register SrcReg = MI.getOperand(i: 1).getReg();
320
321 // Look through G_ZEXT.
322 bool IsShr = mi_match(R: SrcReg, MRI, P: m_GZExt(Src: m_Reg(R&: SrcReg)));
323
324 Register Src0;
325 int64_t ShiftAmt;
326 IsShr = mi_match(R: SrcReg, MRI, P: m_GLShr(L: m_Reg(R&: Src0), R: m_ICst(Cst&: ShiftAmt)));
327 if (IsShr || mi_match(R: SrcReg, MRI, P: m_GShl(L: m_Reg(R&: Src0), R: m_ICst(Cst&: ShiftAmt)))) {
328 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
329
330 unsigned ShiftOffset = 8 * Offset;
331 if (IsShr)
332 ShiftOffset += ShiftAmt;
333 else
334 ShiftOffset -= ShiftAmt;
335
336 MatchInfo.CvtVal = Src0;
337 MatchInfo.ShiftOffset = ShiftOffset;
338 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
339 }
340
341 // TODO: Simplify demanded bits.
342 return false;
343}
344
345void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
346 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
347 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
348
349 const LLT S32 = LLT::scalar(SizeInBits: 32);
350 Register CvtSrc = MatchInfo.CvtVal;
351 LLT SrcTy = MRI.getType(Reg: MatchInfo.CvtVal);
352 if (SrcTy != S32) {
353 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
354 CvtSrc = B.buildAnyExt(Res: S32, Op: CvtSrc).getReg(Idx: 0);
355 }
356
357 assert(MI.getOpcode() != NewOpc);
358 B.buildInstr(Opc: NewOpc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {CvtSrc}, Flags: MI.getFlags());
359 MI.eraseFromParent();
360}
361
362bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
363 MachineInstr &MI, Register &Reg) const {
364 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
365 MF.getSubtarget().getTargetLowering());
366 Reg = MI.getOperand(i: 1).getReg();
367 return TLI->isCanonicalized(Reg, MF);
368}
369
370// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
371// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
372// with sign extension instrucions in order to generate buffer_load_{i8, i16}
373// instructions.
374
375// Identify buffer_load_{u8, u16}.
376bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
377 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
378 Register LoadReg = MI.getOperand(i: 1).getReg();
379 if (!MRI.hasOneNonDBGUse(RegNo: LoadReg))
380 return false;
381
382 // Check if the first operand of the sign extension is a subword buffer load
383 // instruction.
384 MachineInstr *LoadMI = MRI.getVRegDef(Reg: LoadReg);
385 int64_t Width = MI.getOperand(i: 2).getImm();
386 switch (LoadMI->getOpcode()) {
387 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
388 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
389 return Width == 8;
390 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
391 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
392 return Width == 16;
393 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
394 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};
395 return Width == 8;
396 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
397 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};
398 return Width == 16;
399 }
400 return false;
401}
402
403// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
404// buffer_load_{i8, i16}.
405void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
406 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
407 auto [LoadMI, NewOpcode] = MatchData;
408 LoadMI->setDesc(TII.get(Opcode: NewOpcode));
409 // Update the destination register of the load with the destination register
410 // of the sign extension.
411 Register SignExtendInsnDst = MI.getOperand(i: 0).getReg();
412 LoadMI->getOperand(i: 0).setReg(SignExtendInsnDst);
413 // Remove the sign extension.
414 MI.eraseFromParent();
415}
416
417bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
418 MachineInstr &MI, unsigned &NewOpcode) const {
419 Register Src0 = MI.getOperand(i: 1).getReg();
420 Register Src1 = MI.getOperand(i: 2).getReg();
421 if (MRI.getType(Reg: Src0) != LLT::scalar(SizeInBits: 64))
422 return false;
423
424 if (KB->getKnownBits(R: Src1).countMinLeadingZeros() >= 32 &&
425 KB->getKnownBits(R: Src0).countMinLeadingZeros() >= 32) {
426 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;
427 return true;
428 }
429
430 if (KB->computeNumSignBits(R: Src1) >= 33 &&
431 KB->computeNumSignBits(R: Src0) >= 33) {
432 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;
433 return true;
434 }
435 return false;
436}
437
438// Pass boilerplate
439// ================
440
441class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
442public:
443 static char ID;
444
445 AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
446
447 StringRef getPassName() const override {
448 return "AMDGPUPostLegalizerCombiner";
449 }
450
451 bool runOnMachineFunction(MachineFunction &MF) override;
452
453 void getAnalysisUsage(AnalysisUsage &AU) const override;
454
455private:
456 bool IsOptNone;
457 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
458};
459} // end anonymous namespace
460
461void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
462 AU.addRequired<TargetPassConfig>();
463 AU.setPreservesCFG();
464 getSelectionDAGFallbackAnalysisUsage(AU);
465 AU.addRequired<GISelKnownBitsAnalysis>();
466 AU.addPreserved<GISelKnownBitsAnalysis>();
467 if (!IsOptNone) {
468 AU.addRequired<MachineDominatorTreeWrapperPass>();
469 AU.addPreserved<MachineDominatorTreeWrapperPass>();
470 }
471 MachineFunctionPass::getAnalysisUsage(AU);
472}
473
474AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
475 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
476 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
477
478 if (!RuleConfig.parseCommandLineOption())
479 report_fatal_error(reason: "Invalid rule identifier");
480}
481
482bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
483 if (MF.getProperties().hasProperty(
484 P: MachineFunctionProperties::Property::FailedISel))
485 return false;
486 auto *TPC = &getAnalysis<TargetPassConfig>();
487 const Function &F = MF.getFunction();
488 bool EnableOpt =
489 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
490
491 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
492 const AMDGPULegalizerInfo *LI =
493 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
494
495 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
496 MachineDominatorTree *MDT =
497 IsOptNone ? nullptr
498 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
499
500 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
501 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
502
503 AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
504 RuleConfig, ST, MDT, LI);
505 return Impl.combineMachineInstrs();
506}
507
508char AMDGPUPostLegalizerCombiner::ID = 0;
509INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
510 "Combine AMDGPU machine instrs after legalization", false,
511 false)
512INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
513INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
514INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
515 "Combine AMDGPU machine instrs after legalization", false,
516 false)
517
518namespace llvm {
519FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
520 return new AMDGPUPostLegalizerCombiner(IsOptNone);
521}
522} // end namespace llvm
523