1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUCombinerHelper.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
20#include "llvm/CodeGen/GlobalISel/Combiner.h"
21#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
25#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/TargetPassConfig.h"
28#include "llvm/Target/TargetMachine.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const AMDGPUCombinerHelper Helper;
49
50public:
51 AMDGPUPreLegalizerCombinerImpl(
52 MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
53 GISelCSEInfo *CSEInfo,
54 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
55 const GCNSubtarget &STI, MachineDominatorTree *MDT,
56 const LegalizerInfo *LI);
57
58 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
59
60 bool tryCombineAllImpl(MachineInstr &MI) const;
61 bool tryCombineAll(MachineInstr &I) const override;
62
63 struct ClampI64ToI16MatchInfo {
64 int64_t Cmp1 = 0;
65 int64_t Cmp2 = 0;
66 Register Origin;
67 };
68
69 bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
70 const MachineFunction &MF,
71 ClampI64ToI16MatchInfo &MatchInfo) const;
72
73 void applyClampI64ToI16(MachineInstr &MI,
74 const ClampI64ToI16MatchInfo &MatchInfo) const;
75
76private:
77#define GET_GICOMBINER_CLASS_MEMBERS
78#define AMDGPUSubtarget GCNSubtarget
79#include "AMDGPUGenPreLegalizeGICombiner.inc"
80#undef GET_GICOMBINER_CLASS_MEMBERS
81#undef AMDGPUSubtarget
82};
83
84#define GET_GICOMBINER_IMPL
85#define AMDGPUSubtarget GCNSubtarget
86#include "AMDGPUGenPreLegalizeGICombiner.inc"
87#undef AMDGPUSubtarget
88#undef GET_GICOMBINER_IMPL
89
90AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
91 MachineFunction &MF, CombinerInfo &CInfo, GISelValueTracking &VT,
92 GISelCSEInfo *CSEInfo,
93 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
94 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
95 : Combiner(MF, CInfo, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
96 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI, STI),
97#define GET_GICOMBINER_CONSTRUCTOR_INITS
98#include "AMDGPUGenPreLegalizeGICombiner.inc"
99#undef GET_GICOMBINER_CONSTRUCTOR_INITS
100{
101}
102
103bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
104 if (tryCombineAllImpl(I&: MI))
105 return true;
106 return false;
107}
108
109bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
110 MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
111 ClampI64ToI16MatchInfo &MatchInfo) const {
112 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
113
114 // Try to find a pattern where an i64 value should get clamped to short.
115 const LLT SrcType = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
116 if (SrcType != LLT::scalar(SizeInBits: 64))
117 return false;
118
119 const LLT DstType = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
120 if (DstType != LLT::scalar(SizeInBits: 16))
121 return false;
122
123 Register Base;
124
125 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
126 const auto Cmp1 = MatchInfo.Cmp1;
127 const auto Cmp2 = MatchInfo.Cmp2;
128 const auto Diff = std::abs(i: Cmp2 - Cmp1);
129
130 // If the difference between both comparison values is 0 or 1, there is no
131 // need to clamp.
132 if (Diff == 0 || Diff == 1)
133 return false;
134
135 const int64_t Min = std::numeric_limits<int16_t>::min();
136 const int64_t Max = std::numeric_limits<int16_t>::max();
137
138 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
139 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
140 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
141 };
142
143 // Try to match a combination of min / max MIR opcodes.
144 if (mi_match(R: MI.getOperand(i: 1).getReg(), MRI,
145 P: m_GSMin(L: m_Reg(R&: Base), R: m_ICst(Cst&: MatchInfo.Cmp1)))) {
146 if (mi_match(R: Base, MRI,
147 P: m_GSMax(L: m_Reg(R&: MatchInfo.Origin), R: m_ICst(Cst&: MatchInfo.Cmp2)))) {
148 return IsApplicableForCombine();
149 }
150 }
151
152 if (mi_match(R: MI.getOperand(i: 1).getReg(), MRI,
153 P: m_GSMax(L: m_Reg(R&: Base), R: m_ICst(Cst&: MatchInfo.Cmp1)))) {
154 if (mi_match(R: Base, MRI,
155 P: m_GSMin(L: m_Reg(R&: MatchInfo.Origin), R: m_ICst(Cst&: MatchInfo.Cmp2)))) {
156 return IsApplicableForCombine();
157 }
158 }
159
160 return false;
161}
162
163// We want to find a combination of instructions that
164// gets generated when an i64 gets clamped to i16.
165// The corresponding pattern is:
166// G_MAX / G_MAX for i16 <= G_TRUNC i64.
167// This can be efficiently written as following:
168// v_cvt_pk_i16_i32 v0, v0, v1
169// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
170void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
171 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
172
173 Register Src = MatchInfo.Origin;
174 assert(MI.getMF()->getRegInfo().getType(Src) == LLT::scalar(64));
175 const LLT S32 = LLT::scalar(SizeInBits: 32);
176
177 auto Unmerge = B.buildUnmerge(Res: S32, Op: Src);
178
179 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
180
181 const LLT V2S16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
182 auto CvtPk =
183 B.buildInstr(Opc: AMDGPU::G_AMDGPU_CVT_PK_I16_I32, DstOps: {V2S16},
184 SrcOps: {Unmerge.getReg(Idx: 0), Unmerge.getReg(Idx: 1)}, Flags: MI.getFlags());
185
186 auto MinBoundary = std::min(a: MatchInfo.Cmp1, b: MatchInfo.Cmp2);
187 auto MaxBoundary = std::max(a: MatchInfo.Cmp1, b: MatchInfo.Cmp2);
188 auto MinBoundaryDst = B.buildConstant(Res: S32, Val: MinBoundary);
189 auto MaxBoundaryDst = B.buildConstant(Res: S32, Val: MaxBoundary);
190
191 auto Bitcast = B.buildBitcast(Dst: {S32}, Src: CvtPk);
192
193 auto Med3 = B.buildInstr(
194 Opc: AMDGPU::G_AMDGPU_SMED3, DstOps: {S32},
195 SrcOps: {MinBoundaryDst.getReg(Idx: 0), Bitcast.getReg(Idx: 0), MaxBoundaryDst.getReg(Idx: 0)},
196 Flags: MI.getFlags());
197
198 B.buildTrunc(Res: MI.getOperand(i: 0).getReg(), Op: Med3);
199
200 MI.eraseFromParent();
201}
202
203// Pass boilerplate
204// ================
205
206class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
207public:
208 static char ID;
209
210 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
211
212 StringRef getPassName() const override {
213 return "AMDGPUPreLegalizerCombiner";
214 }
215
216 bool runOnMachineFunction(MachineFunction &MF) override;
217
218 void getAnalysisUsage(AnalysisUsage &AU) const override;
219
220private:
221 bool IsOptNone;
222 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
223};
224} // end anonymous namespace
225
226void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
227 AU.addRequired<TargetPassConfig>();
228 AU.setPreservesCFG();
229 getSelectionDAGFallbackAnalysisUsage(AU);
230 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
231 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
232 if (!IsOptNone) {
233 AU.addRequired<MachineDominatorTreeWrapperPass>();
234 AU.addPreserved<MachineDominatorTreeWrapperPass>();
235 }
236
237 AU.addRequired<GISelCSEAnalysisWrapperPass>();
238 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
239 MachineFunctionPass::getAnalysisUsage(AU);
240}
241
242AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
243 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
244 if (!RuleConfig.parseCommandLineOption())
245 report_fatal_error(reason: "Invalid rule identifier");
246}
247
248bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
249 if (MF.getProperties().hasFailedISel())
250 return false;
251 auto *TPC = &getAnalysis<TargetPassConfig>();
252 const Function &F = MF.getFunction();
253 bool EnableOpt =
254 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
255 GISelValueTracking *VT =
256 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
257
258 // Enable CSE.
259 GISelCSEAnalysisWrapper &Wrapper =
260 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
261 auto *CSEInfo = &Wrapper.get(CSEOpt: TPC->getCSEConfig());
262
263 const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
264 MachineDominatorTree *MDT =
265 IsOptNone ? nullptr
266 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
267 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
268 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
269 // Disable fixed-point iteration to reduce compile-time
270 CInfo.MaxIterations = 1;
271 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
272 // This is the first Combiner, so the input IR might contain dead
273 // instructions.
274 CInfo.EnableFullDCE = true;
275 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, STI,
276 MDT, STI.getLegalizerInfo());
277 return Impl.combineMachineInstrs();
278}
279
280char AMDGPUPreLegalizerCombiner::ID = 0;
281INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
282 "Combine AMDGPU machine instrs before legalization",
283 false, false)
284INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
285INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)
286INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
287 "Combine AMDGPU machine instrs before legalization", false,
288 false)
289
290FunctionPass *llvm::createAMDGPUPreLegalizeCombiner(bool IsOptNone) {
291 return new AMDGPUPreLegalizerCombiner(IsOptNone);
292}
293