AMDGPUPreLegalizerCombiner.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp]

1	//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass does combining of machine instructions at the generic MI level,
10	// before the legalizer.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "AMDGPU.h"
15	#include "AMDGPUCombinerHelper.h"
16	#include "AMDGPULegalizerInfo.h"
17	#include "GCNSubtarget.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
20	#include "llvm/CodeGen/GlobalISel/Combiner.h"
21	#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22	#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24	#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
25	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26	#include "llvm/CodeGen/MachineDominators.h"
27	#include "llvm/CodeGen/TargetPassConfig.h"
28	#include "llvm/Target/TargetMachine.h"
29
30	#define GET_GICOMBINER_DEPS
31	#include "AMDGPUGenPreLegalizeGICombiner.inc"
32	#undef GET_GICOMBINER_DEPS
33
34	#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36	using namespace llvm;
37	using namespace MIPatternMatch;
38	namespace {
39
40	#define GET_GICOMBINER_TYPES
41	#include "AMDGPUGenPreLegalizeGICombiner.inc"
42	#undef GET_GICOMBINER_TYPES
43
44	class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45	protected:
46	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47	const GCNSubtarget &STI;
48	const AMDGPUCombinerHelper Helper;
49
50	public:
51	AMDGPUPreLegalizerCombinerImpl(
52	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
53	GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
54	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
55	const GCNSubtarget &STI, MachineDominatorTree *MDT,
56	const LegalizerInfo *LI);
57
58	static const char getName() { return* "AMDGPUPreLegalizerCombinerImpl"; }
59
60	bool tryCombineAllImpl(MachineInstr &MI) const;
61	bool tryCombineAll(MachineInstr &I) const override;
62
63	struct ClampI64ToI16MatchInfo {
64	int64_t Cmp1 = `0`;
65	int64_t Cmp2 = `0`;
66	Register Origin;
67	};
68
69	bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
70	const MachineFunction &MF,
71	ClampI64ToI16MatchInfo &MatchInfo) const;
72
73	void applyClampI64ToI16(MachineInstr &MI,
74	const ClampI64ToI16MatchInfo &MatchInfo) const;
75
76	private:
77	#define GET_GICOMBINER_CLASS_MEMBERS
78	#define AMDGPUSubtarget GCNSubtarget
79	#include "AMDGPUGenPreLegalizeGICombiner.inc"
80	#undef GET_GICOMBINER_CLASS_MEMBERS
81	#undef AMDGPUSubtarget
82	};
83
84	#define GET_GICOMBINER_IMPL
85	#define AMDGPUSubtarget GCNSubtarget
86	#include "AMDGPUGenPreLegalizeGICombiner.inc"
87	#undef AMDGPUSubtarget
88	#undef GET_GICOMBINER_IMPL
89
90	AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
91	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
92	GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
93	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
94	const GCNSubtarget &STI, MachineDominatorTree MDT, const* LegalizerInfo *LI)
95	: Combiner (MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
96	Helper (Observer, B, /IsPreLegalize/ true, &VT, MDT, LI, STI),
97	#define GET_GICOMBINER_CONSTRUCTOR_INITS
98	#include "AMDGPUGenPreLegalizeGICombiner.inc"
99	#undef GET_GICOMBINER_CONSTRUCTOR_INITS
100	{
101	}
102
103	bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
104	if (tryCombineAllImpl(I&: MI))
105	return true;
106
107	switch (MI.getOpcode()) {
108	case TargetOpcode::G_SHUFFLE_VECTOR:
109	return Helper.tryCombineShuffleVector(MI);
110	}
111
112	return false;
113	}
114
115	bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
116	MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
117	ClampI64ToI16MatchInfo &MatchInfo) const {
118	assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
119
120	// Try to find a pattern where an i64 value should get clamped to short.
121	const LLT SrcType = MRI.getType(Reg: MI.getOperand(i: `1`).getReg());
122	if (SrcType != LLT::scalar(SizeInBits: `64`))
123	return false;
124
125	const LLT DstType = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
126	if (DstType != LLT::scalar(SizeInBits: `16`))
127	return false;
128
129	Register Base;
130
131	auto IsApplicableForCombine = [&MatchInfo]() -> bool {
132	const auto Cmp1 = MatchInfo.Cmp1;
133	const auto Cmp2 = MatchInfo.Cmp2;
134	const auto Diff = std::abs(i: Cmp2 - Cmp1);
135
136	// If the difference between both comparison values is 0 or 1, there is no
137	// need to clamp.
138	if (Diff == `0` \|\| Diff == `1`)
139	return false;
140
141	const int64_t Min = std::numeric_limits<int16_t>::min();
142	const int64_t Max = std::numeric_limits<int16_t>::max();
143
144	// Check if the comparison values are between SHORT_MIN and SHORT_MAX.
145	return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) \|\|
146	(Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
147	};
148
149	// Try to match a combination of min / max MIR opcodes.
150	if (mi_match(R: MI.getOperand(i: `1`).getReg(), MRI,
151	P: m_GSMin(L: m_Reg(R&: Base), R: m_ICst(Cst&: MatchInfo.Cmp1)))) {
152	if (mi_match(R: Base, MRI,
153	P: m_GSMax(L: m_Reg(R&: MatchInfo.Origin), R: m_ICst(Cst&: MatchInfo.Cmp2)))) {
154	return IsApplicableForCombine ();
155	}
156	}
157
158	if (mi_match(R: MI.getOperand(i: `1`).getReg(), MRI,
159	P: m_GSMax(L: m_Reg(R&: Base), R: m_ICst(Cst&: MatchInfo.Cmp1)))) {
160	if (mi_match(R: Base, MRI,
161	P: m_GSMin(L: m_Reg(R&: MatchInfo.Origin), R: m_ICst(Cst&: MatchInfo.Cmp2)))) {
162	return IsApplicableForCombine ();
163	}
164	}
165
166	return false;
167	}
168
169	// We want to find a combination of instructions that
170	// gets generated when an i64 gets clamped to i16.
171	// The corresponding pattern is:
172	// G_MAX / G_MAX for i16 <= G_TRUNC i64.
173	// This can be efficiently written as following:
174	// v_cvt_pk_i16_i32 v0, v0, v1
175	// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
176	void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
177	MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
178
179	Register Src = MatchInfo.Origin;
180	assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
181	LLT::scalar(`64`));
182	const LLT S32 = LLT::scalar(SizeInBits: `32`);
183
184	auto Unmerge = B.buildUnmerge(Res: S32, Op: Src);
185
186	assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
187
188	const LLT V2S16 = LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `16`);
189	auto CvtPk =
190	B.buildInstr(Opc: AMDGPU::G_AMDGPU_CVT_PK_I16_I32, DstOps: {V2S16},
191	SrcOps: {Unmerge.getReg(Idx: `0`), Unmerge.getReg(Idx: `1`)}, Flags: MI.getFlags());
192
193	auto MinBoundary = std::min(a: MatchInfo.Cmp1, b: MatchInfo.Cmp2);
194	auto MaxBoundary = std::max(a: MatchInfo.Cmp1, b: MatchInfo.Cmp2);
195	auto MinBoundaryDst = B.buildConstant(Res: S32, Val: MinBoundary);
196	auto MaxBoundaryDst = B.buildConstant(Res: S32, Val: MaxBoundary);
197
198	auto Bitcast = B.buildBitcast(Dst: {S32}, Src: CvtPk);
199
200	auto Med3 = B.buildInstr(
201	Opc: AMDGPU::G_AMDGPU_SMED3, DstOps: {S32},
202	SrcOps: {MinBoundaryDst.getReg(Idx: `0`), Bitcast.getReg(Idx: `0`), MaxBoundaryDst.getReg(Idx: `0`)},
203	Flags: MI.getFlags());
204
205	B.buildTrunc(Res: MI.getOperand(i: `0`).getReg(), Op: Med3);
206
207	MI.eraseFromParent();
208	}
209
210	// Pass boilerplate
211	// ================
212
213	class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
214	public:
215	static char ID;
216
217	AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
218
219	StringRef getPassName() const override {
220	return "AMDGPUPreLegalizerCombiner";
221	}
222
223	bool runOnMachineFunction(MachineFunction &MF) override;
224
225	void getAnalysisUsage(AnalysisUsage &AU) const override;
226
227	private:
228	bool IsOptNone;
229	AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
230	};
231	} // end anonymous namespace
232
233	void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
234	AU.addRequired<TargetPassConfig>();
235	AU.setPreservesCFG();
236	getSelectionDAGFallbackAnalysisUsage(AU);
237	AU.addRequired<GISelValueTrackingAnalysisLegacy>();
238	AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
239	if (!IsOptNone) {
240	AU.addRequired<MachineDominatorTreeWrapperPass>();
241	AU.addPreserved<MachineDominatorTreeWrapperPass>();
242	}
243
244	AU.addRequired<GISelCSEAnalysisWrapperPass>();
245	AU.addPreserved<GISelCSEAnalysisWrapperPass>();
246	MachineFunctionPass::getAnalysisUsage(AU);
247	}
248
249	AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
250	: MachineFunctionPass (ID), IsOptNone(IsOptNone) {
251	if (!RuleConfig.parseCommandLineOption())
252	report_fatal_error(reason: "Invalid rule identifier");
253	}
254
255	bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
256	if (MF.getProperties().hasFailedISel())
257	return false;
258	auto *TPC = &getAnalysis<TargetPassConfig>();
259	const Function &F = MF.getFunction();
260	bool EnableOpt =
261	MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
262	GISelValueTracking *VT =
263	&getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
264
265	// Enable CSE.
266	GISelCSEAnalysisWrapper &Wrapper =
267	getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
268	auto *CSEInfo = &Wrapper.get(CSEOpt: TPC->getCSEConfig());
269
270	const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
271	MachineDominatorTree *MDT =
272	IsOptNone ? nullptr
273	: &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
274	CombinerInfo CInfo(/AllowIllegalOps/ true, /ShouldLegalizeIllegal/ false,
275	nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
276	// Disable fixed-point iteration to reduce compile-time
277	CInfo.MaxIterations = `1`;
278	CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
279	// This is the first Combiner, so the input IR might contain dead
280	// instructions.
281	CInfo.EnableFullDCE = true;
282	AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,
283	STI, MDT, STI.getLegalizerInfo());
284	return Impl.combineMachineInstrs();
285	}
286
287	char AMDGPUPreLegalizerCombiner::ID = `0`;
288	INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
289	"Combine AMDGPU machine instrs before legalization",
290	false, false)
291	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
292	INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy)
293	INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
294	"Combine AMDGPU machine instrs before legalization", false,
295	false)
296
297	FunctionPass llvm::createAMDGPUPreLegalizeCombiner(bool* IsOptNone) {
298	return new AMDGPUPreLegalizerCombiner (IsOptNone);
299	}
300

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp