AMDGPUPreLegalizerCombiner.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp]

1	//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass does combining of machine instructions at the generic MI level,
10	// before the legalizer.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "AMDGPU.h"
15	#include "AMDGPUCombinerHelper.h"
16	#include "AMDGPULegalizerInfo.h"
17	#include "GCNSubtarget.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
20	#include "llvm/CodeGen/GlobalISel/Combiner.h"
21	#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22	#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24	#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
25	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26	#include "llvm/CodeGen/MachineDominators.h"
27	#include "llvm/CodeGen/TargetPassConfig.h"
28	#include "llvm/Target/TargetMachine.h"
29
30	#define GET_GICOMBINER_DEPS
31	#include "AMDGPUGenPreLegalizeGICombiner.inc"
32	#undef GET_GICOMBINER_DEPS
33
34	#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36	using namespace llvm;
37	using namespace MIPatternMatch;
38	namespace {
39
40	#define GET_GICOMBINER_TYPES
41	#include "AMDGPUGenPreLegalizeGICombiner.inc"
42	#undef GET_GICOMBINER_TYPES
43
44	class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45	protected:
46	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47	const GCNSubtarget &STI;
48	// TODO: Make CombinerHelper methods const.
49	mutable AMDGPUCombinerHelper Helper;
50
51	public:
52	AMDGPUPreLegalizerCombinerImpl(
53	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
54	GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
55	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
56	const GCNSubtarget &STI, MachineDominatorTree *MDT,
57	const LegalizerInfo *LI);
58
59	static const char getName() { return* "AMDGPUPreLegalizerCombinerImpl"; }
60
61	bool tryCombineAllImpl(MachineInstr &MI) const;
62	bool tryCombineAll(MachineInstr &I) const override;
63
64	struct ClampI64ToI16MatchInfo {
65	int64_t Cmp1 = `0`;
66	int64_t Cmp2 = `0`;
67	Register Origin;
68	};
69
70	bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
71	const MachineFunction &MF,
72	ClampI64ToI16MatchInfo &MatchInfo) const;
73
74	void applyClampI64ToI16(MachineInstr &MI,
75	const ClampI64ToI16MatchInfo &MatchInfo) const;
76
77	private:
78	#define GET_GICOMBINER_CLASS_MEMBERS
79	#define AMDGPUSubtarget GCNSubtarget
80	#include "AMDGPUGenPreLegalizeGICombiner.inc"
81	#undef GET_GICOMBINER_CLASS_MEMBERS
82	#undef AMDGPUSubtarget
83	};
84
85	#define GET_GICOMBINER_IMPL
86	#define AMDGPUSubtarget GCNSubtarget
87	#include "AMDGPUGenPreLegalizeGICombiner.inc"
88	#undef AMDGPUSubtarget
89	#undef GET_GICOMBINER_IMPL
90
91	AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
92	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
93	GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
94	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
95	const GCNSubtarget &STI, MachineDominatorTree MDT, const* LegalizerInfo *LI)
96	: Combiner (MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
97	Helper (Observer, B, /IsPreLegalize/ true, &KB, MDT, LI),
98	#define GET_GICOMBINER_CONSTRUCTOR_INITS
99	#include "AMDGPUGenPreLegalizeGICombiner.inc"
100	#undef GET_GICOMBINER_CONSTRUCTOR_INITS
101	{
102	}
103
104	bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
105	if (tryCombineAllImpl(I&: MI))
106	return true;
107
108	switch (MI.getOpcode()) {
109	case TargetOpcode::G_SHUFFLE_VECTOR:
110	return Helper.tryCombineShuffleVector(MI);
111	}
112
113	return false;
114	}
115
116	bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
117	MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
118	ClampI64ToI16MatchInfo &MatchInfo) const {
119	assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
120
121	// Try to find a pattern where an i64 value should get clamped to short.
122	const LLT SrcType = MRI.getType(Reg: MI.getOperand(i: `1`).getReg());
123	if (SrcType != LLT::scalar(SizeInBits: `64`))
124	return false;
125
126	const LLT DstType = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
127	if (DstType != LLT::scalar(SizeInBits: `16`))
128	return false;
129
130	Register Base;
131
132	auto IsApplicableForCombine = [&MatchInfo]() -> bool {
133	const auto Cmp1 = MatchInfo.Cmp1;
134	const auto Cmp2 = MatchInfo.Cmp2;
135	const auto Diff = std::abs(i: Cmp2 - Cmp1);
136
137	// If the difference between both comparison values is 0 or 1, there is no
138	// need to clamp.
139	if (Diff == `0` \|\| Diff == `1`)
140	return false;
141
142	const int64_t Min = std::numeric_limits<int16_t>::min();
143	const int64_t Max = std::numeric_limits<int16_t>::max();
144
145	// Check if the comparison values are between SHORT_MIN and SHORT_MAX.
146	return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) \|\|
147	(Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
148	};
149
150	// Try to match a combination of min / max MIR opcodes.
151	if (mi_match(R: MI.getOperand(i: `1`).getReg(), MRI,
152	P: m_GSMin(L: m_Reg(R&: Base), R: m_ICst(Cst&: MatchInfo.Cmp1)))) {
153	if (mi_match(R: Base, MRI,
154	P: m_GSMax(L: m_Reg(R&: MatchInfo.Origin), R: m_ICst(Cst&: MatchInfo.Cmp2)))) {
155	return IsApplicableForCombine ();
156	}
157	}
158
159	if (mi_match(R: MI.getOperand(i: `1`).getReg(), MRI,
160	P: m_GSMax(L: m_Reg(R&: Base), R: m_ICst(Cst&: MatchInfo.Cmp1)))) {
161	if (mi_match(R: Base, MRI,
162	P: m_GSMin(L: m_Reg(R&: MatchInfo.Origin), R: m_ICst(Cst&: MatchInfo.Cmp2)))) {
163	return IsApplicableForCombine ();
164	}
165	}
166
167	return false;
168	}
169
170	// We want to find a combination of instructions that
171	// gets generated when an i64 gets clamped to i16.
172	// The corresponding pattern is:
173	// G_MAX / G_MAX for i16 <= G_TRUNC i64.
174	// This can be efficiently written as following:
175	// v_cvt_pk_i16_i32 v0, v0, v1
176	// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
177	void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
178	MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
179
180	Register Src = MatchInfo.Origin;
181	assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
182	LLT::scalar(`64`));
183	const LLT S32 = LLT::scalar(SizeInBits: `32`);
184
185	auto Unmerge = B.buildUnmerge(Res: S32, Op: Src);
186
187	assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
188
189	const LLT V2S16 = LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `16`);
190	auto CvtPk =
191	B.buildInstr(Opc: AMDGPU::G_AMDGPU_CVT_PK_I16_I32, DstOps: {V2S16},
192	SrcOps: {Unmerge.getReg(Idx: `0`), Unmerge.getReg(Idx: `1`)}, Flags: MI.getFlags());
193
194	auto MinBoundary = std::min(a: MatchInfo.Cmp1, b: MatchInfo.Cmp2);
195	auto MaxBoundary = std::max(a: MatchInfo.Cmp1, b: MatchInfo.Cmp2);
196	auto MinBoundaryDst = B.buildConstant(Res: S32, Val: MinBoundary);
197	auto MaxBoundaryDst = B.buildConstant(Res: S32, Val: MaxBoundary);
198
199	auto Bitcast = B.buildBitcast(Dst: {S32}, Src: CvtPk);
200
201	auto Med3 = B.buildInstr(
202	Opc: AMDGPU::G_AMDGPU_SMED3, DstOps: {S32},
203	SrcOps: {MinBoundaryDst.getReg(Idx: `0`), Bitcast.getReg(Idx: `0`), MaxBoundaryDst.getReg(Idx: `0`)},
204	Flags: MI.getFlags());
205
206	B.buildTrunc(Res: MI.getOperand(i: `0`).getReg(), Op: Med3);
207
208	MI.eraseFromParent();
209	}
210
211	// Pass boilerplate
212	// ================
213
214	class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
215	public:
216	static char ID;
217
218	AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
219
220	StringRef getPassName() const override {
221	return "AMDGPUPreLegalizerCombiner";
222	}
223
224	bool runOnMachineFunction(MachineFunction &MF) override;
225
226	void getAnalysisUsage(AnalysisUsage &AU) const override;
227
228	private:
229	bool IsOptNone;
230	AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
231	};
232	} // end anonymous namespace
233
234	void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
235	AU.addRequired<TargetPassConfig>();
236	AU.setPreservesCFG();
237	getSelectionDAGFallbackAnalysisUsage(AU);
238	AU.addRequired<GISelKnownBitsAnalysis>();
239	AU.addPreserved<GISelKnownBitsAnalysis>();
240	if (!IsOptNone) {
241	AU.addRequired<MachineDominatorTreeWrapperPass>();
242	AU.addPreserved<MachineDominatorTreeWrapperPass>();
243	}
244
245	AU.addRequired<GISelCSEAnalysisWrapperPass>();
246	AU.addPreserved<GISelCSEAnalysisWrapperPass>();
247	MachineFunctionPass::getAnalysisUsage(AU);
248	}
249
250	AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
251	: MachineFunctionPass (ID), IsOptNone(IsOptNone) {
252	initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
253
254	if (!RuleConfig.parseCommandLineOption())
255	report_fatal_error(reason: "Invalid rule identifier");
256	}
257
258	bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
259	if (MF.getProperties().hasProperty(
260	P: MachineFunctionProperties::Property::FailedISel))
261	return false;
262	auto *TPC = &getAnalysis<TargetPassConfig>();
263	const Function &F = MF.getFunction();
264	bool EnableOpt =
265	MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
266	GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
267
268	// Enable CSE.
269	GISelCSEAnalysisWrapper &Wrapper =
270	getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
271	auto *CSEInfo = &Wrapper.get(CSEOpt: TPC->getCSEConfig());
272
273	const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
274	MachineDominatorTree *MDT =
275	IsOptNone ? nullptr
276	: &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
277	CombinerInfo CInfo(/AllowIllegalOps/ true, /ShouldLegalizeIllegal/ false,
278	nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
279	AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
280	STI, MDT, STI.getLegalizerInfo());
281	return Impl.combineMachineInstrs();
282	}
283
284	char AMDGPUPreLegalizerCombiner::ID = `0`;
285	INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
286	"Combine AMDGPU machine instrs before legalization",
287	false, false)
288	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
289	INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
290	INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
291	"Combine AMDGPU machine instrs before legalization", false,
292	false)
293
294	namespace llvm {
295	FunctionPass createAMDGPUPreLegalizeCombiner(bool* IsOptNone) {
296	return new AMDGPUPreLegalizerCombiner (IsOptNone);
297	}
298	} // end namespace llvm
299

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp