AMDGPULowerIntrinsics.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp]

1	//===-- AMDGPULowerIntrinsics.cpp -------------------------------------------=//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Lower intrinsics that would otherwise require separate handling in both
10	// SelectionDAG and GlobalISel.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "AMDGPU.h"
15	#include "AMDGPUTargetMachine.h"
16	#include "GCNSubtarget.h"
17	#include "llvm/IR/IRBuilder.h"
18	#include "llvm/IR/IntrinsicInst.h"
19	#include "llvm/IR/IntrinsicsAMDGPU.h"
20	#include "llvm/InitializePasses.h"
21	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
22
23	#define DEBUG_TYPE "amdgpu-lower-intrinsics"
24
25	using namespace llvm;
26
27	namespace {
28
29	class AMDGPULowerIntrinsicsImpl {
30	public:
31	Module &M;
32	const AMDGPUTargetMachine &TM;
33
34	AMDGPULowerIntrinsicsImpl(Module &M, const AMDGPUTargetMachine &TM)
35	: M(M), TM(TM) {}
36
37	bool run();
38
39	private:
40	bool visitBarrier(IntrinsicInst &I);
41	};
42
43	class AMDGPULowerIntrinsicsLegacy : public ModulePass {
44	public:
45	static char ID;
46
47	AMDGPULowerIntrinsicsLegacy() : ModulePass (ID) {}
48
49	bool runOnModule(Module &M) override;
50
51	void getAnalysisUsage(AnalysisUsage &AU) const override {
52	AU.addRequired<TargetPassConfig>();
53	}
54	};
55
56	template <class T> static void forEachCall(Function &Intrin, T Callback) {
57	for (User *U : make_early_inc_range(Range: Intrin.users())) {
58	if (auto *CI = dyn_cast<IntrinsicInst>(Val: U))
59	Callback(CI);
60	}
61	}
62
63	} // anonymous namespace
64
65	bool AMDGPULowerIntrinsicsImpl::run() {
66	bool Changed = false;
67
68	for (Function &F : M) {
69	switch (F.getIntrinsicID()) {
70	default:
71	continue;
72	case Intrinsic::amdgcn_s_barrier:
73	case Intrinsic::amdgcn_s_barrier_signal:
74	case Intrinsic::amdgcn_s_barrier_signal_isfirst:
75	case Intrinsic::amdgcn_s_barrier_wait:
76	case Intrinsic::amdgcn_s_cluster_barrier:
77	forEachCall(Intrin&: F, Callback: [&](IntrinsicInst II) { Changed \|= visitBarrier(I&: II); });
78	break;
79	}
80	}
81
82	return Changed;
83	}
84
85	// Optimize barriers and lower s_(cluster_)barrier to a sequence of split
86	// barrier intrinsics.
87	bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &I) {
88	assert(I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier \|\|
89	I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal \|\|
90	I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal_isfirst \|\|
91	I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait \|\|
92	I.getIntrinsicID() == Intrinsic::amdgcn_s_cluster_barrier);
93
94	const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F: *I.getFunction());
95	bool IsSingleWaveWG = false;
96
97	if (TM.getOptLevel() > CodeGenOptLevel::None) {
98	unsigned WGMaxSize = ST.getFlatWorkGroupSizes(F: *I.getFunction()).second;
99	IsSingleWaveWG = WGMaxSize <= ST.getWavefrontSize();
100	}
101
102	IRBuilder<> B(&I);
103
104	// Lower the s_cluster_barrier intrinsic first. There is no corresponding
105	// hardware instruction in any subtarget.
106	if (I.getIntrinsicID() == Intrinsic::amdgcn_s_cluster_barrier) {
107	// The default cluster barrier expects one signal per workgroup. So we need
108	// a workgroup barrier first.
109	if (IsSingleWaveWG) {
110	B.CreateIntrinsic(RetTy: B.getVoidTy(), ID: Intrinsic::amdgcn_wave_barrier, Args: {});
111	} else {
112	Value *BarrierID_32 = B.getInt32(C: AMDGPU::Barrier::WORKGROUP);
113	Value *BarrierID_16 = B.getInt16(C: AMDGPU::Barrier::WORKGROUP);
114	Value *IsFirst = B.CreateIntrinsic(
115	RetTy: B.getInt1Ty(), ID: Intrinsic::amdgcn_s_barrier_signal_isfirst,
116	Args: {BarrierID_32});
117	B.CreateIntrinsic(RetTy: B.getVoidTy(), ID: Intrinsic::amdgcn_s_barrier_wait,
118	Args: {BarrierID_16});
119
120	Instruction *ThenTerm =
121	SplitBlockAndInsertIfThen(Cond: IsFirst, SplitBefore: I.getIterator(), Unreachable: false);
122	B.SetInsertPoint(ThenTerm);
123	}
124
125	// Now we can signal the cluster barrier from a single wave and wait for the
126	// barrier in all waves.
127	Value *BarrierID_32 = B.getInt32(C: AMDGPU::Barrier::CLUSTER);
128	Value *BarrierID_16 = B.getInt16(C: AMDGPU::Barrier::CLUSTER);
129	B.CreateIntrinsic(RetTy: B.getVoidTy(), ID: Intrinsic::amdgcn_s_barrier_signal,
130	Args: {BarrierID_32});
131
132	B.SetInsertPoint(&I);
133	B.CreateIntrinsic(RetTy: B.getVoidTy(), ID: Intrinsic::amdgcn_s_barrier_wait,
134	Args: {BarrierID_16});
135
136	I.eraseFromParent();
137	return true;
138	}
139
140	bool IsWorkgroupScope = false;
141
142	if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait \|\|
143	I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal \|\|
144	I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal_isfirst) {
145	int BarrierID = cast<ConstantInt>(Val: I.getArgOperand(i: `0`))->getSExtValue();
146	if (BarrierID == AMDGPU::Barrier::TRAP \|\|
147	BarrierID == AMDGPU::Barrier::WORKGROUP \|\|
148	(BarrierID >= AMDGPU::Barrier::NAMED_BARRIER_FIRST &&
149	BarrierID <= AMDGPU::Barrier::NAMED_BARRIER_LAST))
150	IsWorkgroupScope = true;
151	} else {
152	assert(I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier);
153	IsWorkgroupScope = true;
154	}
155
156	if (IsWorkgroupScope && IsSingleWaveWG) {
157	// Down-grade waits, remove split signals.
158	if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier \|\|
159	I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait) {
160	B.CreateIntrinsic(RetTy: B.getVoidTy(), ID: Intrinsic::amdgcn_wave_barrier, Args: {});
161	} else if (I.getIntrinsicID() ==
162	Intrinsic::amdgcn_s_barrier_signal_isfirst) {
163	// If we're the only wave of the workgroup, we're always first.
164	I.replaceAllUsesWith(V: B.getInt1(V: true));
165	}
166	I.eraseFromParent();
167	return true;
168	}
169
170	if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier &&
171	ST.hasSplitBarriers()) {
172	// Lower to split barriers.
173	Value *BarrierID_32 = B.getInt32(C: AMDGPU::Barrier::WORKGROUP);
174	Value *BarrierID_16 = B.getInt16(C: AMDGPU::Barrier::WORKGROUP);
175	B.CreateIntrinsic(RetTy: B.getVoidTy(), ID: Intrinsic::amdgcn_s_barrier_signal,
176	Args: {BarrierID_32});
177	B.CreateIntrinsic(RetTy: B.getVoidTy(), ID: Intrinsic::amdgcn_s_barrier_wait,
178	Args: {BarrierID_16});
179	I.eraseFromParent();
180	return true;
181	}
182
183	return false;
184	}
185
186	PreservedAnalyses AMDGPULowerIntrinsicsPass::run(Module &M,
187	ModuleAnalysisManager &MAM) {
188	AMDGPULowerIntrinsicsImpl Impl(M, TM);
189	if (!Impl.run())
190	return PreservedAnalyses::all();
191	return PreservedAnalyses::none();
192	}
193
194	bool AMDGPULowerIntrinsicsLegacy::runOnModule(Module &M) {
195	auto &TPC = getAnalysis<TargetPassConfig>();
196	const AMDGPUTargetMachine &TM = TPC.getTM<AMDGPUTargetMachine>();
197
198	AMDGPULowerIntrinsicsImpl Impl(M, TM);
199	return Impl.run();
200	}
201
202	#define PASS_DESC "AMDGPU lower intrinsics"
203	INITIALIZE_PASS_BEGIN(AMDGPULowerIntrinsicsLegacy, DEBUG_TYPE, PASS_DESC, false,
204	false)
205	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
206	INITIALIZE_PASS_END(AMDGPULowerIntrinsicsLegacy, DEBUG_TYPE, PASS_DESC, false,
207	false)
208
209	char AMDGPULowerIntrinsicsLegacy::ID = `0`;
210
211	ModulePass *llvm::createAMDGPULowerIntrinsicsLegacyPass() {
212	return new AMDGPULowerIntrinsicsLegacy;
213	}
214

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp