AMDGPULateCodeGenPrepare.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp]

1	//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This pass does misc. AMDGPU optimizations on IR just* before instruction*
11	/// selection.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "AMDGPU.h"
16	#include "AMDGPUTargetMachine.h"
17	#include "llvm/Analysis/AssumptionCache.h"
18	#include "llvm/Analysis/UniformityAnalysis.h"
19	#include "llvm/Analysis/ValueTracking.h"
20	#include "llvm/CodeGen/TargetPassConfig.h"
21	#include "llvm/IR/IRBuilder.h"
22	#include "llvm/IR/InstVisitor.h"
23	#include "llvm/IR/IntrinsicsAMDGPU.h"
24	#include "llvm/Support/CommandLine.h"
25	#include "llvm/Support/KnownBits.h"
26	#include "llvm/Transforms/Utils/Local.h"
27
28	#define DEBUG_TYPE "amdgpu-late-codegenprepare"
29
30	using namespace llvm;
31
32	// Scalar load widening needs running after load-store-vectorizer as that pass
33	// doesn't handle overlapping cases. In addition, this pass enhances the
34	// widening to handle cases where scalar sub-dword loads are naturally aligned
35	// only but not dword aligned.
36	static cl::opt<bool>
37	WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
38	cl::desc ("Widen sub-dword constant address space loads in "
39	"AMDGPULateCodeGenPrepare"),
40	cl::ReallyHidden, cl::init(Val: true));
41
42	namespace {
43
44	class AMDGPULateCodeGenPrepare
45	: public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
46	Function &F;
47	const DataLayout &DL;
48	const GCNSubtarget &ST;
49
50	AssumptionCache *const AC;
51	UniformityInfo &UA;
52
53	SmallVector<WeakTrackingVH, `8`> DeadInsts;
54
55	public:
56	AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST,
57	AssumptionCache *AC, UniformityInfo &UA)
58	: F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
59	bool run();
60	bool visitInstruction(Instruction &) { return false; }
61
62	// Check if the specified value is at least DWORD aligned.
63	bool isDWORDAligned(const Value V) const* {
64	KnownBits Known = computeKnownBits(V, DL, AC);
65	return Known.countMinTrailingZeros() >= `2`;
66	}
67
68	bool canWidenScalarExtLoad(LoadInst &LI) const;
69	bool visitLoadInst(LoadInst &LI);
70	};
71
72	using ValueToValueMap = DenseMap<const Value , Value >;
73
74	class LiveRegOptimizer {
75	private:
76	Module &Mod;
77	const DataLayout &DL;
78	const GCNSubtarget &ST;
79
80	/// The scalar type to convert to
81	Type *const ConvertToScalar;
82	/// The set of visited Instructions
83	SmallPtrSet<Instruction *, `4`> Visited;
84	/// Map of Value -> Converted Value
85	ValueToValueMap ValMap;
86	/// Map of containing conversions from Optimal Type -> Original Type per BB.
87	DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
88
89	public:
90	/// Calculate the and \p return the type to convert to given a problematic \p
91	/// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).
92	Type calculateConvertType(Type OriginalType);
93	/// Convert the virtual register defined by \p V to the compatible vector of
94	/// legal type
95	Value convertToOptType(Instruction V, BasicBlock::iterator &InstPt);
96	/// Convert the virtual register defined by \p V back to the original type \p
97	/// ConvertType, stripping away the MSBs in cases where there was an imperfect
98	/// fit (e.g. v2i32 -> v7i8)
99	Value convertFromOptType(Type ConvertType, Instruction *V,
100	BasicBlock::iterator &InstPt,
101	BasicBlock *InsertBlock);
102	/// Check for problematic PHI nodes or cross-bb values based on the value
103	/// defined by \p I, and coerce to legal types if necessary. For problematic
104	/// PHI node, we coerce all incoming values in a single invocation.
105	bool optimizeLiveType(Instruction *I,
106	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
107
108	// Whether or not the type should be replaced to avoid inefficient
109	// legalization code
110	bool shouldReplace(Type *ITy) {
111	FixedVectorType *VTy = dyn_cast<FixedVectorType>(Val: ITy);
112	if (!VTy)
113	return false;
114
115	const auto *TLI = ST.getTargetLowering();
116
117	Type *EltTy = VTy->getElementType();
118	// If the element size is not less than the convert to scalar size, then we
119	// can't do any bit packing
120	if (!EltTy->isIntegerTy() \|\|
121	EltTy->getScalarSizeInBits() > ConvertToScalar->getScalarSizeInBits())
122	return false;
123
124	// Only coerce illegal types
125	TargetLoweringBase::LegalizeKind LK =
126	TLI->getTypeConversion(Context&: EltTy->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false));
127	return LK.first != TargetLoweringBase::TypeLegal;
128	}
129
130	bool isOpLegal(Instruction I) { return* isa<StoreInst, IntrinsicInst>(Val: I); }
131
132	bool isCoercionProfitable(Instruction *II) {
133	SmallPtrSet<Instruction *, `4`> CVisited;
134	SmallVector<Instruction *, `4`> UserList;
135
136	// Check users for profitable conditions (across block user which can
137	// natively handle the illegal vector).
138	for (User *V : II->users())
139	if (auto *UseInst = dyn_cast<Instruction>(Val: V))
140	UserList.push_back(Elt: UseInst);
141
142	auto IsLookThru = [](Instruction *II) {
143	if (const auto *Intr = dyn_cast<IntrinsicInst>(Val: II))
144	return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
145	return isa<PHINode, ShuffleVectorInst, InsertElementInst,
146	ExtractElementInst, CastInst>(Val: II);
147	};
148
149	while (!UserList.empty()) {
150	auto CII = UserList.pop_back_val();
151	if (!CVisited.insert(Ptr: CII).second)
152	continue;
153
154	if (CII->getParent() == II->getParent() && !IsLookThru(II))
155	continue;
156
157	if (isOpLegal(I: CII))
158	return true;
159
160	if (IsLookThru(CII))
161	for (User *V : CII->users())
162	if (auto *UseInst = dyn_cast<Instruction>(Val: V))
163	UserList.push_back(Elt: UseInst);
164	}
165	return false;
166	}
167
168	LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)
169	: Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
170	ConvertToScalar(Type::getInt32Ty(C&: Mod.getContext())) {}
171	};
172
173	} // end anonymous namespace
174
175	bool AMDGPULateCodeGenPrepare::run() {
176	// "Optimize" the virtual regs that cross basic block boundaries. When
177	// building the SelectionDAG, vectors of illegal types that cross basic blocks
178	// will be scalarized and widened, with each scalar living in its
179	// own register. To work around this, this optimization converts the
180	// vectors to equivalent vectors of legal type (which are converted back
181	// before uses in subsequent blocks), to pack the bits into fewer physical
182	// registers (used in CopyToReg/CopyFromReg pairs).
183	LiveRegOptimizer LRO(*F.getParent(), ST);
184
185	bool Changed = false;
186
187	bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
188
189	for (auto &BB : reverse(C&: F))
190	for (Instruction &I : make_early_inc_range(Range: reverse(C&: BB))) {
191	Changed \|= !HasScalarSubwordLoads && visit(I);
192	Changed \|= LRO.optimizeLiveType(I: &I, DeadInsts);
193	}
194
195	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts);
196	return Changed;
197	}
198
199	Type LiveRegOptimizer::calculateConvertType(Type OriginalType) {
200	assert(OriginalType->getScalarSizeInBits() <=
201	ConvertToScalar->getScalarSizeInBits());
202
203	FixedVectorType *VTy = cast<FixedVectorType>(Val: OriginalType);
204
205	TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: VTy);
206	TypeSize ConvertScalarSize = DL.getTypeSizeInBits(Ty: ConvertToScalar);
207	unsigned ConvertEltCount =
208	(OriginalSize + ConvertScalarSize - `1`) / ConvertScalarSize;
209
210	if (OriginalSize <= ConvertScalarSize)
211	return IntegerType::get(C&: Mod.getContext(), NumBits: ConvertScalarSize);
212
213	return VectorType::get(ElementType: Type::getIntNTy(C&: Mod.getContext(), N: ConvertScalarSize),
214	NumElements: ConvertEltCount, Scalable: false);
215	}
216
217	Value LiveRegOptimizer::convertToOptType(Instruction V,
218	BasicBlock::iterator &InsertPt) {
219	FixedVectorType *VTy = cast<FixedVectorType>(Val: V->getType());
220	Type *NewTy = calculateConvertType(OriginalType: V->getType());
221
222	TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: VTy);
223	TypeSize NewSize = DL.getTypeSizeInBits(Ty: NewTy);
224
225	IRBuilder<> Builder(V->getParent(), InsertPt);
226	// If there is a bitsize match, we can fit the old vector into a new vector of
227	// desired type.
228	if (OriginalSize == NewSize)
229	return Builder.CreateBitCast(V, DestTy: NewTy, Name: V->getName() + ".bc");
230
231	// If there is a bitsize mismatch, we must use a wider vector.
232	assert(NewSize > OriginalSize);
233	uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();
234
235	SmallVector<int, `8`> ShuffleMask;
236	uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();
237	for (unsigned I = `0`; I < OriginalElementCount; I++)
238	ShuffleMask.push_back(Elt: I);
239
240	for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
241	ShuffleMask.push_back(Elt: OriginalElementCount);
242
243	Value *ExpandedVec = Builder.CreateShuffleVector(V, Mask: ShuffleMask);
244	return Builder.CreateBitCast(V: ExpandedVec, DestTy: NewTy, Name: V->getName() + ".bc");
245	}
246
247	Value LiveRegOptimizer::convertFromOptType(Type ConvertType, Instruction *V,
248	BasicBlock::iterator &InsertPt,
249	BasicBlock *InsertBB) {
250	FixedVectorType *NewVTy = cast<FixedVectorType>(Val: ConvertType);
251
252	TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: V->getType());
253	TypeSize NewSize = DL.getTypeSizeInBits(Ty: NewVTy);
254
255	IRBuilder<> Builder(InsertBB, InsertPt);
256	// If there is a bitsize match, we simply convert back to the original type.
257	if (OriginalSize == NewSize)
258	return Builder.CreateBitCast(V, DestTy: NewVTy, Name: V->getName() + ".bc");
259
260	// If there is a bitsize mismatch, then we must have used a wider value to
261	// hold the bits.
262	assert(OriginalSize > NewSize);
263	// For wide scalars, we can just truncate the value.
264	if (!V->getType()->isVectorTy()) {
265	Instruction *Trunc = cast<Instruction>(
266	Val: Builder.CreateTrunc(V, DestTy: IntegerType::get(C&: Mod.getContext(), NumBits: NewSize)));
267	return cast<Instruction>(Val: Builder.CreateBitCast(V: Trunc, DestTy: NewVTy));
268	}
269
270	// For wider vectors, we must strip the MSBs to convert back to the original
271	// type.
272	VectorType *ExpandedVT = VectorType::get(
273	ElementType: Type::getIntNTy(C&: Mod.getContext(), N: NewVTy->getScalarSizeInBits()),
274	NumElements: (OriginalSize / NewVTy->getScalarSizeInBits()), Scalable: false);
275	Instruction *Converted =
276	cast<Instruction>(Val: Builder.CreateBitCast(V, DestTy: ExpandedVT));
277
278	unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();
279	SmallVector<int, `8`> ShuffleMask(NarrowElementCount);
280	std::iota(first: ShuffleMask.begin(), last: ShuffleMask.end(), value: `0`);
281
282	return Builder.CreateShuffleVector(V: Converted, Mask: ShuffleMask);
283	}
284
285	bool LiveRegOptimizer::optimizeLiveType(
286	Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
287	SmallVector<Instruction *, `4`> Worklist;
288	SmallPtrSet<PHINode *, `4`> PhiNodes;
289	SmallPtrSet<Instruction *, `4`> Defs;
290	SmallPtrSet<Instruction *, `4`> Uses;
291
292	Worklist.push_back(Elt: cast<Instruction>(Val: I));
293	while (!Worklist.empty()) {
294	Instruction *II = Worklist.pop_back_val();
295
296	if (!Visited.insert(Ptr: II).second)
297	continue;
298
299	if (!shouldReplace(ITy: II->getType()))
300	continue;
301
302	if (!isCoercionProfitable(II))
303	continue;
304
305	if (PHINode *Phi = dyn_cast<PHINode>(Val: II)) {
306	PhiNodes.insert(Ptr: Phi);
307	// Collect all the incoming values of problematic PHI nodes.
308	for (Value *V : Phi->incoming_values()) {
309	// Repeat the collection process for newly found PHI nodes.
310	if (PHINode *OpPhi = dyn_cast<PHINode>(Val: V)) {
311	if (!PhiNodes.count(Ptr: OpPhi) && !Visited.count(Ptr: OpPhi))
312	Worklist.push_back(Elt: OpPhi);
313	continue;
314	}
315
316	Instruction *IncInst = dyn_cast<Instruction>(Val: V);
317	// Other incoming value types (e.g. vector literals) are unhandled
318	if (!IncInst && !isa<ConstantAggregateZero>(Val: V))
319	return false;
320
321	// Collect all other incoming values for coercion.
322	if (IncInst)
323	Defs.insert(Ptr: IncInst);
324	}
325	}
326
327	// Collect all relevant uses.
328	for (User *V : II->users()) {
329	// Repeat the collection process for problematic PHI nodes.
330	if (PHINode *OpPhi = dyn_cast<PHINode>(Val: V)) {
331	if (!PhiNodes.count(Ptr: OpPhi) && !Visited.count(Ptr: OpPhi))
332	Worklist.push_back(Elt: OpPhi);
333	continue;
334	}
335
336	Instruction *UseInst = cast<Instruction>(Val: V);
337	// Collect all uses of PHINodes and any use the crosses BB boundaries.
338	if (UseInst->getParent() != II->getParent() \|\| isa<PHINode>(Val: II)) {
339	Uses.insert(Ptr: UseInst);
340	if (!isa<PHINode>(Val: II))
341	Defs.insert(Ptr: II);
342	}
343	}
344	}
345
346	// Coerce and track the defs.
347	for (Instruction *D : Defs) {
348	if (!ValMap.contains(Val: D)) {
349	BasicBlock::iterator InsertPt = std::next(x: D->getIterator());
350	Value *ConvertVal = convertToOptType(V: D, InsertPt);
351	assert(ConvertVal);
352	ValMap [D] = ConvertVal;
353	}
354	}
355
356	// Construct new-typed PHI nodes.
357	for (PHINode *Phi : PhiNodes) {
358	ValMap [Phi] = PHINode::Create(Ty: calculateConvertType(OriginalType: Phi->getType()),
359	NumReservedValues: Phi->getNumIncomingValues(),
360	NameStr: Phi->getName() + ".tc", InsertBefore: Phi->getIterator());
361	}
362
363	// Connect all the PHI nodes with their new incoming values.
364	for (PHINode *Phi : PhiNodes) {
365	PHINode *NewPhi = cast<PHINode>(Val: ValMap [Phi]);
366	bool MissingIncVal = false;
367	for (int I = `0`, E = Phi->getNumIncomingValues(); I < E; I++) {
368	Value *IncVal = Phi->getIncomingValue(i: I);
369	if (isa<ConstantAggregateZero>(Val: IncVal)) {
370	Type *NewType = calculateConvertType(OriginalType: Phi->getType());
371	NewPhi->addIncoming(V: ConstantInt::get(Ty: NewType, V: `0`, IsSigned: false),
372	BB: Phi->getIncomingBlock(i: I));
373	} else if (Value *Val = ValMap.lookup(Val: IncVal))
374	NewPhi->addIncoming(V: Val, BB: Phi->getIncomingBlock(i: I));
375	else
376	MissingIncVal = true;
377	}
378	if (MissingIncVal) {
379	Value *DeadVal = ValMap [Phi];
380	// The coercion chain of the PHI is broken. Delete the Phi
381	// from the ValMap and any connected / user Phis.
382	SmallVector<Value *, `4`> PHIWorklist;
383	SmallPtrSet<Value *, `4`> VisitedPhis;
384	PHIWorklist.push_back(Elt: DeadVal);
385	while (!PHIWorklist.empty()) {
386	Value *NextDeadValue = PHIWorklist.pop_back_val();
387	VisitedPhis.insert(Ptr: NextDeadValue);
388	auto OriginalPhi =
389	llvm::find_if(Range&: PhiNodes, P: [this, &NextDeadValue](PHINode *CandPhi) {
390	return ValMap [CandPhi] == NextDeadValue;
391	});
392	// This PHI may have already been removed from maps when
393	// unwinding a previous Phi
394	if (OriginalPhi != PhiNodes.end())
395	ValMap.erase(Val: *OriginalPhi);
396
397	DeadInsts.emplace_back(Args: cast<Instruction>(Val: NextDeadValue));
398
399	for (User *U : NextDeadValue->users()) {
400	if (!VisitedPhis.contains(Ptr: cast<PHINode>(Val: U)))
401	PHIWorklist.push_back(Elt: U);
402	}
403	}
404	} else {
405	DeadInsts.emplace_back(Args: cast<Instruction>(Val: Phi));
406	}
407	}
408	// Coerce back to the original type and replace the uses.
409	for (Instruction *U : Uses) {
410	// Replace all converted operands for a use.
411	for (auto [OpIdx, Op] : enumerate(First: U->operands())) {
412	if (Value *Val = ValMap.lookup(Val: Op)) {
413	Value NewVal = nullptr*;
414	if (BBUseValMap.contains(Val: U->getParent()) &&
415	BBUseValMap [U->getParent()].contains(Val))
416	NewVal = BBUseValMap [U->getParent()][Val];
417	else {
418	BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
419	// We may pick up ops that were previously converted for users in
420	// other blocks. If there is an originally typed definition of the Op
421	// already in this block, simply reuse it.
422	if (isa<Instruction>(Val: Op) && !isa<PHINode>(Val: Op) &&
423	U->getParent() == cast<Instruction>(Val&: Op)->getParent()) {
424	NewVal = Op;
425	} else {
426	NewVal =
427	convertFromOptType(ConvertType: Op ->getType(), V: cast<Instruction>(Val: ValMap [Op]),
428	InsertPt, InsertBB: U->getParent());
429	BBUseValMap [U->getParent()][ValMap [Op]] = NewVal;
430	}
431	}
432	assert(NewVal);
433	U->setOperand(i: OpIdx, Val: NewVal);
434	}
435	}
436	}
437
438	return true;
439	}
440
441	bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
442	unsigned AS = LI.getPointerAddressSpace();
443	// Skip non-constant address space.
444	if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
445	AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
446	return false;
447	// Skip non-simple loads.
448	if (!LI.isSimple())
449	return false;
450	Type *Ty = LI.getType();
451	// Skip aggregate types.
452	if (Ty->isAggregateType())
453	return false;
454	unsigned TySize = DL.getTypeStoreSize(Ty);
455	// Only handle sub-DWORD loads.
456	if (TySize >= `4`)
457	return false;
458	// That load must be at least naturally aligned.
459	if (LI.getAlign() < DL.getABITypeAlign(Ty))
460	return false;
461	// It should be uniform, i.e. a scalar load.
462	return UA.isUniform(I: &LI);
463	}
464
465	bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
466	if (!WidenLoads)
467	return false;
468
469	// Skip if that load is already aligned on DWORD at least as it's handled in
470	// SDAG.
471	if (LI.getAlign() >= `4`)
472	return false;
473
474	if (!canWidenScalarExtLoad(LI))
475	return false;
476
477	int64_t Offset = `0`;
478	auto *Base =
479	GetPointerBaseWithConstantOffset(Ptr: LI.getPointerOperand(), Offset, DL);
480	// If that base is not DWORD aligned, it's not safe to perform the following
481	// transforms.
482	if (!isDWORDAligned(V: Base))
483	return false;
484
485	int64_t Adjust = Offset & `0x3`;
486	if (Adjust == `0`) {
487	// With a zero adjust, the original alignment could be promoted with a
488	// better one.
489	LI.setAlignment(Align (`4`));
490	return true;
491	}
492
493	IRBuilder<> IRB(&LI);
494	IRB.SetCurrentDebugLocation(LI.getDebugLoc());
495
496	unsigned LdBits = DL.getTypeStoreSizeInBits(Ty: LI.getType());
497	auto *IntNTy = Type::getIntNTy(C&: LI.getContext(), N: LdBits);
498
499	auto *NewPtr = IRB.CreateConstGEP1_64(
500	Ty: IRB.getInt8Ty(),
501	Ptr: IRB.CreateAddrSpaceCast(V: Base, DestTy: LI.getPointerOperand()->getType()),
502	Idx0: Offset - Adjust);
503
504	LoadInst *NewLd = IRB.CreateAlignedLoad(Ty: IRB.getInt32Ty(), Ptr: NewPtr, Align: Align (`4`));
505	NewLd->copyMetadata(SrcInst: LI);
506	NewLd->setMetadata(KindID: LLVMContext::MD_range, Node: nullptr);
507
508	unsigned ShAmt = Adjust * `8`;
509	Value *NewVal = IRB.CreateBitCast(
510	V: IRB.CreateTrunc(V: IRB.CreateLShr(LHS: NewLd, RHS: ShAmt),
511	DestTy: DL.typeSizeEqualsStoreSize(Ty: LI.getType()) ? IntNTy
512	: LI.getType()),
513	DestTy: LI.getType());
514	LI.replaceAllUsesWith(V: NewVal);
515	DeadInsts.emplace_back(Args: &LI);
516
517	return true;
518	}
519
520	PreservedAnalyses
521	AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
522	const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
523	AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(IR&: F);
524	UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(IR&: F);
525
526	bool Changed = AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run();
527
528	if (!Changed)
529	return PreservedAnalyses::all();
530	PreservedAnalyses PA = PreservedAnalyses::none();
531	PA.preserveSet<CFGAnalyses>();
532	return PA;
533	}
534
535	class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {
536	public:
537	static char ID;
538
539	AMDGPULateCodeGenPrepareLegacy() : FunctionPass (ID) {}
540
541	StringRef getPassName() const override {
542	return "AMDGPU IR late optimizations";
543	}
544
545	void getAnalysisUsage(AnalysisUsage &AU) const override {
546	AU.addRequired<TargetPassConfig>();
547	AU.addRequired<AssumptionCacheTracker>();
548	AU.addRequired<UniformityInfoWrapperPass>();
549	AU.setPreservesAll();
550	}
551
552	bool runOnFunction(Function &F) override;
553	};
554
555	bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
556	if (skipFunction(F))
557	return false;
558
559	const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
560	const TargetMachine &TM = TPC.getTM<TargetMachine>();
561	const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
562
563	AssumptionCache &AC =
564	getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
565	UniformityInfo &UI =
566	getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
567
568	return AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run();
569	}
570
571	INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
572	"AMDGPU IR late optimizations", false, false)
573	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
574	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
575	INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
576	INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
577	"AMDGPU IR late optimizations", false, false)
578
579	char AMDGPULateCodeGenPrepareLegacy::ID = `0`;
580
581	FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {
582	return new AMDGPULateCodeGenPrepareLegacy ();
583	}
584

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp