AMDGPULateCodeGenPrepare.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp]

1	//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This pass does misc. AMDGPU optimizations on IR just* before instruction*
11	/// selection.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "AMDGPU.h"
16	#include "AMDGPUMemoryUtils.h"
17	#include "AMDGPUTargetMachine.h"
18	#include "llvm/Analysis/AssumptionCache.h"
19	#include "llvm/Analysis/UniformityAnalysis.h"
20	#include "llvm/Analysis/ValueTracking.h"
21	#include "llvm/CodeGen/TargetPassConfig.h"
22	#include "llvm/IR/IRBuilder.h"
23	#include "llvm/IR/InstVisitor.h"
24	#include "llvm/IR/IntrinsicsAMDGPU.h"
25	#include "llvm/InitializePasses.h"
26	#include "llvm/Support/CommandLine.h"
27	#include "llvm/Support/KnownBits.h"
28	#include "llvm/Transforms/Utils/Local.h"
29
30	#define DEBUG_TYPE "amdgpu-late-codegenprepare"
31
32	using namespace llvm;
33
34	// Scalar load widening needs running after load-store-vectorizer as that pass
35	// doesn't handle overlapping cases. In addition, this pass enhances the
36	// widening to handle cases where scalar sub-dword loads are naturally aligned
37	// only but not dword aligned.
38	static cl::opt<bool>
39	WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
40	cl::desc("Widen sub-dword constant address space loads in "
41	"AMDGPULateCodeGenPrepare"),
42	cl::ReallyHidden, cl::init(Val: true));
43
44	namespace {
45
46	class AMDGPULateCodeGenPrepare
47	: public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
48	Function &F;
49	const DataLayout &DL;
50	const GCNSubtarget &ST;
51
52	AssumptionCache *const AC;
53	UniformityInfo &UA;
54
55	SmallVector<WeakTrackingVH, `8`> DeadInsts;
56
57	public:
58	AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST,
59	AssumptionCache *AC, UniformityInfo &UA)
60	: F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
61	bool run();
62	bool visitInstruction(Instruction &) { return false; }
63
64	// Check if the specified value is at least DWORD aligned.
65	bool isDWORDAligned(const Value V) const* {
66	KnownBits Known = computeKnownBits(V, DL, AC);
67	return Known.countMinTrailingZeros() >= `2`;
68	}
69
70	bool canWidenScalarExtLoad(LoadInst &LI) const;
71	bool visitLoadInst(LoadInst &LI);
72	};
73
74	using ValueToValueMap = DenseMap<const Value , Value >;
75
76	class LiveRegOptimizer {
77	private:
78	Module &Mod;
79	const DataLayout &DL;
80	const GCNSubtarget &ST;
81
82	/// The scalar type to convert to
83	Type *const ConvertToScalar;
84	/// Map of Value -> Converted Value
85	ValueToValueMap ValMap;
86	/// Map of containing conversions from Optimal Type -> Original Type per BB.
87	DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
88
89	public:
90	/// Calculate the and \p return the type to convert to given a problematic \p
91	/// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).
92	Type calculateConvertType(Type OriginalType);
93	/// Convert the virtual register defined by \p V to the compatible vector of
94	/// legal type
95	Value convertToOptType(Instruction V, BasicBlock::iterator &InstPt);
96	/// Convert the virtual register defined by \p V back to the original type \p
97	/// ConvertType, stripping away the MSBs in cases where there was an imperfect
98	/// fit (e.g. v2i32 -> v7i8)
99	Value convertFromOptType(Type ConvertType, Instruction *V,
100	BasicBlock::iterator &InstPt,
101	BasicBlock *InsertBlock);
102	/// Check for problematic PHI nodes or cross-bb values based on the value
103	/// defined by \p I, and coerce to legal types if necessary. For problematic
104	/// PHI node, we coerce all incoming values in a single invocation.
105	bool optimizeLiveType(Instruction *I,
106	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
107
108	// Whether or not the type should be replaced to avoid inefficient
109	// legalization code
110	bool shouldReplace(Type *ITy) {
111	FixedVectorType *VTy = dyn_cast<FixedVectorType>(Val: ITy);
112	if (!VTy)
113	return false;
114
115	const auto *TLI = ST.getTargetLowering();
116
117	Type *EltTy = VTy->getElementType();
118	// If the element size is not is not a multiple scalar size, then we can't
119	// do any bit packing
120	if (!EltTy->isIntegerTy() \|\|
121	ConvertToScalar->getScalarSizeInBits() % EltTy->getScalarSizeInBits())
122	return false;
123
124	// Only coerce illegal types
125	TargetLoweringBase::LegalizeKind LK =
126	TLI->getTypeConversion(Context&: EltTy->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false));
127	return LK.first != TargetLoweringBase::TypeLegal;
128	}
129
130	bool isOpLegal(const Instruction *I) {
131	if (isa<IntrinsicInst>(Val: I))
132	return true;
133
134	// Any store is a profitable sink (prevents flip-flopping)
135	if (isa<StoreInst>(Val: I))
136	return true;
137
138	if (auto *BO = dyn_cast<BinaryOperator>(Val: I)) {
139	if (auto *VT = dyn_cast<FixedVectorType>(Val: BO->getType())) {
140	if (const auto *IT = dyn_cast<IntegerType>(Val: VT->getElementType())) {
141	unsigned EB = IT->getBitWidth();
142	unsigned EC = VT->getNumElements();
143	// Check for SDWA-compatible operation
144	if ((EB == `8` \|\| EB == `16`) && ST.hasSDWA() && EC * EB <= `32`) {
145	switch (BO->getOpcode()) {
146	case Instruction::Add:
147	case Instruction::Sub:
148	case Instruction::And:
149	case Instruction::Or:
150	case Instruction::Xor:
151	return true;
152	default:
153	break;
154	}
155	}
156	}
157	}
158	}
159
160	return false;
161	}
162
163	bool isCoercionProfitable(Instruction *II) {
164	SmallPtrSet<Instruction *, `4`> CVisited;
165	SmallVector<Instruction *, `4`> UserList;
166
167	// Check users for profitable conditions (across block user which can
168	// natively handle the illegal vector).
169	for (User *V : II->users())
170	if (auto *UseInst = dyn_cast<Instruction>(Val: V))
171	UserList.push_back(Elt: UseInst);
172
173	auto IsLookThru = [](Instruction *II) {
174	if (const auto *Intr = dyn_cast<IntrinsicInst>(Val: II))
175	return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
176	return isa<PHINode, ShuffleVectorInst, InsertElementInst,
177	ExtractElementInst, CastInst>(Val: II);
178	};
179
180	while (!UserList.empty()) {
181	auto CII = UserList.pop_back_val();
182	if (!CVisited.insert(Ptr: CII).second)
183	continue;
184
185	// Same-BB filter must look at the user; and allow non-lookthrough
186	// users when the def is a PHI (loop-header pattern).
187	if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&
188	!isa<PHINode>(Val: II))
189	continue;
190
191	if (isOpLegal(I: CII))
192	return true;
193
194	if (IsLookThru(CII))
195	for (User *V : CII->users())
196	if (auto *UseInst = dyn_cast<Instruction>(Val: V))
197	UserList.push_back(Elt: UseInst);
198	}
199	return false;
200	}
201
202	LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)
203	: Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
204	ConvertToScalar(Type::getInt32Ty(C&: Mod.getContext())) {}
205	};
206
207	} // end anonymous namespace
208
209	bool AMDGPULateCodeGenPrepare::run() {
210	// "Optimize" the virtual regs that cross basic block boundaries. When
211	// building the SelectionDAG, vectors of illegal types that cross basic blocks
212	// will be scalarized and widened, with each scalar living in its
213	// own register. To work around this, this optimization converts the
214	// vectors to equivalent vectors of legal type (which are converted back
215	// before uses in subsequent blocks), to pack the bits into fewer physical
216	// registers (used in CopyToReg/CopyFromReg pairs).
217	LiveRegOptimizer LRO(*F.getParent(), ST);
218
219	bool Changed = false;
220
221	bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
222
223	for (auto &BB : reverse(C&: F))
224	for (Instruction &I : make_early_inc_range(Range: reverse(C&: BB))) {
225	Changed \|= !HasScalarSubwordLoads && visit(I);
226	Changed \|= LRO.optimizeLiveType(I: &I, DeadInsts);
227	}
228
229	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts);
230	return Changed;
231	}
232
233	Type LiveRegOptimizer::calculateConvertType(Type OriginalType) {
234	assert(OriginalType->getScalarSizeInBits() <=
235	ConvertToScalar->getScalarSizeInBits());
236
237	FixedVectorType *VTy = cast<FixedVectorType>(Val: OriginalType);
238
239	TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: VTy);
240	TypeSize ConvertScalarSize = DL.getTypeSizeInBits(Ty: ConvertToScalar);
241	unsigned ConvertEltCount =
242	(OriginalSize + ConvertScalarSize - `1`) / ConvertScalarSize;
243
244	if (OriginalSize <= ConvertScalarSize)
245	return IntegerType::get(C&: Mod.getContext(), NumBits: ConvertScalarSize);
246
247	return VectorType::get(ElementType: Type::getIntNTy(C&: Mod.getContext(), N: ConvertScalarSize),
248	NumElements: ConvertEltCount, Scalable: false);
249	}
250
251	Value LiveRegOptimizer::convertToOptType(Instruction V,
252	BasicBlock::iterator &InsertPt) {
253	FixedVectorType *VTy = cast<FixedVectorType>(Val: V->getType());
254	Type *NewTy = calculateConvertType(OriginalType: V->getType());
255
256	TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: VTy);
257	TypeSize NewSize = DL.getTypeSizeInBits(Ty: NewTy);
258
259	IRBuilder<> Builder(V->getParent(), InsertPt);
260	// If there is a bitsize match, we can fit the old vector into a new vector of
261	// desired type.
262	if (OriginalSize == NewSize)
263	return Builder.CreateBitCast(V, DestTy: NewTy, Name: V->getName() + ".bc");
264
265	// If there is a bitsize mismatch, we must use a wider vector.
266	assert(NewSize > OriginalSize);
267	uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();
268
269	SmallVector<int, `8`> ShuffleMask;
270	uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();
271	for (unsigned I = `0`; I < OriginalElementCount; I++)
272	ShuffleMask.push_back(Elt: I);
273
274	for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
275	ShuffleMask.push_back(Elt: OriginalElementCount);
276
277	Value *ExpandedVec = Builder.CreateShuffleVector(V, Mask: ShuffleMask);
278	return Builder.CreateBitCast(V: ExpandedVec, DestTy: NewTy, Name: V->getName() + ".bc");
279	}
280
281	Value LiveRegOptimizer::convertFromOptType(Type ConvertType, Instruction *V,
282	BasicBlock::iterator &InsertPt,
283	BasicBlock *InsertBB) {
284	FixedVectorType *NewVTy = cast<FixedVectorType>(Val: ConvertType);
285
286	TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: V->getType());
287	TypeSize NewSize = DL.getTypeSizeInBits(Ty: NewVTy);
288
289	IRBuilder<> Builder(InsertBB, InsertPt);
290	// If there is a bitsize match, we simply convert back to the original type.
291	if (OriginalSize == NewSize)
292	return Builder.CreateBitCast(V, DestTy: NewVTy, Name: V->getName() + ".bc");
293
294	// If there is a bitsize mismatch, then we must have used a wider value to
295	// hold the bits.
296	assert(OriginalSize > NewSize);
297	// For wide scalars, we can just truncate the value.
298	if (!V->getType()->isVectorTy()) {
299	Instruction *Trunc = cast<Instruction>(
300	Val: Builder.CreateTrunc(V, DestTy: IntegerType::get(C&: Mod.getContext(), NumBits: NewSize)));
301	return cast<Instruction>(Val: Builder.CreateBitCast(V: Trunc, DestTy: NewVTy));
302	}
303
304	// For wider vectors, we must strip the MSBs to convert back to the original
305	// type.
306	VectorType *ExpandedVT = VectorType::get(
307	ElementType: Type::getIntNTy(C&: Mod.getContext(), N: NewVTy->getScalarSizeInBits()),
308	NumElements: (OriginalSize / NewVTy->getScalarSizeInBits()), Scalable: false);
309	Instruction *Converted =
310	cast<Instruction>(Val: Builder.CreateBitCast(V, DestTy: ExpandedVT));
311
312	unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();
313	SmallVector<int, `8`> ShuffleMask(NarrowElementCount);
314	std::iota(first: ShuffleMask.begin(), last: ShuffleMask.end(), value: `0`);
315
316	return Builder.CreateShuffleVector(V: Converted, Mask: ShuffleMask);
317	}
318
319	bool LiveRegOptimizer::optimizeLiveType(
320	Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
321	SmallVector<Instruction *, `4`> Worklist;
322	SmallPtrSet<PHINode *, `4`> PhiNodes;
323	SmallPtrSet<Instruction *, `4`> Defs;
324	SmallPtrSet<Instruction *, `4`> Uses;
325	SmallPtrSet<Instruction *, `4`> Visited;
326
327	Worklist.push_back(Elt: cast<Instruction>(Val: I));
328	while (!Worklist.empty()) {
329	Instruction *II = Worklist.pop_back_val();
330
331	if (!Visited.insert(Ptr: II).second)
332	continue;
333
334	if (!shouldReplace(ITy: II->getType()))
335	continue;
336
337	if (!isCoercionProfitable(II))
338	continue;
339
340	if (PHINode *Phi = dyn_cast<PHINode>(Val: II)) {
341	PhiNodes.insert(Ptr: Phi);
342	// Collect all the incoming values of problematic PHI nodes.
343	for (Value *V : Phi->incoming_values()) {
344	// Repeat the collection process for newly found PHI nodes.
345	if (PHINode *OpPhi = dyn_cast<PHINode>(Val: V)) {
346	if (!PhiNodes.count(Ptr: OpPhi) && !Visited.count(Ptr: OpPhi))
347	Worklist.push_back(Elt: OpPhi);
348	continue;
349	}
350
351	Instruction *IncInst = dyn_cast<Instruction>(Val: V);
352	// Other incoming value types (e.g. vector literals) are unhandled
353	if (!IncInst && !isa<ConstantAggregateZero>(Val: V))
354	return false;
355
356	// Collect all other incoming values for coercion.
357	if (IncInst)
358	Defs.insert(Ptr: IncInst);
359	}
360	}
361
362	// Collect all relevant uses.
363	for (User *V : II->users()) {
364	// Repeat the collection process for problematic PHI nodes.
365	if (PHINode *OpPhi = dyn_cast<PHINode>(Val: V)) {
366	if (!PhiNodes.count(Ptr: OpPhi) && !Visited.count(Ptr: OpPhi))
367	Worklist.push_back(Elt: OpPhi);
368	continue;
369	}
370
371	Instruction *UseInst = cast<Instruction>(Val: V);
372	// Collect all uses of PHINodes and any use the crosses BB boundaries.
373	if (UseInst->getParent() != II->getParent() \|\| isa<PHINode>(Val: II)) {
374	Uses.insert(Ptr: UseInst);
375	if (!isa<PHINode>(Val: II))
376	Defs.insert(Ptr: II);
377	}
378	}
379	}
380
381	// Coerce and track the defs.
382	for (Instruction *D : Defs) {
383	if (!ValMap.contains(Val: D)) {
384	BasicBlock::iterator InsertPt = std::next(x: D->getIterator());
385	Value *ConvertVal = convertToOptType(V: D, InsertPt);
386	assert(ConvertVal);
387	ValMap [D] = ConvertVal;
388	}
389	}
390
391	// Construct new-typed PHI nodes.
392	for (PHINode *Phi : PhiNodes) {
393	ValMap [Phi] = PHINode::Create(Ty: calculateConvertType(OriginalType: Phi->getType()),
394	NumReservedValues: Phi->getNumIncomingValues(),
395	NameStr: Phi->getName() + ".tc", InsertBefore: Phi->getIterator());
396	}
397
398	// Connect all the PHI nodes with their new incoming values.
399	for (PHINode *Phi : PhiNodes) {
400	PHINode *NewPhi = cast<PHINode>(Val: ValMap [Phi]);
401	bool MissingIncVal = false;
402	for (int I = `0`, E = Phi->getNumIncomingValues(); I < E; I++) {
403	Value *IncVal = Phi->getIncomingValue(i: I);
404	if (isa<ConstantAggregateZero>(Val: IncVal)) {
405	Type *NewType = calculateConvertType(OriginalType: Phi->getType());
406	NewPhi->addIncoming(V: ConstantInt::get(Ty: NewType, V: `0`, IsSigned: false),
407	BB: Phi->getIncomingBlock(i: I));
408	} else if (Value *Val = ValMap.lookup(Val: IncVal))
409	NewPhi->addIncoming(V: Val, BB: Phi->getIncomingBlock(i: I));
410	else
411	MissingIncVal = true;
412	}
413	if (MissingIncVal) {
414	Value *DeadVal = ValMap [Phi];
415	// The coercion chain of the PHI is broken. Delete the Phi
416	// from the ValMap and any connected / user Phis.
417	SmallVector<Value *, `4`> PHIWorklist;
418	SmallPtrSet<Value *, `4`> VisitedPhis;
419	PHIWorklist.push_back(Elt: DeadVal);
420	while (!PHIWorklist.empty()) {
421	Value *NextDeadValue = PHIWorklist.pop_back_val();
422	VisitedPhis.insert(Ptr: NextDeadValue);
423	auto OriginalPhi =
424	llvm::find_if(Range&: PhiNodes, P: [this, &NextDeadValue](PHINode *CandPhi) {
425	return ValMap [CandPhi] == NextDeadValue;
426	});
427	// This PHI may have already been removed from maps when
428	// unwinding a previous Phi
429	if (OriginalPhi != PhiNodes.end())
430	ValMap.erase(Val: *OriginalPhi);
431
432	DeadInsts.emplace_back(Args: cast<Instruction>(Val: NextDeadValue));
433
434	for (User *U : NextDeadValue->users()) {
435	if (!VisitedPhis.contains(Ptr: cast<PHINode>(Val: U)))
436	PHIWorklist.push_back(Elt: U);
437	}
438	}
439	} else {
440	DeadInsts.emplace_back(Args: cast<Instruction>(Val: Phi));
441	}
442	}
443	// Coerce back to the original type and replace the uses.
444	for (Instruction *U : Uses) {
445	// Replace all converted operands for a use.
446	for (auto [OpIdx, Op] : enumerate(First: U->operands())) {
447	if (Value *Val = ValMap.lookup(Val: Op)) {
448	Value NewVal = nullptr*;
449	if (BBUseValMap.contains(Val: U->getParent()) &&
450	BBUseValMap [U->getParent()].contains(Val))
451	NewVal = BBUseValMap [U->getParent()][Val];
452	else {
453	BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
454	// We may pick up ops that were previously converted for users in
455	// other blocks. If there is an originally typed definition of the Op
456	// already in this block, simply reuse it.
457	if (isa<Instruction>(Val: Op) && !isa<PHINode>(Val: Op) &&
458	U->getParent() == cast<Instruction>(Val&: Op)->getParent()) {
459	NewVal = Op;
460	} else {
461	NewVal =
462	convertFromOptType(ConvertType: Op ->getType(), V: cast<Instruction>(Val: ValMap [Op]),
463	InsertPt, InsertBB: U->getParent());
464	BBUseValMap [U->getParent()][ValMap [Op]] = NewVal;
465	}
466	}
467	assert(NewVal);
468	U->setOperand(i: OpIdx, Val: NewVal);
469	}
470	}
471	}
472
473	return true;
474	}
475
476	bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
477	unsigned AS = LI.getPointerAddressSpace();
478	// Skip non-constant address space.
479	if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
480	AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
481	return false;
482	// Skip non-simple loads.
483	if (!LI.isSimple())
484	return false;
485	Type *Ty = LI.getType();
486	// Skip aggregate types.
487	if (Ty->isAggregateType())
488	return false;
489	unsigned TySize = DL.getTypeStoreSize(Ty);
490	// Only handle sub-DWORD loads.
491	if (TySize >= `4`)
492	return false;
493	// That load must be at least naturally aligned.
494	if (LI.getAlign() < DL.getABITypeAlign(Ty))
495	return false;
496	// It should be uniform, i.e. a scalar load.
497	return UA.isUniformAtDef(V: &LI);
498	}
499
500	bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
501	if (!WidenLoads)
502	return false;
503
504	// Skip if that load is already aligned on DWORD at least as it's handled in
505	// SDAG.
506	if (LI.getAlign() >= `4`)
507	return false;
508
509	if (!canWidenScalarExtLoad(LI))
510	return false;
511
512	int64_t Offset = `0`;
513	auto *Base =
514	GetPointerBaseWithConstantOffset(Ptr: LI.getPointerOperand(), Offset, DL);
515	// If that base is not DWORD aligned, it's not safe to perform the following
516	// transforms.
517	if (!isDWORDAligned(V: Base))
518	return false;
519
520	int64_t Adjust = Offset & `0x3`;
521	if (Adjust == `0`) {
522	// With a zero adjust, the original alignment could be promoted with a
523	// better one.
524	LI.setAlignment(Align (`4`));
525	return true;
526	}
527
528	IRBuilder<> IRB(&LI);
529	IRB.SetCurrentDebugLocation(LI.getDebugLoc());
530
531	unsigned LdBits = DL.getTypeStoreSizeInBits(Ty: LI.getType());
532	auto *IntNTy = Type::getIntNTy(C&: LI.getContext(), N: LdBits);
533
534	auto *NewPtr = IRB.CreateConstGEP1_64(
535	Ty: IRB.getInt8Ty(),
536	Ptr: IRB.CreateAddrSpaceCast(V: Base, DestTy: LI.getPointerOperand()->getType()),
537	Idx0: Offset - Adjust);
538
539	LoadInst *NewLd = IRB.CreateAlignedLoad(Ty: IRB.getInt32Ty(), Ptr: NewPtr, Align: Align (`4`));
540	AMDGPU::copyMetadataForWidenedLoad(Dest&: *NewLd, Source: LI);
541
542	unsigned ShAmt = Adjust * `8`;
543	Value *NewVal = IRB.CreateBitCast(
544	V: IRB.CreateTrunc(V: IRB.CreateLShr(LHS: NewLd, RHS: ShAmt),
545	DestTy: DL.typeSizeEqualsStoreSize(Ty: LI.getType()) ? IntNTy
546	: LI.getType()),
547	DestTy: LI.getType());
548	LI.replaceAllUsesWith(V: NewVal);
549	DeadInsts.emplace_back(Args: &LI);
550
551	return true;
552	}
553
554	PreservedAnalyses
555	AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
556	const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
557	AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(IR&: F);
558	UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(IR&: F);
559
560	bool Changed = AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run();
561
562	if (!Changed)
563	return PreservedAnalyses::all();
564	PreservedAnalyses PA = PreservedAnalyses::none();
565	PA.preserveSet<CFGAnalyses>();
566	return PA;
567	}
568
569	class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {
570	public:
571	static char ID;
572
573	AMDGPULateCodeGenPrepareLegacy() : FunctionPass (ID) {}
574
575	StringRef getPassName() const override {
576	return "AMDGPU IR late optimizations";
577	}
578
579	void getAnalysisUsage(AnalysisUsage &AU) const override {
580	AU.addRequired<TargetPassConfig>();
581	AU.addRequired<AssumptionCacheTracker>();
582	AU.addRequired<UniformityInfoWrapperPass>();
583	// Invalidates UniformityInfo
584	AU.setPreservesCFG();
585	}
586
587	bool runOnFunction(Function &F) override;
588	};
589
590	bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
591	if (skipFunction(F))
592	return false;
593
594	const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
595	const TargetMachine &TM = TPC.getTM<TargetMachine>();
596	const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
597
598	AssumptionCache &AC =
599	getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
600	UniformityInfo &UI =
601	getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
602
603	return AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run();
604	}
605
606	INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
607	"AMDGPU IR late optimizations", false, false)
608	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
609	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
610	INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
611	INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
612	"AMDGPU IR late optimizations", false, false)
613
614	char AMDGPULateCodeGenPrepareLegacy::ID = `0`;
615
616	FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {
617	return new AMDGPULateCodeGenPrepareLegacy ();
618	}
619

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp