1//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass does misc. AMDGPU optimizations on IR *just* before instruction
11/// selection.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "AMDGPUMemoryUtils.h"
17#include "AMDGPUTargetMachine.h"
18#include "llvm/Analysis/AssumptionCache.h"
19#include "llvm/Analysis/UniformityAnalysis.h"
20#include "llvm/Analysis/ValueTracking.h"
21#include "llvm/CodeGen/TargetPassConfig.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/InstVisitor.h"
24#include "llvm/IR/IntrinsicsAMDGPU.h"
25#include "llvm/InitializePasses.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/KnownBits.h"
28#include "llvm/Transforms/Utils/Local.h"
29
30#define DEBUG_TYPE "amdgpu-late-codegenprepare"
31
32using namespace llvm;
33
34// Scalar load widening needs running after load-store-vectorizer as that pass
35// doesn't handle overlapping cases. In addition, this pass enhances the
36// widening to handle cases where scalar sub-dword loads are naturally aligned
37// only but not dword aligned.
38static cl::opt<bool>
39 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
40 cl::desc("Widen sub-dword constant address space loads in "
41 "AMDGPULateCodeGenPrepare"),
42 cl::ReallyHidden, cl::init(Val: true));
43
44namespace {
45
46class AMDGPULateCodeGenPrepare
47 : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
48 Function &F;
49 const DataLayout &DL;
50 const GCNSubtarget &ST;
51
52 AssumptionCache *const AC;
53 UniformityInfo &UA;
54
55 SmallVector<WeakTrackingVH, 8> DeadInsts;
56
57public:
58 AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST,
59 AssumptionCache *AC, UniformityInfo &UA)
60 : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
61 bool run();
62 bool visitInstruction(Instruction &) { return false; }
63
64 // Check if the specified value is at least DWORD aligned.
65 bool isDWORDAligned(const Value *V) const {
66 KnownBits Known = computeKnownBits(V, DL, AC);
67 return Known.countMinTrailingZeros() >= 2;
68 }
69
70 bool canWidenScalarExtLoad(LoadInst &LI) const;
71 bool visitLoadInst(LoadInst &LI);
72};
73
74using ValueToValueMap = DenseMap<const Value *, Value *>;
75
76class LiveRegOptimizer {
77private:
78 Module &Mod;
79 const DataLayout &DL;
80 const GCNSubtarget &ST;
81
82 /// The scalar type to convert to
83 Type *const ConvertToScalar;
84 /// Map of Value -> Converted Value
85 ValueToValueMap ValMap;
86 /// Map of containing conversions from Optimal Type -> Original Type per BB.
87 DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
88
89public:
90 /// Calculate the and \p return the type to convert to given a problematic \p
91 /// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).
92 Type *calculateConvertType(Type *OriginalType);
93 /// Convert the virtual register defined by \p V to the compatible vector of
94 /// legal type
95 Value *convertToOptType(Instruction *V, BasicBlock::iterator &InstPt);
96 /// Convert the virtual register defined by \p V back to the original type \p
97 /// ConvertType, stripping away the MSBs in cases where there was an imperfect
98 /// fit (e.g. v2i32 -> v7i8)
99 Value *convertFromOptType(Type *ConvertType, Instruction *V,
100 BasicBlock::iterator &InstPt,
101 BasicBlock *InsertBlock);
102 /// Check for problematic PHI nodes or cross-bb values based on the value
103 /// defined by \p I, and coerce to legal types if necessary. For problematic
104 /// PHI node, we coerce all incoming values in a single invocation.
105 bool optimizeLiveType(Instruction *I,
106 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
107
108 // Whether or not the type should be replaced to avoid inefficient
109 // legalization code
110 bool shouldReplace(Type *ITy) {
111 FixedVectorType *VTy = dyn_cast<FixedVectorType>(Val: ITy);
112 if (!VTy)
113 return false;
114
115 const auto *TLI = ST.getTargetLowering();
116
117 Type *EltTy = VTy->getElementType();
118 // If the element size is not is not a multiple scalar size, then we can't
119 // do any bit packing
120 if (!EltTy->isIntegerTy() ||
121 ConvertToScalar->getScalarSizeInBits() % EltTy->getScalarSizeInBits())
122 return false;
123
124 // Only coerce illegal types
125 TargetLoweringBase::LegalizeKind LK =
126 TLI->getTypeConversion(Context&: EltTy->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false));
127 return LK.first != TargetLoweringBase::TypeLegal;
128 }
129
130 bool isOpLegal(const Instruction *I) {
131 if (isa<IntrinsicInst>(Val: I))
132 return true;
133
134 // Any store is a profitable sink (prevents flip-flopping)
135 if (isa<StoreInst>(Val: I))
136 return true;
137
138 if (auto *BO = dyn_cast<BinaryOperator>(Val: I)) {
139 if (auto *VT = dyn_cast<FixedVectorType>(Val: BO->getType())) {
140 if (const auto *IT = dyn_cast<IntegerType>(Val: VT->getElementType())) {
141 unsigned EB = IT->getBitWidth();
142 unsigned EC = VT->getNumElements();
143 // Check for SDWA-compatible operation
144 if ((EB == 8 || EB == 16) && ST.hasSDWA() && EC * EB <= 32) {
145 switch (BO->getOpcode()) {
146 case Instruction::Add:
147 case Instruction::Sub:
148 case Instruction::And:
149 case Instruction::Or:
150 case Instruction::Xor:
151 return true;
152 default:
153 break;
154 }
155 }
156 }
157 }
158 }
159
160 return false;
161 }
162
163 bool isCoercionProfitable(Instruction *II) {
164 SmallPtrSet<Instruction *, 4> CVisited;
165 SmallVector<Instruction *, 4> UserList;
166
167 // Check users for profitable conditions (across block user which can
168 // natively handle the illegal vector).
169 for (User *V : II->users())
170 if (auto *UseInst = dyn_cast<Instruction>(Val: V))
171 UserList.push_back(Elt: UseInst);
172
173 auto IsLookThru = [](Instruction *II) {
174 if (const auto *Intr = dyn_cast<IntrinsicInst>(Val: II))
175 return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
176 return isa<PHINode, ShuffleVectorInst, InsertElementInst,
177 ExtractElementInst, CastInst>(Val: II);
178 };
179
180 while (!UserList.empty()) {
181 auto CII = UserList.pop_back_val();
182 if (!CVisited.insert(Ptr: CII).second)
183 continue;
184
185 // Same-BB filter must look at the *user*; and allow non-lookthrough
186 // users when the def is a PHI (loop-header pattern).
187 if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&
188 !isa<PHINode>(Val: II))
189 continue;
190
191 if (isOpLegal(I: CII))
192 return true;
193
194 if (IsLookThru(CII))
195 for (User *V : CII->users())
196 if (auto *UseInst = dyn_cast<Instruction>(Val: V))
197 UserList.push_back(Elt: UseInst);
198 }
199 return false;
200 }
201
202 LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)
203 : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
204 ConvertToScalar(Type::getInt32Ty(C&: Mod.getContext())) {}
205};
206
207} // end anonymous namespace
208
209bool AMDGPULateCodeGenPrepare::run() {
210 // "Optimize" the virtual regs that cross basic block boundaries. When
211 // building the SelectionDAG, vectors of illegal types that cross basic blocks
212 // will be scalarized and widened, with each scalar living in its
213 // own register. To work around this, this optimization converts the
214 // vectors to equivalent vectors of legal type (which are converted back
215 // before uses in subsequent blocks), to pack the bits into fewer physical
216 // registers (used in CopyToReg/CopyFromReg pairs).
217 LiveRegOptimizer LRO(*F.getParent(), ST);
218
219 bool Changed = false;
220
221 bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
222
223 for (auto &BB : reverse(C&: F))
224 for (Instruction &I : make_early_inc_range(Range: reverse(C&: BB))) {
225 Changed |= !HasScalarSubwordLoads && visit(I);
226 Changed |= LRO.optimizeLiveType(I: &I, DeadInsts);
227 }
228
229 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts);
230 return Changed;
231}
232
233Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
234 assert(OriginalType->getScalarSizeInBits() <=
235 ConvertToScalar->getScalarSizeInBits());
236
237 FixedVectorType *VTy = cast<FixedVectorType>(Val: OriginalType);
238
239 TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: VTy);
240 TypeSize ConvertScalarSize = DL.getTypeSizeInBits(Ty: ConvertToScalar);
241 unsigned ConvertEltCount =
242 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;
243
244 if (OriginalSize <= ConvertScalarSize)
245 return IntegerType::get(C&: Mod.getContext(), NumBits: ConvertScalarSize);
246
247 return VectorType::get(ElementType: Type::getIntNTy(C&: Mod.getContext(), N: ConvertScalarSize),
248 NumElements: ConvertEltCount, Scalable: false);
249}
250
251Value *LiveRegOptimizer::convertToOptType(Instruction *V,
252 BasicBlock::iterator &InsertPt) {
253 FixedVectorType *VTy = cast<FixedVectorType>(Val: V->getType());
254 Type *NewTy = calculateConvertType(OriginalType: V->getType());
255
256 TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: VTy);
257 TypeSize NewSize = DL.getTypeSizeInBits(Ty: NewTy);
258
259 IRBuilder<> Builder(V->getParent(), InsertPt);
260 // If there is a bitsize match, we can fit the old vector into a new vector of
261 // desired type.
262 if (OriginalSize == NewSize)
263 return Builder.CreateBitCast(V, DestTy: NewTy, Name: V->getName() + ".bc");
264
265 // If there is a bitsize mismatch, we must use a wider vector.
266 assert(NewSize > OriginalSize);
267 uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();
268
269 SmallVector<int, 8> ShuffleMask;
270 uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();
271 for (unsigned I = 0; I < OriginalElementCount; I++)
272 ShuffleMask.push_back(Elt: I);
273
274 for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
275 ShuffleMask.push_back(Elt: OriginalElementCount);
276
277 Value *ExpandedVec = Builder.CreateShuffleVector(V, Mask: ShuffleMask);
278 return Builder.CreateBitCast(V: ExpandedVec, DestTy: NewTy, Name: V->getName() + ".bc");
279}
280
281Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
282 BasicBlock::iterator &InsertPt,
283 BasicBlock *InsertBB) {
284 FixedVectorType *NewVTy = cast<FixedVectorType>(Val: ConvertType);
285
286 TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: V->getType());
287 TypeSize NewSize = DL.getTypeSizeInBits(Ty: NewVTy);
288
289 IRBuilder<> Builder(InsertBB, InsertPt);
290 // If there is a bitsize match, we simply convert back to the original type.
291 if (OriginalSize == NewSize)
292 return Builder.CreateBitCast(V, DestTy: NewVTy, Name: V->getName() + ".bc");
293
294 // If there is a bitsize mismatch, then we must have used a wider value to
295 // hold the bits.
296 assert(OriginalSize > NewSize);
297 // For wide scalars, we can just truncate the value.
298 if (!V->getType()->isVectorTy()) {
299 Instruction *Trunc = cast<Instruction>(
300 Val: Builder.CreateTrunc(V, DestTy: IntegerType::get(C&: Mod.getContext(), NumBits: NewSize)));
301 return cast<Instruction>(Val: Builder.CreateBitCast(V: Trunc, DestTy: NewVTy));
302 }
303
304 // For wider vectors, we must strip the MSBs to convert back to the original
305 // type.
306 VectorType *ExpandedVT = VectorType::get(
307 ElementType: Type::getIntNTy(C&: Mod.getContext(), N: NewVTy->getScalarSizeInBits()),
308 NumElements: (OriginalSize / NewVTy->getScalarSizeInBits()), Scalable: false);
309 Instruction *Converted =
310 cast<Instruction>(Val: Builder.CreateBitCast(V, DestTy: ExpandedVT));
311
312 unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();
313 SmallVector<int, 8> ShuffleMask(NarrowElementCount);
314 std::iota(first: ShuffleMask.begin(), last: ShuffleMask.end(), value: 0);
315
316 return Builder.CreateShuffleVector(V: Converted, Mask: ShuffleMask);
317}
318
319bool LiveRegOptimizer::optimizeLiveType(
320 Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
321 SmallVector<Instruction *, 4> Worklist;
322 SmallPtrSet<PHINode *, 4> PhiNodes;
323 SmallPtrSet<Instruction *, 4> Defs;
324 SmallPtrSet<Instruction *, 4> Uses;
325 SmallPtrSet<Instruction *, 4> Visited;
326
327 Worklist.push_back(Elt: cast<Instruction>(Val: I));
328 while (!Worklist.empty()) {
329 Instruction *II = Worklist.pop_back_val();
330
331 if (!Visited.insert(Ptr: II).second)
332 continue;
333
334 if (!shouldReplace(ITy: II->getType()))
335 continue;
336
337 if (!isCoercionProfitable(II))
338 continue;
339
340 if (PHINode *Phi = dyn_cast<PHINode>(Val: II)) {
341 PhiNodes.insert(Ptr: Phi);
342 // Collect all the incoming values of problematic PHI nodes.
343 for (Value *V : Phi->incoming_values()) {
344 // Repeat the collection process for newly found PHI nodes.
345 if (PHINode *OpPhi = dyn_cast<PHINode>(Val: V)) {
346 if (!PhiNodes.count(Ptr: OpPhi) && !Visited.count(Ptr: OpPhi))
347 Worklist.push_back(Elt: OpPhi);
348 continue;
349 }
350
351 Instruction *IncInst = dyn_cast<Instruction>(Val: V);
352 // Other incoming value types (e.g. vector literals) are unhandled
353 if (!IncInst && !isa<ConstantAggregateZero>(Val: V))
354 return false;
355
356 // Collect all other incoming values for coercion.
357 if (IncInst)
358 Defs.insert(Ptr: IncInst);
359 }
360 }
361
362 // Collect all relevant uses.
363 for (User *V : II->users()) {
364 // Repeat the collection process for problematic PHI nodes.
365 if (PHINode *OpPhi = dyn_cast<PHINode>(Val: V)) {
366 if (!PhiNodes.count(Ptr: OpPhi) && !Visited.count(Ptr: OpPhi))
367 Worklist.push_back(Elt: OpPhi);
368 continue;
369 }
370
371 Instruction *UseInst = cast<Instruction>(Val: V);
372 // Collect all uses of PHINodes and any use the crosses BB boundaries.
373 if (UseInst->getParent() != II->getParent() || isa<PHINode>(Val: II)) {
374 Uses.insert(Ptr: UseInst);
375 if (!isa<PHINode>(Val: II))
376 Defs.insert(Ptr: II);
377 }
378 }
379 }
380
381 // Coerce and track the defs.
382 for (Instruction *D : Defs) {
383 if (!ValMap.contains(Val: D)) {
384 BasicBlock::iterator InsertPt = std::next(x: D->getIterator());
385 Value *ConvertVal = convertToOptType(V: D, InsertPt);
386 assert(ConvertVal);
387 ValMap[D] = ConvertVal;
388 }
389 }
390
391 // Construct new-typed PHI nodes.
392 for (PHINode *Phi : PhiNodes) {
393 ValMap[Phi] = PHINode::Create(Ty: calculateConvertType(OriginalType: Phi->getType()),
394 NumReservedValues: Phi->getNumIncomingValues(),
395 NameStr: Phi->getName() + ".tc", InsertBefore: Phi->getIterator());
396 }
397
398 // Connect all the PHI nodes with their new incoming values.
399 for (PHINode *Phi : PhiNodes) {
400 PHINode *NewPhi = cast<PHINode>(Val: ValMap[Phi]);
401 bool MissingIncVal = false;
402 for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {
403 Value *IncVal = Phi->getIncomingValue(i: I);
404 if (isa<ConstantAggregateZero>(Val: IncVal)) {
405 Type *NewType = calculateConvertType(OriginalType: Phi->getType());
406 NewPhi->addIncoming(V: ConstantInt::get(Ty: NewType, V: 0, IsSigned: false),
407 BB: Phi->getIncomingBlock(i: I));
408 } else if (Value *Val = ValMap.lookup(Val: IncVal))
409 NewPhi->addIncoming(V: Val, BB: Phi->getIncomingBlock(i: I));
410 else
411 MissingIncVal = true;
412 }
413 if (MissingIncVal) {
414 Value *DeadVal = ValMap[Phi];
415 // The coercion chain of the PHI is broken. Delete the Phi
416 // from the ValMap and any connected / user Phis.
417 SmallVector<Value *, 4> PHIWorklist;
418 SmallPtrSet<Value *, 4> VisitedPhis;
419 PHIWorklist.push_back(Elt: DeadVal);
420 while (!PHIWorklist.empty()) {
421 Value *NextDeadValue = PHIWorklist.pop_back_val();
422 VisitedPhis.insert(Ptr: NextDeadValue);
423 auto OriginalPhi =
424 llvm::find_if(Range&: PhiNodes, P: [this, &NextDeadValue](PHINode *CandPhi) {
425 return ValMap[CandPhi] == NextDeadValue;
426 });
427 // This PHI may have already been removed from maps when
428 // unwinding a previous Phi
429 if (OriginalPhi != PhiNodes.end())
430 ValMap.erase(Val: *OriginalPhi);
431
432 DeadInsts.emplace_back(Args: cast<Instruction>(Val: NextDeadValue));
433
434 for (User *U : NextDeadValue->users()) {
435 if (!VisitedPhis.contains(Ptr: cast<PHINode>(Val: U)))
436 PHIWorklist.push_back(Elt: U);
437 }
438 }
439 } else {
440 DeadInsts.emplace_back(Args: cast<Instruction>(Val: Phi));
441 }
442 }
443 // Coerce back to the original type and replace the uses.
444 for (Instruction *U : Uses) {
445 // Replace all converted operands for a use.
446 for (auto [OpIdx, Op] : enumerate(First: U->operands())) {
447 if (Value *Val = ValMap.lookup(Val: Op)) {
448 Value *NewVal = nullptr;
449 if (BBUseValMap.contains(Val: U->getParent()) &&
450 BBUseValMap[U->getParent()].contains(Val))
451 NewVal = BBUseValMap[U->getParent()][Val];
452 else {
453 BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
454 // We may pick up ops that were previously converted for users in
455 // other blocks. If there is an originally typed definition of the Op
456 // already in this block, simply reuse it.
457 if (isa<Instruction>(Val: Op) && !isa<PHINode>(Val: Op) &&
458 U->getParent() == cast<Instruction>(Val&: Op)->getParent()) {
459 NewVal = Op;
460 } else {
461 NewVal =
462 convertFromOptType(ConvertType: Op->getType(), V: cast<Instruction>(Val: ValMap[Op]),
463 InsertPt, InsertBB: U->getParent());
464 BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
465 }
466 }
467 assert(NewVal);
468 U->setOperand(i: OpIdx, Val: NewVal);
469 }
470 }
471 }
472
473 return true;
474}
475
476bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
477 unsigned AS = LI.getPointerAddressSpace();
478 // Skip non-constant address space.
479 if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
480 AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
481 return false;
482 // Skip non-simple loads.
483 if (!LI.isSimple())
484 return false;
485 Type *Ty = LI.getType();
486 // Skip aggregate types.
487 if (Ty->isAggregateType())
488 return false;
489 unsigned TySize = DL.getTypeStoreSize(Ty);
490 // Only handle sub-DWORD loads.
491 if (TySize >= 4)
492 return false;
493 // That load must be at least naturally aligned.
494 if (LI.getAlign() < DL.getABITypeAlign(Ty))
495 return false;
496 // It should be uniform, i.e. a scalar load.
497 return UA.isUniformAtDef(V: &LI);
498}
499
500bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
501 if (!WidenLoads)
502 return false;
503
504 // Skip if that load is already aligned on DWORD at least as it's handled in
505 // SDAG.
506 if (LI.getAlign() >= 4)
507 return false;
508
509 if (!canWidenScalarExtLoad(LI))
510 return false;
511
512 int64_t Offset = 0;
513 auto *Base =
514 GetPointerBaseWithConstantOffset(Ptr: LI.getPointerOperand(), Offset, DL);
515 // If that base is not DWORD aligned, it's not safe to perform the following
516 // transforms.
517 if (!isDWORDAligned(V: Base))
518 return false;
519
520 int64_t Adjust = Offset & 0x3;
521 if (Adjust == 0) {
522 // With a zero adjust, the original alignment could be promoted with a
523 // better one.
524 LI.setAlignment(Align(4));
525 return true;
526 }
527
528 IRBuilder<> IRB(&LI);
529 IRB.SetCurrentDebugLocation(LI.getDebugLoc());
530
531 unsigned LdBits = DL.getTypeStoreSizeInBits(Ty: LI.getType());
532 auto *IntNTy = Type::getIntNTy(C&: LI.getContext(), N: LdBits);
533
534 auto *NewPtr = IRB.CreateConstGEP1_64(
535 Ty: IRB.getInt8Ty(),
536 Ptr: IRB.CreateAddrSpaceCast(V: Base, DestTy: LI.getPointerOperand()->getType()),
537 Idx0: Offset - Adjust);
538
539 LoadInst *NewLd = IRB.CreateAlignedLoad(Ty: IRB.getInt32Ty(), Ptr: NewPtr, Align: Align(4));
540 AMDGPU::copyMetadataForWidenedLoad(Dest&: *NewLd, Source: LI);
541
542 unsigned ShAmt = Adjust * 8;
543 Value *NewVal = IRB.CreateBitCast(
544 V: IRB.CreateTrunc(V: IRB.CreateLShr(LHS: NewLd, RHS: ShAmt),
545 DestTy: DL.typeSizeEqualsStoreSize(Ty: LI.getType()) ? IntNTy
546 : LI.getType()),
547 DestTy: LI.getType());
548 LI.replaceAllUsesWith(V: NewVal);
549 DeadInsts.emplace_back(Args: &LI);
550
551 return true;
552}
553
554PreservedAnalyses
555AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
556 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
557 AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(IR&: F);
558 UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(IR&: F);
559
560 bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
561
562 if (!Changed)
563 return PreservedAnalyses::all();
564 PreservedAnalyses PA = PreservedAnalyses::none();
565 PA.preserveSet<CFGAnalyses>();
566 return PA;
567}
568
569class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {
570public:
571 static char ID;
572
573 AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {}
574
575 StringRef getPassName() const override {
576 return "AMDGPU IR late optimizations";
577 }
578
579 void getAnalysisUsage(AnalysisUsage &AU) const override {
580 AU.addRequired<TargetPassConfig>();
581 AU.addRequired<AssumptionCacheTracker>();
582 AU.addRequired<UniformityInfoWrapperPass>();
583 // Invalidates UniformityInfo
584 AU.setPreservesCFG();
585 }
586
587 bool runOnFunction(Function &F) override;
588};
589
590bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
591 if (skipFunction(F))
592 return false;
593
594 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
595 const TargetMachine &TM = TPC.getTM<TargetMachine>();
596 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
597
598 AssumptionCache &AC =
599 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
600 UniformityInfo &UI =
601 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
602
603 return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
604}
605
606INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
607 "AMDGPU IR late optimizations", false, false)
608INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
609INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
610INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
611INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
612 "AMDGPU IR late optimizations", false, false)
613
614char AMDGPULateCodeGenPrepareLegacy::ID = 0;
615
616FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {
617 return new AMDGPULateCodeGenPrepareLegacy();
618}
619