1//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass does misc. AMDGPU optimizations on IR *just* before instruction
11/// selection.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "AMDGPUTargetMachine.h"
17#include "llvm/Analysis/AssumptionCache.h"
18#include "llvm/Analysis/UniformityAnalysis.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/CodeGen/TargetPassConfig.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/InstVisitor.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/KnownBits.h"
26#include "llvm/Transforms/Utils/Local.h"
27
28#define DEBUG_TYPE "amdgpu-late-codegenprepare"
29
30using namespace llvm;
31
32// Scalar load widening needs running after load-store-vectorizer as that pass
33// doesn't handle overlapping cases. In addition, this pass enhances the
34// widening to handle cases where scalar sub-dword loads are naturally aligned
35// only but not dword aligned.
36static cl::opt<bool>
37 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
38 cl::desc("Widen sub-dword constant address space loads in "
39 "AMDGPULateCodeGenPrepare"),
40 cl::ReallyHidden, cl::init(Val: true));
41
42namespace {
43
44class AMDGPULateCodeGenPrepare
45 : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
46 Function &F;
47 const DataLayout &DL;
48 const GCNSubtarget &ST;
49
50 AssumptionCache *const AC;
51 UniformityInfo &UA;
52
53 SmallVector<WeakTrackingVH, 8> DeadInsts;
54
55public:
56 AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST,
57 AssumptionCache *AC, UniformityInfo &UA)
58 : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
59 bool run();
60 bool visitInstruction(Instruction &) { return false; }
61
62 // Check if the specified value is at least DWORD aligned.
63 bool isDWORDAligned(const Value *V) const {
64 KnownBits Known = computeKnownBits(V, DL, AC);
65 return Known.countMinTrailingZeros() >= 2;
66 }
67
68 bool canWidenScalarExtLoad(LoadInst &LI) const;
69 bool visitLoadInst(LoadInst &LI);
70};
71
72using ValueToValueMap = DenseMap<const Value *, Value *>;
73
74class LiveRegOptimizer {
75private:
76 Module &Mod;
77 const DataLayout &DL;
78 const GCNSubtarget &ST;
79
80 /// The scalar type to convert to
81 Type *const ConvertToScalar;
82 /// The set of visited Instructions
83 SmallPtrSet<Instruction *, 4> Visited;
84 /// Map of Value -> Converted Value
85 ValueToValueMap ValMap;
86 /// Map of containing conversions from Optimal Type -> Original Type per BB.
87 DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
88
89public:
90 /// Calculate the and \p return the type to convert to given a problematic \p
91 /// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).
92 Type *calculateConvertType(Type *OriginalType);
93 /// Convert the virtual register defined by \p V to the compatible vector of
94 /// legal type
95 Value *convertToOptType(Instruction *V, BasicBlock::iterator &InstPt);
96 /// Convert the virtual register defined by \p V back to the original type \p
97 /// ConvertType, stripping away the MSBs in cases where there was an imperfect
98 /// fit (e.g. v2i32 -> v7i8)
99 Value *convertFromOptType(Type *ConvertType, Instruction *V,
100 BasicBlock::iterator &InstPt,
101 BasicBlock *InsertBlock);
102 /// Check for problematic PHI nodes or cross-bb values based on the value
103 /// defined by \p I, and coerce to legal types if necessary. For problematic
104 /// PHI node, we coerce all incoming values in a single invocation.
105 bool optimizeLiveType(Instruction *I,
106 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
107
108 // Whether or not the type should be replaced to avoid inefficient
109 // legalization code
110 bool shouldReplace(Type *ITy) {
111 FixedVectorType *VTy = dyn_cast<FixedVectorType>(Val: ITy);
112 if (!VTy)
113 return false;
114
115 const auto *TLI = ST.getTargetLowering();
116
117 Type *EltTy = VTy->getElementType();
118 // If the element size is not less than the convert to scalar size, then we
119 // can't do any bit packing
120 if (!EltTy->isIntegerTy() ||
121 EltTy->getScalarSizeInBits() > ConvertToScalar->getScalarSizeInBits())
122 return false;
123
124 // Only coerce illegal types
125 TargetLoweringBase::LegalizeKind LK =
126 TLI->getTypeConversion(Context&: EltTy->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false));
127 return LK.first != TargetLoweringBase::TypeLegal;
128 }
129
130 bool isOpLegal(Instruction *I) { return isa<StoreInst, IntrinsicInst>(Val: I); }
131
132 bool isCoercionProfitable(Instruction *II) {
133 SmallPtrSet<Instruction *, 4> CVisited;
134 SmallVector<Instruction *, 4> UserList;
135
136 // Check users for profitable conditions (across block user which can
137 // natively handle the illegal vector).
138 for (User *V : II->users())
139 if (auto *UseInst = dyn_cast<Instruction>(Val: V))
140 UserList.push_back(Elt: UseInst);
141
142 auto IsLookThru = [](Instruction *II) {
143 if (const auto *Intr = dyn_cast<IntrinsicInst>(Val: II))
144 return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
145 return isa<PHINode, ShuffleVectorInst, InsertElementInst,
146 ExtractElementInst, CastInst>(Val: II);
147 };
148
149 while (!UserList.empty()) {
150 auto CII = UserList.pop_back_val();
151 if (!CVisited.insert(Ptr: CII).second)
152 continue;
153
154 if (CII->getParent() == II->getParent() && !IsLookThru(II))
155 continue;
156
157 if (isOpLegal(I: CII))
158 return true;
159
160 if (IsLookThru(CII))
161 for (User *V : CII->users())
162 if (auto *UseInst = dyn_cast<Instruction>(Val: V))
163 UserList.push_back(Elt: UseInst);
164 }
165 return false;
166 }
167
168 LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)
169 : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
170 ConvertToScalar(Type::getInt32Ty(C&: Mod.getContext())) {}
171};
172
173} // end anonymous namespace
174
175bool AMDGPULateCodeGenPrepare::run() {
176 // "Optimize" the virtual regs that cross basic block boundaries. When
177 // building the SelectionDAG, vectors of illegal types that cross basic blocks
178 // will be scalarized and widened, with each scalar living in its
179 // own register. To work around this, this optimization converts the
180 // vectors to equivalent vectors of legal type (which are converted back
181 // before uses in subsequent blocks), to pack the bits into fewer physical
182 // registers (used in CopyToReg/CopyFromReg pairs).
183 LiveRegOptimizer LRO(*F.getParent(), ST);
184
185 bool Changed = false;
186
187 bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
188
189 for (auto &BB : reverse(C&: F))
190 for (Instruction &I : make_early_inc_range(Range: reverse(C&: BB))) {
191 Changed |= !HasScalarSubwordLoads && visit(I);
192 Changed |= LRO.optimizeLiveType(I: &I, DeadInsts);
193 }
194
195 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts);
196 return Changed;
197}
198
199Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
200 assert(OriginalType->getScalarSizeInBits() <=
201 ConvertToScalar->getScalarSizeInBits());
202
203 FixedVectorType *VTy = cast<FixedVectorType>(Val: OriginalType);
204
205 TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: VTy);
206 TypeSize ConvertScalarSize = DL.getTypeSizeInBits(Ty: ConvertToScalar);
207 unsigned ConvertEltCount =
208 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;
209
210 if (OriginalSize <= ConvertScalarSize)
211 return IntegerType::get(C&: Mod.getContext(), NumBits: ConvertScalarSize);
212
213 return VectorType::get(ElementType: Type::getIntNTy(C&: Mod.getContext(), N: ConvertScalarSize),
214 NumElements: ConvertEltCount, Scalable: false);
215}
216
217Value *LiveRegOptimizer::convertToOptType(Instruction *V,
218 BasicBlock::iterator &InsertPt) {
219 FixedVectorType *VTy = cast<FixedVectorType>(Val: V->getType());
220 Type *NewTy = calculateConvertType(OriginalType: V->getType());
221
222 TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: VTy);
223 TypeSize NewSize = DL.getTypeSizeInBits(Ty: NewTy);
224
225 IRBuilder<> Builder(V->getParent(), InsertPt);
226 // If there is a bitsize match, we can fit the old vector into a new vector of
227 // desired type.
228 if (OriginalSize == NewSize)
229 return Builder.CreateBitCast(V, DestTy: NewTy, Name: V->getName() + ".bc");
230
231 // If there is a bitsize mismatch, we must use a wider vector.
232 assert(NewSize > OriginalSize);
233 uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();
234
235 SmallVector<int, 8> ShuffleMask;
236 uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();
237 for (unsigned I = 0; I < OriginalElementCount; I++)
238 ShuffleMask.push_back(Elt: I);
239
240 for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
241 ShuffleMask.push_back(Elt: OriginalElementCount);
242
243 Value *ExpandedVec = Builder.CreateShuffleVector(V, Mask: ShuffleMask);
244 return Builder.CreateBitCast(V: ExpandedVec, DestTy: NewTy, Name: V->getName() + ".bc");
245}
246
247Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
248 BasicBlock::iterator &InsertPt,
249 BasicBlock *InsertBB) {
250 FixedVectorType *NewVTy = cast<FixedVectorType>(Val: ConvertType);
251
252 TypeSize OriginalSize = DL.getTypeSizeInBits(Ty: V->getType());
253 TypeSize NewSize = DL.getTypeSizeInBits(Ty: NewVTy);
254
255 IRBuilder<> Builder(InsertBB, InsertPt);
256 // If there is a bitsize match, we simply convert back to the original type.
257 if (OriginalSize == NewSize)
258 return Builder.CreateBitCast(V, DestTy: NewVTy, Name: V->getName() + ".bc");
259
260 // If there is a bitsize mismatch, then we must have used a wider value to
261 // hold the bits.
262 assert(OriginalSize > NewSize);
263 // For wide scalars, we can just truncate the value.
264 if (!V->getType()->isVectorTy()) {
265 Instruction *Trunc = cast<Instruction>(
266 Val: Builder.CreateTrunc(V, DestTy: IntegerType::get(C&: Mod.getContext(), NumBits: NewSize)));
267 return cast<Instruction>(Val: Builder.CreateBitCast(V: Trunc, DestTy: NewVTy));
268 }
269
270 // For wider vectors, we must strip the MSBs to convert back to the original
271 // type.
272 VectorType *ExpandedVT = VectorType::get(
273 ElementType: Type::getIntNTy(C&: Mod.getContext(), N: NewVTy->getScalarSizeInBits()),
274 NumElements: (OriginalSize / NewVTy->getScalarSizeInBits()), Scalable: false);
275 Instruction *Converted =
276 cast<Instruction>(Val: Builder.CreateBitCast(V, DestTy: ExpandedVT));
277
278 unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();
279 SmallVector<int, 8> ShuffleMask(NarrowElementCount);
280 std::iota(first: ShuffleMask.begin(), last: ShuffleMask.end(), value: 0);
281
282 return Builder.CreateShuffleVector(V: Converted, Mask: ShuffleMask);
283}
284
285bool LiveRegOptimizer::optimizeLiveType(
286 Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
287 SmallVector<Instruction *, 4> Worklist;
288 SmallPtrSet<PHINode *, 4> PhiNodes;
289 SmallPtrSet<Instruction *, 4> Defs;
290 SmallPtrSet<Instruction *, 4> Uses;
291
292 Worklist.push_back(Elt: cast<Instruction>(Val: I));
293 while (!Worklist.empty()) {
294 Instruction *II = Worklist.pop_back_val();
295
296 if (!Visited.insert(Ptr: II).second)
297 continue;
298
299 if (!shouldReplace(ITy: II->getType()))
300 continue;
301
302 if (!isCoercionProfitable(II))
303 continue;
304
305 if (PHINode *Phi = dyn_cast<PHINode>(Val: II)) {
306 PhiNodes.insert(Ptr: Phi);
307 // Collect all the incoming values of problematic PHI nodes.
308 for (Value *V : Phi->incoming_values()) {
309 // Repeat the collection process for newly found PHI nodes.
310 if (PHINode *OpPhi = dyn_cast<PHINode>(Val: V)) {
311 if (!PhiNodes.count(Ptr: OpPhi) && !Visited.count(Ptr: OpPhi))
312 Worklist.push_back(Elt: OpPhi);
313 continue;
314 }
315
316 Instruction *IncInst = dyn_cast<Instruction>(Val: V);
317 // Other incoming value types (e.g. vector literals) are unhandled
318 if (!IncInst && !isa<ConstantAggregateZero>(Val: V))
319 return false;
320
321 // Collect all other incoming values for coercion.
322 if (IncInst)
323 Defs.insert(Ptr: IncInst);
324 }
325 }
326
327 // Collect all relevant uses.
328 for (User *V : II->users()) {
329 // Repeat the collection process for problematic PHI nodes.
330 if (PHINode *OpPhi = dyn_cast<PHINode>(Val: V)) {
331 if (!PhiNodes.count(Ptr: OpPhi) && !Visited.count(Ptr: OpPhi))
332 Worklist.push_back(Elt: OpPhi);
333 continue;
334 }
335
336 Instruction *UseInst = cast<Instruction>(Val: V);
337 // Collect all uses of PHINodes and any use the crosses BB boundaries.
338 if (UseInst->getParent() != II->getParent() || isa<PHINode>(Val: II)) {
339 Uses.insert(Ptr: UseInst);
340 if (!isa<PHINode>(Val: II))
341 Defs.insert(Ptr: II);
342 }
343 }
344 }
345
346 // Coerce and track the defs.
347 for (Instruction *D : Defs) {
348 if (!ValMap.contains(Val: D)) {
349 BasicBlock::iterator InsertPt = std::next(x: D->getIterator());
350 Value *ConvertVal = convertToOptType(V: D, InsertPt);
351 assert(ConvertVal);
352 ValMap[D] = ConvertVal;
353 }
354 }
355
356 // Construct new-typed PHI nodes.
357 for (PHINode *Phi : PhiNodes) {
358 ValMap[Phi] = PHINode::Create(Ty: calculateConvertType(OriginalType: Phi->getType()),
359 NumReservedValues: Phi->getNumIncomingValues(),
360 NameStr: Phi->getName() + ".tc", InsertBefore: Phi->getIterator());
361 }
362
363 // Connect all the PHI nodes with their new incoming values.
364 for (PHINode *Phi : PhiNodes) {
365 PHINode *NewPhi = cast<PHINode>(Val: ValMap[Phi]);
366 bool MissingIncVal = false;
367 for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {
368 Value *IncVal = Phi->getIncomingValue(i: I);
369 if (isa<ConstantAggregateZero>(Val: IncVal)) {
370 Type *NewType = calculateConvertType(OriginalType: Phi->getType());
371 NewPhi->addIncoming(V: ConstantInt::get(Ty: NewType, V: 0, IsSigned: false),
372 BB: Phi->getIncomingBlock(i: I));
373 } else if (Value *Val = ValMap.lookup(Val: IncVal))
374 NewPhi->addIncoming(V: Val, BB: Phi->getIncomingBlock(i: I));
375 else
376 MissingIncVal = true;
377 }
378 if (MissingIncVal) {
379 Value *DeadVal = ValMap[Phi];
380 // The coercion chain of the PHI is broken. Delete the Phi
381 // from the ValMap and any connected / user Phis.
382 SmallVector<Value *, 4> PHIWorklist;
383 SmallPtrSet<Value *, 4> VisitedPhis;
384 PHIWorklist.push_back(Elt: DeadVal);
385 while (!PHIWorklist.empty()) {
386 Value *NextDeadValue = PHIWorklist.pop_back_val();
387 VisitedPhis.insert(Ptr: NextDeadValue);
388 auto OriginalPhi =
389 llvm::find_if(Range&: PhiNodes, P: [this, &NextDeadValue](PHINode *CandPhi) {
390 return ValMap[CandPhi] == NextDeadValue;
391 });
392 // This PHI may have already been removed from maps when
393 // unwinding a previous Phi
394 if (OriginalPhi != PhiNodes.end())
395 ValMap.erase(Val: *OriginalPhi);
396
397 DeadInsts.emplace_back(Args: cast<Instruction>(Val: NextDeadValue));
398
399 for (User *U : NextDeadValue->users()) {
400 if (!VisitedPhis.contains(Ptr: cast<PHINode>(Val: U)))
401 PHIWorklist.push_back(Elt: U);
402 }
403 }
404 } else {
405 DeadInsts.emplace_back(Args: cast<Instruction>(Val: Phi));
406 }
407 }
408 // Coerce back to the original type and replace the uses.
409 for (Instruction *U : Uses) {
410 // Replace all converted operands for a use.
411 for (auto [OpIdx, Op] : enumerate(First: U->operands())) {
412 if (Value *Val = ValMap.lookup(Val: Op)) {
413 Value *NewVal = nullptr;
414 if (BBUseValMap.contains(Val: U->getParent()) &&
415 BBUseValMap[U->getParent()].contains(Val))
416 NewVal = BBUseValMap[U->getParent()][Val];
417 else {
418 BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
419 // We may pick up ops that were previously converted for users in
420 // other blocks. If there is an originally typed definition of the Op
421 // already in this block, simply reuse it.
422 if (isa<Instruction>(Val: Op) && !isa<PHINode>(Val: Op) &&
423 U->getParent() == cast<Instruction>(Val&: Op)->getParent()) {
424 NewVal = Op;
425 } else {
426 NewVal =
427 convertFromOptType(ConvertType: Op->getType(), V: cast<Instruction>(Val: ValMap[Op]),
428 InsertPt, InsertBB: U->getParent());
429 BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
430 }
431 }
432 assert(NewVal);
433 U->setOperand(i: OpIdx, Val: NewVal);
434 }
435 }
436 }
437
438 return true;
439}
440
441bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
442 unsigned AS = LI.getPointerAddressSpace();
443 // Skip non-constant address space.
444 if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
445 AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
446 return false;
447 // Skip non-simple loads.
448 if (!LI.isSimple())
449 return false;
450 Type *Ty = LI.getType();
451 // Skip aggregate types.
452 if (Ty->isAggregateType())
453 return false;
454 unsigned TySize = DL.getTypeStoreSize(Ty);
455 // Only handle sub-DWORD loads.
456 if (TySize >= 4)
457 return false;
458 // That load must be at least naturally aligned.
459 if (LI.getAlign() < DL.getABITypeAlign(Ty))
460 return false;
461 // It should be uniform, i.e. a scalar load.
462 return UA.isUniform(I: &LI);
463}
464
465bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
466 if (!WidenLoads)
467 return false;
468
469 // Skip if that load is already aligned on DWORD at least as it's handled in
470 // SDAG.
471 if (LI.getAlign() >= 4)
472 return false;
473
474 if (!canWidenScalarExtLoad(LI))
475 return false;
476
477 int64_t Offset = 0;
478 auto *Base =
479 GetPointerBaseWithConstantOffset(Ptr: LI.getPointerOperand(), Offset, DL);
480 // If that base is not DWORD aligned, it's not safe to perform the following
481 // transforms.
482 if (!isDWORDAligned(V: Base))
483 return false;
484
485 int64_t Adjust = Offset & 0x3;
486 if (Adjust == 0) {
487 // With a zero adjust, the original alignment could be promoted with a
488 // better one.
489 LI.setAlignment(Align(4));
490 return true;
491 }
492
493 IRBuilder<> IRB(&LI);
494 IRB.SetCurrentDebugLocation(LI.getDebugLoc());
495
496 unsigned LdBits = DL.getTypeStoreSizeInBits(Ty: LI.getType());
497 auto *IntNTy = Type::getIntNTy(C&: LI.getContext(), N: LdBits);
498
499 auto *NewPtr = IRB.CreateConstGEP1_64(
500 Ty: IRB.getInt8Ty(),
501 Ptr: IRB.CreateAddrSpaceCast(V: Base, DestTy: LI.getPointerOperand()->getType()),
502 Idx0: Offset - Adjust);
503
504 LoadInst *NewLd = IRB.CreateAlignedLoad(Ty: IRB.getInt32Ty(), Ptr: NewPtr, Align: Align(4));
505 NewLd->copyMetadata(SrcInst: LI);
506 NewLd->setMetadata(KindID: LLVMContext::MD_range, Node: nullptr);
507
508 unsigned ShAmt = Adjust * 8;
509 Value *NewVal = IRB.CreateBitCast(
510 V: IRB.CreateTrunc(V: IRB.CreateLShr(LHS: NewLd, RHS: ShAmt),
511 DestTy: DL.typeSizeEqualsStoreSize(Ty: LI.getType()) ? IntNTy
512 : LI.getType()),
513 DestTy: LI.getType());
514 LI.replaceAllUsesWith(V: NewVal);
515 DeadInsts.emplace_back(Args: &LI);
516
517 return true;
518}
519
520PreservedAnalyses
521AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
522 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
523 AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(IR&: F);
524 UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(IR&: F);
525
526 bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
527
528 if (!Changed)
529 return PreservedAnalyses::all();
530 PreservedAnalyses PA = PreservedAnalyses::none();
531 PA.preserveSet<CFGAnalyses>();
532 return PA;
533}
534
535class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {
536public:
537 static char ID;
538
539 AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {}
540
541 StringRef getPassName() const override {
542 return "AMDGPU IR late optimizations";
543 }
544
545 void getAnalysisUsage(AnalysisUsage &AU) const override {
546 AU.addRequired<TargetPassConfig>();
547 AU.addRequired<AssumptionCacheTracker>();
548 AU.addRequired<UniformityInfoWrapperPass>();
549 AU.setPreservesAll();
550 }
551
552 bool runOnFunction(Function &F) override;
553};
554
555bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
556 if (skipFunction(F))
557 return false;
558
559 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
560 const TargetMachine &TM = TPC.getTM<TargetMachine>();
561 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
562
563 AssumptionCache &AC =
564 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
565 UniformityInfo &UI =
566 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
567
568 return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
569}
570
571INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
572 "AMDGPU IR late optimizations", false, false)
573INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
574INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
575INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
576INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
577 "AMDGPU IR late optimizations", false, false)
578
579char AMDGPULateCodeGenPrepareLegacy::ID = 0;
580
581FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {
582 return new AMDGPULateCodeGenPrepareLegacy();
583}
584