1//===- AMDGPUWaitSGPRHazards.cpp - Insert waits for SGPR read hazards -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_wait_alu instructions to mitigate SGPR read hazards on GFX12.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUWaitSGPRHazards.h"
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
17#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18#include "SIInstrInfo.h"
19#include "llvm/ADT/SetVector.h"
20#include "llvm/TargetParser/TargetParser.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "amdgpu-wait-sgpr-hazards"
25
26static cl::opt<bool> GlobalEnableSGPRHazardWaits(
27 "amdgpu-sgpr-hazard-wait", cl::init(Val: true), cl::Hidden,
28 cl::desc("Enable required s_wait_alu on SGPR hazards"));
29
30static cl::opt<bool> GlobalCullSGPRHazardsOnFunctionBoundary(
31 "amdgpu-sgpr-hazard-boundary-cull", cl::init(Val: false), cl::Hidden,
32 cl::desc("Cull hazards on function boundaries"));
33
34static cl::opt<bool>
35 GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull",
36 cl::init(Val: false), cl::Hidden,
37 cl::desc("Cull hazards on memory waits"));
38
39static cl::opt<unsigned> GlobalCullSGPRHazardsMemWaitThreshold(
40 "amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(Val: 8), cl::Hidden,
41 cl::desc("Number of tracked SGPRs before initiating hazard cull on memory "
42 "wait"));
43
44namespace {
45
46class AMDGPUWaitSGPRHazards {
47public:
48 const GCNSubtarget *ST;
49 const SIInstrInfo *TII;
50 const SIRegisterInfo *TRI;
51 const MachineRegisterInfo *MRI;
52 unsigned DsNopCount;
53
54 bool EnableSGPRHazardWaits;
55 bool CullSGPRHazardsOnFunctionBoundary;
56 bool CullSGPRHazardsAtMemWait;
57 unsigned CullSGPRHazardsMemWaitThreshold;
58
59 AMDGPUWaitSGPRHazards() = default;
60
61 // Return the numeric ID 0-127 for a given SGPR.
62 static std::optional<unsigned> sgprNumber(Register Reg,
63 const SIRegisterInfo &TRI) {
64 switch (Reg) {
65 case AMDGPU::M0:
66 case AMDGPU::EXEC:
67 case AMDGPU::EXEC_LO:
68 case AMDGPU::EXEC_HI:
69 case AMDGPU::SGPR_NULL:
70 case AMDGPU::SGPR_NULL64:
71 return {};
72 default:
73 break;
74 }
75 unsigned RegN = TRI.getHWRegIndex(Reg);
76 if (RegN > 127)
77 return {};
78 return RegN;
79 }
80
81 static inline bool isVCC(Register Reg) {
82 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;
83 }
84
85 // Adjust global offsets for instructions bundled with S_GETPC_B64 after
86 // insertion of a new instruction.
87 static void updateGetPCBundle(MachineInstr *NewMI) {
88 if (!NewMI->isBundled())
89 return;
90
91 // Find start of bundle.
92 auto I = NewMI->getIterator();
93 while (I->isBundledWithPred())
94 I--;
95 if (I->isBundle())
96 I++;
97
98 // Bail if this is not an S_GETPC bundle.
99 if (I->getOpcode() != AMDGPU::S_GETPC_B64)
100 return;
101
102 // Update offsets of any references in the bundle.
103 const unsigned NewBytes = 4;
104 assert(NewMI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
105 "Unexpected instruction insertion in bundle");
106 auto NextMI = std::next(x: NewMI->getIterator());
107 auto End = NewMI->getParent()->end();
108 while (NextMI != End && NextMI->isBundledWithPred()) {
109 for (auto &Operand : NextMI->operands()) {
110 if (Operand.isGlobal())
111 Operand.setOffset(Operand.getOffset() + NewBytes);
112 }
113 NextMI++;
114 }
115 }
116
117 struct HazardState {
118 static constexpr unsigned None = 0;
119 static constexpr unsigned SALU = (1 << 0);
120 static constexpr unsigned VALU = (1 << 1);
121
122 std::bitset<64> Tracked; // SGPR banks ever read by VALU
123 std::bitset<128> SALUHazards; // SGPRs with uncommitted values from SALU
124 std::bitset<128> VALUHazards; // SGPRs with uncommitted values from VALU
125 unsigned VCCHazard = None; // Source of current VCC writes
126 bool ActiveFlat = false; // Has unwaited flat instructions
127
128 bool merge(const HazardState &RHS) {
129 HazardState Orig(*this);
130 *this |= RHS;
131 return (*this != Orig);
132 }
133
134 bool operator==(const HazardState &RHS) const {
135 return Tracked == RHS.Tracked && SALUHazards == RHS.SALUHazards &&
136 VALUHazards == RHS.VALUHazards && VCCHazard == RHS.VCCHazard &&
137 ActiveFlat == RHS.ActiveFlat;
138 }
139
140 bool operator!=(const HazardState &RHS) const { return !(*this == RHS); }
141
142 void operator|=(const HazardState &RHS) {
143 Tracked |= RHS.Tracked;
144 SALUHazards |= RHS.SALUHazards;
145 VALUHazards |= RHS.VALUHazards;
146 VCCHazard |= RHS.VCCHazard;
147 ActiveFlat |= RHS.ActiveFlat;
148 }
149 };
150
151 struct BlockHazardState {
152 HazardState In;
153 HazardState Out;
154 };
155
156 DenseMap<const MachineBasicBlock *, BlockHazardState> BlockState;
157
158 static constexpr unsigned WAVE32_NOPS = 4;
159 static constexpr unsigned WAVE64_NOPS = 8;
160
161 void insertHazardCull(MachineBasicBlock &MBB,
162 MachineBasicBlock::instr_iterator &MI) {
163 assert(!MI->isBundled());
164 unsigned Count = DsNopCount;
165 while (Count--)
166 BuildMI(BB&: MBB, I: MI, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: AMDGPU::DS_NOP));
167 }
168
169 unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
170 unsigned Mask = AMDGPU::DepCtr::getDefaultDepCtrEncoding(STI: *ST);
171 Mask = AMDGPU::DepCtr::encodeFieldSaSdst(
172 Encoded: Mask, SaSdst: std::min(a: AMDGPU::DepCtr::decodeFieldSaSdst(Encoded: Mask1),
173 b: AMDGPU::DepCtr::decodeFieldSaSdst(Encoded: Mask2)));
174 Mask = AMDGPU::DepCtr::encodeFieldVaVcc(
175 Encoded: Mask, VaVcc: std::min(a: AMDGPU::DepCtr::decodeFieldVaVcc(Encoded: Mask1),
176 b: AMDGPU::DepCtr::decodeFieldVaVcc(Encoded: Mask2)));
177 Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
178 Encoded: Mask, VmVsrc: std::min(a: AMDGPU::DepCtr::decodeFieldVmVsrc(Encoded: Mask1),
179 b: AMDGPU::DepCtr::decodeFieldVmVsrc(Encoded: Mask2)));
180 Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
181 Encoded: Mask, VaSdst: std::min(a: AMDGPU::DepCtr::decodeFieldVaSdst(Encoded: Mask1),
182 b: AMDGPU::DepCtr::decodeFieldVaSdst(Encoded: Mask2)));
183 Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
184 Encoded: Mask, VaVdst: std::min(a: AMDGPU::DepCtr::decodeFieldVaVdst(Encoded: Mask1),
185 b: AMDGPU::DepCtr::decodeFieldVaVdst(Encoded: Mask2)));
186 const AMDGPU::IsaVersion &Version = AMDGPU::getIsaVersion(GPU: ST->getCPU());
187 Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(
188 Encoded: Mask,
189 HoldCnt: std::min(a: AMDGPU::DepCtr::decodeFieldHoldCnt(Encoded: Mask1, Version),
190 b: AMDGPU::DepCtr::decodeFieldHoldCnt(Encoded: Mask2, Version)),
191 Version);
192 Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(
193 Encoded: Mask, VaSsrc: std::min(a: AMDGPU::DepCtr::decodeFieldVaSsrc(Encoded: Mask1),
194 b: AMDGPU::DepCtr::decodeFieldVaSsrc(Encoded: Mask2)));
195 return Mask;
196 }
197
198 bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,
199 unsigned Mask) {
200 auto MBB = MI->getParent();
201 if (MI == MBB->instr_begin())
202 return false;
203
204 auto It = prev_nodbg(It: MI, Begin: MBB->instr_begin());
205 if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR)
206 return false;
207
208 It->getOperand(i: 0).setImm(mergeMasks(Mask1: Mask, Mask2: It->getOperand(i: 0).getImm()));
209 return true;
210 }
211
212 bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
213 enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
214
215 HazardState State = BlockState[&MBB].In;
216 SmallSet<Register, 8> SeenRegs;
217 bool Emitted = false;
218 unsigned DsNops = 0;
219
220 for (MachineBasicBlock::instr_iterator MI = MBB.instr_begin(),
221 E = MBB.instr_end();
222 MI != E; ++MI) {
223 if (MI->isMetaInstruction())
224 continue;
225
226 // Clear tracked SGPRs if sufficient DS_NOPs occur
227 if (MI->getOpcode() == AMDGPU::DS_NOP) {
228 if (++DsNops >= DsNopCount)
229 State.Tracked.reset();
230 continue;
231 }
232 DsNops = 0;
233
234 // Snoop FLAT instructions to avoid adding culls before scratch/lds loads.
235 // Culls could be disproportionate in cost to load time.
236 if (SIInstrInfo::isFLAT(MI: *MI) && !SIInstrInfo::isFLATGlobal(MI: *MI))
237 State.ActiveFlat = true;
238
239 // SMEM or VMEM clears hazards
240 // FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?
241 if ((SIInstrInfo::isVMEM(MI: *MI) && !SIInstrInfo::isFLAT(MI: *MI)) ||
242 SIInstrInfo::isSMRD(MI: *MI)) {
243 State.VCCHazard = HazardState::None;
244 State.SALUHazards.reset();
245 State.VALUHazards.reset();
246 continue;
247 }
248
249 // Existing S_WAITALU can clear hazards
250 if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
251 unsigned int Mask = MI->getOperand(i: 0).getImm();
252 if (AMDGPU::DepCtr::decodeFieldVaVcc(Encoded: Mask) == 0)
253 State.VCCHazard &= ~HazardState::VALU;
254 if (AMDGPU::DepCtr::decodeFieldSaSdst(Encoded: Mask) == 0) {
255 State.SALUHazards.reset();
256 State.VCCHazard &= ~HazardState::SALU;
257 }
258 if (AMDGPU::DepCtr::decodeFieldVaSdst(Encoded: Mask) == 0)
259 State.VALUHazards.reset();
260 continue;
261 }
262
263 // Snoop counter waits to insert culls
264 if (CullSGPRHazardsAtMemWait &&
265 (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT ||
266 MI->getOpcode() == AMDGPU::S_WAIT_SAMPLECNT ||
267 MI->getOpcode() == AMDGPU::S_WAIT_BVHCNT) &&
268 (MI->getOperand(i: 0).isImm() && MI->getOperand(i: 0).getImm() == 0) &&
269 (State.Tracked.count() >= CullSGPRHazardsMemWaitThreshold)) {
270 if (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT && State.ActiveFlat) {
271 State.ActiveFlat = false;
272 } else {
273 State.Tracked.reset();
274 if (Emit)
275 insertHazardCull(MBB, MI);
276 continue;
277 }
278 }
279
280 // Process only VALUs and SALUs
281 bool IsVALU = SIInstrInfo::isVALU(MI: *MI);
282 bool IsSALU = SIInstrInfo::isSALU(MI: *MI);
283 if (!IsVALU && !IsSALU)
284 continue;
285
286 unsigned Wait = 0;
287
288 auto processOperand = [&](const MachineOperand &Op, bool IsUse) {
289 if (!Op.isReg())
290 return;
291 Register Reg = Op.getReg();
292 assert(!Op.getSubReg());
293 if (!TRI->isSGPRReg(MRI: *MRI, Reg))
294 return;
295
296 // Only visit each register once
297 if (!SeenRegs.insert(V: Reg).second)
298 return;
299
300 auto RegNumber = sgprNumber(Reg, TRI: *TRI);
301 if (!RegNumber)
302 return;
303
304 // Track SGPRs by pair -- numeric ID of an 64b SGPR pair.
305 // i.e. SGPR0 = SGPR0_SGPR1 = 0, SGPR3 = SGPR2_SGPR3 = 1, etc
306 unsigned RegN = *RegNumber;
307 unsigned PairN = (RegN >> 1) & 0x3f;
308
309 // Read/write of untracked register is safe; but must record any new
310 // reads.
311 if (!State.Tracked[PairN]) {
312 if (IsVALU && IsUse)
313 State.Tracked.set(position: PairN);
314 return;
315 }
316
317 uint8_t SGPRCount =
318 AMDGPU::getRegBitWidth(RC: *TRI->getRegClassForReg(MRI: *MRI, Reg)) / 32;
319
320 if (IsUse) {
321 // SALU reading SGPR clears VALU hazards
322 if (IsSALU) {
323 if (isVCC(Reg)) {
324 if (State.VCCHazard & HazardState::VALU)
325 State.VCCHazard = HazardState::None;
326 } else {
327 State.VALUHazards.reset();
328 }
329 }
330 // Compute required waits
331 for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {
332 Wait |= State.SALUHazards[RegN + RegIdx] ? WA_SALU : 0;
333 Wait |= IsVALU && State.VALUHazards[RegN + RegIdx] ? WA_VALU : 0;
334 }
335 if (isVCC(Reg) && State.VCCHazard) {
336 // Note: it's possible for both SALU and VALU to exist if VCC
337 // was updated differently by merged predecessors.
338 if (State.VCCHazard & HazardState::SALU)
339 Wait |= WA_SALU;
340 if (State.VCCHazard & HazardState::VALU)
341 Wait |= WA_VCC;
342 }
343 } else {
344 // Update hazards
345 if (isVCC(Reg)) {
346 State.VCCHazard = IsSALU ? HazardState::SALU : HazardState::VALU;
347 } else {
348 for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {
349 if (IsSALU)
350 State.SALUHazards.set(position: RegN + RegIdx);
351 else
352 State.VALUHazards.set(position: RegN + RegIdx);
353 }
354 }
355 }
356 };
357
358 const bool IsSetPC =
359 (MI->isCall() || MI->isReturn() || MI->isIndirectBranch()) &&
360 MI->getOpcode() != AMDGPU::S_ENDPGM &&
361 MI->getOpcode() != AMDGPU::S_ENDPGM_SAVED;
362
363 // Only consider implicit VCC specified by instruction descriptor.
364 const bool HasImplicitVCC =
365 llvm::any_of(Range: MI->getDesc().implicit_uses(), P: isVCC) ||
366 llvm::any_of(Range: MI->getDesc().implicit_defs(), P: isVCC);
367
368 if (IsSetPC) {
369 // All SGPR writes before a call/return must be flushed as the
370 // callee/caller will not will not see the hazard chain.
371 if (State.VCCHazard & HazardState::VALU)
372 Wait |= WA_VCC;
373 if (State.SALUHazards.any() || (State.VCCHazard & HazardState::SALU))
374 Wait |= WA_SALU;
375 if (State.VALUHazards.any())
376 Wait |= WA_VALU;
377 if (CullSGPRHazardsOnFunctionBoundary && State.Tracked.any()) {
378 State.Tracked.reset();
379 if (Emit)
380 insertHazardCull(MBB, MI);
381 }
382 } else {
383 // Process uses to determine required wait.
384 SeenRegs.clear();
385 for (const MachineOperand &Op : MI->all_uses()) {
386 if (Op.isImplicit() &&
387 (!HasImplicitVCC || !Op.isReg() || !isVCC(Reg: Op.getReg())))
388 continue;
389 processOperand(Op, true);
390 }
391 }
392
393 // Apply wait
394 if (Wait) {
395 unsigned Mask = AMDGPU::DepCtr::getDefaultDepCtrEncoding(STI: *ST);
396 if (Wait & WA_VCC) {
397 State.VCCHazard &= ~HazardState::VALU;
398 Mask = AMDGPU::DepCtr::encodeFieldVaVcc(Encoded: Mask, VaVcc: 0);
399 }
400 if (Wait & WA_SALU) {
401 State.SALUHazards.reset();
402 State.VCCHazard &= ~HazardState::SALU;
403 Mask = AMDGPU::DepCtr::encodeFieldSaSdst(Encoded: Mask, SaSdst: 0);
404 }
405 if (Wait & WA_VALU) {
406 State.VALUHazards.reset();
407 Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Encoded: Mask, VaSdst: 0);
408 }
409 if (Emit) {
410 if (!mergeConsecutiveWaitAlus(MI, Mask)) {
411 auto NewMI = BuildMI(BB&: MBB, I: MI, MIMD: MI->getDebugLoc(),
412 MCID: TII->get(Opcode: AMDGPU::S_WAITCNT_DEPCTR))
413 .addImm(Val: Mask);
414 updateGetPCBundle(NewMI);
415 }
416 Emitted = true;
417 }
418 }
419
420 // On return from a call SGPR state is unknown, so all potential hazards.
421 if (MI->isCall() && !CullSGPRHazardsOnFunctionBoundary)
422 State.Tracked.set();
423
424 // Update hazards based on defs.
425 SeenRegs.clear();
426 for (const MachineOperand &Op : MI->all_defs()) {
427 if (Op.isImplicit() &&
428 (!HasImplicitVCC || !Op.isReg() || !isVCC(Reg: Op.getReg())))
429 continue;
430 processOperand(Op, false);
431 }
432 }
433
434 BlockHazardState &BS = BlockState[&MBB];
435 bool Changed = State != BS.Out;
436 if (Emit) {
437 assert(!Changed && "Hazard state should not change on emit pass");
438 return Emitted;
439 }
440 if (Changed)
441 BS.Out = State;
442 return Changed;
443 }
444
445 bool run(MachineFunction &MF) {
446 ST = &MF.getSubtarget<GCNSubtarget>();
447 if (!ST->hasVALUReadSGPRHazard())
448 return false;
449
450 // Parse settings
451 EnableSGPRHazardWaits = GlobalEnableSGPRHazardWaits;
452 CullSGPRHazardsOnFunctionBoundary = GlobalCullSGPRHazardsOnFunctionBoundary;
453 CullSGPRHazardsAtMemWait = GlobalCullSGPRHazardsAtMemWait;
454 CullSGPRHazardsMemWaitThreshold = GlobalCullSGPRHazardsMemWaitThreshold;
455
456 if (!GlobalEnableSGPRHazardWaits.getNumOccurrences())
457 EnableSGPRHazardWaits = MF.getFunction().getFnAttributeAsParsedInteger(
458 Kind: "amdgpu-sgpr-hazard-wait", Default: EnableSGPRHazardWaits);
459 if (!GlobalCullSGPRHazardsOnFunctionBoundary.getNumOccurrences())
460 CullSGPRHazardsOnFunctionBoundary =
461 MF.getFunction().hasFnAttribute(Kind: "amdgpu-sgpr-hazard-boundary-cull");
462 if (!GlobalCullSGPRHazardsAtMemWait.getNumOccurrences())
463 CullSGPRHazardsAtMemWait =
464 MF.getFunction().hasFnAttribute(Kind: "amdgpu-sgpr-hazard-mem-wait-cull");
465 if (!GlobalCullSGPRHazardsMemWaitThreshold.getNumOccurrences())
466 CullSGPRHazardsMemWaitThreshold =
467 MF.getFunction().getFnAttributeAsParsedInteger(
468 Kind: "amdgpu-sgpr-hazard-mem-wait-cull-threshold",
469 Default: CullSGPRHazardsMemWaitThreshold);
470
471 // Bail if disabled
472 if (!EnableSGPRHazardWaits)
473 return false;
474
475 TII = ST->getInstrInfo();
476 TRI = ST->getRegisterInfo();
477 MRI = &MF.getRegInfo();
478 DsNopCount = ST->isWave64() ? WAVE64_NOPS : WAVE32_NOPS;
479
480 auto CallingConv = MF.getFunction().getCallingConv();
481 if (!AMDGPU::isEntryFunctionCC(CC: CallingConv) &&
482 !CullSGPRHazardsOnFunctionBoundary) {
483 // Callee must consider all SGPRs as tracked.
484 LLVM_DEBUG(dbgs() << "Is called function, track all SGPRs.\n");
485 MachineBasicBlock &EntryBlock = MF.front();
486 BlockState[&EntryBlock].In.Tracked.set();
487 }
488
489 // Calculate the hazard state for each basic block.
490 // Iterate until a fixed point is reached.
491 // Fixed point is guaranteed as merge function only ever increases
492 // the hazard set, and all backedges will cause a merge.
493 //
494 // Note: we have to take care of the entry block as this technically
495 // has an edge from outside the function. Failure to treat this as
496 // a merge could prevent fixed point being reached.
497 SetVector<MachineBasicBlock *> Worklist;
498 for (auto &MBB : reverse(C&: MF))
499 Worklist.insert(X: &MBB);
500 while (!Worklist.empty()) {
501 auto &MBB = *Worklist.pop_back_val();
502 bool Changed = runOnMachineBasicBlock(MBB, Emit: false);
503 if (Changed) {
504 // Note: take a copy of state here in case it is reallocated by map
505 HazardState NewState = BlockState[&MBB].Out;
506 // Propagate to all successor blocks
507 for (auto Succ : MBB.successors()) {
508 // We only need to merge hazards at CFG merge points.
509 auto &SuccState = BlockState[Succ];
510 if (Succ->getSinglePredecessor() && !Succ->isEntryBlock()) {
511 if (SuccState.In != NewState) {
512 SuccState.In = NewState;
513 Worklist.insert(X: Succ);
514 }
515 } else if (SuccState.In.merge(RHS: NewState)) {
516 Worklist.insert(X: Succ);
517 }
518 }
519 }
520 }
521
522 LLVM_DEBUG(dbgs() << "Emit s_wait_alu instructions\n");
523
524 // Final to emit wait instructions.
525 bool Changed = false;
526 for (auto &MBB : MF)
527 Changed |= runOnMachineBasicBlock(MBB, Emit: true);
528
529 BlockState.clear();
530 return Changed;
531 }
532};
533
534class AMDGPUWaitSGPRHazardsLegacy : public MachineFunctionPass {
535public:
536 static char ID;
537
538 AMDGPUWaitSGPRHazardsLegacy() : MachineFunctionPass(ID) {}
539
540 bool runOnMachineFunction(MachineFunction &MF) override {
541 return AMDGPUWaitSGPRHazards().run(MF);
542 }
543
544 void getAnalysisUsage(AnalysisUsage &AU) const override {
545 AU.setPreservesCFG();
546 MachineFunctionPass::getAnalysisUsage(AU);
547 }
548};
549
550} // namespace
551
552char AMDGPUWaitSGPRHazardsLegacy::ID = 0;
553
554char &llvm::AMDGPUWaitSGPRHazardsLegacyID = AMDGPUWaitSGPRHazardsLegacy::ID;
555
556INITIALIZE_PASS(AMDGPUWaitSGPRHazardsLegacy, DEBUG_TYPE,
557 "AMDGPU Insert waits for SGPR read hazards", false, false)
558
559PreservedAnalyses
560AMDGPUWaitSGPRHazardsPass::run(MachineFunction &MF,
561 MachineFunctionAnalysisManager &MFAM) {
562 if (AMDGPUWaitSGPRHazards().run(MF))
563 return getMachineFunctionPassPreservedAnalyses();
564 return PreservedAnalyses::all();
565}
566