1//===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines hazard recognizers for scheduling on GCN processors.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
14#define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
15
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/CodeGen/MachineLoopInfo.h"
19#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
20#include "llvm/CodeGen/TargetSchedule.h"
21#include <list>
22
23namespace llvm {
24
25class MachineFunction;
26class MachineInstr;
27class MachineOperand;
28class MachineRegisterInfo;
29class SIInstrInfo;
30class SIRegisterInfo;
31class GCNSubtarget;
32
33class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
34public:
35 typedef function_ref<bool(const MachineInstr &)> IsHazardFn;
36 typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
37 typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
38
39private:
40 // Distinguish if we are called from scheduler or hazard recognizer
41 bool IsHazardRecognizerMode;
42
43 // This variable stores the instruction that has been emitted this cycle. It
44 // will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
45 // called.
46 MachineInstr *CurrCycleInstr;
47 std::list<MachineInstr*> EmittedInstrs;
48 const MachineFunction &MF;
49 const GCNSubtarget &ST;
50 const SIInstrInfo &TII;
51 const SIRegisterInfo &TRI;
52 const TargetSchedModel &TSchedModel;
53
54 // Loop info for V_NOP hoisting, passed from the pass manager.
55 MachineLoopInfo *MLI = nullptr;
56
57 bool RunLdsBranchVmemWARHazardFixup;
58
59 /// RegUnits of uses in the current soft memory clause.
60 mutable BitVector ClauseUses;
61
62 /// RegUnits of defs in the current soft memory clause.
63 mutable BitVector ClauseDefs;
64
65 void resetClause() const {
66 ClauseUses.reset();
67 ClauseDefs.reset();
68 }
69
70 void addClauseInst(const MachineInstr &MI) const;
71
72 /// \returns the number of wait states before another MFMA instruction can be
73 /// issued after \p MI.
74 unsigned getMFMAPipelineWaitStates(const MachineInstr &MI) const;
75
76 // Advance over a MachineInstr bundle. Look for hazards in the bundled
77 // instructions.
78 void processBundle();
79
80 // Run on an individual instruction in hazard recognizer mode. This can be
81 // used on a newly inserted instruction before returning from PreEmitNoops.
82 void runOnInstruction(MachineInstr *MI);
83
84 int getWaitStatesSince(IsHazardFn IsHazard, int Limit,
85 GetNumWaitStatesFn GetNumWaitStates) const;
86 int getWaitStatesSince(IsHazardFn IsHazard, int Limit) const;
87 int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef,
88 int Limit) const;
89 int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit) const;
90
91 int checkSoftClauseHazards(MachineInstr *SMEM) const;
92 int checkSMRDHazards(MachineInstr *SMRD) const;
93 int checkVMEMHazards(MachineInstr *VMEM) const;
94 int checkDPPHazards(MachineInstr *DPP) const;
95 int checkDivFMasHazards(MachineInstr *DivFMas) const;
96 int checkGetRegHazards(MachineInstr *GetRegInstr) const;
97 int checkSetRegHazards(MachineInstr *SetRegInstr) const;
98 int createsVALUHazard(const MachineInstr &MI) const;
99 int checkVALUHazards(MachineInstr *VALU) const;
100 int checkVALUHazardsHelper(const MachineOperand &Def,
101 const MachineRegisterInfo &MRI) const;
102 int checkRWLaneHazards(MachineInstr *RWLane) const;
103 int checkRFEHazards(MachineInstr *RFE) const;
104 int checkInlineAsmHazards(MachineInstr *IA) const;
105 int checkReadM0Hazards(MachineInstr *SMovRel) const;
106 int checkNSAtoVMEMHazard(MachineInstr *MI) const;
107 int checkFPAtomicToDenormModeHazard(MachineInstr *MI) const;
108 // Emit \p WaitStatesNeeded V_NOP instructions before \p InsertPt.
109 // If IsHoisting is true, uses empty DebugLoc for compiler-inserted NOPs.
110 void emitVNops(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
111 int WaitStatesNeeded, bool IsHoisting = false);
112 void fixHazards(MachineInstr *MI);
113 bool fixVcmpxPermlaneHazards(MachineInstr *MI);
114 bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
115 bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
116 bool fixVcmpxExecWARHazard(MachineInstr *MI);
117 bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
118 bool fixLdsDirectVALUHazard(MachineInstr *MI);
119 bool fixLdsDirectVMEMHazard(MachineInstr *MI);
120 bool fixVALUPartialForwardingHazard(MachineInstr *MI);
121 bool fixVALUTransUseHazard(MachineInstr *MI);
122 bool fixVALUTransCoexecutionHazards(MachineInstr *MI);
123 bool fixWMMAHazards(MachineInstr *MI);
124 int checkWMMACoexecutionHazards(MachineInstr *MI) const;
125 bool fixWMMACoexecutionHazards(MachineInstr *MI);
126 bool tryHoistWMMAVnopsFromLoop(MachineInstr *MI, int WaitStatesNeeded);
127 bool hasWMMAHazardInLoop(MachineLoop *L, MachineInstr *MI,
128 bool IncludeSubloops = true);
129 bool hasWMMAToWMMARegOverlap(const MachineInstr &WMMA,
130 const MachineInstr &MI) const;
131 bool hasWMMAToVALURegOverlap(const MachineInstr &WMMA,
132 const MachineInstr &MI) const;
133 bool isCoexecutionHazardFor(const MachineInstr &I,
134 const MachineInstr &MI) const;
135 bool fixShift64HighRegBug(MachineInstr *MI);
136 bool fixVALUMaskWriteHazard(MachineInstr *MI);
137 bool fixRequiredExportPriority(MachineInstr *MI);
138 bool fixGetRegWaitIdle(MachineInstr *MI);
139 bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
140 bool fixScratchBaseForwardingHazard(MachineInstr *MI);
141 bool fixSetRegMode(MachineInstr *MI);
142
143 int checkMAIHazards(MachineInstr *MI) const;
144 int checkMAIHazards908(MachineInstr *MI) const;
145 int checkMAIHazards90A(MachineInstr *MI) const;
146 /// Pad the latency between neighboring MFMA instructions with s_nops. The
147 /// percentage of wait states to fill with s_nops is specified by the command
148 /// line option '-amdgpu-mfma-padding-ratio'.
149 ///
150 /// For example, with '-amdgpu-mfma-padding-ratio=100':
151 ///
152 /// 2 pass MFMA instructions have a latency of 2 wait states. Therefore, a
153 /// 'S_NOP 1' will be added between sequential MFMA instructions.
154 ///
155 /// V_MFMA_F32_4X4X1F32
156 /// V_MFMA_F32_4X4X1F32
157 ///-->
158 /// V_MFMA_F32_4X4X1F32
159 /// S_NOP 1
160 /// V_MFMA_F32_4X4X1F32
161 int checkMFMAPadding(MachineInstr *MI) const;
162 int checkMAIVALUHazards(MachineInstr *MI) const;
163 int checkMAILdStHazards(MachineInstr *MI) const;
164 int checkPermlaneHazards(MachineInstr *MI) const;
165
166public:
167 GCNHazardRecognizer(const MachineFunction &MF,
168 MachineLoopInfo *MLI = nullptr);
169 // We can only issue one instruction per cycle.
170 bool atIssueLimit() const override { return true; }
171 void EmitInstruction(SUnit *SU) override;
172 void EmitInstruction(MachineInstr *MI) override;
173 HazardType getHazardType(SUnit *SU, int Stalls) override;
174 void EmitNoop() override;
175 unsigned PreEmitNoops(MachineInstr *) override;
176 unsigned PreEmitNoopsCommon(MachineInstr *) const;
177 void AdvanceCycle() override;
178 void RecedeCycle() override;
179 bool ShouldPreferAnother(SUnit *SU) const override;
180 void Reset() override;
181};
182
183} // end namespace llvm
184
185#endif //LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
186