| 1 | //===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines hazard recognizers for scheduling on GCN processors. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H |
| 14 | #define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H |
| 15 | |
| 16 | #include "llvm/ADT/BitVector.h" |
| 17 | #include "llvm/ADT/STLExtras.h" |
| 18 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 19 | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" |
| 20 | #include "llvm/CodeGen/TargetSchedule.h" |
| 21 | #include <list> |
| 22 | |
| 23 | namespace llvm { |
| 24 | |
| 25 | class MachineFunction; |
| 26 | class MachineInstr; |
| 27 | class MachineOperand; |
| 28 | class MachineRegisterInfo; |
| 29 | class SIInstrInfo; |
| 30 | class SIRegisterInfo; |
| 31 | class GCNSubtarget; |
| 32 | |
| 33 | class GCNHazardRecognizer final : public ScheduleHazardRecognizer { |
| 34 | public: |
| 35 | typedef function_ref<bool(const MachineInstr &)> IsHazardFn; |
| 36 | typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn; |
| 37 | typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn; |
| 38 | |
| 39 | private: |
| 40 | // Distinguish if we are called from scheduler or hazard recognizer |
| 41 | bool IsHazardRecognizerMode; |
| 42 | |
| 43 | // This variable stores the instruction that has been emitted this cycle. It |
| 44 | // will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is |
| 45 | // called. |
| 46 | MachineInstr *CurrCycleInstr; |
| 47 | std::list<MachineInstr*> EmittedInstrs; |
| 48 | const MachineFunction &MF; |
| 49 | const GCNSubtarget &ST; |
| 50 | const SIInstrInfo &TII; |
| 51 | const SIRegisterInfo &TRI; |
| 52 | const TargetSchedModel &TSchedModel; |
| 53 | |
| 54 | // Loop info for V_NOP hoisting, passed from the pass manager. |
| 55 | MachineLoopInfo *MLI = nullptr; |
| 56 | |
| 57 | bool RunLdsBranchVmemWARHazardFixup; |
| 58 | |
| 59 | /// RegUnits of uses in the current soft memory clause. |
| 60 | mutable BitVector ClauseUses; |
| 61 | |
| 62 | /// RegUnits of defs in the current soft memory clause. |
| 63 | mutable BitVector ClauseDefs; |
| 64 | |
| 65 | void resetClause() const { |
| 66 | ClauseUses.reset(); |
| 67 | ClauseDefs.reset(); |
| 68 | } |
| 69 | |
| 70 | void addClauseInst(const MachineInstr &MI) const; |
| 71 | |
| 72 | /// \returns the number of wait states before another MFMA instruction can be |
| 73 | /// issued after \p MI. |
| 74 | unsigned getMFMAPipelineWaitStates(const MachineInstr &MI) const; |
| 75 | |
| 76 | // Advance over a MachineInstr bundle. Look for hazards in the bundled |
| 77 | // instructions. |
| 78 | void processBundle(); |
| 79 | |
| 80 | // Run on an individual instruction in hazard recognizer mode. This can be |
| 81 | // used on a newly inserted instruction before returning from PreEmitNoops. |
| 82 | void runOnInstruction(MachineInstr *MI); |
| 83 | |
| 84 | int getWaitStatesSince(IsHazardFn IsHazard, int Limit, |
| 85 | GetNumWaitStatesFn GetNumWaitStates) const; |
| 86 | int getWaitStatesSince(IsHazardFn IsHazard, int Limit) const; |
| 87 | int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, |
| 88 | int Limit) const; |
| 89 | int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit) const; |
| 90 | |
| 91 | int checkSoftClauseHazards(MachineInstr *SMEM) const; |
| 92 | int checkSMRDHazards(MachineInstr *SMRD) const; |
| 93 | int checkVMEMHazards(MachineInstr *VMEM) const; |
| 94 | int checkDPPHazards(MachineInstr *DPP) const; |
| 95 | int checkDivFMasHazards(MachineInstr *DivFMas) const; |
| 96 | int checkGetRegHazards(MachineInstr *GetRegInstr) const; |
| 97 | int checkSetRegHazards(MachineInstr *SetRegInstr) const; |
| 98 | int createsVALUHazard(const MachineInstr &MI) const; |
| 99 | int checkVALUHazards(MachineInstr *VALU) const; |
| 100 | int checkVALUHazardsHelper(const MachineOperand &Def, |
| 101 | const MachineRegisterInfo &MRI) const; |
| 102 | int checkRWLaneHazards(MachineInstr *RWLane) const; |
| 103 | int checkRFEHazards(MachineInstr *RFE) const; |
| 104 | int checkInlineAsmHazards(MachineInstr *IA) const; |
| 105 | int checkReadM0Hazards(MachineInstr *SMovRel) const; |
| 106 | int checkNSAtoVMEMHazard(MachineInstr *MI) const; |
| 107 | int checkFPAtomicToDenormModeHazard(MachineInstr *MI) const; |
| 108 | // Emit \p WaitStatesNeeded V_NOP instructions before \p InsertPt. |
| 109 | // If IsHoisting is true, uses empty DebugLoc for compiler-inserted NOPs. |
| 110 | void emitVNops(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, |
| 111 | int WaitStatesNeeded, bool IsHoisting = false); |
| 112 | void fixHazards(MachineInstr *MI); |
| 113 | bool fixVcmpxPermlaneHazards(MachineInstr *MI); |
| 114 | bool fixVMEMtoScalarWriteHazards(MachineInstr *MI); |
| 115 | bool fixSMEMtoVectorWriteHazards(MachineInstr *MI); |
| 116 | bool fixVcmpxExecWARHazard(MachineInstr *MI); |
| 117 | bool fixLdsBranchVmemWARHazard(MachineInstr *MI); |
| 118 | bool fixLdsDirectVALUHazard(MachineInstr *MI); |
| 119 | bool fixLdsDirectVMEMHazard(MachineInstr *MI); |
| 120 | bool fixVALUPartialForwardingHazard(MachineInstr *MI); |
| 121 | bool fixVALUTransUseHazard(MachineInstr *MI); |
| 122 | bool fixVALUTransCoexecutionHazards(MachineInstr *MI); |
| 123 | bool fixWMMAHazards(MachineInstr *MI); |
| 124 | int checkWMMACoexecutionHazards(MachineInstr *MI) const; |
| 125 | bool fixWMMACoexecutionHazards(MachineInstr *MI); |
| 126 | bool tryHoistWMMAVnopsFromLoop(MachineInstr *MI, int WaitStatesNeeded); |
| 127 | bool hasWMMAHazardInLoop(MachineLoop *L, MachineInstr *MI, |
| 128 | bool IncludeSubloops = true); |
| 129 | bool hasWMMAToWMMARegOverlap(const MachineInstr &WMMA, |
| 130 | const MachineInstr &MI) const; |
| 131 | bool hasWMMAToVALURegOverlap(const MachineInstr &WMMA, |
| 132 | const MachineInstr &MI) const; |
| 133 | bool isCoexecutionHazardFor(const MachineInstr &I, |
| 134 | const MachineInstr &MI) const; |
| 135 | bool fixShift64HighRegBug(MachineInstr *MI); |
| 136 | bool fixVALUMaskWriteHazard(MachineInstr *MI); |
| 137 | bool fixRequiredExportPriority(MachineInstr *MI); |
| 138 | bool fixGetRegWaitIdle(MachineInstr *MI); |
| 139 | bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI); |
| 140 | bool fixScratchBaseForwardingHazard(MachineInstr *MI); |
| 141 | bool fixSetRegMode(MachineInstr *MI); |
| 142 | |
| 143 | int checkMAIHazards(MachineInstr *MI) const; |
| 144 | int checkMAIHazards908(MachineInstr *MI) const; |
| 145 | int checkMAIHazards90A(MachineInstr *MI) const; |
| 146 | /// Pad the latency between neighboring MFMA instructions with s_nops. The |
| 147 | /// percentage of wait states to fill with s_nops is specified by the command |
| 148 | /// line option '-amdgpu-mfma-padding-ratio'. |
| 149 | /// |
| 150 | /// For example, with '-amdgpu-mfma-padding-ratio=100': |
| 151 | /// |
| 152 | /// 2 pass MFMA instructions have a latency of 2 wait states. Therefore, a |
| 153 | /// 'S_NOP 1' will be added between sequential MFMA instructions. |
| 154 | /// |
| 155 | /// V_MFMA_F32_4X4X1F32 |
| 156 | /// V_MFMA_F32_4X4X1F32 |
| 157 | ///--> |
| 158 | /// V_MFMA_F32_4X4X1F32 |
| 159 | /// S_NOP 1 |
| 160 | /// V_MFMA_F32_4X4X1F32 |
| 161 | int checkMFMAPadding(MachineInstr *MI) const; |
| 162 | int checkMAIVALUHazards(MachineInstr *MI) const; |
| 163 | int checkMAILdStHazards(MachineInstr *MI) const; |
| 164 | int checkPermlaneHazards(MachineInstr *MI) const; |
| 165 | |
| 166 | public: |
| 167 | GCNHazardRecognizer(const MachineFunction &MF, |
| 168 | MachineLoopInfo *MLI = nullptr); |
| 169 | // We can only issue one instruction per cycle. |
| 170 | bool atIssueLimit() const override { return true; } |
| 171 | void EmitInstruction(SUnit *SU) override; |
| 172 | void EmitInstruction(MachineInstr *MI) override; |
| 173 | HazardType getHazardType(SUnit *SU, int Stalls) override; |
| 174 | void EmitNoop() override; |
| 175 | unsigned PreEmitNoops(MachineInstr *) override; |
| 176 | unsigned PreEmitNoopsCommon(MachineInstr *) const; |
| 177 | void AdvanceCycle() override; |
| 178 | void RecedeCycle() override; |
| 179 | bool ShouldPreferAnother(SUnit *SU) const override; |
| 180 | void Reset() override; |
| 181 | }; |
| 182 | |
| 183 | } // end namespace llvm |
| 184 | |
| 185 | #endif //LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H |
| 186 | |