| 1 | //===- MCSchedule.cpp - Scheduling ------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the default scheduling model. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/MC/MCSchedule.h" |
| 14 | #include "llvm/ADT/APFloat.h" |
| 15 | #include "llvm/ADT/APSInt.h" |
| 16 | #include "llvm/MC/MCInst.h" |
| 17 | #include "llvm/MC/MCInstrDesc.h" |
| 18 | #include "llvm/MC/MCInstrInfo.h" |
| 19 | #include "llvm/MC/MCSubtargetInfo.h" |
| 20 | #include "llvm/Support/CommandLine.h" |
| 21 | #include <optional> |
| 22 | #include <type_traits> |
| 23 | |
| 24 | using namespace llvm; |
| 25 | |
| 26 | cl::OptionCategory llvm::MCScheduleOptions("Machine scheduling model options" ); |
| 27 | |
| 28 | static constexpr float DefaultReservationStationScaleFactor = 1.0f; |
| 29 | |
| 30 | static cl::opt<float> ReservationStationScaleFactor( |
| 31 | "sched-model-reservation-station-scale-factor" , cl::Hidden, |
| 32 | cl::init(Val: DefaultReservationStationScaleFactor), cl::cat(MCScheduleOptions), |
| 33 | cl::desc("Scale the buffer size of all reservation stations by a positive " |
| 34 | "factor. Buffer sizes of -1/0/1 (unlimited/unbuffered/in-order) " |
| 35 | "are preserved. Likewise, if the scaled result is <= 1, the " |
| 36 | "original size is kept. Computed sizes " |
| 37 | "are truncated towards zero." )); |
| 38 | |
| 39 | static_assert(std::is_trivial_v<MCSchedModel>, |
| 40 | "MCSchedModel is required to be a trivial type" ); |
| 41 | const MCSchedModel MCSchedModel::Default = {.IssueWidth: DefaultIssueWidth, |
| 42 | .MicroOpBufferSize: DefaultMicroOpBufferSize, |
| 43 | .LoopMicroOpBufferSize: DefaultLoopMicroOpBufferSize, |
| 44 | .LoadLatency: DefaultLoadLatency, |
| 45 | .HighLatency: DefaultHighLatency, |
| 46 | .MispredictPenalty: DefaultMispredictPenalty, |
| 47 | .PostRAScheduler: false, |
| 48 | .CompleteModel: true, |
| 49 | /*EnableIntervals=*/false, |
| 50 | .ProcID: 0, |
| 51 | .ProcResourceTable: nullptr, |
| 52 | .SchedClassTable: nullptr, |
| 53 | .NumProcResourceKinds: 0, |
| 54 | .NumSchedClasses: 0, |
| 55 | .SchedClassNames: nullptr, |
| 56 | .InstrItineraries: nullptr, |
| 57 | .ExtraProcessorInfo: nullptr}; |
| 58 | |
| 59 | int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI, |
| 60 | const MCSchedClassDesc &SCDesc) { |
| 61 | int Latency = 0; |
| 62 | for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries; |
| 63 | DefIdx != DefEnd; ++DefIdx) { |
| 64 | // Lookup the definition's write latency in SubtargetInfo. |
| 65 | const MCWriteLatencyEntry *WLEntry = |
| 66 | STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx); |
| 67 | // Early exit if we found an invalid latency. |
| 68 | if (WLEntry->Cycles < 0) |
| 69 | return WLEntry->Cycles; |
| 70 | Latency = std::max(a: Latency, b: static_cast<int>(WLEntry->Cycles)); |
| 71 | } |
| 72 | return Latency; |
| 73 | } |
| 74 | |
| 75 | int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI, |
| 76 | unsigned SchedClass) const { |
| 77 | const MCSchedClassDesc &SCDesc = *getSchedClassDesc(SchedClassIdx: SchedClass); |
| 78 | if (!SCDesc.isValid()) |
| 79 | return 0; |
| 80 | if (!SCDesc.isVariant()) |
| 81 | return MCSchedModel::computeInstrLatency(STI, SCDesc); |
| 82 | |
| 83 | llvm_unreachable("unsupported variant scheduling class" ); |
| 84 | } |
| 85 | |
| 86 | int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI, |
| 87 | const MCInstrInfo &MCII, |
| 88 | const MCInst &Inst) const { |
| 89 | return MCSchedModel::computeInstrLatency<MCSubtargetInfo, MCInstrInfo, |
| 90 | InstrItineraryData, MCInst>( |
| 91 | STI, MCII, Inst, |
| 92 | ResolveVariantSchedClass: [&](const MCSchedClassDesc *SCDesc) -> const MCSchedClassDesc * { |
| 93 | if (!SCDesc->isValid()) |
| 94 | return nullptr; |
| 95 | |
| 96 | unsigned CPUID = getProcessorID(); |
| 97 | unsigned SchedClass = 0; |
| 98 | while (SCDesc->isVariant()) { |
| 99 | SchedClass = |
| 100 | STI.resolveVariantSchedClass(SchedClass, MI: &Inst, MCII: &MCII, CPUID); |
| 101 | SCDesc = getSchedClassDesc(SchedClassIdx: SchedClass); |
| 102 | } |
| 103 | |
| 104 | if (!SchedClass) { |
| 105 | assert(false && "unsupported variant scheduling class" ); |
| 106 | return nullptr; |
| 107 | } |
| 108 | |
| 109 | return SCDesc; |
| 110 | }); |
| 111 | } |
| 112 | |
| 113 | double |
| 114 | MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, |
| 115 | const MCSchedClassDesc &SCDesc) { |
| 116 | std::optional<double> MinThroughput; |
| 117 | const MCSchedModel &SM = STI.getSchedModel(); |
| 118 | const MCWriteProcResEntry *I = STI.getWriteProcResBegin(SC: &SCDesc); |
| 119 | const MCWriteProcResEntry *E = STI.getWriteProcResEnd(SC: &SCDesc); |
| 120 | for (; I != E; ++I) { |
| 121 | if (!I->ReleaseAtCycle || I->ReleaseAtCycle == I->AcquireAtCycle) |
| 122 | continue; |
| 123 | assert(I->ReleaseAtCycle > I->AcquireAtCycle && "invalid resource segment" ); |
| 124 | unsigned NumUnits = SM.getProcResource(ProcResourceIdx: I->ProcResourceIdx)->NumUnits; |
| 125 | double Throughput = |
| 126 | double(NumUnits) / double(I->ReleaseAtCycle - I->AcquireAtCycle); |
| 127 | MinThroughput = |
| 128 | MinThroughput ? std::min(a: *MinThroughput, b: Throughput) : Throughput; |
| 129 | } |
| 130 | if (MinThroughput) |
| 131 | return 1.0 / *MinThroughput; |
| 132 | |
| 133 | // If no throughput value was calculated, assume that we can execute at the |
| 134 | // maximum issue width scaled by number of micro-ops for the schedule class. |
| 135 | return ((double)SCDesc.NumMicroOps) / SM.IssueWidth; |
| 136 | } |
| 137 | |
| 138 | double |
| 139 | MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI, |
| 140 | const MCInstrInfo &MCII, |
| 141 | const MCInst &Inst) const { |
| 142 | unsigned SchedClass = MCII.get(Opcode: Inst.getOpcode()).getSchedClass(); |
| 143 | const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClassIdx: SchedClass); |
| 144 | |
| 145 | // If there's no valid class, assume that the instruction executes/completes |
| 146 | // at the maximum issue width. |
| 147 | if (!SCDesc->isValid()) |
| 148 | return 1.0 / IssueWidth; |
| 149 | |
| 150 | unsigned CPUID = getProcessorID(); |
| 151 | while (SCDesc->isVariant()) { |
| 152 | SchedClass = STI.resolveVariantSchedClass(SchedClass, MI: &Inst, MCII: &MCII, CPUID); |
| 153 | SCDesc = getSchedClassDesc(SchedClassIdx: SchedClass); |
| 154 | } |
| 155 | |
| 156 | if (SchedClass) |
| 157 | return MCSchedModel::getReciprocalThroughput(STI, SCDesc: *SCDesc); |
| 158 | |
| 159 | llvm_unreachable("unsupported variant scheduling class" ); |
| 160 | } |
| 161 | |
| 162 | double |
| 163 | MCSchedModel::getReciprocalThroughput(unsigned SchedClass, |
| 164 | const InstrItineraryData &IID) { |
| 165 | std::optional<double> Throughput; |
| 166 | const InstrStage *I = IID.beginStage(ItinClassIndx: SchedClass); |
| 167 | const InstrStage *E = IID.endStage(ItinClassIndx: SchedClass); |
| 168 | for (; I != E; ++I) { |
| 169 | if (!I->getCycles()) |
| 170 | continue; |
| 171 | double Temp = llvm::popcount(Value: I->getUnits()) * 1.0 / I->getCycles(); |
| 172 | Throughput = Throughput ? std::min(a: *Throughput, b: Temp) : Temp; |
| 173 | } |
| 174 | if (Throughput) |
| 175 | return 1.0 / *Throughput; |
| 176 | |
| 177 | // If there are no execution resources specified for this class, then assume |
| 178 | // that it can execute at the maximum default issue width. |
| 179 | return 1.0 / DefaultIssueWidth; |
| 180 | } |
| 181 | |
| 182 | unsigned |
| 183 | MCSchedModel::getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries, |
| 184 | unsigned WriteResourceID) { |
| 185 | if (Entries.empty()) |
| 186 | return 0; |
| 187 | |
| 188 | int DelayCycles = 0; |
| 189 | for (const MCReadAdvanceEntry &E : Entries) { |
| 190 | if (E.WriteResourceID != WriteResourceID) |
| 191 | continue; |
| 192 | DelayCycles = std::min(a: DelayCycles, b: E.Cycles); |
| 193 | } |
| 194 | |
| 195 | return std::abs(x: DelayCycles); |
| 196 | } |
| 197 | |
| 198 | unsigned MCSchedModel::getBypassDelayCycles(const MCSubtargetInfo &STI, |
| 199 | const MCSchedClassDesc &SCDesc) { |
| 200 | |
| 201 | ArrayRef<MCReadAdvanceEntry> Entries = STI.getReadAdvanceEntries(SC: SCDesc); |
| 202 | if (Entries.empty()) |
| 203 | return 0; |
| 204 | |
| 205 | unsigned MaxLatency = 0; |
| 206 | unsigned WriteResourceID = 0; |
| 207 | unsigned DefEnd = SCDesc.NumWriteLatencyEntries; |
| 208 | |
| 209 | for (unsigned DefIdx = 0; DefIdx != DefEnd; ++DefIdx) { |
| 210 | // Lookup the definition's write latency in SubtargetInfo. |
| 211 | const MCWriteLatencyEntry *WLEntry = |
| 212 | STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx); |
| 213 | unsigned Cycles = 0; |
| 214 | // If latency is Invalid (<0), consider 0 cycle latency |
| 215 | if (WLEntry->Cycles > 0) |
| 216 | Cycles = (unsigned)WLEntry->Cycles; |
| 217 | if (Cycles > MaxLatency) { |
| 218 | MaxLatency = Cycles; |
| 219 | WriteResourceID = WLEntry->WriteResourceID; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | for (const MCReadAdvanceEntry &E : Entries) { |
| 224 | if (E.WriteResourceID == WriteResourceID) |
| 225 | return E.Cycles; |
| 226 | } |
| 227 | |
| 228 | // Unable to find WriteResourceID in MCReadAdvanceEntry Entries |
| 229 | return 0; |
| 230 | } |
| 231 | |
| 232 | /// Return the buffer size of the resource. If a positive scale factor |
| 233 | /// is provided and the original buffer size is > 1, the size is scaled |
| 234 | /// accordingly. |
| 235 | int MCSchedModel::getResourceBufferSize(unsigned ProcResourceIdx) const { |
| 236 | int BufferSize = getProcResource(ProcResourceIdx)->BufferSize; |
| 237 | |
| 238 | // Skip scaling when factor is 1 (the default). |
| 239 | // Use native float comparison to avoid overhead on the hot fast |
| 240 | // path, as 1.0f is exactly representable |
| 241 | if (LLVM_LIKELY(ReservationStationScaleFactor == |
| 242 | DefaultReservationStationScaleFactor)) |
| 243 | return BufferSize; |
| 244 | |
| 245 | // Skip scaling for special buffer sizes (-1,0,1) |
| 246 | if (BufferSize <= 1) |
| 247 | return BufferSize; |
| 248 | |
| 249 | // Skip invalid (non-positive) scale factors |
| 250 | APFloat Scale(ReservationStationScaleFactor); |
| 251 | if (Scale.isNegative() || Scale.isZero()) |
| 252 | return BufferSize; |
| 253 | |
| 254 | // Scale and truncate the positive computed size towards zero |
| 255 | APFloat Product(static_cast<float>(BufferSize)); |
| 256 | Product.multiply(RHS: Scale, RM: APFloat::rmTowardZero); |
| 257 | APSInt Result(32, /*IsUnsigned=*/false); |
| 258 | bool IsExact; |
| 259 | if (Product.convertToInteger(Result, RM: APFloat::rmTowardZero, IsExact: &IsExact) & |
| 260 | APFloat::opInvalidOp) |
| 261 | return BufferSize; |
| 262 | int Scaled = static_cast<int>(Result.getExtValue()); |
| 263 | |
| 264 | // Avoid producing special buffer sizes (-1,0,1) |
| 265 | if (Scaled <= 1) |
| 266 | return BufferSize; |
| 267 | |
| 268 | return Scaled; |
| 269 | } |
| 270 | |