| 1 | //===--------------------- TimelineView.cpp ---------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \brief |
| 9 | /// |
| 10 | /// This file implements the TimelineView interface. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "Views/TimelineView.h" |
| 15 | #include <numeric> |
| 16 | |
| 17 | namespace llvm { |
| 18 | namespace mca { |
| 19 | |
| 20 | TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer, |
| 21 | llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations, |
| 22 | unsigned Cycles) |
| 23 | : InstructionView(sti, Printer, S), CurrentCycle(0), |
| 24 | MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles), |
| 25 | LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) { |
| 26 | unsigned NumInstructions = getSource().size(); |
| 27 | assert(Iterations && "Invalid number of iterations specified!" ); |
| 28 | NumInstructions *= Iterations; |
| 29 | Timeline.resize(new_size: NumInstructions); |
| 30 | TimelineViewEntry InvalidTVEntry = {.CycleDispatched: -1, .CycleReady: 0, .CycleIssued: 0, .CycleExecuted: 0, .CycleRetired: 0}; |
| 31 | llvm::fill(Range&: Timeline, Value&: InvalidTVEntry); |
| 32 | |
| 33 | WaitTimeEntry NullWTEntry = {.CyclesSpentInSchedulerQueue: 0, .CyclesSpentInSQWhileReady: 0, .CyclesSpentAfterWBAndBeforeRetire: 0}; |
| 34 | llvm::fill(Range&: WaitTime, Value&: NullWTEntry); |
| 35 | |
| 36 | std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0, |
| 37 | /* unknown buffer size */ -1}; |
| 38 | llvm::fill(Range&: UsedBuffer, Value&: NullUsedBufferEntry); |
| 39 | } |
| 40 | |
| 41 | void TimelineView::onReservedBuffers(const InstRef &IR, |
| 42 | ArrayRef<unsigned> Buffers) { |
| 43 | if (IR.getSourceIndex() >= getSource().size()) |
| 44 | return; |
| 45 | |
| 46 | const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); |
| 47 | std::pair<unsigned, int> BufferInfo = {0, -1}; |
| 48 | for (const unsigned Buffer : Buffers) { |
| 49 | const MCProcResourceDesc &MCDesc = *SM.getProcResource(ProcResourceIdx: Buffer); |
| 50 | if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) { |
| 51 | BufferInfo.first = Buffer; |
| 52 | BufferInfo.second = MCDesc.BufferSize; |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | UsedBuffer[IR.getSourceIndex()] = BufferInfo; |
| 57 | } |
| 58 | |
| 59 | void TimelineView::onEvent(const HWInstructionEvent &Event) { |
| 60 | const unsigned Index = Event.IR.getSourceIndex(); |
| 61 | if (Index >= Timeline.size()) |
| 62 | return; |
| 63 | |
| 64 | switch (Event.Type) { |
| 65 | case HWInstructionEvent::Retired: { |
| 66 | TimelineViewEntry &TVEntry = Timeline[Index]; |
| 67 | if (CurrentCycle < MaxCycle) |
| 68 | TVEntry.CycleRetired = CurrentCycle; |
| 69 | |
| 70 | // Update the WaitTime entry which corresponds to this Index. |
| 71 | assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!" ); |
| 72 | unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched); |
| 73 | WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()]; |
| 74 | WTEntry.CyclesSpentInSchedulerQueue += |
| 75 | TVEntry.CycleIssued - CycleDispatched; |
| 76 | assert(CycleDispatched <= TVEntry.CycleReady && |
| 77 | "Instruction cannot be ready if it hasn't been dispatched yet!" ); |
| 78 | WTEntry.CyclesSpentInSQWhileReady += |
| 79 | TVEntry.CycleIssued - TVEntry.CycleReady; |
| 80 | if (CurrentCycle > TVEntry.CycleExecuted) { |
| 81 | WTEntry.CyclesSpentAfterWBAndBeforeRetire += |
| 82 | (CurrentCycle - 1) - TVEntry.CycleExecuted; |
| 83 | } |
| 84 | break; |
| 85 | } |
| 86 | case HWInstructionEvent::Ready: |
| 87 | Timeline[Index].CycleReady = CurrentCycle; |
| 88 | break; |
| 89 | case HWInstructionEvent::Issued: |
| 90 | Timeline[Index].CycleIssued = CurrentCycle; |
| 91 | break; |
| 92 | case HWInstructionEvent::Executed: |
| 93 | Timeline[Index].CycleExecuted = CurrentCycle; |
| 94 | break; |
| 95 | case HWInstructionEvent::Dispatched: |
| 96 | // There may be multiple dispatch events. Microcoded instructions that are |
| 97 | // expanded into multiple uOps may require multiple dispatch cycles. Here, |
| 98 | // we want to capture the first dispatch cycle. |
| 99 | if (Timeline[Index].CycleDispatched == -1) |
| 100 | Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle); |
| 101 | break; |
| 102 | default: |
| 103 | return; |
| 104 | } |
| 105 | if (CurrentCycle < MaxCycle) |
| 106 | LastCycle = std::max(a: LastCycle, b: CurrentCycle); |
| 107 | } |
| 108 | |
| 109 | static raw_ostream::Colors chooseColor(unsigned CumulativeCycles, |
| 110 | unsigned Executions, int BufferSize) { |
| 111 | if (CumulativeCycles && BufferSize < 0) |
| 112 | return raw_ostream::MAGENTA; |
| 113 | unsigned Size = static_cast<unsigned>(BufferSize); |
| 114 | if (CumulativeCycles >= Size * Executions) |
| 115 | return raw_ostream::RED; |
| 116 | if ((CumulativeCycles * 2) >= Size * Executions) |
| 117 | return raw_ostream::YELLOW; |
| 118 | return raw_ostream::SAVEDCOLOR; |
| 119 | } |
| 120 | |
| 121 | static void tryChangeColor(raw_ostream &OS, unsigned Cycles, |
| 122 | unsigned Executions, int BufferSize) { |
| 123 | if (!OS.has_colors()) |
| 124 | return; |
| 125 | |
| 126 | raw_ostream::Colors Color = chooseColor(CumulativeCycles: Cycles, Executions, BufferSize); |
| 127 | if (Color == raw_ostream::SAVEDCOLOR) { |
| 128 | OS.resetColor(); |
| 129 | return; |
| 130 | } |
| 131 | OS.changeColor(Color, /* bold */ Bold: true, /* BG */ false); |
| 132 | } |
| 133 | |
| 134 | void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, |
| 135 | const WaitTimeEntry &Entry, |
| 136 | unsigned SourceIndex, |
| 137 | unsigned Executions) const { |
| 138 | bool PrintingTotals = SourceIndex == getSource().size(); |
| 139 | unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; |
| 140 | |
| 141 | if (!PrintingTotals) |
| 142 | OS << SourceIndex << '.'; |
| 143 | |
| 144 | OS.PadToColumn(NewCol: 7); |
| 145 | |
| 146 | double AverageTime1, AverageTime2, AverageTime3; |
| 147 | AverageTime1 = |
| 148 | (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions; |
| 149 | AverageTime2 = |
| 150 | (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions; |
| 151 | AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) / |
| 152 | CumulativeExecutions; |
| 153 | |
| 154 | OS << Executions; |
| 155 | OS.PadToColumn(NewCol: 13); |
| 156 | |
| 157 | int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; |
| 158 | if (!PrintingTotals) |
| 159 | tryChangeColor(OS, Cycles: Entry.CyclesSpentInSchedulerQueue, Executions: CumulativeExecutions, |
| 160 | BufferSize); |
| 161 | OS << format(Fmt: "%.1f" , Vals: floor(x: AverageTime1 + 0.5) / 10); |
| 162 | OS.PadToColumn(NewCol: 20); |
| 163 | if (!PrintingTotals) |
| 164 | tryChangeColor(OS, Cycles: Entry.CyclesSpentInSQWhileReady, Executions: CumulativeExecutions, |
| 165 | BufferSize); |
| 166 | OS << format(Fmt: "%.1f" , Vals: floor(x: AverageTime2 + 0.5) / 10); |
| 167 | OS.PadToColumn(NewCol: 27); |
| 168 | if (!PrintingTotals) |
| 169 | tryChangeColor(OS, Cycles: Entry.CyclesSpentAfterWBAndBeforeRetire, |
| 170 | Executions: CumulativeExecutions, |
| 171 | BufferSize: getSubTargetInfo().getSchedModel().MicroOpBufferSize); |
| 172 | OS << format(Fmt: "%.1f" , Vals: floor(x: AverageTime3 + 0.5) / 10); |
| 173 | |
| 174 | if (OS.has_colors()) |
| 175 | OS.resetColor(); |
| 176 | OS.PadToColumn(NewCol: 34); |
| 177 | } |
| 178 | |
| 179 | void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { |
| 180 | std::string = |
| 181 | "\n\nAverage Wait times (based on the timeline view):\n" |
| 182 | "[0]: Executions\n" |
| 183 | "[1]: Average time spent waiting in a scheduler's queue\n" |
| 184 | "[2]: Average time spent waiting in a scheduler's queue while ready\n" |
| 185 | "[3]: Average time elapsed from WB until retire stage\n\n" |
| 186 | " [0] [1] [2] [3]\n" ; |
| 187 | OS << Header; |
| 188 | formatted_raw_ostream FOS(OS); |
| 189 | unsigned Executions = Timeline.size() / getSource().size(); |
| 190 | unsigned IID = 0; |
| 191 | for (const MCInst &Inst : getSource()) { |
| 192 | printWaitTimeEntry(OS&: FOS, Entry: WaitTime[IID], SourceIndex: IID, Executions); |
| 193 | FOS << " " << printInstructionString(MCI: Inst) << '\n'; |
| 194 | FOS.flush(); |
| 195 | ++IID; |
| 196 | } |
| 197 | |
| 198 | // If the timeline contains more than one instruction, |
| 199 | // let's also print global averages. |
| 200 | if (getSource().size() != 1) { |
| 201 | WaitTimeEntry TotalWaitTime = std::accumulate( |
| 202 | first: WaitTime.begin(), last: WaitTime.end(), init: WaitTimeEntry{.CyclesSpentInSchedulerQueue: 0, .CyclesSpentInSQWhileReady: 0, .CyclesSpentAfterWBAndBeforeRetire: 0}, |
| 203 | binary_op: [](const WaitTimeEntry &A, const WaitTimeEntry &B) { |
| 204 | return WaitTimeEntry{ |
| 205 | .CyclesSpentInSchedulerQueue: A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, |
| 206 | .CyclesSpentInSQWhileReady: A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, |
| 207 | .CyclesSpentAfterWBAndBeforeRetire: A.CyclesSpentAfterWBAndBeforeRetire + |
| 208 | B.CyclesSpentAfterWBAndBeforeRetire}; |
| 209 | }); |
| 210 | printWaitTimeEntry(OS&: FOS, Entry: TotalWaitTime, SourceIndex: IID, Executions); |
| 211 | FOS << " " |
| 212 | << "<total>" << '\n'; |
| 213 | FOS.flush(); |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, |
| 218 | const TimelineViewEntry &Entry, |
| 219 | unsigned Iteration, |
| 220 | unsigned SourceIndex) const { |
| 221 | if (Iteration == 0 && SourceIndex == 0) |
| 222 | OS << '\n'; |
| 223 | OS << '[' << Iteration << ',' << SourceIndex << ']'; |
| 224 | OS.PadToColumn(NewCol: 10); |
| 225 | assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!" ); |
| 226 | unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched); |
| 227 | for (unsigned I = 0, E = CycleDispatched; I < E; ++I) |
| 228 | OS << ((I % 5 == 0) ? '.' : ' '); |
| 229 | OS << TimelineView::DisplayChar::Dispatched; |
| 230 | if (CycleDispatched != Entry.CycleExecuted) { |
| 231 | // Zero latency instructions have the same value for CycleDispatched, |
| 232 | // CycleIssued and CycleExecuted. |
| 233 | for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I) |
| 234 | OS << TimelineView::DisplayChar::Waiting; |
| 235 | if (Entry.CycleIssued == Entry.CycleExecuted) |
| 236 | OS << TimelineView::DisplayChar::DisplayChar::Executed; |
| 237 | else { |
| 238 | if (CycleDispatched != Entry.CycleIssued) |
| 239 | OS << TimelineView::DisplayChar::Executing; |
| 240 | for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E; |
| 241 | ++I) |
| 242 | OS << TimelineView::DisplayChar::Executing; |
| 243 | OS << TimelineView::DisplayChar::Executed; |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I) |
| 248 | OS << TimelineView::DisplayChar::RetireLag; |
| 249 | if (Entry.CycleExecuted < Entry.CycleRetired) |
| 250 | OS << TimelineView::DisplayChar::Retired; |
| 251 | |
| 252 | // Skip other columns. |
| 253 | for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I) |
| 254 | OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' '); |
| 255 | } |
| 256 | |
| 257 | static void (formatted_raw_ostream &OS, unsigned Cycles) { |
| 258 | OS << "\n\nTimeline view:\n" ; |
| 259 | if (Cycles >= 10) { |
| 260 | OS.PadToColumn(NewCol: 10); |
| 261 | for (unsigned I = 0; I <= Cycles; ++I) { |
| 262 | if (((I / 10) & 1) == 0) |
| 263 | OS << ' '; |
| 264 | else |
| 265 | OS << I % 10; |
| 266 | } |
| 267 | OS << '\n'; |
| 268 | } |
| 269 | |
| 270 | OS << "Index" ; |
| 271 | OS.PadToColumn(NewCol: 10); |
| 272 | for (unsigned I = 0; I <= Cycles; ++I) { |
| 273 | if (((I / 10) & 1) == 0) |
| 274 | OS << I % 10; |
| 275 | else |
| 276 | OS << ' '; |
| 277 | } |
| 278 | OS << '\n'; |
| 279 | } |
| 280 | |
| 281 | void TimelineView::printTimeline(raw_ostream &OS) const { |
| 282 | formatted_raw_ostream FOS(OS); |
| 283 | printTimelineHeader(OS&: FOS, Cycles: LastCycle); |
| 284 | FOS.flush(); |
| 285 | |
| 286 | unsigned IID = 0; |
| 287 | ArrayRef<llvm::MCInst> Source = getSource(); |
| 288 | const unsigned Iterations = Timeline.size() / Source.size(); |
| 289 | for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) { |
| 290 | for (const MCInst &Inst : Source) { |
| 291 | const TimelineViewEntry &Entry = Timeline[IID]; |
| 292 | // When an instruction is retired after timeline-max-cycles, |
| 293 | // its CycleRetired is left at 0. However, it's possible for |
| 294 | // a 0 latency instruction to be retired during cycle 0 and we |
| 295 | // don't want to early exit in that case. The CycleExecuted |
| 296 | // attribute is set correctly whether or not it is greater |
| 297 | // than timeline-max-cycles so we can use that to ensure |
| 298 | // we don't early exit because of a 0 latency instruction. |
| 299 | if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) { |
| 300 | FOS << "Truncated display due to cycle limit\n" ; |
| 301 | return; |
| 302 | } |
| 303 | |
| 304 | unsigned SourceIndex = IID % Source.size(); |
| 305 | printTimelineViewEntry(OS&: FOS, Entry, Iteration, SourceIndex); |
| 306 | FOS << " " << printInstructionString(MCI: Inst) << '\n'; |
| 307 | FOS.flush(); |
| 308 | |
| 309 | ++IID; |
| 310 | } |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | json::Value TimelineView::toJSON() const { |
| 315 | json::Array TimelineInfo; |
| 316 | |
| 317 | for (const TimelineViewEntry &TLE : Timeline) { |
| 318 | // Check if the timeline-max-cycles has been reached. |
| 319 | if (!TLE.CycleRetired && TLE.CycleExecuted) |
| 320 | break; |
| 321 | |
| 322 | TimelineInfo.push_back( |
| 323 | E: json::Object({{.K: "CycleDispatched" , .V: TLE.CycleDispatched}, |
| 324 | {.K: "CycleReady" , .V: TLE.CycleReady}, |
| 325 | {.K: "CycleIssued" , .V: TLE.CycleIssued}, |
| 326 | {.K: "CycleExecuted" , .V: TLE.CycleExecuted}, |
| 327 | {.K: "CycleRetired" , .V: TLE.CycleRetired}})); |
| 328 | } |
| 329 | return json::Object({{.K: "TimelineInfo" , .V: std::move(TimelineInfo)}}); |
| 330 | } |
| 331 | } // namespace mca |
| 332 | } // namespace llvm |
| 333 | |