1 | //===--------------------- TimelineView.cpp ---------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \brief |
9 | /// |
10 | /// This file implements the TimelineView interface. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "Views/TimelineView.h" |
15 | #include <numeric> |
16 | |
17 | namespace llvm { |
18 | namespace mca { |
19 | |
20 | TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer, |
21 | llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations, |
22 | unsigned Cycles) |
23 | : InstructionView(sti, Printer, S), CurrentCycle(0), |
24 | MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles), |
25 | LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) { |
26 | unsigned NumInstructions = getSource().size(); |
27 | assert(Iterations && "Invalid number of iterations specified!" ); |
28 | NumInstructions *= Iterations; |
29 | Timeline.resize(new_size: NumInstructions); |
30 | TimelineViewEntry InvalidTVEntry = {.CycleDispatched: -1, .CycleReady: 0, .CycleIssued: 0, .CycleExecuted: 0, .CycleRetired: 0}; |
31 | std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry); |
32 | |
33 | WaitTimeEntry NullWTEntry = {.CyclesSpentInSchedulerQueue: 0, .CyclesSpentInSQWhileReady: 0, .CyclesSpentAfterWBAndBeforeRetire: 0}; |
34 | std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry); |
35 | |
36 | std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0, |
37 | /* unknown buffer size */ -1}; |
38 | std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry); |
39 | } |
40 | |
41 | void TimelineView::onReservedBuffers(const InstRef &IR, |
42 | ArrayRef<unsigned> Buffers) { |
43 | if (IR.getSourceIndex() >= getSource().size()) |
44 | return; |
45 | |
46 | const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); |
47 | std::pair<unsigned, int> BufferInfo = {0, -1}; |
48 | for (const unsigned Buffer : Buffers) { |
49 | const MCProcResourceDesc &MCDesc = *SM.getProcResource(ProcResourceIdx: Buffer); |
50 | if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) { |
51 | BufferInfo.first = Buffer; |
52 | BufferInfo.second = MCDesc.BufferSize; |
53 | } |
54 | } |
55 | |
56 | UsedBuffer[IR.getSourceIndex()] = BufferInfo; |
57 | } |
58 | |
59 | void TimelineView::onEvent(const HWInstructionEvent &Event) { |
60 | const unsigned Index = Event.IR.getSourceIndex(); |
61 | if (Index >= Timeline.size()) |
62 | return; |
63 | |
64 | switch (Event.Type) { |
65 | case HWInstructionEvent::Retired: { |
66 | TimelineViewEntry &TVEntry = Timeline[Index]; |
67 | if (CurrentCycle < MaxCycle) |
68 | TVEntry.CycleRetired = CurrentCycle; |
69 | |
70 | // Update the WaitTime entry which corresponds to this Index. |
71 | assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!" ); |
72 | unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched); |
73 | WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()]; |
74 | WTEntry.CyclesSpentInSchedulerQueue += |
75 | TVEntry.CycleIssued - CycleDispatched; |
76 | assert(CycleDispatched <= TVEntry.CycleReady && |
77 | "Instruction cannot be ready if it hasn't been dispatched yet!" ); |
78 | WTEntry.CyclesSpentInSQWhileReady += |
79 | TVEntry.CycleIssued - TVEntry.CycleReady; |
80 | if (CurrentCycle > TVEntry.CycleExecuted) { |
81 | WTEntry.CyclesSpentAfterWBAndBeforeRetire += |
82 | (CurrentCycle - 1) - TVEntry.CycleExecuted; |
83 | } |
84 | break; |
85 | } |
86 | case HWInstructionEvent::Ready: |
87 | Timeline[Index].CycleReady = CurrentCycle; |
88 | break; |
89 | case HWInstructionEvent::Issued: |
90 | Timeline[Index].CycleIssued = CurrentCycle; |
91 | break; |
92 | case HWInstructionEvent::Executed: |
93 | Timeline[Index].CycleExecuted = CurrentCycle; |
94 | break; |
95 | case HWInstructionEvent::Dispatched: |
96 | // There may be multiple dispatch events. Microcoded instructions that are |
97 | // expanded into multiple uOps may require multiple dispatch cycles. Here, |
98 | // we want to capture the first dispatch cycle. |
99 | if (Timeline[Index].CycleDispatched == -1) |
100 | Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle); |
101 | break; |
102 | default: |
103 | return; |
104 | } |
105 | if (CurrentCycle < MaxCycle) |
106 | LastCycle = std::max(a: LastCycle, b: CurrentCycle); |
107 | } |
108 | |
109 | static raw_ostream::Colors chooseColor(unsigned CumulativeCycles, |
110 | unsigned Executions, int BufferSize) { |
111 | if (CumulativeCycles && BufferSize < 0) |
112 | return raw_ostream::MAGENTA; |
113 | unsigned Size = static_cast<unsigned>(BufferSize); |
114 | if (CumulativeCycles >= Size * Executions) |
115 | return raw_ostream::RED; |
116 | if ((CumulativeCycles * 2) >= Size * Executions) |
117 | return raw_ostream::YELLOW; |
118 | return raw_ostream::SAVEDCOLOR; |
119 | } |
120 | |
121 | static void tryChangeColor(raw_ostream &OS, unsigned Cycles, |
122 | unsigned Executions, int BufferSize) { |
123 | if (!OS.has_colors()) |
124 | return; |
125 | |
126 | raw_ostream::Colors Color = chooseColor(CumulativeCycles: Cycles, Executions, BufferSize); |
127 | if (Color == raw_ostream::SAVEDCOLOR) { |
128 | OS.resetColor(); |
129 | return; |
130 | } |
131 | OS.changeColor(Color, /* bold */ Bold: true, /* BG */ false); |
132 | } |
133 | |
134 | void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, |
135 | const WaitTimeEntry &Entry, |
136 | unsigned SourceIndex, |
137 | unsigned Executions) const { |
138 | bool PrintingTotals = SourceIndex == getSource().size(); |
139 | unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; |
140 | |
141 | if (!PrintingTotals) |
142 | OS << SourceIndex << '.'; |
143 | |
144 | OS.PadToColumn(NewCol: 7); |
145 | |
146 | double AverageTime1, AverageTime2, AverageTime3; |
147 | AverageTime1 = |
148 | (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions; |
149 | AverageTime2 = |
150 | (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions; |
151 | AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) / |
152 | CumulativeExecutions; |
153 | |
154 | OS << Executions; |
155 | OS.PadToColumn(NewCol: 13); |
156 | |
157 | int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; |
158 | if (!PrintingTotals) |
159 | tryChangeColor(OS, Cycles: Entry.CyclesSpentInSchedulerQueue, Executions: CumulativeExecutions, |
160 | BufferSize); |
161 | OS << format(Fmt: "%.1f" , Vals: floor(x: AverageTime1 + 0.5) / 10); |
162 | OS.PadToColumn(NewCol: 20); |
163 | if (!PrintingTotals) |
164 | tryChangeColor(OS, Cycles: Entry.CyclesSpentInSQWhileReady, Executions: CumulativeExecutions, |
165 | BufferSize); |
166 | OS << format(Fmt: "%.1f" , Vals: floor(x: AverageTime2 + 0.5) / 10); |
167 | OS.PadToColumn(NewCol: 27); |
168 | if (!PrintingTotals) |
169 | tryChangeColor(OS, Cycles: Entry.CyclesSpentAfterWBAndBeforeRetire, |
170 | Executions: CumulativeExecutions, |
171 | BufferSize: getSubTargetInfo().getSchedModel().MicroOpBufferSize); |
172 | OS << format(Fmt: "%.1f" , Vals: floor(x: AverageTime3 + 0.5) / 10); |
173 | |
174 | if (OS.has_colors()) |
175 | OS.resetColor(); |
176 | OS.PadToColumn(NewCol: 34); |
177 | } |
178 | |
179 | void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { |
180 | std::string = |
181 | "\n\nAverage Wait times (based on the timeline view):\n" |
182 | "[0]: Executions\n" |
183 | "[1]: Average time spent waiting in a scheduler's queue\n" |
184 | "[2]: Average time spent waiting in a scheduler's queue while ready\n" |
185 | "[3]: Average time elapsed from WB until retire stage\n\n" |
186 | " [0] [1] [2] [3]\n" ; |
187 | OS << Header; |
188 | formatted_raw_ostream FOS(OS); |
189 | unsigned Executions = Timeline.size() / getSource().size(); |
190 | unsigned IID = 0; |
191 | for (const MCInst &Inst : getSource()) { |
192 | printWaitTimeEntry(OS&: FOS, Entry: WaitTime[IID], SourceIndex: IID, Executions); |
193 | FOS << " " << printInstructionString(MCI: Inst) << '\n'; |
194 | FOS.flush(); |
195 | ++IID; |
196 | } |
197 | |
198 | // If the timeline contains more than one instruction, |
199 | // let's also print global averages. |
200 | if (getSource().size() != 1) { |
201 | WaitTimeEntry TotalWaitTime = std::accumulate( |
202 | first: WaitTime.begin(), last: WaitTime.end(), init: WaitTimeEntry{.CyclesSpentInSchedulerQueue: 0, .CyclesSpentInSQWhileReady: 0, .CyclesSpentAfterWBAndBeforeRetire: 0}, |
203 | binary_op: [](const WaitTimeEntry &A, const WaitTimeEntry &B) { |
204 | return WaitTimeEntry{ |
205 | .CyclesSpentInSchedulerQueue: A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, |
206 | .CyclesSpentInSQWhileReady: A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, |
207 | .CyclesSpentAfterWBAndBeforeRetire: A.CyclesSpentAfterWBAndBeforeRetire + |
208 | B.CyclesSpentAfterWBAndBeforeRetire}; |
209 | }); |
210 | printWaitTimeEntry(OS&: FOS, Entry: TotalWaitTime, SourceIndex: IID, Executions); |
211 | FOS << " " |
212 | << "<total>" << '\n'; |
213 | FOS.flush(); |
214 | } |
215 | } |
216 | |
217 | void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, |
218 | const TimelineViewEntry &Entry, |
219 | unsigned Iteration, |
220 | unsigned SourceIndex) const { |
221 | if (Iteration == 0 && SourceIndex == 0) |
222 | OS << '\n'; |
223 | OS << '[' << Iteration << ',' << SourceIndex << ']'; |
224 | OS.PadToColumn(NewCol: 10); |
225 | assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!" ); |
226 | unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched); |
227 | for (unsigned I = 0, E = CycleDispatched; I < E; ++I) |
228 | OS << ((I % 5 == 0) ? '.' : ' '); |
229 | OS << TimelineView::DisplayChar::Dispatched; |
230 | if (CycleDispatched != Entry.CycleExecuted) { |
231 | // Zero latency instructions have the same value for CycleDispatched, |
232 | // CycleIssued and CycleExecuted. |
233 | for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I) |
234 | OS << TimelineView::DisplayChar::Waiting; |
235 | if (Entry.CycleIssued == Entry.CycleExecuted) |
236 | OS << TimelineView::DisplayChar::DisplayChar::Executed; |
237 | else { |
238 | if (CycleDispatched != Entry.CycleIssued) |
239 | OS << TimelineView::DisplayChar::Executing; |
240 | for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E; |
241 | ++I) |
242 | OS << TimelineView::DisplayChar::Executing; |
243 | OS << TimelineView::DisplayChar::Executed; |
244 | } |
245 | } |
246 | |
247 | for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I) |
248 | OS << TimelineView::DisplayChar::RetireLag; |
249 | if (Entry.CycleExecuted < Entry.CycleRetired) |
250 | OS << TimelineView::DisplayChar::Retired; |
251 | |
252 | // Skip other columns. |
253 | for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I) |
254 | OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' '); |
255 | } |
256 | |
257 | static void (formatted_raw_ostream &OS, unsigned Cycles) { |
258 | OS << "\n\nTimeline view:\n" ; |
259 | if (Cycles >= 10) { |
260 | OS.PadToColumn(NewCol: 10); |
261 | for (unsigned I = 0; I <= Cycles; ++I) { |
262 | if (((I / 10) & 1) == 0) |
263 | OS << ' '; |
264 | else |
265 | OS << I % 10; |
266 | } |
267 | OS << '\n'; |
268 | } |
269 | |
270 | OS << "Index" ; |
271 | OS.PadToColumn(NewCol: 10); |
272 | for (unsigned I = 0; I <= Cycles; ++I) { |
273 | if (((I / 10) & 1) == 0) |
274 | OS << I % 10; |
275 | else |
276 | OS << ' '; |
277 | } |
278 | OS << '\n'; |
279 | } |
280 | |
281 | void TimelineView::printTimeline(raw_ostream &OS) const { |
282 | formatted_raw_ostream FOS(OS); |
283 | printTimelineHeader(OS&: FOS, Cycles: LastCycle); |
284 | FOS.flush(); |
285 | |
286 | unsigned IID = 0; |
287 | ArrayRef<llvm::MCInst> Source = getSource(); |
288 | const unsigned Iterations = Timeline.size() / Source.size(); |
289 | for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) { |
290 | for (const MCInst &Inst : Source) { |
291 | const TimelineViewEntry &Entry = Timeline[IID]; |
292 | // When an instruction is retired after timeline-max-cycles, |
293 | // its CycleRetired is left at 0. However, it's possible for |
294 | // a 0 latency instruction to be retired during cycle 0 and we |
295 | // don't want to early exit in that case. The CycleExecuted |
296 | // attribute is set correctly whether or not it is greater |
297 | // than timeline-max-cycles so we can use that to ensure |
298 | // we don't early exit because of a 0 latency instruction. |
299 | if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) { |
300 | FOS << "Truncated display due to cycle limit\n" ; |
301 | return; |
302 | } |
303 | |
304 | unsigned SourceIndex = IID % Source.size(); |
305 | printTimelineViewEntry(OS&: FOS, Entry, Iteration, SourceIndex); |
306 | FOS << " " << printInstructionString(MCI: Inst) << '\n'; |
307 | FOS.flush(); |
308 | |
309 | ++IID; |
310 | } |
311 | } |
312 | } |
313 | |
314 | json::Value TimelineView::toJSON() const { |
315 | json::Array TimelineInfo; |
316 | |
317 | for (const TimelineViewEntry &TLE : Timeline) { |
318 | // Check if the timeline-max-cycles has been reached. |
319 | if (!TLE.CycleRetired && TLE.CycleExecuted) |
320 | break; |
321 | |
322 | TimelineInfo.push_back( |
323 | E: json::Object({{.K: "CycleDispatched" , .V: TLE.CycleDispatched}, |
324 | {.K: "CycleReady" , .V: TLE.CycleReady}, |
325 | {.K: "CycleIssued" , .V: TLE.CycleIssued}, |
326 | {.K: "CycleExecuted" , .V: TLE.CycleExecuted}, |
327 | {.K: "CycleRetired" , .V: TLE.CycleRetired}})); |
328 | } |
329 | return json::Object({{.K: "TimelineInfo" , .V: std::move(TimelineInfo)}}); |
330 | } |
331 | } // namespace mca |
332 | } // namespace llvm |
333 | |