1 | //===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file implements the SchedulerStatistics interface. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "Views/SchedulerStatistics.h" |
15 | #include "llvm/Support/Format.h" |
16 | #include "llvm/Support/FormattedStream.h" |
17 | |
18 | namespace llvm { |
19 | namespace mca { |
20 | |
21 | SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI) |
22 | : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0), |
23 | NumCycles(0), MostRecentLoadDispatched(~0U), |
24 | MostRecentStoreDispatched(~0U), |
25 | Usage(STI.getSchedModel().NumProcResourceKinds, {.SlotsInUse: 0, .MaxUsedSlots: 0, .CumulativeNumUsedSlots: 0}) { |
26 | if (SM.hasExtraProcessorInfo()) { |
27 | const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); |
28 | LQResourceID = EPI.LoadQueueID; |
29 | SQResourceID = EPI.StoreQueueID; |
30 | } |
31 | } |
32 | |
33 | // FIXME: This implementation works under the assumption that load/store queue |
34 | // entries are reserved at 'instruction dispatched' stage, and released at |
35 | // 'instruction executed' stage. This currently matches the behavior of LSUnit. |
36 | // |
37 | // The current design minimizes the number of events generated by the |
38 | // Dispatch/Execute stages, at the cost of doing extra bookkeeping in method |
39 | // `onEvent`. However, it introduces a subtle dependency between this view and |
40 | // how the LSUnit works. |
41 | // |
42 | // In future we should add a new "memory queue" event type, so that we stop |
43 | // making assumptions on how LSUnit internally works (See PR39828). |
44 | void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) { |
45 | if (Event.Type == HWInstructionEvent::Issued) { |
46 | const Instruction &Inst = *Event.IR.getInstruction(); |
47 | NumIssued += Inst.getDesc().NumMicroOps; |
48 | } else if (Event.Type == HWInstructionEvent::Dispatched) { |
49 | const Instruction &Inst = *Event.IR.getInstruction(); |
50 | const unsigned Index = Event.IR.getSourceIndex(); |
51 | if (LQResourceID && Inst.getMayLoad() && |
52 | MostRecentLoadDispatched != Index) { |
53 | Usage[LQResourceID].SlotsInUse++; |
54 | MostRecentLoadDispatched = Index; |
55 | } |
56 | if (SQResourceID && Inst.getMayStore() && |
57 | MostRecentStoreDispatched != Index) { |
58 | Usage[SQResourceID].SlotsInUse++; |
59 | MostRecentStoreDispatched = Index; |
60 | } |
61 | } else if (Event.Type == HWInstructionEvent::Executed) { |
62 | const Instruction &Inst = *Event.IR.getInstruction(); |
63 | if (LQResourceID && Inst.getMayLoad()) { |
64 | assert(Usage[LQResourceID].SlotsInUse); |
65 | Usage[LQResourceID].SlotsInUse--; |
66 | } |
67 | if (SQResourceID && Inst.getMayStore()) { |
68 | assert(Usage[SQResourceID].SlotsInUse); |
69 | Usage[SQResourceID].SlotsInUse--; |
70 | } |
71 | } |
72 | } |
73 | |
74 | void SchedulerStatistics::onReservedBuffers(const InstRef & /* unused */, |
75 | ArrayRef<unsigned> Buffers) { |
76 | for (const unsigned Buffer : Buffers) { |
77 | if (Buffer == LQResourceID || Buffer == SQResourceID) |
78 | continue; |
79 | Usage[Buffer].SlotsInUse++; |
80 | } |
81 | } |
82 | |
83 | void SchedulerStatistics::onReleasedBuffers(const InstRef & /* unused */, |
84 | ArrayRef<unsigned> Buffers) { |
85 | for (const unsigned Buffer : Buffers) { |
86 | if (Buffer == LQResourceID || Buffer == SQResourceID) |
87 | continue; |
88 | Usage[Buffer].SlotsInUse--; |
89 | } |
90 | } |
91 | |
92 | void SchedulerStatistics::updateHistograms() { |
93 | for (BufferUsage &BU : Usage) { |
94 | BU.CumulativeNumUsedSlots += BU.SlotsInUse; |
95 | BU.MaxUsedSlots = std::max(a: BU.MaxUsedSlots, b: BU.SlotsInUse); |
96 | } |
97 | |
98 | IssueWidthPerCycle[NumIssued]++; |
99 | NumIssued = 0; |
100 | } |
101 | |
102 | void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const { |
103 | OS << "\n\nSchedulers - " |
104 | << "number of cycles where we saw N micro opcodes issued:\n" ; |
105 | OS << "[# issued], [# cycles]\n" ; |
106 | |
107 | bool HasColors = OS.has_colors(); |
108 | const auto It = llvm::max_element(Range: IssueWidthPerCycle); |
109 | for (const std::pair<const unsigned, unsigned> &Entry : IssueWidthPerCycle) { |
110 | unsigned NumIssued = Entry.first; |
111 | if (NumIssued == It->first && HasColors) |
112 | OS.changeColor(Color: raw_ostream::SAVEDCOLOR, Bold: true, BG: false); |
113 | |
114 | unsigned IPC = Entry.second; |
115 | OS << " " << NumIssued << ", " << IPC << " (" |
116 | << format(Fmt: "%.1f" , Vals: ((double)IPC / NumCycles) * 100) << "%)\n" ; |
117 | if (HasColors) |
118 | OS.resetColor(); |
119 | } |
120 | } |
121 | |
122 | void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const { |
123 | assert(NumCycles && "Unexpected number of cycles!" ); |
124 | |
125 | OS << "\nScheduler's queue usage:\n" ; |
126 | if (all_of(Range: Usage, P: [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) { |
127 | OS << "No scheduler resources used.\n" ; |
128 | return; |
129 | } |
130 | |
131 | OS << "[1] Resource name.\n" |
132 | << "[2] Average number of used buffer entries.\n" |
133 | << "[3] Maximum number of used buffer entries.\n" |
134 | << "[4] Total number of buffer entries.\n\n" |
135 | << " [1] [2] [3] [4]\n" ; |
136 | |
137 | formatted_raw_ostream FOS(OS); |
138 | bool HasColors = FOS.has_colors(); |
139 | for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { |
140 | const MCProcResourceDesc &ProcResource = *SM.getProcResource(ProcResourceIdx: I); |
141 | if (ProcResource.BufferSize <= 0) |
142 | continue; |
143 | |
144 | const BufferUsage &BU = Usage[I]; |
145 | double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles; |
146 | double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5; |
147 | unsigned NormalizedAvg = floor(x: (AvgUsage * 10) + 0.5) / 10; |
148 | unsigned NormalizedThreshold = floor(x: (AlmostFullThreshold * 10) + 0.5) / 10; |
149 | |
150 | FOS << ProcResource.Name; |
151 | FOS.PadToColumn(NewCol: 17); |
152 | if (HasColors && NormalizedAvg >= NormalizedThreshold) |
153 | FOS.changeColor(Color: raw_ostream::YELLOW, Bold: true, BG: false); |
154 | FOS << NormalizedAvg; |
155 | if (HasColors) |
156 | FOS.resetColor(); |
157 | FOS.PadToColumn(NewCol: 28); |
158 | if (HasColors && |
159 | BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize)) |
160 | FOS.changeColor(Color: raw_ostream::RED, Bold: true, BG: false); |
161 | FOS << BU.MaxUsedSlots; |
162 | if (HasColors) |
163 | FOS.resetColor(); |
164 | FOS.PadToColumn(NewCol: 39); |
165 | FOS << ProcResource.BufferSize << '\n'; |
166 | } |
167 | |
168 | FOS.flush(); |
169 | } |
170 | |
171 | void SchedulerStatistics::printView(raw_ostream &OS) const { |
172 | printSchedulerStats(OS); |
173 | printSchedulerUsage(OS); |
174 | } |
175 | |
176 | } // namespace mca |
177 | } // namespace llvm |
178 | |