1 | //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file implements the functionalities used by the BottleneckAnalysis |
11 | /// to report bottleneck info. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "Views/BottleneckAnalysis.h" |
16 | #include "llvm/MC/MCInst.h" |
17 | #include "llvm/MCA/Support.h" |
18 | #include "llvm/Support/Format.h" |
19 | |
20 | namespace llvm { |
21 | namespace mca { |
22 | |
23 | #define DEBUG_TYPE "llvm-mca" |
24 | |
25 | PressureTracker::PressureTracker(const MCSchedModel &Model) |
26 | : SM(Model), |
27 | ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), |
28 | ProcResID2Mask(Model.getNumProcResourceKinds(), 0), |
29 | ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), |
30 | ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { |
31 | computeProcResourceMasks(SM, Masks: ProcResID2Mask); |
32 | |
33 | // Ignore the invalid resource at index zero. |
34 | unsigned NextResourceUsersIdx = 0; |
35 | for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { |
36 | const MCProcResourceDesc &ProcResource = *SM.getProcResource(ProcResourceIdx: I); |
37 | ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; |
38 | NextResourceUsersIdx += ProcResource.NumUnits; |
39 | uint64_t ResourceMask = ProcResID2Mask[I]; |
40 | ResIdx2ProcResID[getResourceStateIndex(Mask: ResourceMask)] = I; |
41 | } |
42 | |
43 | ResourceUsers.resize(N: NextResourceUsersIdx); |
44 | llvm::fill(Range&: ResourceUsers, Value: std::make_pair<unsigned, unsigned>(x: ~0U, y: 0U)); |
45 | } |
46 | |
47 | void PressureTracker::getResourceUsers(uint64_t ResourceMask, |
48 | SmallVectorImpl<User> &Users) const { |
49 | unsigned Index = getResourceStateIndex(Mask: ResourceMask); |
50 | unsigned ProcResID = ResIdx2ProcResID[Index]; |
51 | const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResourceIdx: ProcResID); |
52 | for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { |
53 | const User U = getResourceUser(ProcResID, UnitID: I); |
54 | if (U.second && IPI.contains(Val: U.first)) |
55 | Users.emplace_back(Args: U); |
56 | } |
57 | } |
58 | |
59 | void PressureTracker::onInstructionDispatched(unsigned IID) { |
60 | IPI.try_emplace(Key: IID); |
61 | } |
62 | |
63 | void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(Val: IID); } |
64 | |
65 | void PressureTracker::handleInstructionIssuedEvent( |
66 | const HWInstructionIssuedEvent &Event) { |
67 | unsigned IID = Event.IR.getSourceIndex(); |
68 | for (const ResourceUse &Use : Event.UsedResources) { |
69 | const ResourceRef &RR = Use.first; |
70 | unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; |
71 | Index += llvm::countr_zero(Val: RR.second); |
72 | ResourceUsers[Index] = std::make_pair(x&: IID, y: Use.second.getNumerator()); |
73 | } |
74 | } |
75 | |
76 | void PressureTracker::updateResourcePressureDistribution( |
77 | uint64_t CumulativeMask) { |
78 | while (CumulativeMask) { |
79 | uint64_t Current = CumulativeMask & (-CumulativeMask); |
80 | unsigned ResIdx = getResourceStateIndex(Mask: Current); |
81 | unsigned ProcResID = ResIdx2ProcResID[ResIdx]; |
82 | uint64_t Mask = ProcResID2Mask[ProcResID]; |
83 | |
84 | if (Mask == Current) { |
85 | ResourcePressureDistribution[ProcResID]++; |
86 | CumulativeMask ^= Current; |
87 | continue; |
88 | } |
89 | |
90 | Mask ^= Current; |
91 | while (Mask) { |
92 | uint64_t SubUnit = Mask & (-Mask); |
93 | ResIdx = getResourceStateIndex(Mask: SubUnit); |
94 | ProcResID = ResIdx2ProcResID[ResIdx]; |
95 | ResourcePressureDistribution[ProcResID]++; |
96 | Mask ^= SubUnit; |
97 | } |
98 | |
99 | CumulativeMask ^= Current; |
100 | } |
101 | } |
102 | |
103 | void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { |
104 | assert(Event.Reason != HWPressureEvent::INVALID && |
105 | "Unexpected invalid event!" ); |
106 | |
107 | switch (Event.Reason) { |
108 | default: |
109 | break; |
110 | |
111 | case HWPressureEvent::RESOURCES: { |
112 | const uint64_t ResourceMask = Event.ResourceMask; |
113 | updateResourcePressureDistribution(CumulativeMask: Event.ResourceMask); |
114 | |
115 | for (const InstRef &IR : Event.AffectedInstructions) { |
116 | const Instruction &IS = *IR.getInstruction(); |
117 | unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; |
118 | if (!BusyResources) |
119 | continue; |
120 | |
121 | unsigned IID = IR.getSourceIndex(); |
122 | IPI[IID].ResourcePressureCycles++; |
123 | } |
124 | break; |
125 | } |
126 | |
127 | case HWPressureEvent::REGISTER_DEPS: |
128 | for (const InstRef &IR : Event.AffectedInstructions) { |
129 | unsigned IID = IR.getSourceIndex(); |
130 | IPI[IID].RegisterPressureCycles++; |
131 | } |
132 | break; |
133 | |
134 | case HWPressureEvent::MEMORY_DEPS: |
135 | for (const InstRef &IR : Event.AffectedInstructions) { |
136 | unsigned IID = IR.getSourceIndex(); |
137 | IPI[IID].MemoryPressureCycles++; |
138 | } |
139 | } |
140 | } |
141 | |
142 | #ifndef NDEBUG |
143 | void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, |
144 | const DependencyEdge &DepEdge, |
145 | MCInstPrinter &MCIP) const { |
146 | unsigned FromIID = DepEdge.FromIID; |
147 | unsigned ToIID = DepEdge.ToIID; |
148 | assert(FromIID < ToIID && "Graph should be acyclic!" ); |
149 | |
150 | const DependencyEdge::Dependency &DE = DepEdge.Dep; |
151 | assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!" ); |
152 | |
153 | OS << " FROM: " << FromIID << " TO: " << ToIID << " " ; |
154 | if (DE.Type == DependencyEdge::DT_REGISTER) { |
155 | OS << " - REGISTER: " ; |
156 | MCIP.printRegName(OS, DE.ResourceOrRegID); |
157 | } else if (DE.Type == DependencyEdge::DT_MEMORY) { |
158 | OS << " - MEMORY" ; |
159 | } else { |
160 | assert(DE.Type == DependencyEdge::DT_RESOURCE && |
161 | "Unsupported dependency type!" ); |
162 | OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; |
163 | } |
164 | OS << " - COST: " << DE.Cost << '\n'; |
165 | } |
166 | #endif // NDEBUG |
167 | |
168 | void DependencyGraph::pruneEdges(unsigned Iterations) { |
169 | for (DGNode &N : Nodes) { |
170 | unsigned NumPruned = 0; |
171 | const unsigned Size = N.OutgoingEdges.size(); |
172 | // Use a cut-off threshold to prune edges with a low frequency. |
173 | for (unsigned I = 0, E = Size; I < E; ++I) { |
174 | DependencyEdge &Edge = N.OutgoingEdges[I]; |
175 | if (Edge.Frequency == Iterations) |
176 | continue; |
177 | double Factor = (double)Edge.Frequency / Iterations; |
178 | if (0.10 < Factor) |
179 | continue; |
180 | Nodes[Edge.ToIID].NumPredecessors--; |
181 | std::swap(a&: Edge, b&: N.OutgoingEdges[E - 1]); |
182 | --E; |
183 | ++NumPruned; |
184 | } |
185 | |
186 | if (NumPruned) |
187 | N.OutgoingEdges.resize(N: Size - NumPruned); |
188 | } |
189 | } |
190 | |
191 | void DependencyGraph::initializeRootSet( |
192 | SmallVectorImpl<unsigned> &RootSet) const { |
193 | for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { |
194 | const DGNode &N = Nodes[I]; |
195 | if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) |
196 | RootSet.emplace_back(Args&: I); |
197 | } |
198 | } |
199 | |
200 | void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, |
201 | unsigned Iterations) { |
202 | SmallVector<unsigned, 8> ToVisit; |
203 | |
204 | // A critical sequence is computed as the longest path from a node of the |
205 | // RootSet to a leaf node (i.e. a node with no successors). The RootSet is |
206 | // composed of nodes with at least one successor, and no predecessors. |
207 | // |
208 | // Each node of the graph starts with an initial default cost of zero. The |
209 | // cost of a node is a measure of criticality: the higher the cost, the bigger |
210 | // is the performance impact. |
211 | // For register and memory dependencies, the cost is a function of the write |
212 | // latency as well as the actual delay (in cycles) caused to users. |
213 | // For processor resource dependencies, the cost is a function of the resource |
214 | // pressure. Resource interferences with low frequency values are ignored. |
215 | // |
216 | // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of |
217 | // the inner loop selects (i.e. visits) a node N from a set of `unvisited |
218 | // nodes`, and then propagates the cost of N to all its neighbors. |
219 | // |
220 | // The `unvisited nodes` set initially contains all the nodes from the |
221 | // RootSet. A node N is added to the `unvisited nodes` if all its |
222 | // predecessors have been visited already. |
223 | // |
224 | // For simplicity, every node tracks the number of unvisited incoming edges in |
225 | // field `NumVisitedPredecessors`. When the value of that field drops to |
226 | // zero, then the corresponding node is added to a `ToVisit` set. |
227 | // |
228 | // At the end of every iteration of the outer loop, set `ToVisit` becomes our |
229 | // new `unvisited nodes` set. |
230 | // |
231 | // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) |
232 | // is empty. This algorithm works under the assumption that the graph is |
233 | // acyclic. |
234 | do { |
235 | for (unsigned IID : RootSet) { |
236 | const DGNode &N = Nodes[IID]; |
237 | for (const DependencyEdge &DepEdge : N.OutgoingEdges) { |
238 | unsigned ToIID = DepEdge.ToIID; |
239 | DGNode &To = Nodes[ToIID]; |
240 | uint64_t Cost = N.Cost + DepEdge.Dep.Cost; |
241 | // Check if this is the most expensive incoming edge seen so far. In |
242 | // case, update the total cost of the destination node (ToIID), as well |
243 | // its field `CriticalPredecessor`. |
244 | if (Cost > To.Cost) { |
245 | To.CriticalPredecessor = DepEdge; |
246 | To.Cost = Cost; |
247 | To.Depth = N.Depth + 1; |
248 | } |
249 | To.NumVisitedPredecessors++; |
250 | if (To.NumVisitedPredecessors == To.NumPredecessors) |
251 | ToVisit.emplace_back(Args&: ToIID); |
252 | } |
253 | } |
254 | |
255 | std::swap(LHS&: RootSet, RHS&: ToVisit); |
256 | ToVisit.clear(); |
257 | } while (!RootSet.empty()); |
258 | } |
259 | |
260 | void DependencyGraph::getCriticalSequence( |
261 | SmallVectorImpl<const DependencyEdge *> &Seq) const { |
262 | // At this stage, nodes of the graph have been already visited, and costs have |
263 | // been propagated through the edges (see method `propagateThroughEdges()`). |
264 | |
265 | // Identify the node N with the highest cost in the graph. By construction, |
266 | // that node is the last instruction of our critical sequence. |
267 | // Field N.Depth would tell us the total length of the sequence. |
268 | // |
269 | // To obtain the sequence of critical edges, we simply follow the chain of |
270 | // critical predecessors starting from node N (field |
271 | // DGNode::CriticalPredecessor). |
272 | const auto It = |
273 | llvm::max_element(Range: Nodes, C: [](const DGNode &Lhs, const DGNode &Rhs) { |
274 | return Lhs.Cost < Rhs.Cost; |
275 | }); |
276 | unsigned IID = std::distance(first: Nodes.begin(), last: It); |
277 | Seq.resize(N: Nodes[IID].Depth); |
278 | for (const DependencyEdge *&DE : llvm::reverse(C&: Seq)) { |
279 | const DGNode &N = Nodes[IID]; |
280 | DE = &N.CriticalPredecessor; |
281 | IID = N.CriticalPredecessor.FromIID; |
282 | } |
283 | } |
284 | |
285 | void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS, |
286 | const MCInst &MCI, |
287 | bool UseDifferentColor) const { |
288 | FOS.PadToColumn(NewCol: 14); |
289 | if (UseDifferentColor) |
290 | FOS.changeColor(Color: raw_ostream::CYAN, Bold: true, BG: false); |
291 | FOS << printInstructionString(MCI); |
292 | if (UseDifferentColor) |
293 | FOS.resetColor(); |
294 | } |
295 | |
296 | void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { |
297 | // Early exit if no bottlenecks were found during the simulation. |
298 | if (!SeenStallCycles || !BPI.PressureIncreaseCycles) |
299 | return; |
300 | |
301 | SmallVector<const DependencyEdge *, 16> Seq; |
302 | DG.getCriticalSequence(Seq); |
303 | if (Seq.empty()) |
304 | return; |
305 | |
306 | OS << "\nCritical sequence based on the simulation:\n\n" ; |
307 | |
308 | const DependencyEdge &FirstEdge = *Seq[0]; |
309 | ArrayRef<llvm::MCInst> Source = getSource(); |
310 | unsigned FromIID = FirstEdge.FromIID % Source.size(); |
311 | unsigned ToIID = FirstEdge.ToIID % Source.size(); |
312 | bool IsLoopCarried = FromIID >= ToIID; |
313 | |
314 | formatted_raw_ostream FOS(OS); |
315 | FOS.PadToColumn(NewCol: 14); |
316 | FOS << "Instruction" ; |
317 | FOS.PadToColumn(NewCol: 58); |
318 | FOS << "Dependency Information" ; |
319 | |
320 | bool HasColors = FOS.has_colors(); |
321 | |
322 | unsigned CurrentIID = 0; |
323 | if (IsLoopCarried) { |
324 | FOS << "\n +----< " << FromIID << "." ; |
325 | printInstruction(FOS, MCI: Source[FromIID], UseDifferentColor: HasColors); |
326 | FOS << "\n |\n | < loop carried > \n |" ; |
327 | } else { |
328 | while (CurrentIID < FromIID) { |
329 | FOS << "\n " << CurrentIID << "." ; |
330 | printInstruction(FOS, MCI: Source[CurrentIID]); |
331 | CurrentIID++; |
332 | } |
333 | |
334 | FOS << "\n +----< " << CurrentIID << "." ; |
335 | printInstruction(FOS, MCI: Source[CurrentIID], UseDifferentColor: HasColors); |
336 | CurrentIID++; |
337 | } |
338 | |
339 | for (const DependencyEdge *&DE : Seq) { |
340 | ToIID = DE->ToIID % Source.size(); |
341 | unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; |
342 | |
343 | while (CurrentIID < LastIID) { |
344 | FOS << "\n | " << CurrentIID << "." ; |
345 | printInstruction(FOS, MCI: Source[CurrentIID]); |
346 | CurrentIID++; |
347 | } |
348 | |
349 | if (CurrentIID == ToIID) { |
350 | FOS << "\n +----> " << ToIID << "." ; |
351 | printInstruction(FOS, MCI: Source[CurrentIID], UseDifferentColor: HasColors); |
352 | } else { |
353 | FOS << "\n |\n | < loop carried > \n |" |
354 | << "\n +----> " << ToIID << "." ; |
355 | printInstruction(FOS, MCI: Source[ToIID], UseDifferentColor: HasColors); |
356 | } |
357 | FOS.PadToColumn(NewCol: 58); |
358 | |
359 | const DependencyEdge::Dependency &Dep = DE->Dep; |
360 | if (HasColors) |
361 | FOS.changeColor(Color: raw_ostream::SAVEDCOLOR, Bold: true, BG: false); |
362 | |
363 | if (Dep.Type == DependencyEdge::DT_REGISTER) { |
364 | FOS << "## REGISTER dependency: " ; |
365 | if (HasColors) |
366 | FOS.changeColor(Color: raw_ostream::MAGENTA, Bold: true, BG: false); |
367 | getInstPrinter().printRegName(OS&: FOS, Reg: Dep.ResourceOrRegID); |
368 | } else if (Dep.Type == DependencyEdge::DT_MEMORY) { |
369 | FOS << "## MEMORY dependency." ; |
370 | } else { |
371 | assert(Dep.Type == DependencyEdge::DT_RESOURCE && |
372 | "Unsupported dependency type!" ); |
373 | FOS << "## RESOURCE interference: " ; |
374 | if (HasColors) |
375 | FOS.changeColor(Color: raw_ostream::MAGENTA, Bold: true, BG: false); |
376 | FOS << Tracker.resolveResourceName(ResourceMask: Dep.ResourceOrRegID); |
377 | if (HasColors) { |
378 | FOS.resetColor(); |
379 | FOS.changeColor(Color: raw_ostream::SAVEDCOLOR, Bold: true, BG: false); |
380 | } |
381 | FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) |
382 | << "% ]" ; |
383 | } |
384 | if (HasColors) |
385 | FOS.resetColor(); |
386 | ++CurrentIID; |
387 | } |
388 | |
389 | while (CurrentIID < Source.size()) { |
390 | FOS << "\n " << CurrentIID << "." ; |
391 | printInstruction(FOS, MCI: Source[CurrentIID]); |
392 | CurrentIID++; |
393 | } |
394 | |
395 | FOS << '\n'; |
396 | FOS.flush(); |
397 | } |
398 | |
399 | #ifndef NDEBUG |
400 | void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { |
401 | OS << "\nREG DEPS\n" ; |
402 | for (const DGNode &Node : Nodes) |
403 | for (const DependencyEdge &DE : Node.OutgoingEdges) |
404 | if (DE.Dep.Type == DependencyEdge::DT_REGISTER) |
405 | dumpDependencyEdge(OS, DE, MCIP); |
406 | |
407 | OS << "\nMEM DEPS\n" ; |
408 | for (const DGNode &Node : Nodes) |
409 | for (const DependencyEdge &DE : Node.OutgoingEdges) |
410 | if (DE.Dep.Type == DependencyEdge::DT_MEMORY) |
411 | dumpDependencyEdge(OS, DE, MCIP); |
412 | |
413 | OS << "\nRESOURCE DEPS\n" ; |
414 | for (const DGNode &Node : Nodes) |
415 | for (const DependencyEdge &DE : Node.OutgoingEdges) |
416 | if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) |
417 | dumpDependencyEdge(OS, DE, MCIP); |
418 | } |
419 | #endif // NDEBUG |
420 | |
421 | void DependencyGraph::addDependency(unsigned From, unsigned To, |
422 | DependencyEdge::Dependency &&Dep) { |
423 | DGNode &NodeFrom = Nodes[From]; |
424 | DGNode &NodeTo = Nodes[To]; |
425 | SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; |
426 | |
427 | auto It = find_if(Range&: Vec, P: [To, Dep](DependencyEdge &DE) { |
428 | return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; |
429 | }); |
430 | |
431 | if (It != Vec.end()) { |
432 | It->Dep.Cost += Dep.Cost; |
433 | It->Frequency++; |
434 | return; |
435 | } |
436 | |
437 | DependencyEdge DE = {.Dep: Dep, .FromIID: From, .ToIID: To, .Frequency: 1}; |
438 | Vec.emplace_back(Args&: DE); |
439 | NodeTo.NumPredecessors++; |
440 | } |
441 | |
442 | BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, |
443 | MCInstPrinter &Printer, |
444 | ArrayRef<MCInst> S, unsigned NumIter) |
445 | : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()), |
446 | DG(S.size() * 3), Iterations(NumIter), TotalCycles(0), |
447 | PressureIncreasedBecauseOfResources(false), |
448 | PressureIncreasedBecauseOfRegisterDependencies(false), |
449 | PressureIncreasedBecauseOfMemoryDependencies(false), |
450 | SeenStallCycles(false), BPI() {} |
451 | |
452 | void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, |
453 | unsigned RegID, unsigned Cost) { |
454 | bool IsLoopCarried = From >= To; |
455 | unsigned SourceSize = getSource().size(); |
456 | if (IsLoopCarried) { |
457 | DG.addRegisterDep(From, To: To + SourceSize, RegID, Cost); |
458 | DG.addRegisterDep(From: From + SourceSize, To: To + (SourceSize * 2), RegID, Cost); |
459 | return; |
460 | } |
461 | DG.addRegisterDep(From: From + SourceSize, To: To + SourceSize, RegID, Cost); |
462 | } |
463 | |
464 | void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, |
465 | unsigned Cost) { |
466 | bool IsLoopCarried = From >= To; |
467 | unsigned SourceSize = getSource().size(); |
468 | if (IsLoopCarried) { |
469 | DG.addMemoryDep(From, To: To + SourceSize, Cost); |
470 | DG.addMemoryDep(From: From + SourceSize, To: To + (SourceSize * 2), Cost); |
471 | return; |
472 | } |
473 | DG.addMemoryDep(From: From + SourceSize, To: To + SourceSize, Cost); |
474 | } |
475 | |
476 | void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, |
477 | uint64_t Mask, unsigned Cost) { |
478 | bool IsLoopCarried = From >= To; |
479 | unsigned SourceSize = getSource().size(); |
480 | if (IsLoopCarried) { |
481 | DG.addResourceDep(From, To: To + SourceSize, Mask, Cost); |
482 | DG.addResourceDep(From: From + SourceSize, To: To + (SourceSize * 2), Mask, Cost); |
483 | return; |
484 | } |
485 | DG.addResourceDep(From: From + SourceSize, To: To + SourceSize, Mask, Cost); |
486 | } |
487 | |
488 | void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { |
489 | const unsigned IID = Event.IR.getSourceIndex(); |
490 | if (Event.Type == HWInstructionEvent::Dispatched) { |
491 | Tracker.onInstructionDispatched(IID); |
492 | return; |
493 | } |
494 | if (Event.Type == HWInstructionEvent::Executed) { |
495 | Tracker.onInstructionExecuted(IID); |
496 | return; |
497 | } |
498 | |
499 | if (Event.Type != HWInstructionEvent::Issued) |
500 | return; |
501 | |
502 | ArrayRef<llvm::MCInst> Source = getSource(); |
503 | const Instruction &IS = *Event.IR.getInstruction(); |
504 | unsigned To = IID % Source.size(); |
505 | |
506 | unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); |
507 | uint64_t ResourceMask = IS.getCriticalResourceMask(); |
508 | SmallVector<std::pair<unsigned, unsigned>, 4> Users; |
509 | while (ResourceMask) { |
510 | uint64_t Current = ResourceMask & (-ResourceMask); |
511 | Tracker.getResourceUsers(ResourceMask: Current, Users); |
512 | for (const std::pair<unsigned, unsigned> &U : Users) |
513 | addResourceDep(From: U.first % Source.size(), To, Mask: Current, Cost: U.second + Cycles); |
514 | Users.clear(); |
515 | ResourceMask ^= Current; |
516 | } |
517 | |
518 | const CriticalDependency &RegDep = IS.getCriticalRegDep(); |
519 | if (RegDep.Cycles) { |
520 | Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); |
521 | unsigned From = RegDep.IID % Source.size(); |
522 | addRegisterDep(From, To, RegID: RegDep.RegID, Cost: Cycles); |
523 | } |
524 | |
525 | const CriticalDependency &MemDep = IS.getCriticalMemDep(); |
526 | if (MemDep.Cycles) { |
527 | Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); |
528 | unsigned From = MemDep.IID % Source.size(); |
529 | addMemoryDep(From, To, Cost: Cycles); |
530 | } |
531 | |
532 | Tracker.handleInstructionIssuedEvent( |
533 | Event: static_cast<const HWInstructionIssuedEvent &>(Event)); |
534 | |
535 | // Check if this is the last simulated instruction. |
536 | if (IID == ((Iterations * Source.size()) - 1)) |
537 | DG.finalizeGraph(Iterations); |
538 | } |
539 | |
540 | void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { |
541 | assert(Event.Reason != HWPressureEvent::INVALID && |
542 | "Unexpected invalid event!" ); |
543 | |
544 | Tracker.handlePressureEvent(Event); |
545 | |
546 | switch (Event.Reason) { |
547 | default: |
548 | break; |
549 | |
550 | case HWPressureEvent::RESOURCES: |
551 | PressureIncreasedBecauseOfResources = true; |
552 | break; |
553 | case HWPressureEvent::REGISTER_DEPS: |
554 | PressureIncreasedBecauseOfRegisterDependencies = true; |
555 | break; |
556 | case HWPressureEvent::MEMORY_DEPS: |
557 | PressureIncreasedBecauseOfMemoryDependencies = true; |
558 | break; |
559 | } |
560 | } |
561 | |
562 | void BottleneckAnalysis::onCycleEnd() { |
563 | ++TotalCycles; |
564 | |
565 | bool PressureIncreasedBecauseOfDataDependencies = |
566 | PressureIncreasedBecauseOfRegisterDependencies || |
567 | PressureIncreasedBecauseOfMemoryDependencies; |
568 | if (!PressureIncreasedBecauseOfResources && |
569 | !PressureIncreasedBecauseOfDataDependencies) |
570 | return; |
571 | |
572 | ++BPI.PressureIncreaseCycles; |
573 | if (PressureIncreasedBecauseOfRegisterDependencies) |
574 | ++BPI.RegisterDependencyCycles; |
575 | if (PressureIncreasedBecauseOfMemoryDependencies) |
576 | ++BPI.MemoryDependencyCycles; |
577 | if (PressureIncreasedBecauseOfDataDependencies) |
578 | ++BPI.DataDependencyCycles; |
579 | if (PressureIncreasedBecauseOfResources) |
580 | ++BPI.ResourcePressureCycles; |
581 | PressureIncreasedBecauseOfResources = false; |
582 | PressureIncreasedBecauseOfRegisterDependencies = false; |
583 | PressureIncreasedBecauseOfMemoryDependencies = false; |
584 | } |
585 | |
586 | void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { |
587 | if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { |
588 | OS << "\n\nNo resource or data dependency bottlenecks discovered.\n" ; |
589 | return; |
590 | } |
591 | |
592 | double PressurePerCycle = |
593 | (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; |
594 | double ResourcePressurePerCycle = |
595 | (double)BPI.ResourcePressureCycles * 100 / TotalCycles; |
596 | double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; |
597 | double RegDepPressurePerCycle = |
598 | (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; |
599 | double MemDepPressurePerCycle = |
600 | (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; |
601 | |
602 | OS << "\n\nCycles with backend pressure increase [ " |
603 | << format(Fmt: "%.2f" , Vals: floor(x: (PressurePerCycle * 100) + 0.5) / 100) << "% ]" ; |
604 | |
605 | OS << "\nThroughput Bottlenecks: " |
606 | << "\n Resource Pressure [ " |
607 | << format(Fmt: "%.2f" , Vals: floor(x: (ResourcePressurePerCycle * 100) + 0.5) / 100) |
608 | << "% ]" ; |
609 | |
610 | if (BPI.PressureIncreaseCycles) { |
611 | ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); |
612 | const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); |
613 | for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { |
614 | unsigned ReleaseAtCycles = Distribution[I]; |
615 | if (ReleaseAtCycles) { |
616 | double Frequency = (double)ReleaseAtCycles * 100 / TotalCycles; |
617 | const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResourceIdx: I); |
618 | OS << "\n - " << PRDesc.Name << " [ " |
619 | << format(Fmt: "%.2f" , Vals: floor(x: (Frequency * 100) + 0.5) / 100) << "% ]" ; |
620 | } |
621 | } |
622 | } |
623 | |
624 | OS << "\n Data Dependencies: [ " |
625 | << format(Fmt: "%.2f" , Vals: floor(x: (DDPerCycle * 100) + 0.5) / 100) << "% ]" ; |
626 | OS << "\n - Register Dependencies [ " |
627 | << format(Fmt: "%.2f" , Vals: floor(x: (RegDepPressurePerCycle * 100) + 0.5) / 100) |
628 | << "% ]" ; |
629 | OS << "\n - Memory Dependencies [ " |
630 | << format(Fmt: "%.2f" , Vals: floor(x: (MemDepPressurePerCycle * 100) + 0.5) / 100) |
631 | << "% ]\n" ; |
632 | } |
633 | |
634 | void BottleneckAnalysis::printView(raw_ostream &OS) const { |
635 | std::string Buffer; |
636 | raw_string_ostream TempStream(Buffer); |
637 | printBottleneckHints(OS&: TempStream); |
638 | TempStream.flush(); |
639 | OS << Buffer; |
640 | printCriticalSequence(OS); |
641 | } |
642 | |
643 | json::Value BottleneckAnalysis::toJSON() const { |
644 | if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { |
645 | json::Object JO({{.K: "PressureIncreaseCycles" , .V: 0}}); |
646 | return JO; |
647 | } |
648 | |
649 | json::Array CriticalSequence; |
650 | // get critical sequence |
651 | SmallVector<const DependencyEdge *, 16> Seq; |
652 | DG.getCriticalSequence(Seq); |
653 | if (!Seq.empty()) { |
654 | for (const DependencyEdge *&DE : Seq) { |
655 | json::Object DEJO({{.K: "FromID" , .V: DE->FromIID}, |
656 | {.K: "ToID" , .V: DE->ToIID}, |
657 | {.K: "Type" , .V: static_cast<unsigned>(DE->Dep.Type)}, |
658 | {.K: "ResourceOrRegID" , .V: DE->Dep.ResourceOrRegID}}); |
659 | CriticalSequence.push_back(E: std::move(DEJO)); |
660 | } |
661 | } |
662 | |
663 | json::Array ResourcePressure; |
664 | if (BPI.PressureIncreaseCycles) { |
665 | ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); |
666 | const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); |
667 | for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { |
668 | unsigned ReleaseAtCycles = Distribution[I]; |
669 | if (ReleaseAtCycles) { |
670 | const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResourceIdx: I); |
671 | json::Object RPJO({{.K: PRDesc.Name, .V: ReleaseAtCycles}}); |
672 | ResourcePressure.push_back(E: std::move(RPJO)); |
673 | } |
674 | } |
675 | } |
676 | |
677 | json::Object JO({{.K: "PressureIncreaseCycles" , .V: BPI.PressureIncreaseCycles}, |
678 | {.K: "ResourcePressureCycles" , .V: BPI.ResourcePressureCycles}, |
679 | {.K: "DataDependencyCycles" , .V: BPI.DataDependencyCycles}, |
680 | {.K: "RegisterDependencyCycles" , .V: BPI.RegisterDependencyCycles}, |
681 | {.K: "MemoryDependencyCycles" , .V: BPI.MemoryDependencyCycles}, |
682 | {.K: "TotalCycles" , .V: TotalCycles}, |
683 | {.K: "DependencyEdge" , .V: std::move(CriticalSequence)}, |
684 | {.K: "ResourcePressure" , .V: std::move(ResourcePressure)}}); |
685 | |
686 | return JO; |
687 | } |
688 | |
689 | } // namespace mca. |
690 | } // namespace llvm |
691 | |