1//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the functionalities used by the BottleneckAnalysis
11/// to report bottleneck info.
12///
13//===----------------------------------------------------------------------===//
14
15#include "Views/BottleneckAnalysis.h"
16#include "llvm/MC/MCInst.h"
17#include "llvm/MCA/Support.h"
18#include "llvm/Support/Format.h"
19
20namespace llvm {
21namespace mca {
22
23#define DEBUG_TYPE "llvm-mca"
24
25PressureTracker::PressureTracker(const MCSchedModel &Model)
26 : SM(Model),
27 ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
28 ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
29 ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
30 ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
31 computeProcResourceMasks(SM, Masks: ProcResID2Mask);
32
33 // Ignore the invalid resource at index zero.
34 unsigned NextResourceUsersIdx = 0;
35 for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
36 const MCProcResourceDesc &ProcResource = *SM.getProcResource(ProcResourceIdx: I);
37 ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
38 NextResourceUsersIdx += ProcResource.NumUnits;
39 uint64_t ResourceMask = ProcResID2Mask[I];
40 ResIdx2ProcResID[getResourceStateIndex(Mask: ResourceMask)] = I;
41 }
42
43 ResourceUsers.resize(N: NextResourceUsersIdx);
44 llvm::fill(Range&: ResourceUsers, Value: std::make_pair<unsigned, unsigned>(x: ~0U, y: 0U));
45}
46
47void PressureTracker::getResourceUsers(uint64_t ResourceMask,
48 SmallVectorImpl<User> &Users) const {
49 unsigned Index = getResourceStateIndex(Mask: ResourceMask);
50 unsigned ProcResID = ResIdx2ProcResID[Index];
51 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResourceIdx: ProcResID);
52 for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
53 const User U = getResourceUser(ProcResID, UnitID: I);
54 if (U.second && IPI.contains(Val: U.first))
55 Users.emplace_back(Args: U);
56 }
57}
58
59void PressureTracker::onInstructionDispatched(unsigned IID) {
60 IPI.try_emplace(Key: IID);
61}
62
63void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(Val: IID); }
64
65void PressureTracker::handleInstructionIssuedEvent(
66 const HWInstructionIssuedEvent &Event) {
67 unsigned IID = Event.IR.getSourceIndex();
68 for (const ResourceUse &Use : Event.UsedResources) {
69 const ResourceRef &RR = Use.first;
70 unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
71 Index += llvm::countr_zero(Val: RR.second);
72 ResourceUsers[Index] = std::make_pair(x&: IID, y: Use.second.getNumerator());
73 }
74}
75
76void PressureTracker::updateResourcePressureDistribution(
77 uint64_t CumulativeMask) {
78 while (CumulativeMask) {
79 uint64_t Current = CumulativeMask & (-CumulativeMask);
80 unsigned ResIdx = getResourceStateIndex(Mask: Current);
81 unsigned ProcResID = ResIdx2ProcResID[ResIdx];
82 uint64_t Mask = ProcResID2Mask[ProcResID];
83
84 if (Mask == Current) {
85 ResourcePressureDistribution[ProcResID]++;
86 CumulativeMask ^= Current;
87 continue;
88 }
89
90 Mask ^= Current;
91 while (Mask) {
92 uint64_t SubUnit = Mask & (-Mask);
93 ResIdx = getResourceStateIndex(Mask: SubUnit);
94 ProcResID = ResIdx2ProcResID[ResIdx];
95 ResourcePressureDistribution[ProcResID]++;
96 Mask ^= SubUnit;
97 }
98
99 CumulativeMask ^= Current;
100 }
101}
102
103void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
104 assert(Event.Reason != HWPressureEvent::INVALID &&
105 "Unexpected invalid event!");
106
107 switch (Event.Reason) {
108 default:
109 break;
110
111 case HWPressureEvent::RESOURCES: {
112 const uint64_t ResourceMask = Event.ResourceMask;
113 updateResourcePressureDistribution(CumulativeMask: Event.ResourceMask);
114
115 for (const InstRef &IR : Event.AffectedInstructions) {
116 const Instruction &IS = *IR.getInstruction();
117 unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
118 if (!BusyResources)
119 continue;
120
121 unsigned IID = IR.getSourceIndex();
122 IPI[IID].ResourcePressureCycles++;
123 }
124 break;
125 }
126
127 case HWPressureEvent::REGISTER_DEPS:
128 for (const InstRef &IR : Event.AffectedInstructions) {
129 unsigned IID = IR.getSourceIndex();
130 IPI[IID].RegisterPressureCycles++;
131 }
132 break;
133
134 case HWPressureEvent::MEMORY_DEPS:
135 for (const InstRef &IR : Event.AffectedInstructions) {
136 unsigned IID = IR.getSourceIndex();
137 IPI[IID].MemoryPressureCycles++;
138 }
139 }
140}
141
142#ifndef NDEBUG
143void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
144 const DependencyEdge &DepEdge,
145 MCInstPrinter &MCIP) const {
146 unsigned FromIID = DepEdge.FromIID;
147 unsigned ToIID = DepEdge.ToIID;
148 assert(FromIID < ToIID && "Graph should be acyclic!");
149
150 const DependencyEdge::Dependency &DE = DepEdge.Dep;
151 assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
152
153 OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
154 if (DE.Type == DependencyEdge::DT_REGISTER) {
155 OS << " - REGISTER: ";
156 MCIP.printRegName(OS, DE.ResourceOrRegID);
157 } else if (DE.Type == DependencyEdge::DT_MEMORY) {
158 OS << " - MEMORY";
159 } else {
160 assert(DE.Type == DependencyEdge::DT_RESOURCE &&
161 "Unsupported dependency type!");
162 OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
163 }
164 OS << " - COST: " << DE.Cost << '\n';
165}
166#endif // NDEBUG
167
168void DependencyGraph::pruneEdges(unsigned Iterations) {
169 for (DGNode &N : Nodes) {
170 unsigned NumPruned = 0;
171 const unsigned Size = N.OutgoingEdges.size();
172 // Use a cut-off threshold to prune edges with a low frequency.
173 for (unsigned I = 0, E = Size; I < E; ++I) {
174 DependencyEdge &Edge = N.OutgoingEdges[I];
175 if (Edge.Frequency == Iterations)
176 continue;
177 double Factor = (double)Edge.Frequency / Iterations;
178 if (0.10 < Factor)
179 continue;
180 Nodes[Edge.ToIID].NumPredecessors--;
181 std::swap(a&: Edge, b&: N.OutgoingEdges[E - 1]);
182 --E;
183 ++NumPruned;
184 }
185
186 if (NumPruned)
187 N.OutgoingEdges.resize(N: Size - NumPruned);
188 }
189}
190
191void DependencyGraph::initializeRootSet(
192 SmallVectorImpl<unsigned> &RootSet) const {
193 for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
194 const DGNode &N = Nodes[I];
195 if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
196 RootSet.emplace_back(Args&: I);
197 }
198}
199
200void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
201 unsigned Iterations) {
202 SmallVector<unsigned, 8> ToVisit;
203
204 // A critical sequence is computed as the longest path from a node of the
205 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
206 // composed of nodes with at least one successor, and no predecessors.
207 //
208 // Each node of the graph starts with an initial default cost of zero. The
209 // cost of a node is a measure of criticality: the higher the cost, the bigger
210 // is the performance impact.
211 // For register and memory dependencies, the cost is a function of the write
212 // latency as well as the actual delay (in cycles) caused to users.
213 // For processor resource dependencies, the cost is a function of the resource
214 // pressure. Resource interferences with low frequency values are ignored.
215 //
216 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
217 // the inner loop selects (i.e. visits) a node N from a set of `unvisited
218 // nodes`, and then propagates the cost of N to all its neighbors.
219 //
220 // The `unvisited nodes` set initially contains all the nodes from the
221 // RootSet. A node N is added to the `unvisited nodes` if all its
222 // predecessors have been visited already.
223 //
224 // For simplicity, every node tracks the number of unvisited incoming edges in
225 // field `NumVisitedPredecessors`. When the value of that field drops to
226 // zero, then the corresponding node is added to a `ToVisit` set.
227 //
228 // At the end of every iteration of the outer loop, set `ToVisit` becomes our
229 // new `unvisited nodes` set.
230 //
231 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
232 // is empty. This algorithm works under the assumption that the graph is
233 // acyclic.
234 do {
235 for (unsigned IID : RootSet) {
236 const DGNode &N = Nodes[IID];
237 for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
238 unsigned ToIID = DepEdge.ToIID;
239 DGNode &To = Nodes[ToIID];
240 uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
241 // Check if this is the most expensive incoming edge seen so far. In
242 // case, update the total cost of the destination node (ToIID), as well
243 // its field `CriticalPredecessor`.
244 if (Cost > To.Cost) {
245 To.CriticalPredecessor = DepEdge;
246 To.Cost = Cost;
247 To.Depth = N.Depth + 1;
248 }
249 To.NumVisitedPredecessors++;
250 if (To.NumVisitedPredecessors == To.NumPredecessors)
251 ToVisit.emplace_back(Args&: ToIID);
252 }
253 }
254
255 std::swap(LHS&: RootSet, RHS&: ToVisit);
256 ToVisit.clear();
257 } while (!RootSet.empty());
258}
259
260void DependencyGraph::getCriticalSequence(
261 SmallVectorImpl<const DependencyEdge *> &Seq) const {
262 // At this stage, nodes of the graph have been already visited, and costs have
263 // been propagated through the edges (see method `propagateThroughEdges()`).
264
265 // Identify the node N with the highest cost in the graph. By construction,
266 // that node is the last instruction of our critical sequence.
267 // Field N.Depth would tell us the total length of the sequence.
268 //
269 // To obtain the sequence of critical edges, we simply follow the chain of
270 // critical predecessors starting from node N (field
271 // DGNode::CriticalPredecessor).
272 const auto It =
273 llvm::max_element(Range: Nodes, C: [](const DGNode &Lhs, const DGNode &Rhs) {
274 return Lhs.Cost < Rhs.Cost;
275 });
276 unsigned IID = std::distance(first: Nodes.begin(), last: It);
277 Seq.resize(N: Nodes[IID].Depth);
278 for (const DependencyEdge *&DE : llvm::reverse(C&: Seq)) {
279 const DGNode &N = Nodes[IID];
280 DE = &N.CriticalPredecessor;
281 IID = N.CriticalPredecessor.FromIID;
282 }
283}
284
285void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
286 const MCInst &MCI,
287 bool UseDifferentColor) const {
288 FOS.PadToColumn(NewCol: 14);
289 if (UseDifferentColor)
290 FOS.changeColor(Color: raw_ostream::CYAN, Bold: true, BG: false);
291 FOS << printInstructionString(MCI);
292 if (UseDifferentColor)
293 FOS.resetColor();
294}
295
296void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
297 // Early exit if no bottlenecks were found during the simulation.
298 if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
299 return;
300
301 SmallVector<const DependencyEdge *, 16> Seq;
302 DG.getCriticalSequence(Seq);
303 if (Seq.empty())
304 return;
305
306 OS << "\nCritical sequence based on the simulation:\n\n";
307
308 const DependencyEdge &FirstEdge = *Seq[0];
309 ArrayRef<llvm::MCInst> Source = getSource();
310 unsigned FromIID = FirstEdge.FromIID % Source.size();
311 unsigned ToIID = FirstEdge.ToIID % Source.size();
312 bool IsLoopCarried = FromIID >= ToIID;
313
314 formatted_raw_ostream FOS(OS);
315 FOS.PadToColumn(NewCol: 14);
316 FOS << "Instruction";
317 FOS.PadToColumn(NewCol: 58);
318 FOS << "Dependency Information";
319
320 bool HasColors = FOS.has_colors();
321
322 unsigned CurrentIID = 0;
323 if (IsLoopCarried) {
324 FOS << "\n +----< " << FromIID << ".";
325 printInstruction(FOS, MCI: Source[FromIID], UseDifferentColor: HasColors);
326 FOS << "\n |\n | < loop carried > \n |";
327 } else {
328 while (CurrentIID < FromIID) {
329 FOS << "\n " << CurrentIID << ".";
330 printInstruction(FOS, MCI: Source[CurrentIID]);
331 CurrentIID++;
332 }
333
334 FOS << "\n +----< " << CurrentIID << ".";
335 printInstruction(FOS, MCI: Source[CurrentIID], UseDifferentColor: HasColors);
336 CurrentIID++;
337 }
338
339 for (const DependencyEdge *&DE : Seq) {
340 ToIID = DE->ToIID % Source.size();
341 unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
342
343 while (CurrentIID < LastIID) {
344 FOS << "\n | " << CurrentIID << ".";
345 printInstruction(FOS, MCI: Source[CurrentIID]);
346 CurrentIID++;
347 }
348
349 if (CurrentIID == ToIID) {
350 FOS << "\n +----> " << ToIID << ".";
351 printInstruction(FOS, MCI: Source[CurrentIID], UseDifferentColor: HasColors);
352 } else {
353 FOS << "\n |\n | < loop carried > \n |"
354 << "\n +----> " << ToIID << ".";
355 printInstruction(FOS, MCI: Source[ToIID], UseDifferentColor: HasColors);
356 }
357 FOS.PadToColumn(NewCol: 58);
358
359 const DependencyEdge::Dependency &Dep = DE->Dep;
360 if (HasColors)
361 FOS.changeColor(Color: raw_ostream::SAVEDCOLOR, Bold: true, BG: false);
362
363 if (Dep.Type == DependencyEdge::DT_REGISTER) {
364 FOS << "## REGISTER dependency: ";
365 if (HasColors)
366 FOS.changeColor(Color: raw_ostream::MAGENTA, Bold: true, BG: false);
367 getInstPrinter().printRegName(OS&: FOS, Reg: Dep.ResourceOrRegID);
368 } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
369 FOS << "## MEMORY dependency.";
370 } else {
371 assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
372 "Unsupported dependency type!");
373 FOS << "## RESOURCE interference: ";
374 if (HasColors)
375 FOS.changeColor(Color: raw_ostream::MAGENTA, Bold: true, BG: false);
376 FOS << Tracker.resolveResourceName(ResourceMask: Dep.ResourceOrRegID);
377 if (HasColors) {
378 FOS.resetColor();
379 FOS.changeColor(Color: raw_ostream::SAVEDCOLOR, Bold: true, BG: false);
380 }
381 FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
382 << "% ]";
383 }
384 if (HasColors)
385 FOS.resetColor();
386 ++CurrentIID;
387 }
388
389 while (CurrentIID < Source.size()) {
390 FOS << "\n " << CurrentIID << ".";
391 printInstruction(FOS, MCI: Source[CurrentIID]);
392 CurrentIID++;
393 }
394
395 FOS << '\n';
396 FOS.flush();
397}
398
399#ifndef NDEBUG
400void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
401 OS << "\nREG DEPS\n";
402 for (const DGNode &Node : Nodes)
403 for (const DependencyEdge &DE : Node.OutgoingEdges)
404 if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
405 dumpDependencyEdge(OS, DE, MCIP);
406
407 OS << "\nMEM DEPS\n";
408 for (const DGNode &Node : Nodes)
409 for (const DependencyEdge &DE : Node.OutgoingEdges)
410 if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
411 dumpDependencyEdge(OS, DE, MCIP);
412
413 OS << "\nRESOURCE DEPS\n";
414 for (const DGNode &Node : Nodes)
415 for (const DependencyEdge &DE : Node.OutgoingEdges)
416 if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
417 dumpDependencyEdge(OS, DE, MCIP);
418}
419#endif // NDEBUG
420
421void DependencyGraph::addDependency(unsigned From, unsigned To,
422 DependencyEdge::Dependency &&Dep) {
423 DGNode &NodeFrom = Nodes[From];
424 DGNode &NodeTo = Nodes[To];
425 SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
426
427 auto It = find_if(Range&: Vec, P: [To, Dep](DependencyEdge &DE) {
428 return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
429 });
430
431 if (It != Vec.end()) {
432 It->Dep.Cost += Dep.Cost;
433 It->Frequency++;
434 return;
435 }
436
437 DependencyEdge DE = {.Dep: Dep, .FromIID: From, .ToIID: To, .Frequency: 1};
438 Vec.emplace_back(Args&: DE);
439 NodeTo.NumPredecessors++;
440}
441
442BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
443 MCInstPrinter &Printer,
444 ArrayRef<MCInst> S, unsigned NumIter)
445 : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
446 DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
447 PressureIncreasedBecauseOfResources(false),
448 PressureIncreasedBecauseOfRegisterDependencies(false),
449 PressureIncreasedBecauseOfMemoryDependencies(false),
450 SeenStallCycles(false), BPI() {}
451
452void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
453 unsigned RegID, unsigned Cost) {
454 bool IsLoopCarried = From >= To;
455 unsigned SourceSize = getSource().size();
456 if (IsLoopCarried) {
457 DG.addRegisterDep(From, To: To + SourceSize, RegID, Cost);
458 DG.addRegisterDep(From: From + SourceSize, To: To + (SourceSize * 2), RegID, Cost);
459 return;
460 }
461 DG.addRegisterDep(From: From + SourceSize, To: To + SourceSize, RegID, Cost);
462}
463
464void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
465 unsigned Cost) {
466 bool IsLoopCarried = From >= To;
467 unsigned SourceSize = getSource().size();
468 if (IsLoopCarried) {
469 DG.addMemoryDep(From, To: To + SourceSize, Cost);
470 DG.addMemoryDep(From: From + SourceSize, To: To + (SourceSize * 2), Cost);
471 return;
472 }
473 DG.addMemoryDep(From: From + SourceSize, To: To + SourceSize, Cost);
474}
475
476void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
477 uint64_t Mask, unsigned Cost) {
478 bool IsLoopCarried = From >= To;
479 unsigned SourceSize = getSource().size();
480 if (IsLoopCarried) {
481 DG.addResourceDep(From, To: To + SourceSize, Mask, Cost);
482 DG.addResourceDep(From: From + SourceSize, To: To + (SourceSize * 2), Mask, Cost);
483 return;
484 }
485 DG.addResourceDep(From: From + SourceSize, To: To + SourceSize, Mask, Cost);
486}
487
488void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
489 const unsigned IID = Event.IR.getSourceIndex();
490 if (Event.Type == HWInstructionEvent::Dispatched) {
491 Tracker.onInstructionDispatched(IID);
492 return;
493 }
494 if (Event.Type == HWInstructionEvent::Executed) {
495 Tracker.onInstructionExecuted(IID);
496 return;
497 }
498
499 if (Event.Type != HWInstructionEvent::Issued)
500 return;
501
502 ArrayRef<llvm::MCInst> Source = getSource();
503 const Instruction &IS = *Event.IR.getInstruction();
504 unsigned To = IID % Source.size();
505
506 unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
507 uint64_t ResourceMask = IS.getCriticalResourceMask();
508 SmallVector<std::pair<unsigned, unsigned>, 4> Users;
509 while (ResourceMask) {
510 uint64_t Current = ResourceMask & (-ResourceMask);
511 Tracker.getResourceUsers(ResourceMask: Current, Users);
512 for (const std::pair<unsigned, unsigned> &U : Users)
513 addResourceDep(From: U.first % Source.size(), To, Mask: Current, Cost: U.second + Cycles);
514 Users.clear();
515 ResourceMask ^= Current;
516 }
517
518 const CriticalDependency &RegDep = IS.getCriticalRegDep();
519 if (RegDep.Cycles) {
520 Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
521 unsigned From = RegDep.IID % Source.size();
522 addRegisterDep(From, To, RegID: RegDep.RegID, Cost: Cycles);
523 }
524
525 const CriticalDependency &MemDep = IS.getCriticalMemDep();
526 if (MemDep.Cycles) {
527 Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
528 unsigned From = MemDep.IID % Source.size();
529 addMemoryDep(From, To, Cost: Cycles);
530 }
531
532 Tracker.handleInstructionIssuedEvent(
533 Event: static_cast<const HWInstructionIssuedEvent &>(Event));
534
535 // Check if this is the last simulated instruction.
536 if (IID == ((Iterations * Source.size()) - 1))
537 DG.finalizeGraph(Iterations);
538}
539
540void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
541 assert(Event.Reason != HWPressureEvent::INVALID &&
542 "Unexpected invalid event!");
543
544 Tracker.handlePressureEvent(Event);
545
546 switch (Event.Reason) {
547 default:
548 break;
549
550 case HWPressureEvent::RESOURCES:
551 PressureIncreasedBecauseOfResources = true;
552 break;
553 case HWPressureEvent::REGISTER_DEPS:
554 PressureIncreasedBecauseOfRegisterDependencies = true;
555 break;
556 case HWPressureEvent::MEMORY_DEPS:
557 PressureIncreasedBecauseOfMemoryDependencies = true;
558 break;
559 }
560}
561
562void BottleneckAnalysis::onCycleEnd() {
563 ++TotalCycles;
564
565 bool PressureIncreasedBecauseOfDataDependencies =
566 PressureIncreasedBecauseOfRegisterDependencies ||
567 PressureIncreasedBecauseOfMemoryDependencies;
568 if (!PressureIncreasedBecauseOfResources &&
569 !PressureIncreasedBecauseOfDataDependencies)
570 return;
571
572 ++BPI.PressureIncreaseCycles;
573 if (PressureIncreasedBecauseOfRegisterDependencies)
574 ++BPI.RegisterDependencyCycles;
575 if (PressureIncreasedBecauseOfMemoryDependencies)
576 ++BPI.MemoryDependencyCycles;
577 if (PressureIncreasedBecauseOfDataDependencies)
578 ++BPI.DataDependencyCycles;
579 if (PressureIncreasedBecauseOfResources)
580 ++BPI.ResourcePressureCycles;
581 PressureIncreasedBecauseOfResources = false;
582 PressureIncreasedBecauseOfRegisterDependencies = false;
583 PressureIncreasedBecauseOfMemoryDependencies = false;
584}
585
586void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
587 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
588 OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
589 return;
590 }
591
592 double PressurePerCycle =
593 (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
594 double ResourcePressurePerCycle =
595 (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
596 double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
597 double RegDepPressurePerCycle =
598 (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
599 double MemDepPressurePerCycle =
600 (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
601
602 OS << "\n\nCycles with backend pressure increase [ "
603 << format(Fmt: "%.2f", Vals: floor(x: (PressurePerCycle * 100) + 0.5) / 100) << "% ]";
604
605 OS << "\nThroughput Bottlenecks: "
606 << "\n Resource Pressure [ "
607 << format(Fmt: "%.2f", Vals: floor(x: (ResourcePressurePerCycle * 100) + 0.5) / 100)
608 << "% ]";
609
610 if (BPI.PressureIncreaseCycles) {
611 ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
612 const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
613 for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
614 unsigned ReleaseAtCycles = Distribution[I];
615 if (ReleaseAtCycles) {
616 double Frequency = (double)ReleaseAtCycles * 100 / TotalCycles;
617 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResourceIdx: I);
618 OS << "\n - " << PRDesc.Name << " [ "
619 << format(Fmt: "%.2f", Vals: floor(x: (Frequency * 100) + 0.5) / 100) << "% ]";
620 }
621 }
622 }
623
624 OS << "\n Data Dependencies: [ "
625 << format(Fmt: "%.2f", Vals: floor(x: (DDPerCycle * 100) + 0.5) / 100) << "% ]";
626 OS << "\n - Register Dependencies [ "
627 << format(Fmt: "%.2f", Vals: floor(x: (RegDepPressurePerCycle * 100) + 0.5) / 100)
628 << "% ]";
629 OS << "\n - Memory Dependencies [ "
630 << format(Fmt: "%.2f", Vals: floor(x: (MemDepPressurePerCycle * 100) + 0.5) / 100)
631 << "% ]\n";
632}
633
634void BottleneckAnalysis::printView(raw_ostream &OS) const {
635 std::string Buffer;
636 raw_string_ostream TempStream(Buffer);
637 printBottleneckHints(OS&: TempStream);
638 TempStream.flush();
639 OS << Buffer;
640 printCriticalSequence(OS);
641}
642
643json::Value BottleneckAnalysis::toJSON() const {
644 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
645 json::Object JO({{.K: "PressureIncreaseCycles", .V: 0}});
646 return JO;
647 }
648
649 json::Array CriticalSequence;
650 // get critical sequence
651 SmallVector<const DependencyEdge *, 16> Seq;
652 DG.getCriticalSequence(Seq);
653 if (!Seq.empty()) {
654 for (const DependencyEdge *&DE : Seq) {
655 json::Object DEJO({{.K: "FromID", .V: DE->FromIID},
656 {.K: "ToID", .V: DE->ToIID},
657 {.K: "Type", .V: static_cast<unsigned>(DE->Dep.Type)},
658 {.K: "ResourceOrRegID", .V: DE->Dep.ResourceOrRegID}});
659 CriticalSequence.push_back(E: std::move(DEJO));
660 }
661 }
662
663 json::Array ResourcePressure;
664 if (BPI.PressureIncreaseCycles) {
665 ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
666 const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
667 for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
668 unsigned ReleaseAtCycles = Distribution[I];
669 if (ReleaseAtCycles) {
670 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResourceIdx: I);
671 json::Object RPJO({{.K: PRDesc.Name, .V: ReleaseAtCycles}});
672 ResourcePressure.push_back(E: std::move(RPJO));
673 }
674 }
675 }
676
677 json::Object JO({{.K: "PressureIncreaseCycles", .V: BPI.PressureIncreaseCycles},
678 {.K: "ResourcePressureCycles", .V: BPI.ResourcePressureCycles},
679 {.K: "DataDependencyCycles", .V: BPI.DataDependencyCycles},
680 {.K: "RegisterDependencyCycles", .V: BPI.RegisterDependencyCycles},
681 {.K: "MemoryDependencyCycles", .V: BPI.MemoryDependencyCycles},
682 {.K: "TotalCycles", .V: TotalCycles},
683 {.K: "DependencyEdge", .V: std::move(CriticalSequence)},
684 {.K: "ResourcePressure", .V: std::move(ResourcePressure)}});
685
686 return JO;
687}
688
689} // namespace mca.
690} // namespace llvm
691