1 | //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file implements the functionalities used by the BottleneckAnalysis |
11 | /// to report bottleneck info. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "Views/BottleneckAnalysis.h" |
16 | #include "llvm/MC/MCInst.h" |
17 | #include "llvm/MCA/Support.h" |
18 | #include "llvm/Support/Format.h" |
19 | |
20 | namespace llvm { |
21 | namespace mca { |
22 | |
23 | #define DEBUG_TYPE "llvm-mca" |
24 | |
25 | PressureTracker::PressureTracker(const MCSchedModel &Model) |
26 | : SM(Model), |
27 | ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), |
28 | ProcResID2Mask(Model.getNumProcResourceKinds(), 0), |
29 | ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), |
30 | ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { |
31 | computeProcResourceMasks(SM, Masks: ProcResID2Mask); |
32 | |
33 | // Ignore the invalid resource at index zero. |
34 | unsigned NextResourceUsersIdx = 0; |
35 | for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { |
36 | const MCProcResourceDesc &ProcResource = *SM.getProcResource(ProcResourceIdx: I); |
37 | ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; |
38 | NextResourceUsersIdx += ProcResource.NumUnits; |
39 | uint64_t ResourceMask = ProcResID2Mask[I]; |
40 | ResIdx2ProcResID[getResourceStateIndex(Mask: ResourceMask)] = I; |
41 | } |
42 | |
43 | ResourceUsers.resize(N: NextResourceUsersIdx); |
44 | std::fill(first: ResourceUsers.begin(), last: ResourceUsers.end(), |
45 | value: std::make_pair<unsigned, unsigned>(x: ~0U, y: 0U)); |
46 | } |
47 | |
48 | void PressureTracker::getResourceUsers(uint64_t ResourceMask, |
49 | SmallVectorImpl<User> &Users) const { |
50 | unsigned Index = getResourceStateIndex(Mask: ResourceMask); |
51 | unsigned ProcResID = ResIdx2ProcResID[Index]; |
52 | const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResourceIdx: ProcResID); |
53 | for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { |
54 | const User U = getResourceUser(ProcResID, UnitID: I); |
55 | if (U.second && IPI.contains(Val: U.first)) |
56 | Users.emplace_back(Args: U); |
57 | } |
58 | } |
59 | |
60 | void PressureTracker::onInstructionDispatched(unsigned IID) { |
61 | IPI.insert(KV: std::make_pair(x&: IID, y: InstructionPressureInfo())); |
62 | } |
63 | |
64 | void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(Val: IID); } |
65 | |
66 | void PressureTracker::handleInstructionIssuedEvent( |
67 | const HWInstructionIssuedEvent &Event) { |
68 | unsigned IID = Event.IR.getSourceIndex(); |
69 | for (const ResourceUse &Use : Event.UsedResources) { |
70 | const ResourceRef &RR = Use.first; |
71 | unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; |
72 | Index += llvm::countr_zero(Val: RR.second); |
73 | ResourceUsers[Index] = std::make_pair(x&: IID, y: Use.second.getNumerator()); |
74 | } |
75 | } |
76 | |
77 | void PressureTracker::updateResourcePressureDistribution( |
78 | uint64_t CumulativeMask) { |
79 | while (CumulativeMask) { |
80 | uint64_t Current = CumulativeMask & (-CumulativeMask); |
81 | unsigned ResIdx = getResourceStateIndex(Mask: Current); |
82 | unsigned ProcResID = ResIdx2ProcResID[ResIdx]; |
83 | uint64_t Mask = ProcResID2Mask[ProcResID]; |
84 | |
85 | if (Mask == Current) { |
86 | ResourcePressureDistribution[ProcResID]++; |
87 | CumulativeMask ^= Current; |
88 | continue; |
89 | } |
90 | |
91 | Mask ^= Current; |
92 | while (Mask) { |
93 | uint64_t SubUnit = Mask & (-Mask); |
94 | ResIdx = getResourceStateIndex(Mask: SubUnit); |
95 | ProcResID = ResIdx2ProcResID[ResIdx]; |
96 | ResourcePressureDistribution[ProcResID]++; |
97 | Mask ^= SubUnit; |
98 | } |
99 | |
100 | CumulativeMask ^= Current; |
101 | } |
102 | } |
103 | |
104 | void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { |
105 | assert(Event.Reason != HWPressureEvent::INVALID && |
106 | "Unexpected invalid event!" ); |
107 | |
108 | switch (Event.Reason) { |
109 | default: |
110 | break; |
111 | |
112 | case HWPressureEvent::RESOURCES: { |
113 | const uint64_t ResourceMask = Event.ResourceMask; |
114 | updateResourcePressureDistribution(CumulativeMask: Event.ResourceMask); |
115 | |
116 | for (const InstRef &IR : Event.AffectedInstructions) { |
117 | const Instruction &IS = *IR.getInstruction(); |
118 | unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; |
119 | if (!BusyResources) |
120 | continue; |
121 | |
122 | unsigned IID = IR.getSourceIndex(); |
123 | IPI[IID].ResourcePressureCycles++; |
124 | } |
125 | break; |
126 | } |
127 | |
128 | case HWPressureEvent::REGISTER_DEPS: |
129 | for (const InstRef &IR : Event.AffectedInstructions) { |
130 | unsigned IID = IR.getSourceIndex(); |
131 | IPI[IID].RegisterPressureCycles++; |
132 | } |
133 | break; |
134 | |
135 | case HWPressureEvent::MEMORY_DEPS: |
136 | for (const InstRef &IR : Event.AffectedInstructions) { |
137 | unsigned IID = IR.getSourceIndex(); |
138 | IPI[IID].MemoryPressureCycles++; |
139 | } |
140 | } |
141 | } |
142 | |
143 | #ifndef NDEBUG |
144 | void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, |
145 | const DependencyEdge &DepEdge, |
146 | MCInstPrinter &MCIP) const { |
147 | unsigned FromIID = DepEdge.FromIID; |
148 | unsigned ToIID = DepEdge.ToIID; |
149 | assert(FromIID < ToIID && "Graph should be acyclic!" ); |
150 | |
151 | const DependencyEdge::Dependency &DE = DepEdge.Dep; |
152 | assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!" ); |
153 | |
154 | OS << " FROM: " << FromIID << " TO: " << ToIID << " " ; |
155 | if (DE.Type == DependencyEdge::DT_REGISTER) { |
156 | OS << " - REGISTER: " ; |
157 | MCIP.printRegName(OS, DE.ResourceOrRegID); |
158 | } else if (DE.Type == DependencyEdge::DT_MEMORY) { |
159 | OS << " - MEMORY" ; |
160 | } else { |
161 | assert(DE.Type == DependencyEdge::DT_RESOURCE && |
162 | "Unsupported dependency type!" ); |
163 | OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; |
164 | } |
165 | OS << " - COST: " << DE.Cost << '\n'; |
166 | } |
167 | #endif // NDEBUG |
168 | |
169 | void DependencyGraph::pruneEdges(unsigned Iterations) { |
170 | for (DGNode &N : Nodes) { |
171 | unsigned NumPruned = 0; |
172 | const unsigned Size = N.OutgoingEdges.size(); |
173 | // Use a cut-off threshold to prune edges with a low frequency. |
174 | for (unsigned I = 0, E = Size; I < E; ++I) { |
175 | DependencyEdge &Edge = N.OutgoingEdges[I]; |
176 | if (Edge.Frequency == Iterations) |
177 | continue; |
178 | double Factor = (double)Edge.Frequency / Iterations; |
179 | if (0.10 < Factor) |
180 | continue; |
181 | Nodes[Edge.ToIID].NumPredecessors--; |
182 | std::swap(a&: Edge, b&: N.OutgoingEdges[E - 1]); |
183 | --E; |
184 | ++NumPruned; |
185 | } |
186 | |
187 | if (NumPruned) |
188 | N.OutgoingEdges.resize(N: Size - NumPruned); |
189 | } |
190 | } |
191 | |
192 | void DependencyGraph::initializeRootSet( |
193 | SmallVectorImpl<unsigned> &RootSet) const { |
194 | for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { |
195 | const DGNode &N = Nodes[I]; |
196 | if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) |
197 | RootSet.emplace_back(Args&: I); |
198 | } |
199 | } |
200 | |
201 | void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, |
202 | unsigned Iterations) { |
203 | SmallVector<unsigned, 8> ToVisit; |
204 | |
205 | // A critical sequence is computed as the longest path from a node of the |
206 | // RootSet to a leaf node (i.e. a node with no successors). The RootSet is |
207 | // composed of nodes with at least one successor, and no predecessors. |
208 | // |
209 | // Each node of the graph starts with an initial default cost of zero. The |
210 | // cost of a node is a measure of criticality: the higher the cost, the bigger |
211 | // is the performance impact. |
212 | // For register and memory dependencies, the cost is a function of the write |
213 | // latency as well as the actual delay (in cycles) caused to users. |
214 | // For processor resource dependencies, the cost is a function of the resource |
215 | // pressure. Resource interferences with low frequency values are ignored. |
216 | // |
217 | // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of |
218 | // the inner loop selects (i.e. visits) a node N from a set of `unvisited |
219 | // nodes`, and then propagates the cost of N to all its neighbors. |
220 | // |
221 | // The `unvisited nodes` set initially contains all the nodes from the |
222 | // RootSet. A node N is added to the `unvisited nodes` if all its |
223 | // predecessors have been visited already. |
224 | // |
225 | // For simplicity, every node tracks the number of unvisited incoming edges in |
226 | // field `NumVisitedPredecessors`. When the value of that field drops to |
227 | // zero, then the corresponding node is added to a `ToVisit` set. |
228 | // |
229 | // At the end of every iteration of the outer loop, set `ToVisit` becomes our |
230 | // new `unvisited nodes` set. |
231 | // |
232 | // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) |
233 | // is empty. This algorithm works under the assumption that the graph is |
234 | // acyclic. |
235 | do { |
236 | for (unsigned IID : RootSet) { |
237 | const DGNode &N = Nodes[IID]; |
238 | for (const DependencyEdge &DepEdge : N.OutgoingEdges) { |
239 | unsigned ToIID = DepEdge.ToIID; |
240 | DGNode &To = Nodes[ToIID]; |
241 | uint64_t Cost = N.Cost + DepEdge.Dep.Cost; |
242 | // Check if this is the most expensive incoming edge seen so far. In |
243 | // case, update the total cost of the destination node (ToIID), as well |
244 | // its field `CriticalPredecessor`. |
245 | if (Cost > To.Cost) { |
246 | To.CriticalPredecessor = DepEdge; |
247 | To.Cost = Cost; |
248 | To.Depth = N.Depth + 1; |
249 | } |
250 | To.NumVisitedPredecessors++; |
251 | if (To.NumVisitedPredecessors == To.NumPredecessors) |
252 | ToVisit.emplace_back(Args&: ToIID); |
253 | } |
254 | } |
255 | |
256 | std::swap(LHS&: RootSet, RHS&: ToVisit); |
257 | ToVisit.clear(); |
258 | } while (!RootSet.empty()); |
259 | } |
260 | |
261 | void DependencyGraph::getCriticalSequence( |
262 | SmallVectorImpl<const DependencyEdge *> &Seq) const { |
263 | // At this stage, nodes of the graph have been already visited, and costs have |
264 | // been propagated through the edges (see method `propagateThroughEdges()`). |
265 | |
266 | // Identify the node N with the highest cost in the graph. By construction, |
267 | // that node is the last instruction of our critical sequence. |
268 | // Field N.Depth would tell us the total length of the sequence. |
269 | // |
270 | // To obtain the sequence of critical edges, we simply follow the chain of |
271 | // critical predecessors starting from node N (field |
272 | // DGNode::CriticalPredecessor). |
273 | const auto It = |
274 | llvm::max_element(Range: Nodes, C: [](const DGNode &Lhs, const DGNode &Rhs) { |
275 | return Lhs.Cost < Rhs.Cost; |
276 | }); |
277 | unsigned IID = std::distance(first: Nodes.begin(), last: It); |
278 | Seq.resize(N: Nodes[IID].Depth); |
279 | for (const DependencyEdge *&DE : llvm::reverse(C&: Seq)) { |
280 | const DGNode &N = Nodes[IID]; |
281 | DE = &N.CriticalPredecessor; |
282 | IID = N.CriticalPredecessor.FromIID; |
283 | } |
284 | } |
285 | |
286 | void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS, |
287 | const MCInst &MCI, |
288 | bool UseDifferentColor) const { |
289 | FOS.PadToColumn(NewCol: 14); |
290 | if (UseDifferentColor) |
291 | FOS.changeColor(Color: raw_ostream::CYAN, Bold: true, BG: false); |
292 | FOS << printInstructionString(MCI); |
293 | if (UseDifferentColor) |
294 | FOS.resetColor(); |
295 | } |
296 | |
297 | void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { |
298 | // Early exit if no bottlenecks were found during the simulation. |
299 | if (!SeenStallCycles || !BPI.PressureIncreaseCycles) |
300 | return; |
301 | |
302 | SmallVector<const DependencyEdge *, 16> Seq; |
303 | DG.getCriticalSequence(Seq); |
304 | if (Seq.empty()) |
305 | return; |
306 | |
307 | OS << "\nCritical sequence based on the simulation:\n\n" ; |
308 | |
309 | const DependencyEdge &FirstEdge = *Seq[0]; |
310 | ArrayRef<llvm::MCInst> Source = getSource(); |
311 | unsigned FromIID = FirstEdge.FromIID % Source.size(); |
312 | unsigned ToIID = FirstEdge.ToIID % Source.size(); |
313 | bool IsLoopCarried = FromIID >= ToIID; |
314 | |
315 | formatted_raw_ostream FOS(OS); |
316 | FOS.PadToColumn(NewCol: 14); |
317 | FOS << "Instruction" ; |
318 | FOS.PadToColumn(NewCol: 58); |
319 | FOS << "Dependency Information" ; |
320 | |
321 | bool HasColors = FOS.has_colors(); |
322 | |
323 | unsigned CurrentIID = 0; |
324 | if (IsLoopCarried) { |
325 | FOS << "\n +----< " << FromIID << "." ; |
326 | printInstruction(FOS, MCI: Source[FromIID], UseDifferentColor: HasColors); |
327 | FOS << "\n |\n | < loop carried > \n |" ; |
328 | } else { |
329 | while (CurrentIID < FromIID) { |
330 | FOS << "\n " << CurrentIID << "." ; |
331 | printInstruction(FOS, MCI: Source[CurrentIID]); |
332 | CurrentIID++; |
333 | } |
334 | |
335 | FOS << "\n +----< " << CurrentIID << "." ; |
336 | printInstruction(FOS, MCI: Source[CurrentIID], UseDifferentColor: HasColors); |
337 | CurrentIID++; |
338 | } |
339 | |
340 | for (const DependencyEdge *&DE : Seq) { |
341 | ToIID = DE->ToIID % Source.size(); |
342 | unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; |
343 | |
344 | while (CurrentIID < LastIID) { |
345 | FOS << "\n | " << CurrentIID << "." ; |
346 | printInstruction(FOS, MCI: Source[CurrentIID]); |
347 | CurrentIID++; |
348 | } |
349 | |
350 | if (CurrentIID == ToIID) { |
351 | FOS << "\n +----> " << ToIID << "." ; |
352 | printInstruction(FOS, MCI: Source[CurrentIID], UseDifferentColor: HasColors); |
353 | } else { |
354 | FOS << "\n |\n | < loop carried > \n |" |
355 | << "\n +----> " << ToIID << "." ; |
356 | printInstruction(FOS, MCI: Source[ToIID], UseDifferentColor: HasColors); |
357 | } |
358 | FOS.PadToColumn(NewCol: 58); |
359 | |
360 | const DependencyEdge::Dependency &Dep = DE->Dep; |
361 | if (HasColors) |
362 | FOS.changeColor(Color: raw_ostream::SAVEDCOLOR, Bold: true, BG: false); |
363 | |
364 | if (Dep.Type == DependencyEdge::DT_REGISTER) { |
365 | FOS << "## REGISTER dependency: " ; |
366 | if (HasColors) |
367 | FOS.changeColor(Color: raw_ostream::MAGENTA, Bold: true, BG: false); |
368 | getInstPrinter().printRegName(OS&: FOS, Reg: Dep.ResourceOrRegID); |
369 | } else if (Dep.Type == DependencyEdge::DT_MEMORY) { |
370 | FOS << "## MEMORY dependency." ; |
371 | } else { |
372 | assert(Dep.Type == DependencyEdge::DT_RESOURCE && |
373 | "Unsupported dependency type!" ); |
374 | FOS << "## RESOURCE interference: " ; |
375 | if (HasColors) |
376 | FOS.changeColor(Color: raw_ostream::MAGENTA, Bold: true, BG: false); |
377 | FOS << Tracker.resolveResourceName(ResourceMask: Dep.ResourceOrRegID); |
378 | if (HasColors) { |
379 | FOS.resetColor(); |
380 | FOS.changeColor(Color: raw_ostream::SAVEDCOLOR, Bold: true, BG: false); |
381 | } |
382 | FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) |
383 | << "% ]" ; |
384 | } |
385 | if (HasColors) |
386 | FOS.resetColor(); |
387 | ++CurrentIID; |
388 | } |
389 | |
390 | while (CurrentIID < Source.size()) { |
391 | FOS << "\n " << CurrentIID << "." ; |
392 | printInstruction(FOS, MCI: Source[CurrentIID]); |
393 | CurrentIID++; |
394 | } |
395 | |
396 | FOS << '\n'; |
397 | FOS.flush(); |
398 | } |
399 | |
400 | #ifndef NDEBUG |
401 | void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { |
402 | OS << "\nREG DEPS\n" ; |
403 | for (const DGNode &Node : Nodes) |
404 | for (const DependencyEdge &DE : Node.OutgoingEdges) |
405 | if (DE.Dep.Type == DependencyEdge::DT_REGISTER) |
406 | dumpDependencyEdge(OS, DE, MCIP); |
407 | |
408 | OS << "\nMEM DEPS\n" ; |
409 | for (const DGNode &Node : Nodes) |
410 | for (const DependencyEdge &DE : Node.OutgoingEdges) |
411 | if (DE.Dep.Type == DependencyEdge::DT_MEMORY) |
412 | dumpDependencyEdge(OS, DE, MCIP); |
413 | |
414 | OS << "\nRESOURCE DEPS\n" ; |
415 | for (const DGNode &Node : Nodes) |
416 | for (const DependencyEdge &DE : Node.OutgoingEdges) |
417 | if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) |
418 | dumpDependencyEdge(OS, DE, MCIP); |
419 | } |
420 | #endif // NDEBUG |
421 | |
422 | void DependencyGraph::addDependency(unsigned From, unsigned To, |
423 | DependencyEdge::Dependency &&Dep) { |
424 | DGNode &NodeFrom = Nodes[From]; |
425 | DGNode &NodeTo = Nodes[To]; |
426 | SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; |
427 | |
428 | auto It = find_if(Range&: Vec, P: [To, Dep](DependencyEdge &DE) { |
429 | return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; |
430 | }); |
431 | |
432 | if (It != Vec.end()) { |
433 | It->Dep.Cost += Dep.Cost; |
434 | It->Frequency++; |
435 | return; |
436 | } |
437 | |
438 | DependencyEdge DE = {.Dep: Dep, .FromIID: From, .ToIID: To, .Frequency: 1}; |
439 | Vec.emplace_back(Args&: DE); |
440 | NodeTo.NumPredecessors++; |
441 | } |
442 | |
443 | BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, |
444 | MCInstPrinter &Printer, |
445 | ArrayRef<MCInst> S, unsigned NumIter) |
446 | : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()), |
447 | DG(S.size() * 3), Iterations(NumIter), TotalCycles(0), |
448 | PressureIncreasedBecauseOfResources(false), |
449 | PressureIncreasedBecauseOfRegisterDependencies(false), |
450 | PressureIncreasedBecauseOfMemoryDependencies(false), |
451 | SeenStallCycles(false), BPI() {} |
452 | |
453 | void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, |
454 | unsigned RegID, unsigned Cost) { |
455 | bool IsLoopCarried = From >= To; |
456 | unsigned SourceSize = getSource().size(); |
457 | if (IsLoopCarried) { |
458 | DG.addRegisterDep(From, To: To + SourceSize, RegID, Cost); |
459 | DG.addRegisterDep(From: From + SourceSize, To: To + (SourceSize * 2), RegID, Cost); |
460 | return; |
461 | } |
462 | DG.addRegisterDep(From: From + SourceSize, To: To + SourceSize, RegID, Cost); |
463 | } |
464 | |
465 | void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, |
466 | unsigned Cost) { |
467 | bool IsLoopCarried = From >= To; |
468 | unsigned SourceSize = getSource().size(); |
469 | if (IsLoopCarried) { |
470 | DG.addMemoryDep(From, To: To + SourceSize, Cost); |
471 | DG.addMemoryDep(From: From + SourceSize, To: To + (SourceSize * 2), Cost); |
472 | return; |
473 | } |
474 | DG.addMemoryDep(From: From + SourceSize, To: To + SourceSize, Cost); |
475 | } |
476 | |
477 | void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, |
478 | uint64_t Mask, unsigned Cost) { |
479 | bool IsLoopCarried = From >= To; |
480 | unsigned SourceSize = getSource().size(); |
481 | if (IsLoopCarried) { |
482 | DG.addResourceDep(From, To: To + SourceSize, Mask, Cost); |
483 | DG.addResourceDep(From: From + SourceSize, To: To + (SourceSize * 2), Mask, Cost); |
484 | return; |
485 | } |
486 | DG.addResourceDep(From: From + SourceSize, To: To + SourceSize, Mask, Cost); |
487 | } |
488 | |
489 | void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { |
490 | const unsigned IID = Event.IR.getSourceIndex(); |
491 | if (Event.Type == HWInstructionEvent::Dispatched) { |
492 | Tracker.onInstructionDispatched(IID); |
493 | return; |
494 | } |
495 | if (Event.Type == HWInstructionEvent::Executed) { |
496 | Tracker.onInstructionExecuted(IID); |
497 | return; |
498 | } |
499 | |
500 | if (Event.Type != HWInstructionEvent::Issued) |
501 | return; |
502 | |
503 | ArrayRef<llvm::MCInst> Source = getSource(); |
504 | const Instruction &IS = *Event.IR.getInstruction(); |
505 | unsigned To = IID % Source.size(); |
506 | |
507 | unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); |
508 | uint64_t ResourceMask = IS.getCriticalResourceMask(); |
509 | SmallVector<std::pair<unsigned, unsigned>, 4> Users; |
510 | while (ResourceMask) { |
511 | uint64_t Current = ResourceMask & (-ResourceMask); |
512 | Tracker.getResourceUsers(ResourceMask: Current, Users); |
513 | for (const std::pair<unsigned, unsigned> &U : Users) |
514 | addResourceDep(From: U.first % Source.size(), To, Mask: Current, Cost: U.second + Cycles); |
515 | Users.clear(); |
516 | ResourceMask ^= Current; |
517 | } |
518 | |
519 | const CriticalDependency &RegDep = IS.getCriticalRegDep(); |
520 | if (RegDep.Cycles) { |
521 | Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); |
522 | unsigned From = RegDep.IID % Source.size(); |
523 | addRegisterDep(From, To, RegID: RegDep.RegID, Cost: Cycles); |
524 | } |
525 | |
526 | const CriticalDependency &MemDep = IS.getCriticalMemDep(); |
527 | if (MemDep.Cycles) { |
528 | Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); |
529 | unsigned From = MemDep.IID % Source.size(); |
530 | addMemoryDep(From, To, Cost: Cycles); |
531 | } |
532 | |
533 | Tracker.handleInstructionIssuedEvent( |
534 | Event: static_cast<const HWInstructionIssuedEvent &>(Event)); |
535 | |
536 | // Check if this is the last simulated instruction. |
537 | if (IID == ((Iterations * Source.size()) - 1)) |
538 | DG.finalizeGraph(Iterations); |
539 | } |
540 | |
541 | void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { |
542 | assert(Event.Reason != HWPressureEvent::INVALID && |
543 | "Unexpected invalid event!" ); |
544 | |
545 | Tracker.handlePressureEvent(Event); |
546 | |
547 | switch (Event.Reason) { |
548 | default: |
549 | break; |
550 | |
551 | case HWPressureEvent::RESOURCES: |
552 | PressureIncreasedBecauseOfResources = true; |
553 | break; |
554 | case HWPressureEvent::REGISTER_DEPS: |
555 | PressureIncreasedBecauseOfRegisterDependencies = true; |
556 | break; |
557 | case HWPressureEvent::MEMORY_DEPS: |
558 | PressureIncreasedBecauseOfMemoryDependencies = true; |
559 | break; |
560 | } |
561 | } |
562 | |
563 | void BottleneckAnalysis::onCycleEnd() { |
564 | ++TotalCycles; |
565 | |
566 | bool PressureIncreasedBecauseOfDataDependencies = |
567 | PressureIncreasedBecauseOfRegisterDependencies || |
568 | PressureIncreasedBecauseOfMemoryDependencies; |
569 | if (!PressureIncreasedBecauseOfResources && |
570 | !PressureIncreasedBecauseOfDataDependencies) |
571 | return; |
572 | |
573 | ++BPI.PressureIncreaseCycles; |
574 | if (PressureIncreasedBecauseOfRegisterDependencies) |
575 | ++BPI.RegisterDependencyCycles; |
576 | if (PressureIncreasedBecauseOfMemoryDependencies) |
577 | ++BPI.MemoryDependencyCycles; |
578 | if (PressureIncreasedBecauseOfDataDependencies) |
579 | ++BPI.DataDependencyCycles; |
580 | if (PressureIncreasedBecauseOfResources) |
581 | ++BPI.ResourcePressureCycles; |
582 | PressureIncreasedBecauseOfResources = false; |
583 | PressureIncreasedBecauseOfRegisterDependencies = false; |
584 | PressureIncreasedBecauseOfMemoryDependencies = false; |
585 | } |
586 | |
587 | void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { |
588 | if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { |
589 | OS << "\n\nNo resource or data dependency bottlenecks discovered.\n" ; |
590 | return; |
591 | } |
592 | |
593 | double PressurePerCycle = |
594 | (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; |
595 | double ResourcePressurePerCycle = |
596 | (double)BPI.ResourcePressureCycles * 100 / TotalCycles; |
597 | double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; |
598 | double RegDepPressurePerCycle = |
599 | (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; |
600 | double MemDepPressurePerCycle = |
601 | (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; |
602 | |
603 | OS << "\n\nCycles with backend pressure increase [ " |
604 | << format(Fmt: "%.2f" , Vals: floor(x: (PressurePerCycle * 100) + 0.5) / 100) << "% ]" ; |
605 | |
606 | OS << "\nThroughput Bottlenecks: " |
607 | << "\n Resource Pressure [ " |
608 | << format(Fmt: "%.2f" , Vals: floor(x: (ResourcePressurePerCycle * 100) + 0.5) / 100) |
609 | << "% ]" ; |
610 | |
611 | if (BPI.PressureIncreaseCycles) { |
612 | ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); |
613 | const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); |
614 | for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { |
615 | unsigned ReleaseAtCycles = Distribution[I]; |
616 | if (ReleaseAtCycles) { |
617 | double Frequency = (double)ReleaseAtCycles * 100 / TotalCycles; |
618 | const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResourceIdx: I); |
619 | OS << "\n - " << PRDesc.Name << " [ " |
620 | << format(Fmt: "%.2f" , Vals: floor(x: (Frequency * 100) + 0.5) / 100) << "% ]" ; |
621 | } |
622 | } |
623 | } |
624 | |
625 | OS << "\n Data Dependencies: [ " |
626 | << format(Fmt: "%.2f" , Vals: floor(x: (DDPerCycle * 100) + 0.5) / 100) << "% ]" ; |
627 | OS << "\n - Register Dependencies [ " |
628 | << format(Fmt: "%.2f" , Vals: floor(x: (RegDepPressurePerCycle * 100) + 0.5) / 100) |
629 | << "% ]" ; |
630 | OS << "\n - Memory Dependencies [ " |
631 | << format(Fmt: "%.2f" , Vals: floor(x: (MemDepPressurePerCycle * 100) + 0.5) / 100) |
632 | << "% ]\n" ; |
633 | } |
634 | |
635 | void BottleneckAnalysis::printView(raw_ostream &OS) const { |
636 | std::string Buffer; |
637 | raw_string_ostream TempStream(Buffer); |
638 | printBottleneckHints(OS&: TempStream); |
639 | TempStream.flush(); |
640 | OS << Buffer; |
641 | printCriticalSequence(OS); |
642 | } |
643 | |
644 | } // namespace mca. |
645 | } // namespace llvm |
646 | |