1//===---------------------- AMDGPUNextUseAnalysis.cpp ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AMDGPUNextUseAnalysis pass, a machine-level analysis
10// that computes the distance from each instruction to the "nearest" next use of
11// every live virtual register. These distances guide register spilling
12// decisions by identifying which live values are furthest from their next use
13// and are therefore the best candidates to spill.
14//
15// The analysis is based on the Braun & Hack CC'09 paper "Register Spilling and
16// Live-Range Splitting for SSA-Form Programs."
17//
18// Key concepts:
19//
20// NextUseDistance A loop-depth-weighted instruction count representing
21// how far away a register's next use is. Distances
22// through deeper loops are scaled by fromLoopDepth() so
23// that uses inside hot loops appear closer.
24//
25// Inter-block Pre-computed shortest weighted distances between all
26// distances pairs of basic blocks, used to efficiently answer
27// cross-block next-use queries. Each intermediate block
28// is weighted by fromLoopDepth() applied once per loop
29// boundary crossing relative to the destination.
30//
31// Configuration flags (see Config struct in the header):
32//
33// CountPhis Count PHI instructions toward distance and block size.
34// ForwardOnly Restrict inter-block distances to forward-reachable
35// paths.
36// PreciseUseModeling Model PHI uses at their incoming edge block and filter
37// uses with intermediate redefinitions.
38// PromoteToPreheader Route loop-entry and inner-loop uses to the preheader.
39//
40// This file contains:
41//
42// - Command-line options for configuration and debug output
43// - LiveRegUse / JSON helpers
44// - AMDGPUNextUseAnalysisImpl (the main analysis implementation)
45// - Instruction ID assignment and block size computation
46// - CFG path pre-computation (reachability, loop depth, back-edges)
47// - Inter-block distance computation
48// - Per-register next-use distance queries and caching
49// - AMDGPUNextUseAnalysis (public facade, pimpl)
50// - Legacy and new pass manager wrappers
51//
52//===----------------------------------------------------------------------===//
53
54#include "AMDGPUNextUseAnalysis.h"
55#include "AMDGPU.h"
56#include "GCNRegPressure.h"
57#include "GCNSubtarget.h"
58
59#include "llvm/ADT/DenseMap.h"
60#include "llvm/ADT/PostOrderIterator.h"
61#include "llvm/ADT/SmallVector.h"
62#include "llvm/CodeGen/MachineBasicBlock.h"
63#include "llvm/CodeGen/MachineFunction.h"
64#include "llvm/CodeGen/MachineInstr.h"
65#include "llvm/CodeGen/MachineLoopInfo.h"
66#include "llvm/IR/ModuleSlotTracker.h"
67#include "llvm/InitializePasses.h"
68#include "llvm/Support/FileSystem.h"
69#include "llvm/Support/JSON.h"
70#include "llvm/Support/Timer.h"
71#include "llvm/Support/ToolOutputFile.h"
72#include "llvm/Support/raw_ostream.h"
73
74#include <algorithm>
75#include <limits>
76#include <string>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "amdgpu-next-use-analysis"
81
82//==============================================================================
83// Options etc
84//==============================================================================
85namespace {
86
87cl::opt<bool>
88 DistanceCacheEnabled("amdgpu-next-use-analysis-distance-cache",
89 cl::init(Val: true), cl::Hidden,
90 cl::desc("Enable live-reg-use distance cache"));
91
92cl::opt<std::string>
93 DumpNextUseDistanceAsJson("amdgpu-next-use-analysis-dump-distance-as-json",
94 cl::Hidden);
95
96cl::opt<bool> DumpNextUseDistanceDefToUse(
97 "amdgpu-next-use-analysis-dump-distance-def-to-use", cl::init(Val: false),
98 cl::Hidden);
99
100cl::opt<bool>
101 DumpNextUseDistanceVerbose("amdgpu-next-use-analysis-dump-distance-verbose",
102 cl::init(Val: false), cl::Hidden);
103
104// 'graphics' and 'compute' modes arose due to initial competing implementations
105// of next-use analysis that emphasized different types of workloads. This
106// implementation is a compromise that combines aspects of both. Over time, the
107// hope is we will be able to remove some of these differences and settle on a
108// more unified implementation.
109cl::opt<std::string>
110 ConfigPresetOpt("amdgpu-next-use-analysis-config", cl::Hidden,
111 cl::init(Val: "graphics"),
112 cl::desc("Config preset: 'graphics' or 'compute'"));
113
114cl::opt<bool> ConfigCountPhisOpt(
115 "amdgpu-next-use-analysis-count-phis", cl::Hidden,
116 cl::desc("Count PHI instructions toward distance and block size"));
117cl::opt<bool> ConfigForwardOnlyOpt(
118 "amdgpu-next-use-analysis-forward-only", cl::Hidden,
119 cl::desc("Restrict inter-block distances to forward-reachable paths"));
120cl::opt<bool> ConfigPreciseUseModelingOpt(
121 "amdgpu-next-use-analysis-precise-use-modeling", cl::Hidden,
122 cl::desc("Model PHI uses via incoming edge block with loop-aware "
123 "reachability filtering"));
124cl::opt<bool> ConfigPromoteToPreheaderOpt(
125 "amdgpu-next-use-analysis-use-preheader-model", cl::Hidden,
126 cl::desc("Promote loop-entry and inner-loop uses to the loop preheader"));
127} // namespace
128
129//==============================================================================
130// LiveRegUse - Represents a live register use with its distance. Used for
131// tracking and sorting register uses by distance.
132//==============================================================================
133namespace {
134using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
135struct LiveRegUse : public UseDistancePair {
136 // 'nullptr' indicates an unset/invalid state.
137 LiveRegUse() : UseDistancePair(nullptr, 0) {}
138 LiveRegUse(const MachineOperand *Use, NextUseDistance Dist)
139 : UseDistancePair(Use, Dist) {}
140 LiveRegUse(const UseDistancePair &P) : UseDistancePair(P) {}
141
142 bool isUnset() const { return Use == nullptr; }
143
144 Register getReg() const { return Use->getReg(); }
145 unsigned getSubReg() const { return Use->getSubReg(); }
146 LaneBitmask getLaneMask(const SIRegisterInfo *TRI) const {
147 return TRI->getSubRegIndexLaneMask(SubIdx: Use->getSubReg());
148 }
149
150 bool isCloserThan(const LiveRegUse &X) const {
151 if (Dist < X.Dist)
152 return true;
153
154 if (Dist > X.Dist)
155 return false;
156
157 if (Use == X.Use)
158 return false;
159
160 // Ugh. When !CountPhis, PHIs and the first non-PHI instruction have id
161 // 0. In this case, consider PHIs as less than the first non-PHI
162 // instruction.
163 const MachineInstr *ThisMI = Use->getParent();
164 const MachineInstr *XMI = X.Use->getParent();
165 const MachineBasicBlock *ThisMBB = ThisMI->getParent();
166 if (ThisMBB == XMI->getParent()) {
167 if (ThisMI->isPHI() && !XMI->isPHI() &&
168 XMI == &(*ThisMBB->getFirstNonPHI()))
169 return true;
170 }
171
172 // Ensure deterministic results
173 return X.getReg() < getReg();
174 }
175
176 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr,
177 const MachineRegisterInfo *MRI = nullptr) const {
178 if (isUnset()) {
179 OS << "<unset>";
180 return;
181 }
182 Dist.print(OS);
183 OS << " [" << printReg(Reg: getReg(), TRI, SubIdx: getSubReg(), MRI) << "]";
184 }
185
186 LLVM_DUMP_METHOD void dump() const {
187 print(OS&: dbgs());
188 dbgs() << '\n';
189 }
190};
191
192inline bool updateClosest(LiveRegUse &Closest, const LiveRegUse &X) {
193 if (!Closest.Use || X.isCloserThan(X: Closest)) {
194 Closest = X;
195 return true;
196 }
197 return false;
198}
199
200inline bool updateFurthest(LiveRegUse &Furthest, const LiveRegUse &X) {
201 if (!Furthest.Use || Furthest.isCloserThan(X)) {
202 Furthest = X;
203 return true;
204 }
205 return false;
206}
207} // namespace
208
209//==============================================================================
210// JSON helpers
211//==============================================================================
212namespace {
213template <typename Lambda>
214void printStringAttr(json::OStream &J, const char *Name, Lambda L) {
215 J.attributeBegin(Key: Name);
216 raw_ostream &OS = J.rawValueBegin();
217 OS << '"';
218 L(OS);
219 OS << '"';
220 J.rawValueEnd();
221 J.attributeEnd();
222}
223void printStringAttr(json::OStream &J, const char *Name, Printable P) {
224 printStringAttr(J, Name, L: [&](raw_ostream &OS) { OS << P; });
225}
226
227void printStringAttr(json::OStream &J, const char *Name, const MachineInstr &MI,
228 ModuleSlotTracker &MST) {
229 printStringAttr(J, Name, L: [&](raw_ostream &OS) {
230 MI.print(OS, MST,
231 /* IsStandalone */ false,
232 /* SkipOpers */ false,
233 /* SkipDebugLoc */ false,
234 /* AddNewLine ---> */ AddNewLine: false,
235 /* TargetInstrInfo */ TII: nullptr);
236 });
237}
238
239void printMBBNameAttr(json::OStream &J, const char *Name,
240 const MachineBasicBlock &MBB, ModuleSlotTracker &MST) {
241 printStringAttr(J, Name, L: [&](raw_ostream &OS) {
242 MBB.printName(os&: OS, printNameFlags: MachineBasicBlock::PrintNameIr, moduleSlotTracker: &MST);
243 });
244}
245
246template <typename NameLambda, typename ValueT>
247void printAttr(json::OStream &J, NameLambda NL, ValueT V) {
248 std::string Name;
249 raw_string_ostream NameOS(Name);
250 NL(NameOS);
251 J.attribute(Key: NameOS.str(), Contents: V);
252}
253
254template <typename ValueT>
255void printAttr(json::OStream &J, const Printable &P, ValueT V) {
256 printAttr(J, [&](raw_ostream &OS) { OS << P; }, V);
257}
258
259} // namespace
260
261//==============================================================================
262// AMDGPUNextUseAnalysisImpl
263//==============================================================================
264class llvm::AMDGPUNextUseAnalysisImpl {
265public:
266 struct CacheableNextUseDistance {
267 bool IsInstrRelative;
268 NextUseDistance Distance;
269 };
270 static constexpr bool InstrRelative = true;
271 static constexpr bool InstrInvariant = false;
272
273private:
274 const MachineFunction *MF = nullptr;
275 const SIRegisterInfo *TRI = nullptr;
276 const SIInstrInfo *TII = nullptr;
277 const MachineLoopInfo *MLI = nullptr;
278 const MachineRegisterInfo *MRI = nullptr;
279
280 using InstrIdTy = unsigned;
281 using InstrToIdMap = DenseMap<const MachineInstr *, InstrIdTy>;
282 InstrToIdMap InstrToId;
283 AMDGPUNextUseAnalysis::Config Cfg;
284
285 void initializeTables() {
286 for (const MachineBasicBlock &BB : *MF)
287 calcInstrIds(BB: &BB, MutableInstrToId&: InstrToId);
288 initializeCfgPaths();
289 initializeInterBlockDistances();
290 }
291
292 void clearTables() {
293 InstrToId.clear();
294 RegUseMap.clear();
295 Paths.clear();
296
297 resetDistanceCache();
298 }
299
300 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
301 // Instruction Ids
302 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
303private:
304 unsigned sizeOf(const MachineInstr &MI) const {
305 // When !Cfg.CountPhis, PHIs do not contribute to distances/sizes since they
306 // generally don't result in the generation of a machine instruction.
307 // FIXME: Consider using MI.isPseudo() or maybe MI.isMetaInstruction().
308 return Cfg.CountPhis ? 1 : !MI.isPHI();
309 }
310
311 void calcInstrIds(const MachineBasicBlock *BB,
312 InstrToIdMap &MutableInstrToId) const {
313 InstrIdTy Id = 0;
314 for (auto &MI : BB->instrs()) {
315 MutableInstrToId[&MI] = Id;
316 Id += sizeOf(MI);
317 }
318 }
319
320 /// Returns MI's instruction Id. It renumbers (part of) the BB if MI is not
321 /// found in the map.
322 InstrIdTy getInstrId(const MachineInstr *MI) const {
323 auto It = InstrToId.find(Val: MI);
324 if (It != InstrToId.end())
325 return It->second;
326
327 // Renumber the MBB.
328 // TODO: Renumber from MI onwards.
329 auto &MutableInstrToId = const_cast<InstrToIdMap &>(InstrToId);
330 calcInstrIds(BB: MI->getParent(), MutableInstrToId);
331 return InstrToId.find(Val: MI)->second;
332 }
333
334 // Length of the segment from MI (inclusive) to the first instruction of the
335 // basic block.
336 InstrIdTy getHeadLen(const MachineInstr *MI) const {
337 const MachineBasicBlock *MBB = MI->getParent();
338 return getInstrId(MI) + getInstrId(MI: &MBB->instr_front()) + 1;
339 }
340
341 // Length of the segment from MI (exclusive) to the last instruction of the
342 // basic block.
343 InstrIdTy getTailLen(const MachineInstr *MI) const {
344 const MachineBasicBlock *MBB = MI->getParent();
345 return getInstrId(MI: &MBB->instr_back()) - getInstrId(MI);
346 }
347
348 // Length of the segment from 'From' to 'To' (exclusive). Both instructions
349 // must be in the same basic block.
350 InstrIdTy getDistance(const MachineInstr *From,
351 const MachineInstr *To) const {
352 assert(From->getParent() == To->getParent());
353 return getInstrId(MI: To) - getInstrId(MI: From);
354 }
355
356 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
357 // RegUses - cache of uses by register
358 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
359private:
360 DenseMap<Register, SmallVector<const MachineOperand *>> RegUseMap;
361
362 const SmallVector<const MachineOperand *> &
363 getRegisterUses(Register Reg) const {
364 auto I = RegUseMap.find(Val: Reg);
365 if (I != RegUseMap.end())
366 return I->second;
367
368 auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
369 SmallVector<const MachineOperand *> &Uses = NonConstThis->RegUseMap[Reg];
370 for (const MachineOperand &UseMO : MRI->use_nodbg_operands(Reg)) {
371 if (!UseMO.isUndef())
372 Uses.push_back(Elt: &UseMO);
373 }
374 return Uses;
375 }
376
377 bool hasAtLeastOneUse(Register Reg) const {
378 return !getRegisterUses(Reg).empty();
379 }
380
381 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
382 // Paths
383 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
384private:
385 class Path {
386 using StorageTy =
387 std::pair<const MachineBasicBlock *, const MachineBasicBlock *>;
388 StorageTy P;
389
390 public:
391 constexpr Path() : P(nullptr, nullptr) {}
392 constexpr Path(const MachineBasicBlock *Src, const MachineBasicBlock *Dst)
393 : P(Src, Dst) {}
394 Path(const StorageTy &Pair) : P(Pair) {}
395
396 constexpr operator const StorageTy &() const { return P; }
397 using DenseMapInfo = llvm::DenseMapInfo<StorageTy>;
398
399 const MachineBasicBlock *src() const { return P.first; }
400 const MachineBasicBlock *dst() const { return P.second; }
401 };
402
403 enum class EdgeKind { Back = -1, None = 0, Forward = 1 };
404 static constexpr StringRef toString(EdgeKind EK) {
405 if (EK == EdgeKind::Back)
406 return "back";
407 if (EK == EdgeKind::Forward)
408 return "fwd";
409 return "none";
410 }
411
412 struct PathInfo {
413 EdgeKind EK;
414 bool Reachable;
415 int ForwardReachable;
416 unsigned RelativeLoopDepth;
417 std::optional<NextUseDistance> ShortestDistance;
418 std::optional<NextUseDistance> ShortestUnweightedDistance;
419 InstrIdTy Size;
420
421 PathInfo()
422 : EK(EdgeKind::None), Reachable(false), ForwardReachable(-1),
423 RelativeLoopDepth(0), Size(0) {}
424
425 bool isBackedge() const { return EK == EdgeKind::Back; }
426
427 bool isForwardReachableSet() const { return 0 <= ForwardReachable; }
428 bool isForwardReachableUnset() const { return ForwardReachable < 0; }
429 bool isForwardReachable() const { return ForwardReachable == 1; }
430 bool isNotForwardReachable() const { return ForwardReachable == 0; }
431
432 void print(raw_ostream &OS) const {
433 OS << "{ek=" << toString(EK) << " reach=" << Reachable
434 << " fwd-reach=" << ForwardReachable
435 << " loop-depth=" << RelativeLoopDepth << " size=" << Size;
436 if (ShortestDistance) {
437 OS << " shortest-dist=";
438 ShortestDistance->print(OS);
439 }
440 if (ShortestUnweightedDistance) {
441 OS << " shortest-unweighted-dist=";
442 ShortestUnweightedDistance->print(OS);
443 }
444 OS << "}";
445 }
446
447 LLVM_DUMP_METHOD void dump() const {
448 print(OS&: dbgs());
449 dbgs() << '\n';
450 }
451 };
452
453 //----------------------------------------------------------------------------
454 // Path Storage - 'Paths' is lazily populated and some members are lazily
455 // computed. All mutations should go through one of the 'initializePathInfo*'
456 // flavors below.
457 //----------------------------------------------------------------------------
458 DenseMap<Path, PathInfo, Path::DenseMapInfo> Paths;
459
460 const PathInfo *maybePathInfoFor(const MachineBasicBlock *From,
461 const MachineBasicBlock *To) const {
462 auto I = Paths.find(Val: {From, To});
463 return I == Paths.end() ? nullptr : &I->second;
464 }
465
466 PathInfo &getOrInitPathInfo(const MachineBasicBlock *From,
467 const MachineBasicBlock *To) const {
468 auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
469 auto &MutablePaths = NonConstThis->Paths;
470
471 Path P(From, To);
472 auto [I, Inserted] = MutablePaths.try_emplace(Key: P);
473 if (!Inserted)
474 return I->second;
475
476 bool Reachable = calcIsReachable(From: P.src(), To: P.dst());
477
478 // Iterator may have been invalidated by calcIsReachable, so get a fresh
479 // reference to the slot.
480 return NonConstThis->initializePathInfo(Slot&: MutablePaths.at(Val: P), P,
481 EK: EdgeKind::None, Reachable);
482 }
483
484 const PathInfo &pathInfoFor(const MachineBasicBlock *From,
485 const MachineBasicBlock *To) const {
486 return getOrInitPathInfo(From, To);
487 }
488
489 //----------------------------------------------------------------------------
490 // initializePathInfo* - various flavors of PathInfo initialization. They
491 // (should) always funnel to the first flavor below.
492 //----------------------------------------------------------------------------
493 PathInfo &initializePathInfo(PathInfo &Slot, Path P, EdgeKind EK,
494 bool Reachable) {
495 Slot.EK = EK;
496 Slot.Reachable = Reachable;
497 Slot.ForwardReachable = EK == EdgeKind::None ? -1 : EK == EdgeKind::Forward;
498 Slot.RelativeLoopDepth =
499 Slot.Reachable ? calcRelativeLoopDepth(From: P.src(), To: P.dst()) : 0;
500 Slot.Size = P.src() == P.dst() ? calcSize(BB: P.src()) : 0;
501 if (EK != EdgeKind::None)
502 Slot.ShortestUnweightedDistance = 0;
503 return Slot;
504 }
505
506 PathInfo &initializePathInfo(Path P, EdgeKind EK, bool Reachable) const {
507 auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
508 auto &MutablePaths = NonConstThis->Paths;
509 return NonConstThis->initializePathInfo(Slot&: MutablePaths[P], P, EK, Reachable);
510 }
511
512 std::pair<PathInfo *, bool> maybeInitializePathInfo(Path P, EdgeKind EK,
513 bool Reachable) const {
514 auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
515 auto &MutablePaths = NonConstThis->Paths;
516 auto [I, Inserted] = MutablePaths.try_emplace(Key: P);
517 if (Inserted)
518 NonConstThis->initializePathInfo(Slot&: I->second, P, EK, Reachable);
519 return {&I->second, Inserted};
520 }
521
522 bool initializePathInfoForwardReachable(const MachineBasicBlock *From,
523 const MachineBasicBlock *To,
524 bool Value) const {
525 PathInfo &Slot = getOrInitPathInfo(From, To);
526 assert(Slot.isForwardReachableUnset());
527 Slot.ForwardReachable = Value;
528 return Value;
529 }
530
531 NextUseDistance
532 initializePathInfoShortestDistance(const MachineBasicBlock *From,
533 const MachineBasicBlock *To,
534 NextUseDistance Value) const {
535 PathInfo &Slot = getOrInitPathInfo(From, To);
536 assert(!Slot.ShortestDistance.has_value());
537 Slot.ShortestDistance = Value;
538 return Value;
539 }
540
541 NextUseDistance
542 initializePathInfoShortestUnweightedDistance(const MachineBasicBlock *From,
543 const MachineBasicBlock *To,
544 NextUseDistance Value) const {
545 PathInfo &Slot = getOrInitPathInfo(From, To);
546 assert(!Slot.ShortestUnweightedDistance.has_value());
547 Slot.ShortestUnweightedDistance = Value;
548 return Value;
549 }
550
551 //----------------------------------------------------------------------------
552 // initialize*Paths
553 //----------------------------------------------------------------------------
554private:
555 void initializePaths(const SmallVector<Path> &ReachablePaths,
556 const SmallVector<Path> &UnreachablePaths) const {
557 for (const Path &P : ReachablePaths)
558 initializePathInfo(P, EK: EdgeKind::None, Reachable: true);
559 for (const Path &P : UnreachablePaths)
560 initializePathInfo(P, EK: EdgeKind::None, Reachable: false);
561 }
562
563 void
564 initializeForwardOnlyPaths(const SmallVector<Path> &ReachablePaths,
565 const SmallVector<Path> &UnreachablePaths) const {
566 for (bool R : {true, false}) {
567 const auto &ToInit = R ? ReachablePaths : UnreachablePaths;
568 for (const Path &P : ToInit) {
569 PathInfo &Slot = getOrInitPathInfo(From: P.src(), To: P.dst());
570 assert(Slot.isForwardReachableUnset() || Slot.ForwardReachable == R);
571 Slot.ForwardReachable = R;
572 }
573 }
574 }
575
576 // Follow the control flow graph starting at the entry block until all blocks
577 // have been visited. Along the way, initialize the PathInfo for each edge
578 // traversed.
579 void initializeCfgPaths() {
580 Paths.clear();
581
582 enum VisitState { Undiscovered, Visiting, Finished };
583 DenseMap<const MachineBasicBlock *, VisitState> State;
584
585 SmallVector<const MachineBasicBlock *> Work{&MF->front()};
586 State[&MF->front()] = Undiscovered;
587
588 while (!Work.empty()) {
589 const MachineBasicBlock *Src = Work.back();
590 VisitState &SrcState = State[Src];
591
592 // A block may already be 'Finished' if it is reachable from multiple
593 // predecessors causing it to be pushed more than once while still
594 // 'Undiscovered'.
595 if (SrcState == Visiting || SrcState == Finished) {
596 Work.pop_back();
597 SrcState = Finished;
598 continue;
599 }
600
601 SrcState = Visiting;
602 for (const MachineBasicBlock *Dst : Src->successors()) {
603 const VisitState DstState = State.lookup(Val: Dst);
604
605 EdgeKind EK;
606 if (DstState == Undiscovered) {
607 EK = EdgeKind::Forward;
608 Work.push_back(Elt: Dst);
609 } else if (DstState == Visiting) {
610 EK = EdgeKind::Back;
611 } else {
612 EK = EdgeKind::Forward;
613 }
614
615 Path P(Src, Dst);
616 assert(!Paths.contains(P));
617 initializePathInfo(P, EK, /*Reachable*/ true);
618 }
619 }
620
621 LLVM_DEBUG(dumpPaths());
622 }
623
624 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
625 // Loop helpers
626 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
627private:
628 static bool isStandAloneLoop(const MachineLoop *Loop) {
629 return Loop->getSubLoops().empty() && Loop->isOutermost();
630 }
631
632 static MachineLoop *findChildLoop(MachineLoop *const Parent,
633 MachineLoop *Descendant) {
634 for (MachineLoop *L = Descendant; L != Parent; L = L->getParentLoop()) {
635 if (L->getParentLoop() == Parent)
636 return L;
637 }
638 return nullptr;
639 }
640
641 // If loops 'A' and 'B' share a common parent loop, return that loop and the
642 // depth of 'A' relative to it. Otherwise return nullptr and the loop depth of
643 // 'A'.
644 static std::pair<MachineLoop *, unsigned>
645 findCommonParent(MachineLoop *A, const MachineLoop *B) {
646 unsigned Depth = 0;
647 for (; A != nullptr; A = A->getParentLoop(), ++Depth) {
648 if (A->contains(L: B))
649 break;
650 }
651 return {A, Depth};
652 }
653
654 static const MachineBasicBlock *
655 getOutermostPreheader(const MachineLoop *Loop) {
656 return Loop ? Loop->getOutermostLoop()->getLoopPreheader() : nullptr;
657 }
658
659 static MachineBasicBlock *findChildPreheader(MachineLoop *const Parent,
660 MachineLoop *Descendant) {
661 MachineLoop *ChildLoop = findChildLoop(Parent, Descendant);
662 return ChildLoop ? ChildLoop->getLoopPreheader() : nullptr;
663 }
664
665 static const MachineBasicBlock *
666 getIncomingBlockIfPhiUse(const MachineInstr *MI, const MachineOperand *MO) {
667 return MI->isPHI() ? MI->getOperand(i: MO->getOperandNo() + 1).getMBB()
668 : nullptr;
669 }
670
671 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
672 // Calculate features
673 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
674private:
675 InstrIdTy calcSize(const MachineBasicBlock *BB) const {
676 InstrIdTy Size = BB->size();
677 if (!Cfg.CountPhis)
678 Size -= std::distance(first: BB->begin(), last: BB->getFirstNonPHI());
679 return Size;
680 }
681
682 NextUseDistance calcWeightedSize(const MachineBasicBlock *From,
683 const MachineBasicBlock *To) const {
684 return NextUseDistance::fromSize(Size: getSize(BB: From),
685 Depth: getRelativeLoopDepth(From, To));
686 }
687
688 // Return the loop depth of 'From' relative to 'To'.
689 unsigned calcRelativeLoopDepth(const MachineBasicBlock *From,
690 const MachineBasicBlock *To) const {
691 MachineLoop *LoopFrom = MLI->getLoopFor(BB: From);
692 MachineLoop *LoopTo = MLI->getLoopFor(BB: To);
693
694 if (!LoopFrom)
695 return 0;
696
697 if (!LoopTo)
698 return LoopFrom->getLoopDepth();
699
700 if (LoopFrom->contains(L: LoopTo)) // covers LoopFrom == LoopTo
701 return 0;
702
703 if (LoopTo->contains(L: LoopFrom))
704 return LoopFrom->getLoopDepth() - LoopTo->getLoopDepth();
705
706 // Loops are siblings of some sort.
707 return findCommonParent(A: LoopFrom, B: LoopTo).second;
708 }
709
710 // Attempt to find a path from 'From' to 'To' using a depth first search. If
711 // 'ForwardOnly' is true, do not follow backedges. As a performance
712 // improvement, this may initialize reachable intermediate paths or paths we
713 // determine are unreachable.
714 bool calcIsReachable(const MachineBasicBlock *From,
715 const MachineBasicBlock *To,
716 bool ForwardOnly = false) const {
717 if (From == To && !MLI->getLoopFor(BB: From))
718 return false;
719
720 if (!ForwardOnly && interBlockDistanceExists(From, To))
721 return true;
722
723 enum { VisitOp, PopOp };
724 using MBBOpPair = std::pair<const MachineBasicBlock *, int>;
725 SmallVector<MBBOpPair> Work{{From, VisitOp}};
726 DenseSet<const MachineBasicBlock *> Visited{From};
727
728 SmallVector<Path> IntermediatePath;
729 SmallVector<Path> Unreachable;
730
731 // Should be run at every function exit point.
732 auto Finally = [&](bool Reachable) {
733 // This is an optimization. For intermediate paths we found while
734 // calculating reachability for 'From' --> 'To', remember their
735 // reachability.
736 if (!Reachable) {
737 IntermediatePath.clear();
738 for (const MachineBasicBlock *MBB : Visited) {
739 if (MBB != From)
740 Unreachable.emplace_back(Args&: MBB, Args&: To);
741 }
742 }
743
744 if (ForwardOnly)
745 initializeForwardOnlyPaths(ReachablePaths: IntermediatePath, UnreachablePaths: Unreachable);
746 else
747 initializePaths(ReachablePaths: IntermediatePath, UnreachablePaths: Unreachable);
748
749 return Reachable;
750 };
751
752 while (!Work.empty()) {
753 auto [Current, Op] = Work.pop_back_val();
754
755 // Backtracking
756 if (Op == PopOp) {
757 IntermediatePath.pop_back();
758 if (ForwardOnly)
759 Unreachable.emplace_back(Args&: Current, Args&: To);
760 continue;
761 }
762
763 if (Current->succ_empty())
764 continue;
765
766 if (Current != From) {
767 IntermediatePath.emplace_back(Args&: Current, Args&: To);
768 Work.emplace_back(Args&: Current, Args: PopOp);
769 }
770
771 for (const MachineBasicBlock *Succ : Current->successors()) {
772 if (ForwardOnly && isBackedge(From: Current, To: Succ))
773 continue;
774
775 if (Succ == To)
776 return Finally(true);
777
778 if (auto CachedReachable = isMaybeReachable(From: Succ, To, ForwardOnly)) {
779 if (CachedReachable.value())
780 return Finally(true);
781 Visited.insert(V: Succ);
782 continue;
783 }
784
785 if (Visited.insert(V: Succ).second)
786 Work.emplace_back(Args&: Succ, Args: VisitOp);
787 }
788 }
789
790 return Finally(false);
791 }
792
793 //----------------------------------------------------------------------------
794 // Inter-block distance - the weighted and unweighted cost (i.e. "distance")
795 // to travel from one MachineBasicBlock to another.
796 //
797 // Values are pre-computed and stored in 'InterBlockDistances' using a
798 // backwards data-flow algorithm similar to the one described in 4.1 of a
799 // "Register Spilling and Live-Range Splitting for SSA-Form Programs" by
800 // Matthias Braun and Sebastian Hack, CC'09. This replaced a prior
801 // implementation based on Dijkstra's shortest path algorithm.
802 //----------------------------------------------------------------------------
803private:
804 struct InterBlockDistance {
805 NextUseDistance Weighted;
806 NextUseDistance Unweighted;
807 InterBlockDistance() : Weighted(-1), Unweighted(-1) {}
808 InterBlockDistance(NextUseDistance W, NextUseDistance UW)
809 : Weighted(W), Unweighted(UW) {}
810 bool operator==(const InterBlockDistance &Other) const {
811 return Weighted == Other.Weighted && Unweighted == Other.Unweighted;
812 }
813 bool operator!=(const InterBlockDistance &Other) const {
814 return !(*this == Other);
815 }
816
817 void print(raw_ostream &OS) const {
818 OS << "{W=";
819 Weighted.print(OS);
820 OS << " U=";
821 Unweighted.print(OS);
822 OS << "}";
823 }
824
825 LLVM_DUMP_METHOD void dump() const {
826 print(OS&: dbgs());
827 dbgs() << '\n';
828 }
829 };
830 using InterBlockDistanceMap =
831 DenseMap<unsigned, DenseMap<unsigned, InterBlockDistance>>;
832 InterBlockDistanceMap InterBlockDistances;
833
834 void initializeInterBlockDistances() {
835 InterBlockDistanceMap Distances;
836
837 bool Changed;
838 do {
839 Changed = false;
840 for (const MachineBasicBlock *MBB : post_order(G: MF)) {
841 unsigned MBBNum = MBB->getNumber();
842
843 // Save previous state for convergence check
844 InterBlockDistanceMap::mapped_type Prev = std::move(Distances[MBBNum]);
845 InterBlockDistanceMap::mapped_type Curr;
846 Curr.reserve(NumEntries: Prev.size());
847
848 // Direct successors are distance 0 by definition: no instructions are
849 // executed between exiting MBB and entering Succ.
850 for (const MachineBasicBlock *Succ : MBB->successors())
851 Curr[Succ->getNumber()] = InterBlockDistance(0, 0);
852
853 // Propagate further destinations through each successor.
854 for (const MachineBasicBlock *Succ : MBB->successors()) {
855 unsigned SuccNum = Succ->getNumber();
856 const unsigned UnweightedSize{getSize(BB: Succ)};
857
858 for (const auto &[DestBlockNum, DestDist] : Distances[SuccNum]) {
859 // MBB -> MBB is considered unreachable (getInterBlockDistance
860 // asserts From != To).
861 if (DestBlockNum == MBBNum)
862 continue;
863
864 const MachineBasicBlock *DestMBB =
865 MF->getBlockNumbered(N: DestBlockNum);
866
867 const NextUseDistance UnweightedDist{UnweightedSize +
868 DestDist.Unweighted};
869
870 unsigned SuccToDestLoopDepth = calcRelativeLoopDepth(From: Succ, To: DestMBB);
871
872 const NextUseDistance WeightedDist =
873 DestDist.Weighted +
874 NextUseDistance::fromSize(Size: UnweightedSize, Depth: SuccToDestLoopDepth);
875
876 // Insert or update distances (take minimum)
877 auto [I, First] =
878 Curr.try_emplace(Key: DestBlockNum, Args: WeightedDist, Args: UnweightedDist);
879 if (!First) {
880 InterBlockDistance &Slot = I->second;
881 Slot.Weighted = min(A: Slot.Weighted, B: WeightedDist);
882 Slot.Unweighted = min(A: Slot.Unweighted, B: UnweightedDist);
883 }
884 }
885 }
886 Changed |= (Prev != Curr);
887 Distances[MBBNum] = std::move(Curr);
888 }
889 } while (Changed);
890
891 InterBlockDistances = std::move(Distances);
892 LLVM_DEBUG(dumpInterBlockDistances());
893 }
894
895 const InterBlockDistance *
896 getInterBlockDistanceMapValue(const MachineBasicBlock *From,
897 const MachineBasicBlock *To) const {
898 auto I = InterBlockDistances.find(Val: From->getNumber());
899 if (I == InterBlockDistances.end())
900 return nullptr;
901 const InterBlockDistanceMap::mapped_type &FromSlot = I->second;
902 auto J = FromSlot.find(Val: To->getNumber());
903 return J == FromSlot.end() ? nullptr : &J->second;
904 }
905
906 bool interBlockDistanceExists(const MachineBasicBlock *From,
907 const MachineBasicBlock *To) const {
908 return getInterBlockDistanceMapValue(From, To);
909 }
910
911 NextUseDistance getInterBlockDistance(const MachineBasicBlock *From,
912 const MachineBasicBlock *To,
913 bool Unweighted) const {
914
915 assert(From != To && "The basic blocks should be different.");
916 if (!From || !To)
917 return NextUseDistance::unreachable();
918
919 if (Cfg.ForwardOnly && !isForwardReachable(From, To))
920 return NextUseDistance::unreachable();
921
922 const InterBlockDistance *BD = getInterBlockDistanceMapValue(From, To);
923 if (!BD)
924 return NextUseDistance::unreachable();
925
926 return Unweighted ? BD->Unweighted : BD->Weighted;
927 }
928
929 NextUseDistance
930 getWeightedInterBlockDistance(const MachineBasicBlock *From,
931 const MachineBasicBlock *To) const {
932 return getInterBlockDistance(From, To, Unweighted: false);
933 }
934
935 NextUseDistance
936 getUnweightedInterBlockDistance(const MachineBasicBlock *From,
937 const MachineBasicBlock *To) const {
938 return getInterBlockDistance(From, To, Unweighted: true);
939 }
940
941 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
942 // Feature getters. Use cached results if available. If not calculate.
943 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
944private:
945 InstrIdTy getSize(const MachineBasicBlock *BB) const {
946 return pathInfoFor(From: BB, To: BB).Size;
947 }
948
949 bool isReachable(const MachineBasicBlock *From,
950 const MachineBasicBlock *To) const {
951 return pathInfoFor(From, To).Reachable;
952 }
953
954 bool isReachableOrSame(const MachineBasicBlock *From,
955 const MachineBasicBlock *To) const {
956 return From == To || pathInfoFor(From, To).Reachable;
957 }
958
959 bool isForwardReachable(const MachineBasicBlock *From,
960 const MachineBasicBlock *To) const {
961 const PathInfo &PI = pathInfoFor(From, To);
962 if (PI.isForwardReachableSet())
963 return PI.isForwardReachable();
964
965 return initializePathInfoForwardReachable(
966 From, To,
967 Value: PI.Reachable && calcIsReachable(From, To, /*ForwardOnly*/ true));
968 }
969
970 // Return true/false if we know that 'To' is reachable or not from
971 // 'From'. Otherwise return 'std::nullopt'.
972 std::optional<bool> isMaybeReachable(const MachineBasicBlock *From,
973 const MachineBasicBlock *To,
974 bool ForwardOnly) const {
975 const PathInfo *PI = maybePathInfoFor(From, To);
976 if (!PI)
977 return std::nullopt;
978
979 if (ForwardOnly) {
980 if (PI->isForwardReachable())
981 return true;
982
983 if (PI->isNotForwardReachable())
984 return false;
985 return std::nullopt;
986 }
987 return PI->Reachable;
988 }
989
990 bool isBackedge(const MachineBasicBlock *From,
991 const MachineBasicBlock *To) const {
992 return pathInfoFor(From, To).isBackedge();
993 }
994
995 // Can be used as a substitute for DT->dominates(A, B) if A and B are in the
996 // same basic block.
997 bool instrsAreInOrder(const MachineInstr *A, const MachineInstr *B) const {
998 assert(A->getParent() == B->getParent() &&
999 "instructions must be in the same basic block!");
1000 if (A == B || getInstrId(MI: A) < getInstrId(MI: B))
1001 return true;
1002 if (!A->isPHI())
1003 return false;
1004 if (!B->isPHI())
1005 return true;
1006 for (auto &PHI : A->getParent()->phis()) {
1007 if (&PHI == A)
1008 return true;
1009 if (&PHI == B)
1010 return false;
1011 }
1012 return false;
1013 }
1014
1015 unsigned getRelativeLoopDepth(const MachineBasicBlock *From,
1016 const MachineBasicBlock *To) const {
1017 return pathInfoFor(From, To).RelativeLoopDepth;
1018 }
1019
1020 NextUseDistance getShortestPath(const MachineBasicBlock *From,
1021 const MachineBasicBlock *To) const {
1022 std::optional<NextUseDistance> MaybeD =
1023 pathInfoFor(From, To).ShortestDistance;
1024 if (MaybeD.has_value())
1025 return MaybeD.value();
1026
1027 NextUseDistance Dist = getWeightedInterBlockDistance(From, To);
1028 return initializePathInfoShortestDistance(From, To, Value: Dist);
1029 }
1030
1031 NextUseDistance getShortestUnweightedPath(const MachineBasicBlock *From,
1032 const MachineBasicBlock *To) const {
1033 std::optional<NextUseDistance> MaybeD =
1034 pathInfoFor(From, To).ShortestUnweightedDistance;
1035 if (MaybeD.has_value())
1036 return MaybeD.value();
1037
1038 return initializePathInfoShortestUnweightedDistance(
1039 From, To, Value: getUnweightedInterBlockDistance(From, To));
1040 }
1041
1042 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1043 /// MBBDistPair - Represents the distance to a machine basic block.
1044 /// Used for returning both the distance and the target block together.
1045 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1046private:
1047 struct MBBDistPair {
1048 NextUseDistance Distance;
1049 const MachineBasicBlock *MBB;
1050 MBBDistPair() : Distance(NextUseDistance::unreachable()), MBB(nullptr) {}
1051 MBBDistPair(NextUseDistance D, const MachineBasicBlock *B)
1052 : Distance(D), MBB(B) {}
1053
1054 MBBDistPair operator+(NextUseDistance D) { return {Distance + D, MBB}; }
1055 MBBDistPair &operator+=(NextUseDistance D) {
1056 Distance += D;
1057 return *this;
1058 }
1059
1060 void print(raw_ostream &OS) const {
1061 OS << "{";
1062 Distance.print(OS);
1063 if (MBB)
1064 OS << " " << printMBBReference(MBB: *MBB);
1065 else
1066 OS << " <null>";
1067 OS << "}";
1068 }
1069
1070 LLVM_DUMP_METHOD void dump() const {
1071 print(OS&: dbgs());
1072 dbgs() << '\n';
1073 }
1074 };
1075
1076 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1077 // CFG Helpers
1078 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1079private:
1080 // Return the shortest distance to a latch
1081 MBBDistPair calcShortestDistanceToLatch(const MachineBasicBlock *CurMBB,
1082 const MachineLoop *CurLoop) const {
1083 SmallVector<MachineBasicBlock *, 2> Latches;
1084 CurLoop->getLoopLatches(LoopLatches&: Latches);
1085 MBBDistPair LD;
1086
1087 for (MachineBasicBlock *LMBB : Latches) {
1088 if (LMBB == CurMBB)
1089 return {0, CurMBB};
1090
1091 NextUseDistance Dst = getShortestPath(From: CurMBB, To: LMBB);
1092 if (Dst < LD.Distance) {
1093 LD.Distance = Dst;
1094 LD.MBB = LMBB;
1095 }
1096 }
1097 return LD;
1098 }
1099
1100 // Return the shortest unweighted distance to a latch
1101 MBBDistPair
1102 calcShortestUnweightedDistanceToLatch(const MachineBasicBlock *CurMBB,
1103 const MachineLoop *CurLoop) const {
1104 SmallVector<MachineBasicBlock *, 2> Latches;
1105 CurLoop->getLoopLatches(LoopLatches&: Latches);
1106 MBBDistPair LD;
1107
1108 for (MachineBasicBlock *LMBB : Latches) {
1109 if (LMBB == CurMBB)
1110 return {0, CurMBB};
1111
1112 NextUseDistance Dst = getShortestUnweightedPath(From: CurMBB, To: LMBB);
1113 if (Dst < LD.Distance) {
1114 LD.Distance = Dst;
1115 LD.MBB = LMBB;
1116 }
1117 }
1118 return LD;
1119 }
1120
1121 // Return the shortest distance to an exit
1122 MBBDistPair calcShortestDistanceToExit(const MachineBasicBlock *CurMBB,
1123 const MachineLoop *CurLoop) const {
1124 SmallVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ExitEdges;
1125 CurLoop->getExitEdges(ExitEdges);
1126 MBBDistPair LD;
1127
1128 for (auto [Exit, Dest] : ExitEdges) {
1129 if (Exit == CurMBB)
1130 return {0, CurMBB};
1131
1132 NextUseDistance Dst = getShortestPath(From: CurMBB, To: Exit);
1133 if (Dst < LD.Distance) {
1134 LD.Distance = Dst;
1135 LD.MBB = Exit;
1136 }
1137 }
1138 return LD;
1139 }
1140
1141 // Return the shortest distance through a loop (header to latch) that goes
1142 // through CurMBB.
1143 MBBDistPair
1144 calcShortestDistanceThroughInnermostLoop(const MachineBasicBlock *CurMBB,
1145 MachineLoop *CurLoop) const {
1146 assert(MLI->getLoopFor(CurMBB) == CurLoop);
1147
1148 // This is a hot spot. Check it before doing anything else.
1149 if (CurLoop->getNumBlocks() == 1)
1150 return {getSize(BB: CurMBB), CurMBB};
1151
1152 MachineBasicBlock *LoopHeader = CurLoop->getHeader();
1153 MBBDistPair LD{0, nullptr};
1154
1155 LD += getSize(BB: LoopHeader);
1156
1157 if (CurMBB != LoopHeader)
1158 LD += getShortestPath(From: LoopHeader, To: CurMBB);
1159
1160 if (CurLoop->isLoopExiting(BB: CurMBB))
1161 LD.MBB = CurMBB;
1162 else
1163 LD = calcShortestDistanceToExit(CurMBB, CurLoop) + LD.Distance;
1164
1165 if (CurMBB != LoopHeader && CurMBB != LD.MBB)
1166 LD += getSize(BB: CurMBB);
1167
1168 if (LD.MBB != LoopHeader)
1169 LD += getSize(BB: LD.MBB);
1170
1171 return LD;
1172 }
1173
1174 // Return the shortest distance through a loop (header to latch) that goes
1175 // through CurMBB.
1176 MBBDistPair calcShortestDistanceThroughLoop(const MachineBasicBlock *CurMBB,
1177 MachineLoop *OuterLoop) const {
1178 MachineLoop *CurLoop = MLI->getLoopFor(BB: CurMBB);
1179 MBBDistPair CurLD =
1180 calcShortestDistanceThroughInnermostLoop(CurMBB, CurLoop);
1181
1182 MachineBasicBlock *CurHdr = CurLoop->getHeader();
1183 for (;;) {
1184 if (OuterLoop == CurLoop)
1185 return CurLD;
1186
1187 MachineLoop *ParentLoop = CurLoop->getParentLoop();
1188 MachineBasicBlock *ParentHdr = ParentLoop->getHeader();
1189
1190 MBBDistPair LD{0, nullptr};
1191 LD += getSize(BB: ParentHdr);
1192 LD += getShortestPath(From: ParentHdr, To: CurHdr);
1193 LD += CurLD.Distance.applyLoopWeight();
1194 LD = calcShortestDistanceToExit(CurMBB: CurLD.MBB, CurLoop: ParentLoop) + LD.Distance;
1195 LD += getSize(BB: LD.MBB);
1196 CurLD = LD;
1197 CurLoop = ParentLoop;
1198 CurHdr = ParentHdr;
1199 }
1200 llvm_unreachable("CurMBB not contained in OuterLoop");
1201 }
1202
1203 // Similar to calcShortestDistanceThroughLoop with LoopWeight applied to the
1204 // returned distance.
1205 MBBDistPair
1206 calcWeightedDistanceThroughLoopViaMBB(const MachineBasicBlock *CurMBB,
1207 MachineLoop *CurLoop) const {
1208 MBBDistPair LD = calcShortestDistanceThroughLoop(CurMBB, OuterLoop: CurLoop);
1209 LD.Distance = LD.Distance.applyLoopWeight();
1210 return LD;
1211 }
1212
1213 // Return the weighted, shortest distance through a loop (header to latch).
1214 // If ParentLoop is provided, use it to adjust the loop depth.
1215 MBBDistPair calcWeightedDistanceThroughLoop(
1216 const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
1217 const MachineLoop *ParentLoop = nullptr) const {
1218 if (CurLoop->getNumBlocks() != 1)
1219 return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop);
1220
1221 unsigned LoopDepth = MLI->getLoopDepth(BB: CurMBB);
1222 if (ParentLoop)
1223 LoopDepth -= ParentLoop->getLoopDepth();
1224
1225 return {NextUseDistance::fromSize(Size: getSize(BB: CurMBB), Depth: LoopDepth),
1226 CurLoop->getLoopLatch()};
1227 }
1228
1229 // Calculate total distance from exit point to use instruction
1230 NextUseDistance appendDistanceToUse(const MBBDistPair &Exit,
1231 const MachineInstr *UseMI,
1232 const MachineBasicBlock *UseMBB) const {
1233 return Exit.Distance + getShortestPath(From: Exit.MBB, To: UseMBB) +
1234 getHeadLen(MI: UseMI);
1235 }
1236
1237 // Return the weighted, shortest distance through the CurLoop which is a
1238 // sub-loop of UseLoop.
1239 MBBDistPair calcDistanceThroughSubLoopUse(const MachineBasicBlock *CurMBB,
1240 MachineLoop *CurLoop,
1241 MachineLoop *UseLoop) const {
1242 // All the sub-loops of the UseLoop will be executed before the use.
1243 // Hence, we should take this into consideration in distance calculation.
1244 MachineLoop *UseLoopSubLoop = findChildLoop(Parent: UseLoop, Descendant: CurLoop);
1245 assert(UseLoopSubLoop && "CurLoop should be nested in UseLoop");
1246 return calcWeightedDistanceThroughLoop(CurMBB, CurLoop: UseLoopSubLoop, ParentLoop: UseLoop);
1247 }
1248
1249 // Similar to calcDistanceThroughSubLoopUse, adding the distance to 'UseMI'.
1250 NextUseDistance calcDistanceThroughSubLoopToUseMI(
1251 const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
1252 const MachineInstr *UseMI, const MachineBasicBlock *UseMBB,
1253 MachineLoop *UseLoop) const {
1254 return appendDistanceToUse(
1255 Exit: calcDistanceThroughSubLoopUse(CurMBB, CurLoop, UseLoop), UseMI, UseMBB);
1256 }
1257
1258 // Return the weighted distance through a loop to an outside use loop.
1259 // Differentiates between uses inside or outside of the current loop nest.
1260 MBBDistPair calcDistanceThroughLoopToOutsideLoopUse(
1261 const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
1262 const MachineBasicBlock *UseMBB, MachineLoop *UseLoop) const {
1263 assert(!CurLoop->contains(UseLoop));
1264
1265 if (isStandAloneLoop(Loop: CurLoop))
1266 return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop);
1267
1268 MachineLoop *OutermostLoop = CurLoop->getOutermostLoop();
1269 if (!OutermostLoop->contains(L: UseLoop)) {
1270 // We should take into consideration the whole loop nest in the
1271 // calculation of the distance because we will reach the use after
1272 // executing the whole loop nest.
1273
1274 // ... But make sure that we pick a route that goes through CurMBB
1275 return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop: OutermostLoop);
1276 }
1277
1278 // At this point we know that CurLoop and UseLoop are independent and they
1279 // are in the same loop nest.
1280
1281 if (MLI->getLoopDepth(BB: CurMBB) <= MLI->getLoopDepth(BB: UseMBB))
1282 return calcWeightedDistanceThroughLoop(CurMBB, CurLoop);
1283
1284 assert(CurLoop != OutermostLoop && "The loop cannot be the outermost.");
1285 const unsigned UseLoopDepth = MLI->getLoopDepth(BB: UseMBB);
1286 for (;;) {
1287 if (CurLoop->getLoopDepth() == UseLoopDepth)
1288 break;
1289 CurLoop = CurLoop->getParentLoop();
1290 if (CurLoop == OutermostLoop)
1291 break;
1292 }
1293 return calcWeightedDistanceThroughLoop(CurMBB, CurLoop);
1294 }
1295
1296 // Similar to calcDistanceThroughLoopToOutsideLoopUse but adds the distance to
1297 // an instruction in the loop.
1298 NextUseDistance calcDistanceThroughLoopToOutsideLoopUseMI(
1299 const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
1300 const MachineInstr *UseMI, const MachineBasicBlock *UseMBB,
1301 MachineLoop *UseLoop) const {
1302 return appendDistanceToUse(Exit: calcDistanceThroughLoopToOutsideLoopUse(
1303 CurMBB, CurLoop, UseMBB, UseLoop),
1304 UseMI, UseMBB);
1305 }
1306
1307 // Return true if 'MO' is covered by 'LaneMask'
1308 bool machineOperandCoveredBy(const MachineOperand &MO,
1309 LaneBitmask LaneMask) const {
1310 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx: MO.getSubReg());
1311 return (Mask & LaneMask) == Mask;
1312 }
1313
1314 // Returns true iff uses of LiveReg/LiveLaneMask in PHI UseMI are coming from
1315 // a backedge when starting at CurMI.
1316 bool isIncomingValFromBackedge(Register LiveReg, LaneBitmask LiveLaneMask,
1317 const MachineInstr *CurMI,
1318 const MachineInstr *UseMI) const {
1319 if (!UseMI->isPHI())
1320 return false;
1321
1322 MachineLoop *CurLoop = MLI->getLoopFor(BB: CurMI->getParent());
1323 MachineLoop *UseLoop = MLI->getLoopFor(BB: UseMI->getParent());
1324
1325 // Not a backedge if ...
1326 // A: not in a loop at all
1327 // B: or CurMI is in a loop outside of UseLoop
1328 // C: or UseMI is not in the UseLoop header
1329 if (/*A:*/ !UseLoop ||
1330 /*B:*/ (CurLoop && !UseLoop->contains(L: CurLoop)) ||
1331 /*C:*/ UseMI->getParent() != UseLoop->getHeader())
1332 return false;
1333
1334 SmallVector<MachineBasicBlock *, 2> Latches;
1335 UseLoop->getLoopLatches(LoopLatches&: Latches);
1336
1337 const unsigned NumOps = UseMI->getNumOperands();
1338 for (unsigned I = 1; I < NumOps; I += 2) {
1339 const MachineOperand &RegMO = UseMI->getOperand(i: I - 1);
1340 const MachineOperand &MBBMO = UseMI->getOperand(i: I);
1341 assert(RegMO.isReg() && "Expected register operand of PHI");
1342 assert(MBBMO.isMBB() && "Expected MBB operand of PHI");
1343 if (RegMO.getReg() == LiveReg &&
1344 machineOperandCoveredBy(MO: RegMO, LaneMask: LiveLaneMask)) {
1345 MachineBasicBlock *IncomingBB = MBBMO.getMBB();
1346 if (llvm::is_contained(Range&: Latches, Element: IncomingBB))
1347 return true;
1348 }
1349 }
1350 return false;
1351 }
1352
1353 // Return the distance from 'CurMI' through a parent loop backedge PHI Use
1354 // ('UseMI').
1355 CacheableNextUseDistance calcDistanceViaEnclosingBackedge(
1356 const MachineInstr *CurMI, const MachineBasicBlock *CurMBB,
1357 MachineLoop *CurLoop, const MachineInstr *UseMI,
1358 const MachineBasicBlock *UseMBB, MachineLoop *UseLoop) const {
1359 assert(UseLoop && "There is no backedge.");
1360 assert(CurLoop && (UseLoop != CurLoop) && UseLoop->contains(CurLoop) &&
1361 "Unexpected loop configuration");
1362
1363 InstrIdTy UseHeadLen = getHeadLen(MI: UseMI);
1364 MBBDistPair InnerLoopLD =
1365 calcDistanceThroughSubLoopUse(CurMBB, CurLoop, UseLoop);
1366 MBBDistPair LD = calcShortestDistanceToLatch(CurMBB: InnerLoopLD.MBB, CurLoop: UseLoop);
1367 return {.IsInstrRelative: InstrInvariant,
1368 .Distance: InnerLoopLD.Distance + LD.Distance + getSize(BB: LD.MBB) + UseHeadLen};
1369 }
1370
1371 // Optimized version of calcBackedgeDistance when we already know that CurMI
1372 // and UseMI are in the same basic block
1373 NextUseDistance calcBackedgeDistance(const MachineInstr *CurMI,
1374 const MachineBasicBlock *CurMBB,
1375 MachineLoop *CurLoop,
1376 const MachineInstr *UseMI) const {
1377 // use is in the next loop iteration
1378 InstrIdTy CurTailLen = getTailLen(MI: CurMI);
1379 InstrIdTy UseHeadLen = getHeadLen(MI: UseMI);
1380 MBBDistPair LD = calcShortestUnweightedDistanceToLatch(CurMBB, CurLoop);
1381 const MachineBasicBlock *HdrMBB = CurLoop->getHeader();
1382 NextUseDistance Hdr = CurMBB == HdrMBB ? 0 : getSize(BB: HdrMBB);
1383 NextUseDistance Dst =
1384 CurMBB == HdrMBB ? 0 : getShortestUnweightedPath(From: HdrMBB, To: CurMBB);
1385
1386 return CurTailLen + LD.Distance + getSize(BB: LD.MBB) + Hdr + Dst + UseHeadLen;
1387 }
1388
1389 //----------------------------------------------------------------------------
1390 // Calculate inter-instruction distances
1391 //----------------------------------------------------------------------------
1392private:
1393 // Calculate the shortest weighted path from MachineInstruction 'FromMI' to
1394 // 'ToMI'. It is weighted distance in that paths that exit loops are made to
1395 // look much further away.
1396 NextUseDistance calcShortestDistance(const MachineInstr *FromMI,
1397 const MachineInstr *ToMI) const {
1398 const MachineBasicBlock *FromMBB = FromMI->getParent();
1399 const MachineBasicBlock *ToMBB = ToMI->getParent();
1400
1401 if (FromMBB == ToMBB) {
1402 NextUseDistance RV = getDistance(From: FromMI, To: ToMI);
1403 assert(RV >= 0 && "unexpected negative distance from getDistance");
1404 return RV;
1405 }
1406
1407 InstrIdTy FromTailLen = getTailLen(MI: FromMI);
1408 InstrIdTy ToHeadLen = getHeadLen(MI: ToMI);
1409 NextUseDistance Dst = getShortestPath(From: FromMBB, To: ToMBB);
1410 assert(Dst.isReachable() &&
1411 "calcShortestDistance called for instructions in non-reachable"
1412 " basic blocks!");
1413 NextUseDistance RV = FromTailLen + Dst + ToHeadLen;
1414 assert(RV >= 0 && "unexpected negative distance");
1415 return RV;
1416 }
1417
1418 // Calculate the shortest unweighted path from MachineInstruction 'FromMI' to
1419 // 'ToMI'. In contrast with 'calcShortestDistance', distances are based solely
1420 // on basic block instruction counts and traversing a loop exit does not
1421 // affect the value.
1422 NextUseDistance
1423 calcShortestUnweightedDistance(const MachineInstr *FromMI,
1424 const MachineInstr *ToMI) const {
1425 const MachineBasicBlock *FromMBB = FromMI->getParent();
1426 const MachineBasicBlock *ToMBB = ToMI->getParent();
1427
1428 if (FromMBB == ToMBB)
1429 return getDistance(From: FromMI, To: ToMI);
1430
1431 InstrIdTy FromTailLen = getTailLen(MI: FromMI);
1432 InstrIdTy ToHeadLen = getHeadLen(MI: ToMI);
1433 NextUseDistance Dst = getShortestUnweightedPath(From: FromMBB, To: ToMBB);
1434 assert(Dst.isReachable() &&
1435 "calcShortestUnweightedDistance called for instructions in"
1436 " non-reachable basic blocks!");
1437 return FromTailLen + Dst + ToHeadLen;
1438 }
1439
1440 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1441 // calcDistanceToUse* - various flavors of calculating the distance from an
1442 // instruction 'CurMI' to the use of a live [sub]register.
1443 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1444private:
1445 // Return the distance from 'CurMI' to a live [sub]register use ('UseMI').
1446 //
1447 // Cfg flags controlling behavior:
1448 // PreciseUseModeling — rewrite PHI uses to their incoming edge block;
1449 // also selects unweighted cross-block distance
1450 // PromoteToPreheader — route loop-entry / inner-loop uses to the preheader
1451 CacheableNextUseDistance
1452 calcDistanceToUse(Register LiveReg, LaneBitmask LiveLaneMask,
1453 const MachineInstr &CurMI,
1454 const MachineOperand *UseMO) const {
1455 const MachineInstr *UseMI = UseMO->getParent();
1456 const MachineBasicBlock *CurMBB = CurMI.getParent();
1457 const MachineBasicBlock *UseMBB = UseMI->getParent();
1458 MachineLoop *CurLoop = MLI->getLoopFor(BB: CurMBB);
1459 MachineLoop *UseLoop = MLI->getLoopFor(BB: UseMBB);
1460
1461 if (Cfg.PreciseUseModeling) {
1462 // Map PHI use to the end of its incoming edge block.
1463 if (auto *PhiUseEdge = getIncomingBlockIfPhiUse(MI: UseMI, MO: UseMO)) {
1464 UseMI = &PhiUseEdge->back();
1465 UseMBB = PhiUseEdge;
1466 UseLoop = MLI->getLoopFor(BB: PhiUseEdge);
1467 }
1468 }
1469
1470 enum class LoopConfig {
1471 NoCur,
1472 Same,
1473 CurContainsUse,
1474 UseContainsCur,
1475 Siblings,
1476 Unrelated
1477 };
1478 auto [LpCfg, PreHdr, CommonParent] = [&]()
1479 -> std::tuple<LoopConfig, const MachineBasicBlock *, MachineLoop *> {
1480 if (!CurLoop) {
1481 return {LoopConfig::NoCur, getOutermostPreheader(Loop: UseLoop), nullptr};
1482 }
1483 if (CurLoop->contains(L: UseLoop)) {
1484 return {CurMBB == UseMBB ? LoopConfig::Same
1485 : LoopConfig::CurContainsUse,
1486 findChildPreheader(Parent: CurLoop, Descendant: UseLoop), nullptr};
1487 }
1488
1489 if (MachineLoop *P = findCommonParent(A: UseLoop, B: CurLoop).first) {
1490 if (P != UseLoop)
1491 return {LoopConfig::Siblings, findChildPreheader(Parent: P, Descendant: UseLoop), P};
1492 return {LoopConfig::UseContainsCur, nullptr, nullptr};
1493 }
1494 return {LoopConfig::Unrelated, getOutermostPreheader(Loop: UseLoop), nullptr};
1495 }();
1496
1497 //--------------------------------------------------------------------------
1498 // Don't PromoteToPreheader
1499 //--------------------------------------------------------------------------
1500 if (!Cfg.PromoteToPreheader) {
1501 switch (LpCfg) {
1502 case LoopConfig::NoCur:
1503 case LoopConfig::Same:
1504 case LoopConfig::CurContainsUse:
1505 return {.IsInstrRelative: InstrRelative, .Distance: calcShortestDistance(FromMI: &CurMI, ToMI: UseMI)};
1506
1507 case LoopConfig::UseContainsCur: {
1508 if (isIncomingValFromBackedge(LiveReg, LiveLaneMask, CurMI: &CurMI, UseMI)) {
1509 return calcDistanceViaEnclosingBackedge(CurMI: &CurMI, CurMBB, CurLoop,
1510 UseMI, UseMBB, UseLoop);
1511 }
1512
1513 return {.IsInstrRelative: InstrInvariant, .Distance: calcDistanceThroughSubLoopToUseMI(
1514 CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
1515 }
1516 case LoopConfig::Siblings:
1517 case LoopConfig::Unrelated:
1518 return {.IsInstrRelative: InstrInvariant, .Distance: calcDistanceThroughLoopToOutsideLoopUseMI(
1519 CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
1520 }
1521 llvm_unreachable("unexpected loop configuration!");
1522 }
1523
1524 //--------------------------------------------------------------------------
1525 // PromoteToPreheader
1526 //--------------------------------------------------------------------------
1527 if (PreHdr) {
1528 UseMI = &PreHdr->back();
1529 UseMBB = PreHdr;
1530 UseLoop = CommonParent;
1531 }
1532
1533 switch (LpCfg) {
1534 case LoopConfig::NoCur:
1535 return {.IsInstrRelative: InstrRelative, .Distance: calcShortestUnweightedDistance(FromMI: &CurMI, ToMI: UseMI) -
1536 (sizeOf(MI: *UseMI) ? 0 : 1)};
1537
1538 case LoopConfig::Same:
1539 case LoopConfig::CurContainsUse:
1540 if (CurMBB == UseMBB && !instrsAreInOrder(A: &CurMI, B: UseMI))
1541 return {.IsInstrRelative: InstrRelative,
1542 .Distance: calcBackedgeDistance(CurMI: &CurMI, CurMBB, CurLoop, UseMI)};
1543
1544 return {.IsInstrRelative: InstrRelative, .Distance: calcShortestUnweightedDistance(FromMI: &CurMI, ToMI: UseMI)};
1545
1546 case LoopConfig::UseContainsCur:
1547 case LoopConfig::Siblings:
1548 return {.IsInstrRelative: InstrInvariant, .Distance: calcDistanceThroughSubLoopToUseMI(
1549 CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
1550
1551 case LoopConfig::Unrelated:
1552 return {.IsInstrRelative: InstrInvariant, .Distance: calcDistanceThroughLoopToOutsideLoopUseMI(
1553 CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
1554 }
1555 llvm_unreachable("unexpected loop configuration!");
1556 }
1557
1558 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1559 // getUses helpers (compute mode)
1560 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1561private:
1562 // Returns true if Use is reachable from MI. Handles backedges and intervening
1563 // defs.
1564 bool isUseReachablePrecise(const MachineInstr &MI,
1565 const MachineBasicBlock *MBB,
1566 const MachineOperand *UseMO,
1567 const MachineInstr *UseMI,
1568 const MachineBasicBlock *UseMBB) const {
1569
1570 // Filter out uses that are clearly unreachable
1571 if (MBB != UseMBB && !isReachable(From: MBB, To: UseMBB))
1572 return false;
1573
1574 // PHI uses are considered part of the incoming BB. Check for reachability
1575 // at the edge.
1576 if (auto *PhiUseEdge = getIncomingBlockIfPhiUse(MI: UseMI, MO: UseMO)) {
1577 if (!isReachableOrSame(From: MBB, To: PhiUseEdge))
1578 return false;
1579 }
1580
1581 // Filter out uses with an intermediate def.
1582 const MachineInstr *DefMI = MRI->getUniqueVRegDef(Reg: UseMO->getReg());
1583 const MachineBasicBlock *DefMBB = DefMI->getParent();
1584 if (MBB == UseMBB) {
1585 if (UseMI->isPHI() && MBB == DefMBB)
1586 return true;
1587
1588 if (instrsAreInOrder(A: &MI, B: UseMI))
1589 return true;
1590
1591 // A Def in the loop means that the value at MI will not survive through
1592 // to this use.
1593 MachineLoop *UseLoop = MLI->getLoopFor(BB: UseMBB);
1594 return UseLoop && !UseLoop->contains(BB: DefMBB);
1595 }
1596
1597 if (MBB == DefMBB)
1598 return instrsAreInOrder(A: DefMI, B: &MI);
1599
1600 MachineLoop *Loop = MLI->getLoopFor(BB: MBB);
1601 if (!Loop)
1602 return true;
1603
1604 MachineLoop *TopLoop = Loop->getOutermostLoop();
1605 return !TopLoop->contains(BB: DefMBB) || !isReachable(From: MBB, To: DefMBB) ||
1606 !isForwardReachable(From: UseMBB, To: MBB);
1607 }
1608
1609 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1610 // Debug/Developer Helpers
1611 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1612private:
1613 /// Goes over all MBB pairs in \p MF, calculates the shortest path between
1614 /// them.
1615 void populatePathTable() {
1616 for (const MachineBasicBlock &MBB1 : *MF) {
1617 for (const MachineBasicBlock &MBB2 : *MF) {
1618 if (&MBB1 == &MBB2)
1619 continue;
1620 getShortestPath(From: &MBB1, To: &MBB2);
1621 }
1622 }
1623 }
1624
1625 void printPaths(raw_ostream &OS) const {
1626 OS << "\n---------------- Paths --------------- {\n";
1627 for (const auto &[P, PI] : Paths) {
1628 OS << " " << printMBBReference(MBB: *P.src()) << " -> "
1629 << printMBBReference(MBB: *P.dst()) << ": ";
1630 PI.print(OS);
1631 OS << '\n';
1632 }
1633 OS << "}\n";
1634 }
1635
1636 LLVM_DUMP_METHOD void dumpPaths() const { printPaths(OS&: dbgs()); }
1637
1638 // Legacy alias kept for existing call sites.
1639 void dumpShortestPaths() const {
1640 for (const auto &P : Paths) {
1641 const MachineBasicBlock *From = P.first.src();
1642 const MachineBasicBlock *To = P.first.dst();
1643 std::optional<NextUseDistance> Dist = P.second.ShortestDistance;
1644 dbgs() << "From: " << printMBBReference(MBB: *From)
1645 << "-> To:" << printMBBReference(MBB: *To) << " = "
1646 << Dist.value_or(u: -1).fmt() << "\n";
1647 }
1648 }
1649
1650 void printInterBlockDistances(raw_ostream &OS) const {
1651 using MBBPair = std::pair<unsigned, unsigned>;
1652 using Elem = std::pair<NextUseDistance, MBBPair>;
1653 std::vector<Elem> SortedDistances;
1654
1655 for (const auto &[FromNum, Dsts] : InterBlockDistances) {
1656 for (const auto &[ToNum, Dist] : Dsts) {
1657 SortedDistances.emplace_back(args: Dist.Weighted, args: MBBPair(FromNum, ToNum));
1658 }
1659 }
1660 llvm::sort(C&: SortedDistances, Comp: [](const auto &A, const auto &B) {
1661 if (A.first != B.first)
1662 return A.first < B.first;
1663
1664 if (A.second.first != B.second.first)
1665 return A.second.first < B.second.first;
1666
1667 return A.second.second < B.second.second;
1668 });
1669
1670 OS << "\n--------- InterBlockDistances -------- {\n";
1671 for (const Elem &E : SortedDistances) {
1672
1673 OS << " bb." << E.second.first << " -> bb." << E.second.second << ": ";
1674 E.first.print(OS);
1675 OS << '\n';
1676 }
1677 OS << "}\n";
1678 }
1679
1680 LLVM_DUMP_METHOD void dumpInterBlockDistances() const {
1681 printInterBlockDistances(OS&: dbgs());
1682 }
1683
1684 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1685 // LiveRegUse Caching - A cache of the distances for the last
1686 // MachineInstruction. When getting the distances for a MachineInstruction, if
1687 // it is the same basic block as the cached instruction, we can generally use
1688 // an offset from the cached values to compute the distances. There are some
1689 // exceptions - see 'cacheLiveRegUse'.
1690 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1691private:
1692 struct LiveRegToUseMapElem {
1693 LiveRegUse Use;
1694 bool MIDependent;
1695 LiveRegToUseMapElem() : Use(), MIDependent(false) {}
1696 LiveRegToUseMapElem(LiveRegUse U, bool MIDep)
1697 : Use(U), MIDependent(MIDep) {}
1698
1699 void print(raw_ostream &OS) const {
1700 Use.print(OS);
1701 OS << (MIDependent ? " [mi-dep]" : " [mi-indep]");
1702 }
1703
1704 LLVM_DUMP_METHOD void dump() const {
1705 print(OS&: dbgs());
1706 dbgs() << '\n';
1707 }
1708 };
1709
1710 // Using std::map because LaneBitmask does not work out-of-the-box as a
1711 // DenseMap key and I did not see a performance benefit over std::map.
1712 using LaneBitmaskToUseMap = std::map<LaneBitmask, LiveRegToUseMapElem>;
1713 using LiveRegToUseMap = DenseMap<Register, LaneBitmaskToUseMap>;
1714
1715 const MachineInstr *CachedDistancesMI = nullptr;
1716 LiveRegToUseMap CachedDistances;
1717 LiveRegToUseMap PendingCachedDistances;
1718 unsigned DistanceCacheHits = 0;
1719 unsigned DistanceCacheMisses = 0;
1720
1721 void resetDistanceCache() {
1722 CachedDistancesMI = nullptr;
1723 CachedDistances.clear();
1724 DistanceCacheHits = 0;
1725 DistanceCacheMisses = 0;
1726 }
1727
1728 void maybeClearCachedLiveRegUses(const MachineInstr &MI) {
1729 if (CachedDistancesMI &&
1730 (CachedDistancesMI->getParent() != MI.getParent() ||
1731 !instrsAreInOrder(A: CachedDistancesMI, B: &MI))) {
1732 CachedDistancesMI = nullptr;
1733 CachedDistances.clear();
1734 }
1735 }
1736
1737 bool okToUseCacheElem(const LiveRegToUseMapElem &CacheElem,
1738 const MachineInstr &MI, const InstrIdTy LastDelta) {
1739 if (!CacheElem.MIDependent)
1740 return true;
1741
1742 const LiveRegUse &U = CacheElem.Use;
1743
1744 // Never okay to produce a negative distance
1745 if (U.Dist < LastDelta)
1746 return false;
1747
1748 const MachineInstr *UseMI = U.Use->getParent();
1749
1750 // Always okay if use is in another basic block or UseMI is MI
1751 if (UseMI->getParent() != MI.getParent() || UseMI == &MI)
1752 return true;
1753
1754 // If CachedDistancesMI <= Use < MI we could have a problem since we don't
1755 // know if Use is still reachable.
1756 return !instrsAreInOrder(A: CachedDistancesMI, B: UseMI) ||
1757 !instrsAreInOrder(A: UseMI, B: &MI);
1758 }
1759
1760 std::pair<const LaneBitmaskToUseMap *, const LiveRegToUseMapElem *>
1761 findCachedLiveRegUse(Register Reg, LaneBitmask LaneMask,
1762 const MachineInstr &MI, const InstrIdTy LastDelta) {
1763 if (!DistanceCacheEnabled)
1764 return {nullptr, nullptr};
1765
1766 ++DistanceCacheMisses; // Assume miss
1767 auto I = CachedDistances.find(Val: Reg);
1768 if (I == CachedDistances.end())
1769 return {nullptr, nullptr};
1770 const LaneBitmaskToUseMap &RegSlot = I->second;
1771 if (RegSlot.empty())
1772 return {nullptr, nullptr};
1773
1774 auto J = RegSlot.find(x: LaneMask);
1775 if (J == RegSlot.end())
1776 return {nullptr, nullptr};
1777
1778 const LiveRegToUseMapElem &MaskSlot = J->second;
1779 if (!okToUseCacheElem(CacheElem: MaskSlot, MI, LastDelta))
1780 return {nullptr, nullptr};
1781
1782 --DistanceCacheMisses;
1783 ++DistanceCacheHits;
1784 return {&RegSlot, &MaskSlot};
1785 }
1786
1787 void cacheLiveRegUse(const MachineInstr &MI, Register Reg, LaneBitmask Mask,
1788 LiveRegUse U, bool MIDependent) {
1789 if (!DistanceCacheEnabled)
1790 return;
1791
1792 auto I = PendingCachedDistances.try_emplace(Key: Reg).first;
1793 LaneBitmaskToUseMap &RegSlot = I->second;
1794 RegSlot.try_emplace(k: Mask, args&: U, args&: MIDependent);
1795 }
1796
1797 void updateCachedLiveRegUses(const MachineInstr &MI) {
1798 if (!DistanceCacheEnabled)
1799 return;
1800
1801 CachedDistancesMI = &MI;
1802 CachedDistances = std::move(PendingCachedDistances);
1803 PendingCachedDistances.clear();
1804 LLVM_DEBUG(dumpDistanceCache());
1805 }
1806
1807 void printDistanceCache(raw_ostream &OS) const {
1808 OS << "\n----------- Distance Cache ----------- {\n";
1809 OS << " CachedAt: ";
1810 if (CachedDistancesMI)
1811 OS << *CachedDistancesMI;
1812 else
1813 OS << "<none>\n";
1814
1815 constexpr size_t RegNameWidth = 20;
1816 for (const auto &[Reg, ByMask] : CachedDistances) {
1817 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
1818 LaneBitmask AllLanes = MRI->getMaxLaneMaskForVReg(Reg);
1819
1820 for (const auto &[Mask, Elem] : ByMask) {
1821 std::string RegName;
1822 raw_string_ostream KOS(RegName);
1823 if (Mask == AllLanes) {
1824 KOS << printReg(Reg);
1825 } else {
1826 SmallVector<unsigned> Indexes;
1827 TRI->getCoveringSubRegIndexes(RC, LaneMask: Mask, Indexes);
1828 if (Indexes.size() == 1)
1829 KOS << printReg(Reg, TRI, SubIdx: Indexes.front(), MRI);
1830 else
1831 KOS << printReg(Reg) << " mask=" << Mask.getAsInteger();
1832 }
1833 OS << " " << left_justify(Str: RegName, Width: RegNameWidth) << " : ";
1834 Elem.print(OS);
1835 OS << '\n';
1836 }
1837 }
1838 OS << " (hits=" << DistanceCacheHits << " misses=" << DistanceCacheMisses
1839 << ")\n";
1840 OS << "}\n";
1841 }
1842
1843 LLVM_DUMP_METHOD void dumpDistanceCache() const {
1844 printDistanceCache(OS&: dbgs());
1845 }
1846
1847 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1848 // Processing Live Reg Uses
1849 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1850private:
1851 // Decompose each use in 'Uses' by sub-reg and store the nearest one in
1852 // 'UseByMask'. Ignores subregs matching 'LiveRegLaneMask' - these are handled
1853 // as registers, not sub-regs.
1854 DenseMap<const TargetRegisterClass *, SmallVector<unsigned>>
1855 SubRegIndexesForRegClass;
1856 void collectSubRegUsesByMask(
1857 const SmallVectorImpl<const MachineOperand *> &Uses,
1858 const SmallVectorImpl<CacheableNextUseDistance> &Distances,
1859 LaneBitmask LiveRegLaneMask, LaneBitmaskToUseMap &UseByMask) {
1860
1861 assert(Uses.size());
1862 assert(Uses.size() == Distances.size());
1863
1864 const TargetRegisterClass *RC = MRI->getRegClass(Reg: Uses.front()->getReg());
1865 auto [SRI, Inserted] = SubRegIndexesForRegClass.try_emplace(Key: RC);
1866 if (Inserted)
1867 TRI->getCoveringSubRegIndexes(RC, LaneMask: LaneBitmask::getAll(), Indexes&: SRI->second);
1868 const SmallVector<unsigned> &RCSubRegIndexes = SRI->second;
1869
1870 unsigned OneIndex; // Backing store for 'Indexes' below when 1 index
1871 for (size_t I = 0; I < Uses.size(); ++I) {
1872 const MachineOperand *MO = Uses[I];
1873 auto [SubRegMIDep, Dist] = Distances[I];
1874 const LiveRegUse LRU{MO, Dist};
1875
1876 ArrayRef<unsigned> Indexes;
1877 if (MO->getSubReg()) {
1878 OneIndex = MO->getSubReg();
1879 Indexes = ArrayRef(OneIndex);
1880 } else {
1881 Indexes = RCSubRegIndexes;
1882 }
1883
1884 for (unsigned Idx : Indexes) {
1885 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx: Idx);
1886 if (Mask.all() || Mask == LiveRegLaneMask)
1887 continue;
1888
1889 auto &[SlotU, SlotMIDep] = UseByMask[Mask];
1890 if (updateClosest(Closest&: SlotU, X: LRU))
1891 SlotMIDep = SubRegMIDep;
1892 }
1893 }
1894 }
1895
1896 // Similar to 'collectSubRegUsesByMask' above, but uses cached distances.
1897 void collectSubRegUsesByMaskFromCache(const LaneBitmaskToUseMap &CachedMap,
1898 LaneBitmask LiveRegLaneMask,
1899 const MachineInstr *MI,
1900 InstrIdTy LastDelta,
1901 LaneBitmaskToUseMap &UseByMask) {
1902
1903 for (const auto &KV : CachedMap) {
1904 LaneBitmask SubregLaneMask = KV.first;
1905 if (SubregLaneMask.all() || SubregLaneMask == LiveRegLaneMask)
1906 continue;
1907
1908 const LiveRegToUseMapElem &SubregE = KV.second;
1909 if (!okToUseCacheElem(CacheElem: SubregE, MI: *MI, LastDelta))
1910 continue;
1911
1912 const bool MIDep = SubregE.MIDependent;
1913 LiveRegUse U = SubregE.Use;
1914 if (MIDep)
1915 U.Dist -= LastDelta;
1916
1917 auto &[SlotU, SlotMIDep] = UseByMask[SubregLaneMask];
1918 if (updateClosest(Closest&: SlotU, X: U))
1919 SlotMIDep = MIDep;
1920 }
1921 }
1922
1923 // Loops through 'UseByMask' finding the furthest sub-register and updating
1924 // 'FurthestSubreg' accordingly.
1925 void updateFurthestSubReg(
1926 const MachineInstr &MI, const LiveRegUse &U,
1927 const LaneBitmaskToUseMap &UseByMask,
1928 DenseMap<const MachineOperand *, UseDistancePair> *RelevantUses,
1929 LiveRegUse &FurthestSubreg) {
1930
1931 if (UseByMask.empty()) {
1932 updateFurthest(Furthest&: FurthestSubreg, X: U);
1933 return;
1934 }
1935
1936 for (const auto &KV : UseByMask) {
1937 const LiveRegUse &SubregU = KV.second.Use;
1938 const bool SubregMIDep = KV.second.MIDependent;
1939
1940 if (RelevantUses)
1941 RelevantUses->try_emplace(Key: SubregU.Use, Args: SubregU);
1942 cacheLiveRegUse(MI, Reg: SubregU.Use->getReg(), Mask: KV.first, U: SubregU,
1943 MIDependent: SubregMIDep);
1944 updateFurthest(Furthest&: FurthestSubreg, X: SubregU);
1945 }
1946 }
1947
1948 // Used to populate 'MIDefs' to be passed to 'getNextUseDistances'.
1949 SmallSet<Register, 4> collectDefinedRegisters(const MachineInstr &MI) const {
1950 SmallSet<Register, 4> MIDefs;
1951
1952 for (const MachineOperand &MO : MI.all_defs()) {
1953 if (MO.isReg() && MO.getReg().isValid() && hasAtLeastOneUse(Reg: MO.getReg()))
1954 MIDefs.insert(V: MO.getReg());
1955 }
1956 return MIDefs;
1957 }
1958
1959 // Computes distances from 'MI' to each registers in 'LiveRegs'. Returns the
1960 // furthest register and (optionally) sub-register in 'Furthest' and
1961 // 'FurthestSubreg' respectively.
1962public:
1963 void getNextUseDistances(const GCNRPTracker::LiveRegSet &LiveRegs,
1964 const MachineInstr &MI, LiveRegUse &Furthest,
1965 LiveRegUse *FurthestSubreg = nullptr,
1966 DenseMap<const MachineOperand *, UseDistancePair>
1967 *RelevantUses = nullptr) {
1968 const SmallSet<Register, 4> MIDefs(collectDefinedRegisters(MI));
1969
1970 SmallVector<const MachineOperand *> Uses;
1971 SmallVector<CacheableNextUseDistance> Distances;
1972 LaneBitmaskToUseMap UseByMask;
1973
1974 maybeClearCachedLiveRegUses(MI);
1975 const InstrIdTy LastDelta =
1976 CachedDistancesMI ? getDistance(From: CachedDistancesMI, To: &MI) : 0;
1977
1978 for (auto &KV : LiveRegs) {
1979 const Register Reg = KV.first;
1980 const LaneBitmask LaneMask = KV.second;
1981
1982 if (MIDefs.contains(V: Reg))
1983 continue;
1984
1985 Uses.clear();
1986 UseByMask.clear();
1987
1988 LiveRegUse U;
1989 bool MIDependent = false;
1990 auto [CacheMap, CacheElem] =
1991 findCachedLiveRegUse(Reg, LaneMask, MI, LastDelta);
1992 if (CacheMap && CacheElem) {
1993 MIDependent = CacheElem->MIDependent;
1994 U = CacheElem->Use;
1995 if (MIDependent)
1996 U.Dist -= LastDelta;
1997 } else {
1998 getReachableUses(LiveReg: Reg, LaneMask, MI, Uses);
1999 if (Uses.empty())
2000 continue;
2001
2002 const MachineOperand *NextUse = nullptr;
2003 NextUseDistance Dist = getShortestDistance(
2004 LiveReg: Reg, LaneMask, FromMI: MI, Uses, ShortestUseOut: &NextUse, MIDependent: &MIDependent, Distances: &Distances);
2005 U = LiveRegUse{NextUse, Dist};
2006 }
2007
2008 if (RelevantUses)
2009 RelevantUses->try_emplace(Key: U.Use, Args&: U);
2010 cacheLiveRegUse(MI, Reg, Mask: LaneMask, U, MIDependent);
2011
2012 updateFurthest(Furthest, X: U);
2013
2014 if (!FurthestSubreg)
2015 continue;
2016
2017 if (CacheMap) {
2018 collectSubRegUsesByMaskFromCache(CachedMap: *CacheMap, LiveRegLaneMask: LaneMask, MI: &MI, LastDelta,
2019 UseByMask);
2020 } else {
2021 collectSubRegUsesByMask(Uses, Distances, LiveRegLaneMask: LaneMask, UseByMask);
2022 }
2023 updateFurthestSubReg(MI, U, UseByMask, RelevantUses, FurthestSubreg&: *FurthestSubreg);
2024 }
2025 updateCachedLiveRegUses(MI);
2026 }
2027
2028 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2029 // Helper methods for printAsJson
2030 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2031private:
2032 static format_object<unsigned> Fmt(unsigned Id) { return format(Fmt: "%u", Vals: Id); }
2033
2034public:
2035 void printVerboseInstrFields(json::OStream &J, const MachineInstr &MI) const {
2036 J.attribute(Key: "id", Contents: getInstrId(MI: &MI));
2037 J.attribute(Key: "head-len", Contents: getHeadLen(MI: &MI));
2038 J.attribute(Key: "tail-len", Contents: getTailLen(MI: &MI));
2039 }
2040
2041 void printPaths(json::OStream &J, ModuleSlotTracker &MST) const {
2042 J.attributeBegin(Key: "paths");
2043 J.arrayBegin();
2044 for (const auto &KV : Paths) {
2045 const Path &P = KV.first;
2046 const PathInfo &PI = KV.second;
2047
2048 J.objectBegin();
2049
2050 printMBBNameAttr(J, Name: "src", MBB: *P.src(), MST);
2051 printMBBNameAttr(J, Name: "dst", MBB: *P.dst(), MST);
2052
2053 if (PI.ShortestDistance.has_value()) {
2054 J.attribute(Key: "shortest-distance",
2055 Contents: PI.ShortestDistance.value().toJsonValue());
2056 } else {
2057 J.attribute(Key: "shortest-distance", Contents: nullptr);
2058 }
2059
2060 if (PI.ShortestUnweightedDistance.has_value()) {
2061 J.attribute(Key: "shortest-unweighted-distance",
2062 Contents: PI.ShortestUnweightedDistance.value().toJsonValue());
2063 } else {
2064 J.attribute(Key: "shortest-unweighted-distance", Contents: nullptr);
2065 }
2066
2067 J.attribute(Key: "edge-kind", Contents: static_cast<int>(PI.EK));
2068 J.attribute(Key: "reachable", Contents: PI.Reachable);
2069 J.attribute(Key: "forward-reachable", Contents: PI.ForwardReachable);
2070
2071 J.objectEnd();
2072 }
2073 J.arrayEnd();
2074 J.attributeEnd();
2075 }
2076
2077public:
2078 AMDGPUNextUseAnalysisImpl(const MachineFunction *, const MachineLoopInfo *);
2079 ~AMDGPUNextUseAnalysisImpl() { clearTables(); }
2080
2081 AMDGPUNextUseAnalysis::Config getConfig() const { return Cfg; }
2082 void setConfig(AMDGPUNextUseAnalysis::Config NewCfg) {
2083 Cfg = NewCfg;
2084 clearTables();
2085 initializeTables();
2086 }
2087
2088 unsigned getDistanceCacheHits() const { return DistanceCacheHits; }
2089 unsigned getDistanceCacheMisses() const { return DistanceCacheMisses; }
2090
2091 void getReachableUses(Register LiveReg, LaneBitmask LaneMask,
2092 const MachineInstr &MI,
2093 SmallVector<const MachineOperand *> &Uses) const;
2094
2095 /// \Returns the shortest next-use distance for \p LiveReg.
2096 NextUseDistance
2097 getShortestDistance(Register LiveReg, LaneBitmask LaneMask,
2098 const MachineInstr &FromMI,
2099 const SmallVector<const MachineOperand *> &Uses,
2100 const MachineOperand **ShortestUseOut, bool *MIDependent,
2101 SmallVector<CacheableNextUseDistance> *Distances) const;
2102
2103 NextUseDistance
2104 getShortestDistance(Register LiveReg, const MachineInstr &FromMI,
2105 const SmallVector<const MachineOperand *> &Uses) const {
2106 return getShortestDistance(LiveReg, LaneMask: LaneBitmask::getAll(), FromMI, Uses,
2107 ShortestUseOut: nullptr, MIDependent: nullptr, Distances: nullptr);
2108 }
2109};
2110
2111AMDGPUNextUseAnalysisImpl::AMDGPUNextUseAnalysisImpl(
2112 const MachineFunction *MF, const MachineLoopInfo *ML) {
2113
2114 this->MF = MF;
2115 this->MLI = ML;
2116
2117 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2118 TII = ST.getInstrInfo();
2119 TRI = &TII->getRegisterInfo();
2120 MRI = &MF->getRegInfo();
2121
2122 // FIXME: Hopefully we will soon converge on a single way of calculating
2123 // next-use distance and remove these presets.
2124 if (ConfigPresetOpt == "compute")
2125 Cfg = AMDGPUNextUseAnalysis::Config::Compute();
2126 else
2127 Cfg = AMDGPUNextUseAnalysis::Config::Graphics();
2128
2129 if (ConfigCountPhisOpt.getNumOccurrences())
2130 Cfg.CountPhis = ConfigCountPhisOpt;
2131 if (ConfigForwardOnlyOpt.getNumOccurrences())
2132 Cfg.ForwardOnly = ConfigForwardOnlyOpt;
2133 if (ConfigPreciseUseModelingOpt.getNumOccurrences())
2134 Cfg.PreciseUseModeling = ConfigPreciseUseModelingOpt;
2135 if (ConfigPromoteToPreheaderOpt.getNumOccurrences())
2136 Cfg.PromoteToPreheader = ConfigPromoteToPreheaderOpt;
2137
2138 initializeTables();
2139}
2140
2141NextUseDistance AMDGPUNextUseAnalysisImpl::getShortestDistance(
2142 Register LiveReg, LaneBitmask LaneMask, const MachineInstr &CurMI,
2143 const SmallVector<const MachineOperand *> &Uses,
2144 const MachineOperand **ShortestUseOut, bool *CurMIDependentOut,
2145 SmallVector<CacheableNextUseDistance> *Distances) const {
2146
2147 assert(!LiveReg.isPhysical() && !TRI->isAGPR(*MRI, LiveReg) &&
2148 "Next-use distance is calculated for SGPRs and VGPRs");
2149 const MachineOperand *NextUse = nullptr;
2150 auto NextUseDist = NextUseDistance::unreachable();
2151 bool CurMIDependent = false;
2152
2153 if (Distances) {
2154 Distances->clear();
2155 Distances->reserve(N: Uses.size());
2156 }
2157 for (auto *UseMO : Uses) {
2158 auto [Dep, D] = calcDistanceToUse(LiveReg, LiveLaneMask: LaneMask, CurMI, UseMO);
2159
2160 if (D < NextUseDist) {
2161 NextUseDist = D;
2162 NextUse = UseMO;
2163 CurMIDependent = Dep;
2164 }
2165
2166 if (Distances)
2167 Distances->push_back(Elt: {.IsInstrRelative: Dep, .Distance: D});
2168 }
2169 if (ShortestUseOut)
2170 *ShortestUseOut = NextUse;
2171 if (CurMIDependentOut)
2172 *CurMIDependentOut = CurMIDependent;
2173
2174 assert(NextUseDist.isReachable() &&
2175 "getShortestDistance called with no reachable uses");
2176 return NextUseDist;
2177}
2178
2179void AMDGPUNextUseAnalysisImpl::getReachableUses(
2180 Register Reg, LaneBitmask LaneMask, const MachineInstr &MI,
2181 SmallVector<const MachineOperand *> &Uses) const {
2182 const bool CheckMask = LaneMask != LaneBitmask::getAll() &&
2183 LaneMask != MRI->getMaxLaneMaskForVReg(Reg);
2184 const MachineBasicBlock *MBB = MI.getParent();
2185
2186 for (const MachineOperand *UseMO : getRegisterUses(Reg)) {
2187 const MachineInstr *UseMI = UseMO->getParent();
2188 const MachineBasicBlock *UseMBB = UseMI->getParent();
2189
2190 if (CheckMask && !machineOperandCoveredBy(MO: *UseMO, LaneMask))
2191 continue;
2192
2193 bool Reachable;
2194 if (Cfg.PreciseUseModeling)
2195 Reachable = isUseReachablePrecise(MI, MBB, UseMO, UseMI, UseMBB);
2196 else if (MBB == UseMBB)
2197 Reachable = instrsAreInOrder(A: &MI, B: UseMI);
2198 else
2199 Reachable = isForwardReachable(From: MBB, To: UseMBB);
2200
2201 if (Reachable)
2202 Uses.push_back(Elt: UseMO);
2203 }
2204}
2205
2206//==============================================================================
2207// AMDGPUNextUseAnalysis
2208//==============================================================================
2209AMDGPUNextUseAnalysis::AMDGPUNextUseAnalysis(const MachineFunction *MF,
2210 const MachineLoopInfo *MLI) {
2211 Impl = std::make_unique<AMDGPUNextUseAnalysisImpl>(args&: MF, args&: MLI);
2212}
2213AMDGPUNextUseAnalysis::AMDGPUNextUseAnalysis(AMDGPUNextUseAnalysis &&Other)
2214 : Impl(std::move(Other.Impl)) {}
2215AMDGPUNextUseAnalysis::~AMDGPUNextUseAnalysis() {}
2216
2217AMDGPUNextUseAnalysis &
2218AMDGPUNextUseAnalysis::operator=(AMDGPUNextUseAnalysis &&Other) {
2219 if (this != &Other)
2220 Impl = std::move(Other.Impl);
2221 return *this;
2222}
2223
2224AMDGPUNextUseAnalysis::Config AMDGPUNextUseAnalysis::getConfig() const {
2225 return Impl->getConfig();
2226}
2227
2228void AMDGPUNextUseAnalysis::setConfig(Config Cfg) { Impl->setConfig(Cfg); }
2229
2230/// \Returns the next-use distance for \p LiveReg.
2231NextUseDistance AMDGPUNextUseAnalysis::getShortestDistance(
2232 Register LiveReg, const MachineInstr &FromMI,
2233 const SmallVector<const MachineOperand *> &Uses,
2234 const MachineOperand **ShortestUseOut,
2235 SmallVector<NextUseDistance> *DistancesOut) const {
2236
2237 SmallVector<AMDGPUNextUseAnalysisImpl::CacheableNextUseDistance> Distances;
2238 auto Dist = Impl->getShortestDistance(LiveReg, LaneMask: LaneBitmask::getAll(), CurMI: FromMI,
2239 Uses, ShortestUseOut, CurMIDependentOut: nullptr,
2240 Distances: DistancesOut ? &Distances : nullptr);
2241 if (DistancesOut) {
2242 for (auto [MIDep, D] : Distances)
2243 DistancesOut->push_back(Elt: D);
2244 }
2245 return Dist;
2246}
2247
2248void AMDGPUNextUseAnalysis::getNextUseDistances(
2249 const DenseMap<unsigned, LaneBitmask> &LiveRegs, const MachineInstr &MI,
2250 UseDistancePair &FurthestOut, UseDistancePair *FurthestSubregOut,
2251 DenseMap<const MachineOperand *, UseDistancePair> *RelevantUses) const {
2252
2253 LiveRegUse Furthest;
2254 LiveRegUse FurthestSubreg;
2255 Impl->getNextUseDistances(LiveRegs, MI, Furthest,
2256 FurthestSubreg: FurthestSubregOut ? &FurthestSubreg : nullptr,
2257 RelevantUses);
2258 FurthestOut = Furthest;
2259 if (FurthestSubregOut)
2260 *FurthestSubregOut = FurthestSubreg;
2261}
2262void AMDGPUNextUseAnalysis::getReachableUses(
2263 Register LiveReg, LaneBitmask LaneMask, const MachineInstr &MI,
2264 SmallVector<const MachineOperand *> &Uses) const {
2265 return Impl->getReachableUses(Reg: LiveReg, LaneMask, MI, Uses);
2266}
2267
2268//==============================================================================
2269// AMDGPUNextUseAnalysisLegacyPass
2270//==============================================================================
2271
2272//------------------------------------------------------------------------------
2273// Legacy Analysis Pass
2274//------------------------------------------------------------------------------
2275AMDGPUNextUseAnalysisLegacyPass::AMDGPUNextUseAnalysisLegacyPass()
2276 : MachineFunctionPass(ID) {}
2277StringRef AMDGPUNextUseAnalysisLegacyPass::getPassName() const {
2278 return "Next Use Analysis";
2279}
2280
2281bool AMDGPUNextUseAnalysisLegacyPass::runOnMachineFunction(
2282 MachineFunction &MF) {
2283 const MachineLoopInfo *MLI =
2284 &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2285 NUA.reset(p: new AMDGPUNextUseAnalysis(&MF, MLI));
2286 return false;
2287}
2288
2289void AMDGPUNextUseAnalysisLegacyPass::getAnalysisUsage(
2290 AnalysisUsage &AU) const {
2291 AU.addRequired<MachineLoopInfoWrapperPass>();
2292 AU.setPreservesAll();
2293 MachineFunctionPass::getAnalysisUsage(AU);
2294}
2295
2296char AMDGPUNextUseAnalysisLegacyPass::ID = 0;
2297char &llvm::AMDGPUNextUseAnalysisLegacyID = AMDGPUNextUseAnalysisLegacyPass::ID;
2298
2299INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisLegacyPass, DEBUG_TYPE,
2300 "Next Use Analysis", false, true)
2301INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
2302INITIALIZE_PASS_END(AMDGPUNextUseAnalysisLegacyPass, DEBUG_TYPE,
2303 "Next Use Analysis", false, true)
2304
2305FunctionPass *llvm::createAMDGPUNextUseAnalysisLegacyPass() {
2306 return new AMDGPUNextUseAnalysisLegacyPass();
2307}
2308
2309//------------------------------------------------------------------------------
2310// New Pass Manager Analysis Pass
2311//------------------------------------------------------------------------------
2312AnalysisKey AMDGPUNextUseAnalysisPass::Key;
2313
2314AMDGPUNextUseAnalysisPass::Result
2315AMDGPUNextUseAnalysisPass::run(MachineFunction &MF,
2316 MachineFunctionAnalysisManager &MFAM) {
2317 const MachineLoopInfo &MLI = MFAM.getResult<MachineLoopAnalysis>(IR&: MF);
2318 return AMDGPUNextUseAnalysis(&MF, &MLI);
2319}
2320
2321//==============================================================================
2322// AMDGPUNextUseAnalysisPrinterLegacyPass
2323//==============================================================================
2324namespace {
2325void printInstrMember(json::OStream &J, ModuleSlotTracker &MST,
2326 const MachineInstr &MI,
2327 const AMDGPUNextUseAnalysisImpl &NUA) {
2328 printStringAttr(J, Name: "instr", MI, MST);
2329 if (DumpNextUseDistanceVerbose)
2330 NUA.printVerboseInstrFields(J, MI);
2331}
2332
2333void printDistances(
2334 json::OStream &J, const MachineRegisterInfo &MRI, const SIRegisterInfo &TRI,
2335 ModuleSlotTracker &MST,
2336 const DenseMap<const MachineOperand *, UseDistancePair> &Uses) {
2337 if (!DumpNextUseDistanceVerbose)
2338 return;
2339
2340 // Sorting isn't necessary for the purposes of JSON, but it reduces
2341 // FileCheck differences.
2342 SmallVector<const MachineOperand *> Keys;
2343 for (const MachineOperand *K : Uses.keys())
2344 Keys.push_back(Elt: K);
2345 llvm::sort(C&: Keys, Comp: [](const auto &A, const auto &B) {
2346 return A->getReg() < B->getReg() ||
2347 (A->getReg() == B->getReg() && A->getSubReg() < B->getSubReg());
2348 });
2349
2350 J.attributeBegin(Key: "distances");
2351 J.objectBegin();
2352
2353 for (const MachineOperand *K : Keys) {
2354 const LiveRegUse U = Uses.at(Val: K);
2355 printAttr(J, P: printReg(Reg: U.getReg(), TRI: &TRI, SubIdx: U.getSubReg(), MRI: &MRI),
2356 V: U.Dist.toJsonValue());
2357 }
2358
2359 J.objectEnd();
2360 J.attributeEnd();
2361}
2362
2363void printFurthestUse(json::OStream &J, const MachineRegisterInfo &MRI,
2364 const SIRegisterInfo &TRI, ModuleSlotTracker &MST,
2365 const LiveRegUse F, bool Subreg = false) {
2366 J.attributeBegin(Key: Subreg ? "furthest-subreg" : "furthest");
2367 J.objectBegin();
2368
2369 if (F.Use) {
2370 printStringAttr(
2371 J, Name: "register",
2372 P: printReg(Reg: F.getReg(), TRI: &TRI, SubIdx: Subreg ? F.getSubReg() : 0, MRI: &MRI));
2373
2374 if (DumpNextUseDistanceVerbose) {
2375 printStringAttr(J, Name: "use", L: [&](raw_ostream &OS) { OS << (*F.Use); });
2376 printStringAttr(J, Name: "use-mi", MI: *F.Use->getParent(), MST);
2377 }
2378 J.attribute(Key: "distance", Contents: F.Dist.toJsonValue());
2379 }
2380
2381 J.objectEnd();
2382 J.attributeEnd();
2383}
2384
2385void printDistanceFromDefToUse(json::OStream &J, const MachineFunction &MF,
2386 const AMDGPUNextUseAnalysis &NUA,
2387 const SIRegisterInfo &TRI,
2388 const MachineRegisterInfo &MRI) {
2389 auto getRegNextUseDistance = [&](Register DefReg) {
2390 const MachineInstr &DefMI = *MRI.def_instr_begin(RegNo: DefReg);
2391
2392 SmallVector<const MachineOperand *> Uses;
2393 NUA.getReachableUses(LiveReg: DefReg, LaneMask: LaneBitmask::getAll(), MI: DefMI, Uses);
2394 if (Uses.empty())
2395 return NextUseDistance::unreachable();
2396 return NUA.getShortestDistance(LiveReg: DefReg, FromMI: DefMI, Uses);
2397 };
2398
2399 J.attributeBegin(Key: "distance-from-def-to-closest-use");
2400 J.objectBegin();
2401
2402 for (const MachineBasicBlock &MBB : MF) {
2403 for (const MachineInstr &MI : MBB) {
2404 for (const MachineOperand &MO : MI.all_defs()) {
2405 Register Reg = MO.getReg();
2406 if (Reg.isPhysical())
2407 continue;
2408 NextUseDistance D = getRegNextUseDistance(Reg);
2409 printAttr(J, P: printReg(Reg, TRI: &TRI, SubIdx: 0, MRI: &MRI), V: D.toJsonValue());
2410 }
2411 }
2412 }
2413
2414 J.objectEnd();
2415 J.attributeEnd();
2416}
2417
2418void printNextUseDistancesAsJson(json::OStream &J, const MachineFunction &MF,
2419 const AMDGPUNextUseAnalysis &NUA,
2420 const AMDGPUNextUseAnalysisImpl &NUAImpl,
2421 const LiveIntervals &LIS) {
2422 using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
2423 const Function &F = MF.getFunction();
2424 const Module *M = F.getParent();
2425
2426 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2427 const SIInstrInfo *TII = ST.getInstrInfo();
2428 const SIRegisterInfo &TRI = TII->getRegisterInfo();
2429 const MachineRegisterInfo &MRI = MF.getRegInfo();
2430
2431 // We don't actually care about register pressure here - just using
2432 // GCNDownwardRPTracker as a convenient way of getting the set of live
2433 // registers at a given instruction.
2434 GCNDownwardRPTracker RPTracker(LIS);
2435 ModuleSlotTracker MST(M);
2436 MST.incorporateFunction(F);
2437
2438 DenseMap<const MachineOperand *, UseDistancePair> RelevantUses;
2439
2440 J.attributeBegin(Key: "furthest-distances");
2441 J.objectBegin();
2442
2443 for (const MachineBasicBlock &MBB : MF) {
2444 std::string BBName;
2445 raw_string_ostream BBOS(BBName);
2446 MBB.printName(os&: BBOS, printNameFlags: MachineBasicBlock::PrintNameIr, moduleSlotTracker: &MST);
2447
2448 J.attributeBegin(Key: BBOS.str());
2449 J.arrayBegin();
2450
2451 const MachineInstr *PrevMI = nullptr;
2452 for (const MachineInstr &MI : MBB) {
2453 // Update register pressure tracker
2454 if (!PrevMI || PrevMI->getOpcode() == AMDGPU::PHI)
2455 RPTracker.reset(MI, End: MBB.end());
2456 RPTracker.advance();
2457
2458 UseDistancePair Furthest;
2459 UseDistancePair FurthestSubreg;
2460 RelevantUses.clear();
2461 NUA.getNextUseDistances(LiveRegs: RPTracker.getLiveRegs(), MI, FurthestOut&: Furthest,
2462 FurthestSubregOut: &FurthestSubreg, RelevantUses: &RelevantUses);
2463
2464 J.objectBegin();
2465 printInstrMember(J, MST, MI, NUA: NUAImpl);
2466 printDistances(J, MRI, TRI, MST, Uses: RelevantUses);
2467 printFurthestUse(J, MRI, TRI, MST, F: Furthest);
2468 printFurthestUse(J, MRI, TRI, MST, F: FurthestSubreg, /*Subreg*/ true);
2469 J.objectEnd();
2470
2471 PrevMI = &MI;
2472 }
2473
2474 J.arrayEnd();
2475 J.attributeEnd();
2476 }
2477
2478 J.objectEnd();
2479 J.attributeEnd();
2480
2481 if (DumpNextUseDistanceVerbose || DumpNextUseDistanceDefToUse)
2482 printDistanceFromDefToUse(J, MF, NUA, TRI, MRI);
2483
2484 if (DumpNextUseDistanceVerbose)
2485 NUAImpl.printPaths(J, MST);
2486
2487 if (DistanceCacheEnabled) {
2488 J.attributeBegin(Key: "metrics");
2489 J.objectBegin();
2490 {
2491 J.attributeBegin(Key: "distance-cache");
2492 J.objectBegin();
2493 {
2494 J.attribute(Key: "hits", Contents: NUAImpl.getDistanceCacheHits());
2495 J.attribute(Key: "misses", Contents: NUAImpl.getDistanceCacheMisses());
2496 }
2497 J.objectEnd();
2498 J.attributeEnd(); // distance-cache
2499 }
2500 J.objectEnd();
2501 J.attributeEnd(); // metrics
2502 }
2503}
2504
2505void printAsJson(raw_ostream &FallbackOS, TimerGroup &JsonTimerGroup,
2506 Timer &JsonTimer, const MachineFunction &MF,
2507 const AMDGPUNextUseAnalysis &NUA,
2508 const AMDGPUNextUseAnalysisImpl &NUAImpl,
2509 const LiveIntervals &LIS) {
2510 std::string FN = DumpNextUseDistanceAsJson;
2511
2512 auto dump = [&](raw_ostream &OS) {
2513 json::OStream J(OS, 2);
2514 J.objectBegin();
2515
2516 J.attributeBegin(Key: "next-use-analysis");
2517 J.objectBegin();
2518 printNextUseDistancesAsJson(J, MF, NUA, NUAImpl, LIS);
2519 J.objectEnd();
2520 J.attributeEnd();
2521
2522 JsonTimer.stopTimer();
2523 JsonTimerGroup.printJSONValues(OS, delim: ",\n");
2524
2525 J.objectEnd();
2526 };
2527
2528 if (!DumpNextUseDistanceAsJson.getNumOccurrences()) {
2529 dump(FallbackOS);
2530 } else if (FN.empty() || FN == "-") {
2531 dump(outs());
2532 } else {
2533 std::error_code EC;
2534 ToolOutputFile OutF(FN, EC, sys::fs::OF_None);
2535 dump(OutF.os());
2536 OutF.keep();
2537 }
2538}
2539} // namespace
2540
2541//------------------------------------------------------------------------------
2542// Legacy Printer Pass
2543//------------------------------------------------------------------------------
2544AMDGPUNextUseAnalysisPrinterLegacyPass::AMDGPUNextUseAnalysisPrinterLegacyPass()
2545 : MachineFunctionPass(ID) {}
2546
2547StringRef AMDGPUNextUseAnalysisPrinterLegacyPass::getPassName() const {
2548 return "AMDGPU Next Use Analysis Printer";
2549}
2550
2551bool AMDGPUNextUseAnalysisPrinterLegacyPass::runOnMachineFunction(
2552 MachineFunction &MF) {
2553 TimerGroup JsonTimerGroup("amdgpu-next-use-analysis-json",
2554 "AMDGPU Next Use Analysis JSON Printer", false);
2555 Timer JsonTimer("json", "Total time spent generating json", JsonTimerGroup);
2556 JsonTimer.startTimer();
2557
2558 const LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2559 const AMDGPUNextUseAnalysis &NUA =
2560 getAnalysis<AMDGPUNextUseAnalysisLegacyPass>().getNextUseAnalysis();
2561
2562 printAsJson(FallbackOS&: errs(), JsonTimerGroup, JsonTimer, MF, NUA, NUAImpl: *NUA.Impl, LIS);
2563
2564 return false;
2565}
2566
2567void AMDGPUNextUseAnalysisPrinterLegacyPass::getAnalysisUsage(
2568 AnalysisUsage &AU) const {
2569 AU.addRequired<MachineLoopInfoWrapperPass>();
2570 AU.addRequired<LiveIntervalsWrapperPass>();
2571 AU.addRequired<AMDGPUNextUseAnalysisLegacyPass>();
2572 AU.setPreservesAll();
2573 MachineFunctionPass::getAnalysisUsage(AU);
2574}
2575
2576char AMDGPUNextUseAnalysisPrinterLegacyPass::ID = 0;
2577char &AMDGPUNextUseAnalysisPrinterLegacyID =
2578 AMDGPUNextUseAnalysisPrinterLegacyPass::ID;
2579
2580INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisPrinterLegacyPass,
2581 "amdgpu-next-use-printer",
2582 "AMDGPU Next Use Analysis Printer", false, false)
2583
2584INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
2585INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
2586
2587INITIALIZE_PASS_END(AMDGPUNextUseAnalysisPrinterLegacyPass,
2588 "amdgpu-next-use-printer",
2589 "AMDGPU Next Use Analysis Printer", false, false)
2590
2591FunctionPass *llvm::createAMDGPUNextUseAnalysisPrinterLegacyPass() {
2592 return new AMDGPUNextUseAnalysisPrinterLegacyPass();
2593}
2594
2595//------------------------------------------------------------------------------
2596// New Pass Manager Printer Pass
2597//------------------------------------------------------------------------------
2598PreservedAnalyses
2599AMDGPUNextUseAnalysisPrinterPass::run(MachineFunction &MF,
2600 MachineFunctionAnalysisManager &MFAM) {
2601
2602 TimerGroup JsonTimerGroup("amdgpu-next-use-analysis-json",
2603 "AMDGPU Next Use Analysis JSON Printer", false);
2604 Timer JsonTimer("json", "Total time spent generating json", JsonTimerGroup);
2605 JsonTimer.startTimer();
2606
2607 const LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
2608 const AMDGPUNextUseAnalysis &NUA =
2609 MFAM.getResult<AMDGPUNextUseAnalysisPass>(IR&: MF);
2610
2611 printAsJson(FallbackOS&: OS, JsonTimerGroup, JsonTimer, MF, NUA, NUAImpl: *NUA.Impl, LIS);
2612
2613 return PreservedAnalyses::all();
2614}
2615