1//===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// \file
9// \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
10// there is a long branch. Branch size at this point is difficult to track since
11// we have no idea what spills will be inserted later on. We just assume 8 bytes
12// per instruction to compute approximations without computing the actual
13// instruction size to see if we're in the neighborhood of the maximum branch
14// distrance threshold tuning of what is considered "long" is handled through
15// amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
16//===----------------------------------------------------------------------===//
17#include "GCNPreRALongBranchReg.h"
18#include "AMDGPU.h"
19#include "GCNSubtarget.h"
20#include "SIMachineFunctionInfo.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/InitializePasses.h"
23
24using namespace llvm;
25
26#define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"
27
28namespace {
29
30static cl::opt<double> LongBranchFactor(
31 "amdgpu-long-branch-factor", cl::init(Val: 1.0), cl::Hidden,
32 cl::desc("Factor to apply to what qualifies as a long branch "
33 "to reserve a pair of scalar registers. If this value "
34 "is 0 the long branch registers are never reserved. As this "
35 "value grows the greater chance the branch distance will fall "
36 "within the threshold and the registers will be marked to be "
37 "reserved. We lean towards always reserving a register for "
38 "long jumps"));
39
40class GCNPreRALongBranchReg {
41
42 struct BasicBlockInfo {
43 // Offset - Distance from the beginning of the function to the beginning
44 // of this basic block.
45 uint64_t Offset = 0;
46 // Size - Size of the basic block in bytes
47 uint64_t Size = 0;
48 };
49 void generateBlockInfo(MachineFunction &MF,
50 SmallVectorImpl<BasicBlockInfo> &BlockInfo);
51
52public:
53 GCNPreRALongBranchReg() = default;
54 bool run(MachineFunction &MF);
55};
56
57class GCNPreRALongBranchRegLegacy : public MachineFunctionPass {
58public:
59 static char ID;
60 GCNPreRALongBranchRegLegacy() : MachineFunctionPass(ID) {}
61
62 bool runOnMachineFunction(MachineFunction &MF) override {
63 return GCNPreRALongBranchReg().run(MF);
64 }
65
66 StringRef getPassName() const override {
67 return "AMDGPU Pre-RA Long Branch Reg";
68 }
69
70 void getAnalysisUsage(AnalysisUsage &AU) const override {
71 AU.setPreservesAll();
72 MachineFunctionPass::getAnalysisUsage(AU);
73 }
74};
75} // End anonymous namespace.
76
77char GCNPreRALongBranchRegLegacy::ID = 0;
78
79INITIALIZE_PASS(GCNPreRALongBranchRegLegacy, DEBUG_TYPE,
80 "AMDGPU Pre-RA Long Branch Reg", false, false)
81
82char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchRegLegacy::ID;
83void GCNPreRALongBranchReg::generateBlockInfo(
84 MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
85
86 BlockInfo.resize(N: MF.getNumBlockIDs());
87
88 // Approximate the size of all basic blocks by just
89 // assuming 8 bytes per instruction
90 for (const MachineBasicBlock &MBB : MF) {
91 uint64_t NumInstr = 0;
92 // Loop through the basic block and add up all non-debug
93 // non-meta instructions
94 for (const MachineInstr &MI : MBB) {
95 // isMetaInstruction is a superset of isDebugIstr
96 if (MI.isMetaInstruction())
97 continue;
98 NumInstr += 1;
99 }
100 // Approximate size as just 8 bytes per instruction
101 BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
102 }
103 uint64_t PrevNum = (&MF)->begin()->getNumber();
104 for (auto &MBB :
105 make_range(x: std::next(x: MachineFunction::iterator((&MF)->begin())),
106 y: (&MF)->end())) {
107 uint64_t Num = MBB.getNumber();
108 // Compute the offset immediately following this block.
109 BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
110 PrevNum = Num;
111 }
112}
113
114bool GCNPreRALongBranchReg::run(MachineFunction &MF) {
115 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
116 const SIInstrInfo *TII = STM.getInstrInfo();
117 const SIRegisterInfo *TRI = STM.getRegisterInfo();
118 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
119 MachineRegisterInfo &MRI = MF.getRegInfo();
120
121 // For now, reserve highest available SGPR pair. After RA,
122 // shift down to a lower unused pair of SGPRs
123 // If all registers are used, then findUnusedRegister will return
124 // AMDGPU::NoRegister.
125 constexpr bool ReserveHighestRegister = true;
126 Register LongBranchReservedReg = TRI->findUnusedRegister(
127 MRI, RC: &AMDGPU::SGPR_64RegClass, MF, ReserveHighestVGPR: ReserveHighestRegister);
128 if (!LongBranchReservedReg)
129 return false;
130
131 // Approximate code size and offsets of each basic block
132 SmallVector<BasicBlockInfo, 16> BlockInfo;
133 generateBlockInfo(MF, BlockInfo);
134
135 for (const MachineBasicBlock &MBB : MF) {
136 MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
137 if (Last == MBB.end() || !Last->isUnconditionalBranch())
138 continue;
139 MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI: *Last);
140 uint64_t BlockDistance = static_cast<uint64_t>(
141 LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
142 // If the distance falls outside the threshold assume it is a long branch
143 // and we need to reserve the registers
144 if (!TII->isBranchOffsetInRange(BranchOpc: Last->getOpcode(), BrOffset: BlockDistance)) {
145 MFI->setLongBranchReservedReg(LongBranchReservedReg);
146 return true;
147 }
148 }
149 return false;
150}
151
152PreservedAnalyses
153GCNPreRALongBranchRegPass::run(MachineFunction &MF,
154 MachineFunctionAnalysisManager &MFAM) {
155 GCNPreRALongBranchReg().run(MF);
156 return PreservedAnalyses::all();
157}
158