| 1 | //===- HexagonHVXSaveRemark.cpp - Remark on HVX saves around calls --------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Diagnostic pass that emits optimization remarks when HVX vector registers |
| 10 | // are live across function calls. All HVX registers are caller-saved |
| 11 | // (Section 5.3 of the Hexagon ABI), so every HVX value that is live across a |
| 12 | // call requires a save/restore pair on the stack. Each HVX vector is 64 or |
| 13 | // 128 bytes (depending on the mode), making this overhead expensive. The |
| 14 | // remarks help programmers identify call sites where inlining, hoisting, or |
| 15 | // sinking the call could reduce the save/restore cost. |
| 16 | // |
| 17 | // The pass runs before register allocation while values are still in virtual |
| 18 | // registers. A backward liveness scan over each basic block counts the HVX |
| 19 | // virtual registers (and their corresponding byte cost) live at each call |
| 20 | // instruction. |
| 21 | // |
| 22 | //===----------------------------------------------------------------------===// |
| 23 | |
| 24 | #include "HexagonSubtarget.h" |
| 25 | #include "llvm/ADT/SmallSet.h" |
| 26 | #include "llvm/ADT/SmallVector.h" |
| 27 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| 28 | #include "llvm/CodeGen/MachineFunction.h" |
| 29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 30 | #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" |
| 31 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 32 | #include "llvm/InitializePasses.h" |
| 33 | #include "llvm/Pass.h" |
| 34 | #include "llvm/Support/CommandLine.h" |
| 35 | #include "llvm/Support/Debug.h" |
| 36 | |
| 37 | using namespace llvm; |
| 38 | |
| 39 | #define DEBUG_TYPE "hexagon-hvx-save" |
| 40 | |
| 41 | static cl::opt<unsigned> HVXSaveThreshold( |
| 42 | "hexagon-hvx-save-threshold" , cl::Hidden, cl::init(Val: 128 * 8), |
| 43 | cl::desc("Minimum number of bytes of HVX caller-saved register data live " |
| 44 | "across a call to trigger a remark (default: 8 x 128-byte " |
| 45 | "vectors)" )); |
| 46 | |
| 47 | namespace { |
| 48 | |
| 49 | struct : public MachineFunctionPass { |
| 50 | static char ; |
| 51 | |
| 52 | () : MachineFunctionPass(ID) {} |
| 53 | |
| 54 | // Returns the number of HVX vectors represented by VReg: 2 for HvxWR |
| 55 | // (vector pair), 1 for HvxVR (single vector), 0 for non-HVX registers. |
| 56 | static unsigned (Register VReg, const MachineRegisterInfo &MRI) { |
| 57 | const TargetRegisterClass *RC = MRI.getRegClass(Reg: VReg); |
| 58 | if (RC == &Hexagon::HvxWRRegClass) |
| 59 | return 2; |
| 60 | if (RC == &Hexagon::HvxVRRegClass) |
| 61 | return 1; |
| 62 | return 0; |
| 63 | } |
| 64 | |
| 65 | bool (MachineFunction &MF) override { |
| 66 | auto &MORE = getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); |
| 67 | if (!MORE.allowExtraAnalysis(DEBUG_TYPE)) |
| 68 | return false; |
| 69 | |
| 70 | const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); |
| 71 | if (!HST.useHVXOps()) |
| 72 | return false; |
| 73 | |
| 74 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 75 | unsigned HVXLen = HST.getVectorLength(); |
| 76 | |
| 77 | // Compute LiveOut[B] for each block: the set of HVX virtual registers |
| 78 | // that are live on exit from B. We use a standard backward dataflow |
| 79 | // fixed-point: |
| 80 | // |
| 81 | // LiveIn[B] = UEVar[B] union (LiveOut[B] - Def[B]) |
| 82 | // LiveOut[B] = union over successors S of LiveIn[S] |
| 83 | // |
| 84 | // where UEVar[B] is the set of HVX vregs that are used in B before any |
| 85 | // definition of that vreg in B (upward-exposed uses), and Def[B] is the |
| 86 | // set of HVX vregs defined in B. |
| 87 | // |
| 88 | // Because MachineBasicBlock::liveins() only contains physical registers, |
| 89 | // we cannot seed cross-block virtual register liveness from successor |
| 90 | // liveins -- we must compute it ourselves. |
| 91 | |
| 92 | unsigned NumBlocks = MF.getNumBlockIDs(); |
| 93 | using VRegSet = SmallSet<Register, 8>; |
| 94 | |
| 95 | // Per-block UEVar and Def sets (HVX vregs only). |
| 96 | SmallVector<VRegSet, 16> UEVar(NumBlocks), BlockDef(NumBlocks); |
| 97 | |
| 98 | for (const MachineBasicBlock &MBB : MF) { |
| 99 | unsigned BN = MBB.getNumber(); |
| 100 | VRegSet Defs; |
| 101 | for (const MachineInstr &MI : MBB) { |
| 102 | for (const MachineOperand &MO : MI.operands()) { |
| 103 | if (!MO.isReg()) |
| 104 | continue; |
| 105 | Register R = MO.getReg(); |
| 106 | if (!R.isVirtual() || !hvxVecCount(VReg: R, MRI)) |
| 107 | continue; |
| 108 | if (MO.isDef()) { |
| 109 | Defs.insert(V: R); |
| 110 | } else if (MO.isUse() && !Defs.count(V: R)) { |
| 111 | UEVar[BN].insert(V: R); // upward-exposed use |
| 112 | } |
| 113 | } |
| 114 | } |
| 115 | BlockDef[BN] = Defs; |
| 116 | } |
| 117 | |
| 118 | // LiveOut[B] and LiveIn[B] maps. |
| 119 | SmallVector<VRegSet, 16> LiveOut(NumBlocks), LiveIn(NumBlocks); |
| 120 | |
| 121 | // Seed LiveIn from UEVar and iterate until stable. |
| 122 | for (unsigned I = 0; I < NumBlocks; ++I) |
| 123 | LiveIn[I] = UEVar[I]; |
| 124 | |
| 125 | bool Changed = true; |
| 126 | while (Changed) { |
| 127 | Changed = false; |
| 128 | for (const MachineBasicBlock &MBB : MF) { |
| 129 | unsigned BN = MBB.getNumber(); |
| 130 | |
| 131 | // LiveOut[B] = union of LiveIn[S] for each successor S. |
| 132 | VRegSet NewLiveOut; |
| 133 | for (const MachineBasicBlock *Succ : MBB.successors()) |
| 134 | for (Register R : LiveIn[Succ->getNumber()]) |
| 135 | NewLiveOut.insert(V: R); |
| 136 | |
| 137 | if (NewLiveOut != LiveOut[BN]) { |
| 138 | LiveOut[BN] = NewLiveOut; |
| 139 | Changed = true; |
| 140 | } |
| 141 | |
| 142 | // LiveIn[B] = UEVar[B] union (LiveOut[B] - Def[B]). |
| 143 | VRegSet NewLiveIn = UEVar[BN]; |
| 144 | for (Register R : LiveOut[BN]) |
| 145 | if (!BlockDef[BN].count(V: R)) |
| 146 | NewLiveIn.insert(V: R); |
| 147 | |
| 148 | if (NewLiveIn != LiveIn[BN]) { |
| 149 | LiveIn[BN] = NewLiveIn; |
| 150 | Changed = true; |
| 151 | } |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | // Now do the backward scan over each block, seeded from LiveOut[B]. |
| 156 | for (const MachineBasicBlock &MBB : MF) { |
| 157 | // Backward liveness scan over virtual registers. We track which |
| 158 | // virtual registers are live at each point, then at call instructions |
| 159 | // count those with HVX register classes. |
| 160 | // |
| 161 | // When walking backwards: |
| 162 | // - a def removes a vreg from the live set |
| 163 | // - a use adds a vreg to the live set |
| 164 | // At each call, the live set holds vregs live after the call (i.e., the |
| 165 | // values that must survive across it and therefore need save/restore). |
| 166 | VRegSet LiveVRegs = LiveOut[MBB.getNumber()]; |
| 167 | |
| 168 | for (const MachineInstr &MI : llvm::reverse(C: MBB)) { |
| 169 | if (MI.isCall()) { |
| 170 | // Count HVX virtual registers live after (and thus across) this |
| 171 | // call. HvxVR holds one vector (HVXLen bytes); HvxWR holds two |
| 172 | // (2 * HVXLen bytes). |
| 173 | unsigned NumVecs = 0; |
| 174 | for (Register VReg : LiveVRegs) |
| 175 | NumVecs += hvxVecCount(VReg, MRI); |
| 176 | unsigned TotalBytes = NumVecs * HVXLen; |
| 177 | |
| 178 | LLVM_DEBUG(dbgs() << "HVXSaveRemark: call in " << MF.getName() |
| 179 | << " has " << NumVecs << " HVX vector(s) live (" |
| 180 | << TotalBytes << " bytes)\n" ); |
| 181 | |
| 182 | if (TotalBytes >= HVXSaveThreshold) { |
| 183 | MORE.emit(RemarkBuilder: [&]() { |
| 184 | MachineOptimizationRemarkAnalysis R( |
| 185 | DEBUG_TYPE, "HVXSaveAroundCall" , MI.getDebugLoc(), &MBB); |
| 186 | R << ore::NV("NumVecs" , NumVecs) |
| 187 | << " HVX caller-saved register(s) (" |
| 188 | << ore::NV("TotalBytes" , TotalBytes) |
| 189 | << " bytes) live across call" ; |
| 190 | return R; |
| 191 | }); |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | // Update liveness: defs kill vregs, uses add them. |
| 196 | for (const MachineOperand &MO : MI.operands()) { |
| 197 | if (!MO.isReg() || !MO.getReg().isVirtual()) |
| 198 | continue; |
| 199 | if (MO.isDef()) |
| 200 | LiveVRegs.erase(V: MO.getReg()); |
| 201 | else if (MO.isUse()) |
| 202 | LiveVRegs.insert(V: MO.getReg()); |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | return false; |
| 208 | } |
| 209 | |
| 210 | StringRef () const override { return "Hexagon HVX Save Remarks" ; } |
| 211 | |
| 212 | void (AnalysisUsage &AU) const override { |
| 213 | AU.addRequired<MachineOptimizationRemarkEmitterPass>(); |
| 214 | AU.setPreservesAll(); |
| 215 | MachineFunctionPass::getAnalysisUsage(AU); |
| 216 | } |
| 217 | }; |
| 218 | |
| 219 | char HexagonHVXSaveRemark:: = 0; |
| 220 | |
| 221 | } // end anonymous namespace |
| 222 | |
| 223 | INITIALIZE_PASS(HexagonHVXSaveRemark, DEBUG_TYPE, "Hexagon HVX Save Remarks" , |
| 224 | false, false) |
| 225 | |
| 226 | FunctionPass *llvm::() { |
| 227 | return new HexagonHVXSaveRemark(); |
| 228 | } |
| 229 | |