| 1 | //===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file implements the Localizer class. |
| 10 | //===----------------------------------------------------------------------===// |
| 11 | |
| 12 | #include "llvm/CodeGen/GlobalISel/Localizer.h" |
| 13 | #include "llvm/ADT/DenseMap.h" |
| 14 | #include "llvm/ADT/STLExtras.h" |
| 15 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 16 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| 17 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
| 18 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 19 | #include "llvm/CodeGen/TargetLowering.h" |
| 20 | #include "llvm/InitializePasses.h" |
| 21 | #include "llvm/Support/Debug.h" |
| 22 | |
| 23 | #define DEBUG_TYPE "localizer" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | |
| 27 | char Localizer::ID = 0; |
| 28 | INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE, |
| 29 | "Move/duplicate certain instructions close to their use" , |
| 30 | false, false) |
| 31 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
| 32 | INITIALIZE_PASS_END(Localizer, DEBUG_TYPE, |
| 33 | "Move/duplicate certain instructions close to their use" , |
| 34 | false, false) |
| 35 | |
| 36 | Localizer::Localizer(std::function<bool(const MachineFunction &)> F) |
| 37 | : MachineFunctionPass(ID), DoNotRunPass(F) {} |
| 38 | |
| 39 | Localizer::Localizer() |
| 40 | : Localizer([](const MachineFunction &) { return false; }) {} |
| 41 | |
| 42 | void Localizer::init(MachineFunction &MF) { |
| 43 | MRI = &MF.getRegInfo(); |
| 44 | TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F: MF.getFunction()); |
| 45 | } |
| 46 | |
| 47 | void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { |
| 48 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
| 49 | getSelectionDAGFallbackAnalysisUsage(AU); |
| 50 | MachineFunctionPass::getAnalysisUsage(AU); |
| 51 | } |
| 52 | |
| 53 | bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, |
| 54 | MachineBasicBlock *&InsertMBB) { |
| 55 | MachineInstr &MIUse = *MOUse.getParent(); |
| 56 | InsertMBB = MIUse.getParent(); |
| 57 | if (MIUse.isPHI()) |
| 58 | InsertMBB = MIUse.getOperand(i: MOUse.getOperandNo() + 1).getMBB(); |
| 59 | return InsertMBB == Def.getParent(); |
| 60 | } |
| 61 | |
| 62 | unsigned Localizer::getNumPhiUses(MachineOperand &Op) const { |
| 63 | auto *MI = dyn_cast<GPhi>(Val: &*Op.getParent()); |
| 64 | if (!MI) |
| 65 | return 0; |
| 66 | |
| 67 | Register SrcReg = Op.getReg(); |
| 68 | unsigned NumUses = 0; |
| 69 | for (unsigned I = 0, NumVals = MI->getNumIncomingValues(); I < NumVals; ++I) { |
| 70 | if (MI->getIncomingValue(I) == SrcReg) |
| 71 | ++NumUses; |
| 72 | } |
| 73 | return NumUses; |
| 74 | } |
| 75 | |
| 76 | bool Localizer::localizeInterBlock(MachineFunction &MF, |
| 77 | LocalizedSetVecT &LocalizedInstrs) { |
| 78 | bool Changed = false; |
| 79 | DenseMap<std::pair<MachineBasicBlock *, Register>, Register> MBBWithLocalDef; |
| 80 | |
| 81 | // Since the IRTranslator only emits constants into the entry block, and the |
| 82 | // rest of the GISel pipeline generally emits constants close to their users, |
| 83 | // we only localize instructions in the entry block here. This might change if |
| 84 | // we start doing CSE across blocks. |
| 85 | auto &MBB = MF.front(); |
| 86 | auto &TL = *MF.getSubtarget().getTargetLowering(); |
| 87 | for (MachineInstr &MI : llvm::reverse(C&: MBB)) { |
| 88 | if (!TL.shouldLocalize(MI, TTI)) |
| 89 | continue; |
| 90 | LLVM_DEBUG(dbgs() << "Should localize: " << MI); |
| 91 | assert(MI.getDesc().getNumDefs() == 1 && |
| 92 | "More than one definition not supported yet" ); |
| 93 | Register Reg = MI.getOperand(i: 0).getReg(); |
| 94 | // Check if all the users of MI are local. |
| 95 | // We are going to invalidation the list of use operands, so we |
| 96 | // can't use range iterator. |
| 97 | for (MachineOperand &MOUse : |
| 98 | llvm::make_early_inc_range(Range: MRI->use_operands(Reg))) { |
| 99 | // Check if the use is already local. |
| 100 | MachineBasicBlock *InsertMBB; |
| 101 | LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); |
| 102 | dbgs() << "Checking use: " << MIUse |
| 103 | << " #Opd: " << MOUse.getOperandNo() << '\n'); |
| 104 | if (isLocalUse(MOUse, Def: MI, InsertMBB)) { |
| 105 | // Even if we're in the same block, if the block is very large we could |
| 106 | // still have many long live ranges. Try to do intra-block localization |
| 107 | // too. |
| 108 | LocalizedInstrs.insert(X: &MI); |
| 109 | continue; |
| 110 | } |
| 111 | |
| 112 | // PHIs look like a single user but can use the same register in multiple |
| 113 | // edges, causing remat into each predecessor. Allow this to a certain |
| 114 | // extent. |
| 115 | unsigned NumPhiUses = getNumPhiUses(Op&: MOUse); |
| 116 | const unsigned PhiThreshold = 2; // FIXME: Tune this more. |
| 117 | if (NumPhiUses > PhiThreshold) |
| 118 | continue; |
| 119 | |
| 120 | LLVM_DEBUG(dbgs() << "Fixing non-local use\n" ); |
| 121 | Changed = true; |
| 122 | auto MBBAndReg = std::make_pair(x&: InsertMBB, y&: Reg); |
| 123 | auto NewVRegIt = MBBWithLocalDef.find(Val: MBBAndReg); |
| 124 | if (NewVRegIt == MBBWithLocalDef.end()) { |
| 125 | // Create the localized instruction. |
| 126 | MachineInstr *LocalizedMI = MF.CloneMachineInstr(Orig: &MI); |
| 127 | LocalizedInstrs.insert(X: LocalizedMI); |
| 128 | MachineInstr &UseMI = *MOUse.getParent(); |
| 129 | if (MRI->hasOneUse(RegNo: Reg) && !UseMI.isPHI()) |
| 130 | InsertMBB->insert(I: UseMI, MI: LocalizedMI); |
| 131 | else |
| 132 | InsertMBB->insert(I: InsertMBB->SkipPHIsAndLabels(I: InsertMBB->begin()), |
| 133 | MI: LocalizedMI); |
| 134 | |
| 135 | // Set a new register for the definition. |
| 136 | Register NewReg = MRI->cloneVirtualRegister(VReg: Reg); |
| 137 | LocalizedMI->getOperand(i: 0).setReg(NewReg); |
| 138 | NewVRegIt = |
| 139 | MBBWithLocalDef.try_emplace(Key: MBBAndReg, Args&: NewReg).first; |
| 140 | LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); |
| 141 | } |
| 142 | LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) |
| 143 | << '\n'); |
| 144 | // Update the user reg. |
| 145 | MOUse.setReg(NewVRegIt->second); |
| 146 | } |
| 147 | } |
| 148 | return Changed; |
| 149 | } |
| 150 | |
| 151 | bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { |
| 152 | bool Changed = false; |
| 153 | |
| 154 | // For each already-localized instruction which has multiple users, then we |
| 155 | // scan the block top down from the current position until we hit one of them. |
| 156 | |
| 157 | // FIXME: Consider doing inst duplication if live ranges are very long due to |
| 158 | // many users, but this case may be better served by regalloc improvements. |
| 159 | |
| 160 | for (MachineInstr *MI : LocalizedInstrs) { |
| 161 | Register Reg = MI->getOperand(i: 0).getReg(); |
| 162 | MachineBasicBlock &MBB = *MI->getParent(); |
| 163 | // All of the user MIs of this reg. |
| 164 | SmallPtrSet<MachineInstr *, 32> Users; |
| 165 | for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { |
| 166 | if (!UseMI.isPHI()) |
| 167 | Users.insert(Ptr: &UseMI); |
| 168 | } |
| 169 | MachineBasicBlock::iterator II(MI); |
| 170 | // If all the users were PHIs then they're not going to be in our block, we |
| 171 | // may still benefit from sinking, especially since the value might be live |
| 172 | // across a call. |
| 173 | if (Users.empty()) { |
| 174 | // Make sure we don't sink in between two terminator sequences by scanning |
| 175 | // forward, not backward. |
| 176 | II = MBB.getFirstTerminatorForward(); |
| 177 | LLVM_DEBUG(dbgs() << "Only phi users: moving inst to end: " << *MI); |
| 178 | } else { |
| 179 | ++II; |
| 180 | while (II != MBB.end() && !Users.count(Ptr: &*II)) |
| 181 | ++II; |
| 182 | assert(II != MBB.end() && "Didn't find the user in the MBB" ); |
| 183 | LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II); |
| 184 | } |
| 185 | |
| 186 | MI->removeFromParent(); |
| 187 | MBB.insert(I: II, MI); |
| 188 | Changed = true; |
| 189 | |
| 190 | // If the instruction (constant) being localized has single user, we can |
| 191 | // propagate debug location from user. |
| 192 | if (Users.size() == 1) { |
| 193 | const auto &DefDL = MI->getDebugLoc(); |
| 194 | const auto &UserDL = (*Users.begin())->getDebugLoc(); |
| 195 | |
| 196 | if ((!DefDL || DefDL.getLine() == 0) && UserDL && UserDL.getLine() != 0) { |
| 197 | MI->setDebugLoc(UserDL); |
| 198 | } |
| 199 | } |
| 200 | } |
| 201 | return Changed; |
| 202 | } |
| 203 | |
| 204 | bool Localizer::runOnMachineFunction(MachineFunction &MF) { |
| 205 | // If the ISel pipeline failed, do not bother running that pass. |
| 206 | if (MF.getProperties().hasFailedISel()) |
| 207 | return false; |
| 208 | |
| 209 | // Don't run the pass if the target asked so. |
| 210 | if (DoNotRunPass(MF)) |
| 211 | return false; |
| 212 | |
| 213 | LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); |
| 214 | |
| 215 | init(MF); |
| 216 | |
| 217 | // Keep track of the instructions we localized. We'll do a second pass of |
| 218 | // intra-block localization to further reduce live ranges. |
| 219 | LocalizedSetVecT LocalizedInstrs; |
| 220 | |
| 221 | bool Changed = localizeInterBlock(MF, LocalizedInstrs); |
| 222 | Changed |= localizeIntraBlock(LocalizedInstrs); |
| 223 | return Changed; |
| 224 | } |
| 225 | |