| 1 | //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // \file |
| 10 | // Uses profile information to split out cold blocks. |
| 11 | // |
| 12 | // This pass splits out cold machine basic blocks from the parent function. This |
| 13 | // implementation leverages the basic block section framework. Blocks marked |
| 14 | // cold by this pass are grouped together in a separate section prefixed with |
| 15 | // ".text.unlikely.*". The linker can then group these together as a cold |
| 16 | // section. The split part of the function is a contiguous region identified by |
| 17 | // the symbol "foo.cold". Grouping all cold blocks across functions together |
| 18 | // decreases fragmentation and improves icache and itlb utilization. Note that |
| 19 | // the overall changes to the binary size are negligible; only a small number of |
| 20 | // additional jump instructions may be introduced. |
| 21 | // |
| 22 | // For the original RFC of this pass please see |
| 23 | // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ |
| 24 | //===----------------------------------------------------------------------===// |
| 25 | |
| 26 | #include "llvm/ADT/SmallVector.h" |
| 27 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
| 28 | #include "llvm/Analysis/EHUtils.h" |
| 29 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
| 30 | #include "llvm/CodeGen/BasicBlockSectionUtils.h" |
| 31 | #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" |
| 32 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 33 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
| 34 | #include "llvm/CodeGen/MachineFunction.h" |
| 35 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 36 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 37 | #include "llvm/CodeGen/Passes.h" |
| 38 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 39 | #include "llvm/IR/Function.h" |
| 40 | #include "llvm/InitializePasses.h" |
| 41 | #include "llvm/Support/CommandLine.h" |
| 42 | #include <optional> |
| 43 | |
| 44 | using namespace llvm; |
| 45 | |
| 46 | // FIXME: This cutoff value is CPU dependent and should be moved to |
| 47 | // TargetTransformInfo once we consider enabling this on other platforms. |
| 48 | // The value is expressed as a ProfileSummaryInfo integer percentile cutoff. |
| 49 | // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. |
| 50 | // The default was empirically determined to be optimal when considering cutoff |
| 51 | // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on |
| 52 | // Intel CPUs. |
| 53 | static cl::opt<unsigned> |
| 54 | PercentileCutoff("mfs-psi-cutoff" , |
| 55 | cl::desc("Percentile profile summary cutoff used to " |
| 56 | "determine cold blocks. Unused if set to zero." ), |
| 57 | cl::init(Val: 999950), cl::Hidden); |
| 58 | |
| 59 | static cl::opt<unsigned> ColdCountThreshold( |
| 60 | "mfs-count-threshold" , |
| 61 | cl::desc( |
| 62 | "Minimum number of times a block must be executed to be retained." ), |
| 63 | cl::init(Val: 1), cl::Hidden); |
| 64 | |
| 65 | static cl::opt<bool> SplitAllEHCode( |
| 66 | "mfs-split-ehcode" , |
| 67 | cl::desc("Splits all EH code and it's descendants by default." ), |
| 68 | cl::init(Val: false), cl::Hidden); |
| 69 | |
| 70 | namespace { |
| 71 | |
| 72 | class MachineFunctionSplitter : public MachineFunctionPass { |
| 73 | public: |
| 74 | static char ID; |
| 75 | MachineFunctionSplitter() : MachineFunctionPass(ID) { |
| 76 | initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); |
| 77 | } |
| 78 | |
| 79 | StringRef getPassName() const override { |
| 80 | return "Machine Function Splitter Transformation" ; |
| 81 | } |
| 82 | |
| 83 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 84 | |
| 85 | bool runOnMachineFunction(MachineFunction &F) override; |
| 86 | }; |
| 87 | } // end anonymous namespace |
| 88 | |
| 89 | /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable |
| 90 | /// only by EH pad as cold. This will help mark EH pads statically cold |
| 91 | /// instead of relying on profile data. |
| 92 | static void setDescendantEHBlocksCold(MachineFunction &MF) { |
| 93 | DenseSet<MachineBasicBlock *> EHBlocks; |
| 94 | computeEHOnlyBlocks(F&: MF, EHBlocks); |
| 95 | for (auto Block : EHBlocks) { |
| 96 | Block->setSectionID(MBBSectionID::ColdSectionID); |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) { |
| 101 | auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { |
| 102 | return X.getSectionID().Type < Y.getSectionID().Type; |
| 103 | }; |
| 104 | llvm::sortBasicBlocksAndUpdateBranches(MF, MBBCmp: Comparator); |
| 105 | llvm::avoidZeroOffsetLandingPad(MF); |
| 106 | } |
| 107 | |
| 108 | static bool isColdBlock(const MachineBasicBlock &MBB, |
| 109 | const MachineBlockFrequencyInfo *MBFI, |
| 110 | ProfileSummaryInfo *PSI) { |
| 111 | std::optional<uint64_t> Count = MBFI->getBlockProfileCount(MBB: &MBB); |
| 112 | // For instrumentation profiles and sample profiles, we use different ways |
| 113 | // to judge whether a block is cold and should be split. |
| 114 | if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) { |
| 115 | // If using instrument profile, which is deemed "accurate", no count means |
| 116 | // cold. |
| 117 | if (!Count) |
| 118 | return true; |
| 119 | if (PercentileCutoff > 0) |
| 120 | return PSI->isColdCountNthPercentile(PercentileCutoff, C: *Count); |
| 121 | // Fallthrough to end of function. |
| 122 | } else if (PSI->hasSampleProfile()) { |
| 123 | // For sample profile, no count means "do not judege coldness". |
| 124 | if (!Count) |
| 125 | return false; |
| 126 | } |
| 127 | |
| 128 | return (*Count < ColdCountThreshold); |
| 129 | } |
| 130 | |
| 131 | bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { |
| 132 | // Do not split functions when -basic-block-sections=all is specified. |
| 133 | if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All) |
| 134 | return false; |
| 135 | // We target functions with profile data. Static information in the form |
| 136 | // of exception handling code may be split to cold if user passes the |
| 137 | // mfs-split-ehcode flag. |
| 138 | bool UseProfileData = MF.getFunction().hasProfileData(); |
| 139 | if (!UseProfileData && !SplitAllEHCode) |
| 140 | return false; |
| 141 | |
| 142 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
| 143 | if (!TII.isFunctionSafeToSplit(MF)) |
| 144 | return false; |
| 145 | |
| 146 | // Do not split functions with BasicBlockSections profiles as they will |
| 147 | // be split by the BasicBlockSections pass. |
| 148 | auto BBSectionsProfile = |
| 149 | getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>(); |
| 150 | if (BBSectionsProfile != nullptr && |
| 151 | BBSectionsProfile->getBBSPR().isFunctionHot(FuncName: MF.getName())) |
| 152 | return false; |
| 153 | |
| 154 | // Renumbering blocks here preserves the order of the blocks as |
| 155 | // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort |
| 156 | // blocks. Preserving the order of blocks is essential to retaining decisions |
| 157 | // made by prior passes such as MachineBlockPlacement. |
| 158 | MF.RenumberBlocks(); |
| 159 | MF.setBBSectionsType(BasicBlockSection::Preset); |
| 160 | |
| 161 | MachineBlockFrequencyInfo *MBFI = nullptr; |
| 162 | ProfileSummaryInfo *PSI = nullptr; |
| 163 | if (UseProfileData) { |
| 164 | MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); |
| 165 | PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
| 166 | // If we don't have a good profile (sample profile is not deemed |
| 167 | // as a "good profile") and the function is not hot, then early |
| 168 | // return. (Because we can only trust hot functions when profile |
| 169 | // quality is not good.) |
| 170 | if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(F: &MF, BFI&: *MBFI)) { |
| 171 | // Split all EH code and it's descendant statically by default. |
| 172 | if (SplitAllEHCode) |
| 173 | setDescendantEHBlocksCold(MF); |
| 174 | finishAdjustingBasicBlocksAndLandingPads(MF); |
| 175 | return true; |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | SmallVector<MachineBasicBlock *, 2> LandingPads; |
| 180 | for (auto &MBB : MF) { |
| 181 | if (MBB.isEntryBlock()) |
| 182 | continue; |
| 183 | |
| 184 | if (MBB.isEHPad()) |
| 185 | LandingPads.push_back(Elt: &MBB); |
| 186 | else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && |
| 187 | TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode) |
| 188 | MBB.setSectionID(MBBSectionID::ColdSectionID); |
| 189 | } |
| 190 | |
| 191 | // Split all EH code and it's descendant statically by default. |
| 192 | if (SplitAllEHCode) |
| 193 | setDescendantEHBlocksCold(MF); |
| 194 | // We only split out eh pads if all of them are cold. |
| 195 | else { |
| 196 | // Here we have UseProfileData == true. |
| 197 | bool HasHotLandingPads = false; |
| 198 | for (const MachineBasicBlock *LP : LandingPads) { |
| 199 | if (!isColdBlock(MBB: *LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(MBB: *LP)) |
| 200 | HasHotLandingPads = true; |
| 201 | } |
| 202 | if (!HasHotLandingPads) { |
| 203 | for (MachineBasicBlock *LP : LandingPads) |
| 204 | LP->setSectionID(MBBSectionID::ColdSectionID); |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | finishAdjustingBasicBlocksAndLandingPads(MF); |
| 209 | return true; |
| 210 | } |
| 211 | |
| 212 | void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { |
| 213 | AU.addRequired<MachineModuleInfoWrapperPass>(); |
| 214 | AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); |
| 215 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
| 216 | AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>(); |
| 217 | } |
| 218 | |
| 219 | char MachineFunctionSplitter::ID = 0; |
| 220 | INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter" , |
| 221 | "Split machine functions using profile information" , false, |
| 222 | false) |
| 223 | |
| 224 | MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { |
| 225 | return new MachineFunctionSplitter(); |
| 226 | } |
| 227 | |