| 1 | //===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass applies cache prefetch instructions based on a profile. The pass |
| 10 | // assumes DiscriminateMemOps ran immediately before, to ensure debug info |
| 11 | // matches the one used at profile generation time. The profile is encoded in |
| 12 | // afdo format (text or binary). It contains prefetch hints recommendations. |
| 13 | // Each recommendation is made in terms of debug info locations, a type (i.e. |
| 14 | // nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a |
| 15 | // memory operand (see X86DiscriminateMemOps). The prefetch will be made for |
| 16 | // a location at that memory operand + the delta specified in the |
| 17 | // recommendation. |
| 18 | // |
| 19 | //===----------------------------------------------------------------------===// |
| 20 | |
| 21 | #include "X86.h" |
| 22 | #include "X86Subtarget.h" |
| 23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 24 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 25 | #include "llvm/IR/DebugInfoMetadata.h" |
| 26 | #include "llvm/IR/Module.h" |
| 27 | #include "llvm/ProfileData/SampleProf.h" |
| 28 | #include "llvm/ProfileData/SampleProfReader.h" |
| 29 | #include "llvm/Support/VirtualFileSystem.h" |
| 30 | #include "llvm/Transforms/IPO/SampleProfile.h" |
| 31 | using namespace llvm; |
| 32 | using namespace sampleprof; |
| 33 | |
| 34 | static cl::opt<std::string> |
| 35 | PrefetchHintsFile("prefetch-hints-file" , |
| 36 | cl::desc("Path to the prefetch hints profile. See also " |
| 37 | "-x86-discriminate-memops" ), |
| 38 | cl::Hidden); |
| 39 | namespace { |
| 40 | |
| 41 | class X86InsertPrefetch : public MachineFunctionPass { |
| 42 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 43 | bool doInitialization(Module &) override; |
| 44 | |
| 45 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 46 | struct PrefetchInfo { |
| 47 | unsigned InstructionID; |
| 48 | int64_t Delta; |
| 49 | }; |
| 50 | typedef SmallVectorImpl<PrefetchInfo> Prefetches; |
| 51 | bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI, |
| 52 | Prefetches &prefetches) const; |
| 53 | |
| 54 | public: |
| 55 | static char ID; |
| 56 | X86InsertPrefetch(const std::string &PrefetchHintsFilename); |
| 57 | StringRef getPassName() const override { |
| 58 | return "X86 Insert Cache Prefetches" ; |
| 59 | } |
| 60 | |
| 61 | private: |
| 62 | std::string Filename; |
| 63 | std::unique_ptr<SampleProfileReader> Reader; |
| 64 | }; |
| 65 | |
| 66 | using PrefetchHints = SampleRecord::CallTargetMap; |
| 67 | |
| 68 | // Return any prefetching hints for the specified MachineInstruction. The hints |
| 69 | // are returned as pairs (name, delta). |
| 70 | ErrorOr<const PrefetchHints &> |
| 71 | getPrefetchHints(const FunctionSamples *TopSamples, const MachineInstr &MI) { |
| 72 | if (const auto &Loc = MI.getDebugLoc()) |
| 73 | if (const auto *Samples = TopSamples->findFunctionSamples(DIL: Loc)) |
| 74 | return Samples->findCallTargetMapAt(LineOffset: FunctionSamples::getOffset(DIL: Loc), |
| 75 | Discriminator: Loc->getBaseDiscriminator()); |
| 76 | return std::error_code(); |
| 77 | } |
| 78 | |
| 79 | // The prefetch instruction can't take memory operands involving vector |
| 80 | // registers. |
| 81 | bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { |
| 82 | Register BaseReg = MI.getOperand(i: Op + X86::AddrBaseReg).getReg(); |
| 83 | Register IndexReg = MI.getOperand(i: Op + X86::AddrIndexReg).getReg(); |
| 84 | return (BaseReg == 0 || |
| 85 | X86MCRegisterClasses[X86::GR64RegClassID].contains(Reg: BaseReg) || |
| 86 | X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg: BaseReg)) && |
| 87 | (IndexReg == 0 || |
| 88 | X86MCRegisterClasses[X86::GR64RegClassID].contains(Reg: IndexReg) || |
| 89 | X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg: IndexReg)); |
| 90 | } |
| 91 | |
| 92 | } // end anonymous namespace |
| 93 | |
| 94 | //===----------------------------------------------------------------------===// |
| 95 | // Implementation |
| 96 | //===----------------------------------------------------------------------===// |
| 97 | |
| 98 | char X86InsertPrefetch::ID = 0; |
| 99 | |
| 100 | X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename) |
| 101 | : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {} |
| 102 | |
| 103 | /// Return true if the provided MachineInstruction has cache prefetch hints. In |
| 104 | /// that case, the prefetch hints are stored, in order, in the Prefetches |
| 105 | /// vector. |
| 106 | bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, |
| 107 | const MachineInstr &MI, |
| 108 | Prefetches &Prefetches) const { |
| 109 | assert(Prefetches.empty() && |
| 110 | "Expected caller passed empty PrefetchInfo vector." ); |
| 111 | |
| 112 | // There is no point to match prefetch hints if the profile is using MD5. |
| 113 | if (FunctionSamples::UseMD5) |
| 114 | return false; |
| 115 | |
| 116 | static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = { |
| 117 | {"_nta_" , X86::PREFETCHNTA}, |
| 118 | {"_t0_" , X86::PREFETCHT0}, |
| 119 | {"_t1_" , X86::PREFETCHT1}, |
| 120 | {"_t2_" , X86::PREFETCHT2}, |
| 121 | }; |
| 122 | static const char *SerializedPrefetchPrefix = "__prefetch" ; |
| 123 | |
| 124 | auto T = getPrefetchHints(TopSamples, MI); |
| 125 | if (!T) |
| 126 | return false; |
| 127 | int16_t max_index = -1; |
| 128 | // Convert serialized prefetch hints into PrefetchInfo objects, and populate |
| 129 | // the Prefetches vector. |
| 130 | for (const auto &S_V : *T) { |
| 131 | StringRef Name = S_V.first.stringRef(); |
| 132 | if (Name.consume_front(Prefix: SerializedPrefetchPrefix)) { |
| 133 | int64_t D = static_cast<int64_t>(S_V.second); |
| 134 | unsigned IID = 0; |
| 135 | for (const auto &HintType : HintTypes) { |
| 136 | if (Name.consume_front(Prefix: HintType.first)) { |
| 137 | IID = HintType.second; |
| 138 | break; |
| 139 | } |
| 140 | } |
| 141 | if (IID == 0) |
| 142 | return false; |
| 143 | uint8_t index = 0; |
| 144 | Name.consumeInteger(Radix: 10, Result&: index); |
| 145 | |
| 146 | if (index >= Prefetches.size()) |
| 147 | Prefetches.resize(N: index + 1); |
| 148 | Prefetches[index] = {.InstructionID: IID, .Delta: D}; |
| 149 | max_index = std::max(a: max_index, b: static_cast<int16_t>(index)); |
| 150 | } |
| 151 | } |
| 152 | assert(max_index + 1 >= 0 && |
| 153 | "Possible overflow: max_index + 1 should be positive." ); |
| 154 | assert(static_cast<size_t>(max_index + 1) == Prefetches.size() && |
| 155 | "The number of prefetch hints received should match the number of " |
| 156 | "PrefetchInfo objects returned" ); |
| 157 | return !Prefetches.empty(); |
| 158 | } |
| 159 | |
| 160 | bool X86InsertPrefetch::doInitialization(Module &M) { |
| 161 | if (Filename.empty()) |
| 162 | return false; |
| 163 | |
| 164 | LLVMContext &Ctx = M.getContext(); |
| 165 | // TODO: Propagate virtual file system into LLVM targets. |
| 166 | auto FS = vfs::getRealFileSystem(); |
| 167 | ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr = |
| 168 | SampleProfileReader::create(Filename, C&: Ctx, FS&: *FS); |
| 169 | if (std::error_code EC = ReaderOrErr.getError()) { |
| 170 | std::string Msg = "Could not open profile: " + EC.message(); |
| 171 | Ctx.diagnose(DI: DiagnosticInfoSampleProfile(Filename, Msg, |
| 172 | DiagnosticSeverity::DS_Warning)); |
| 173 | return false; |
| 174 | } |
| 175 | Reader = std::move(ReaderOrErr.get()); |
| 176 | Reader->read(); |
| 177 | return true; |
| 178 | } |
| 179 | |
| 180 | void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const { |
| 181 | AU.setPreservesAll(); |
| 182 | MachineFunctionPass::getAnalysisUsage(AU); |
| 183 | } |
| 184 | |
| 185 | bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) { |
| 186 | if (!Reader) |
| 187 | return false; |
| 188 | const FunctionSamples *Samples = Reader->getSamplesFor(F: MF.getFunction()); |
| 189 | if (!Samples) |
| 190 | return false; |
| 191 | |
| 192 | bool Changed = false; |
| 193 | |
| 194 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
| 195 | SmallVector<PrefetchInfo, 4> Prefetches; |
| 196 | for (auto &MBB : MF) { |
| 197 | for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) { |
| 198 | auto Current = MI; |
| 199 | ++MI; |
| 200 | |
| 201 | int Offset = X86II::getMemoryOperandNo(TSFlags: Current->getDesc().TSFlags); |
| 202 | if (Offset < 0) |
| 203 | continue; |
| 204 | unsigned Bias = X86II::getOperandBias(Desc: Current->getDesc()); |
| 205 | int MemOpOffset = Offset + Bias; |
| 206 | // FIXME(mtrofin): ORE message when the recommendation cannot be taken. |
| 207 | if (!IsMemOpCompatibleWithPrefetch(MI: *Current, Op: MemOpOffset)) |
| 208 | continue; |
| 209 | Prefetches.clear(); |
| 210 | if (!findPrefetchInfo(TopSamples: Samples, MI: *Current, Prefetches)) |
| 211 | continue; |
| 212 | assert(!Prefetches.empty() && |
| 213 | "The Prefetches vector should contain at least a value if " |
| 214 | "findPrefetchInfo returned true." ); |
| 215 | for (auto &PrefInfo : Prefetches) { |
| 216 | unsigned PFetchInstrID = PrefInfo.InstructionID; |
| 217 | int64_t Delta = PrefInfo.Delta; |
| 218 | const MCInstrDesc &Desc = TII->get(Opcode: PFetchInstrID); |
| 219 | MachineInstr *PFetch = |
| 220 | MF.CreateMachineInstr(MCID: Desc, DL: Current->getDebugLoc(), NoImplicit: true); |
| 221 | MachineInstrBuilder MIB(MF, PFetch); |
| 222 | |
| 223 | static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 && |
| 224 | X86::AddrIndexReg == 2 && X86::AddrDisp == 3 && |
| 225 | X86::AddrSegmentReg == 4, |
| 226 | "Unexpected change in X86 operand offset order." ); |
| 227 | |
| 228 | // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc. |
| 229 | // FIXME(mtrofin): consider adding a: |
| 230 | // MachineInstrBuilder::set(unsigned offset, op). |
| 231 | MIB.addReg(RegNo: Current->getOperand(i: MemOpOffset + X86::AddrBaseReg).getReg()) |
| 232 | .addImm( |
| 233 | Val: Current->getOperand(i: MemOpOffset + X86::AddrScaleAmt).getImm()) |
| 234 | .addReg( |
| 235 | RegNo: Current->getOperand(i: MemOpOffset + X86::AddrIndexReg).getReg()) |
| 236 | .addImm(Val: Current->getOperand(i: MemOpOffset + X86::AddrDisp).getImm() + |
| 237 | Delta) |
| 238 | .addReg(RegNo: Current->getOperand(i: MemOpOffset + X86::AddrSegmentReg) |
| 239 | .getReg()); |
| 240 | |
| 241 | if (!Current->memoperands_empty()) { |
| 242 | MachineMemOperand *CurrentOp = *(Current->memoperands_begin()); |
| 243 | MIB.addMemOperand(MMO: MF.getMachineMemOperand( |
| 244 | MMO: CurrentOp, Offset: CurrentOp->getOffset() + Delta, Size: CurrentOp->getSize())); |
| 245 | } |
| 246 | |
| 247 | // Insert before Current. This is because Current may clobber some of |
| 248 | // the registers used to describe the input memory operand. |
| 249 | MBB.insert(I: Current, M: PFetch); |
| 250 | Changed = true; |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | return Changed; |
| 255 | } |
| 256 | |
| 257 | FunctionPass *llvm::createX86InsertPrefetchPass() { |
| 258 | return new X86InsertPrefetch(PrefetchHintsFile); |
| 259 | } |
| 260 | |