1 | //===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass applies cache prefetch instructions based on a profile. The pass |
10 | // assumes DiscriminateMemOps ran immediately before, to ensure debug info |
11 | // matches the one used at profile generation time. The profile is encoded in |
12 | // afdo format (text or binary). It contains prefetch hints recommendations. |
13 | // Each recommendation is made in terms of debug info locations, a type (i.e. |
14 | // nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a |
15 | // memory operand (see X86DiscriminateMemOps). The prefetch will be made for |
16 | // a location at that memory operand + the delta specified in the |
17 | // recommendation. |
18 | // |
19 | //===----------------------------------------------------------------------===// |
20 | |
21 | #include "X86.h" |
22 | #include "X86InstrBuilder.h" |
23 | #include "X86InstrInfo.h" |
24 | #include "X86MachineFunctionInfo.h" |
25 | #include "X86Subtarget.h" |
26 | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | #include "llvm/CodeGen/MachineModuleInfo.h" |
28 | #include "llvm/IR/DebugInfoMetadata.h" |
29 | #include "llvm/IR/Module.h" |
30 | #include "llvm/ProfileData/SampleProf.h" |
31 | #include "llvm/ProfileData/SampleProfReader.h" |
32 | #include "llvm/Support/VirtualFileSystem.h" |
33 | #include "llvm/Transforms/IPO/SampleProfile.h" |
34 | using namespace llvm; |
35 | using namespace sampleprof; |
36 | |
37 | static cl::opt<std::string> |
38 | PrefetchHintsFile("prefetch-hints-file" , |
39 | cl::desc("Path to the prefetch hints profile. See also " |
40 | "-x86-discriminate-memops" ), |
41 | cl::Hidden); |
42 | namespace { |
43 | |
44 | class X86InsertPrefetch : public MachineFunctionPass { |
45 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
46 | bool doInitialization(Module &) override; |
47 | |
48 | bool runOnMachineFunction(MachineFunction &MF) override; |
49 | struct PrefetchInfo { |
50 | unsigned InstructionID; |
51 | int64_t Delta; |
52 | }; |
53 | typedef SmallVectorImpl<PrefetchInfo> Prefetches; |
54 | bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI, |
55 | Prefetches &prefetches) const; |
56 | |
57 | public: |
58 | static char ID; |
59 | X86InsertPrefetch(const std::string &PrefetchHintsFilename); |
60 | StringRef getPassName() const override { |
61 | return "X86 Insert Cache Prefetches" ; |
62 | } |
63 | |
64 | private: |
65 | std::string Filename; |
66 | std::unique_ptr<SampleProfileReader> Reader; |
67 | }; |
68 | |
69 | using PrefetchHints = SampleRecord::CallTargetMap; |
70 | |
71 | // Return any prefetching hints for the specified MachineInstruction. The hints |
72 | // are returned as pairs (name, delta). |
73 | ErrorOr<const PrefetchHints &> |
74 | getPrefetchHints(const FunctionSamples *TopSamples, const MachineInstr &MI) { |
75 | if (const auto &Loc = MI.getDebugLoc()) |
76 | if (const auto *Samples = TopSamples->findFunctionSamples(DIL: Loc)) |
77 | return Samples->findCallTargetMapAt(LineOffset: FunctionSamples::getOffset(DIL: Loc), |
78 | Discriminator: Loc->getBaseDiscriminator()); |
79 | return std::error_code(); |
80 | } |
81 | |
82 | // The prefetch instruction can't take memory operands involving vector |
83 | // registers. |
84 | bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { |
85 | Register BaseReg = MI.getOperand(i: Op + X86::AddrBaseReg).getReg(); |
86 | Register IndexReg = MI.getOperand(i: Op + X86::AddrIndexReg).getReg(); |
87 | return (BaseReg == 0 || |
88 | X86MCRegisterClasses[X86::GR64RegClassID].contains(Reg: BaseReg) || |
89 | X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg: BaseReg)) && |
90 | (IndexReg == 0 || |
91 | X86MCRegisterClasses[X86::GR64RegClassID].contains(Reg: IndexReg) || |
92 | X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg: IndexReg)); |
93 | } |
94 | |
95 | } // end anonymous namespace |
96 | |
97 | //===----------------------------------------------------------------------===// |
98 | // Implementation |
99 | //===----------------------------------------------------------------------===// |
100 | |
101 | char X86InsertPrefetch::ID = 0; |
102 | |
103 | X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename) |
104 | : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {} |
105 | |
106 | /// Return true if the provided MachineInstruction has cache prefetch hints. In |
107 | /// that case, the prefetch hints are stored, in order, in the Prefetches |
108 | /// vector. |
109 | bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, |
110 | const MachineInstr &MI, |
111 | Prefetches &Prefetches) const { |
112 | assert(Prefetches.empty() && |
113 | "Expected caller passed empty PrefetchInfo vector." ); |
114 | |
115 | // There is no point to match prefetch hints if the profile is using MD5. |
116 | if (FunctionSamples::UseMD5) |
117 | return false; |
118 | |
119 | static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = { |
120 | {"_nta_" , X86::PREFETCHNTA}, |
121 | {"_t0_" , X86::PREFETCHT0}, |
122 | {"_t1_" , X86::PREFETCHT1}, |
123 | {"_t2_" , X86::PREFETCHT2}, |
124 | }; |
125 | static const char *SerializedPrefetchPrefix = "__prefetch" ; |
126 | |
127 | auto T = getPrefetchHints(TopSamples, MI); |
128 | if (!T) |
129 | return false; |
130 | int16_t max_index = -1; |
131 | // Convert serialized prefetch hints into PrefetchInfo objects, and populate |
132 | // the Prefetches vector. |
133 | for (const auto &S_V : *T) { |
134 | StringRef Name = S_V.first.stringRef(); |
135 | if (Name.consume_front(Prefix: SerializedPrefetchPrefix)) { |
136 | int64_t D = static_cast<int64_t>(S_V.second); |
137 | unsigned IID = 0; |
138 | for (const auto &HintType : HintTypes) { |
139 | if (Name.consume_front(Prefix: HintType.first)) { |
140 | IID = HintType.second; |
141 | break; |
142 | } |
143 | } |
144 | if (IID == 0) |
145 | return false; |
146 | uint8_t index = 0; |
147 | Name.consumeInteger(Radix: 10, Result&: index); |
148 | |
149 | if (index >= Prefetches.size()) |
150 | Prefetches.resize(N: index + 1); |
151 | Prefetches[index] = {.InstructionID: IID, .Delta: D}; |
152 | max_index = std::max(a: max_index, b: static_cast<int16_t>(index)); |
153 | } |
154 | } |
155 | assert(max_index + 1 >= 0 && |
156 | "Possible overflow: max_index + 1 should be positive." ); |
157 | assert(static_cast<size_t>(max_index + 1) == Prefetches.size() && |
158 | "The number of prefetch hints received should match the number of " |
159 | "PrefetchInfo objects returned" ); |
160 | return !Prefetches.empty(); |
161 | } |
162 | |
163 | bool X86InsertPrefetch::doInitialization(Module &M) { |
164 | if (Filename.empty()) |
165 | return false; |
166 | |
167 | LLVMContext &Ctx = M.getContext(); |
168 | // TODO: Propagate virtual file system into LLVM targets. |
169 | auto FS = vfs::getRealFileSystem(); |
170 | ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr = |
171 | SampleProfileReader::create(Filename, C&: Ctx, FS&: *FS); |
172 | if (std::error_code EC = ReaderOrErr.getError()) { |
173 | std::string Msg = "Could not open profile: " + EC.message(); |
174 | Ctx.diagnose(DI: DiagnosticInfoSampleProfile(Filename, Msg, |
175 | DiagnosticSeverity::DS_Warning)); |
176 | return false; |
177 | } |
178 | Reader = std::move(ReaderOrErr.get()); |
179 | Reader->read(); |
180 | return true; |
181 | } |
182 | |
183 | void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const { |
184 | AU.setPreservesAll(); |
185 | MachineFunctionPass::getAnalysisUsage(AU); |
186 | } |
187 | |
188 | bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) { |
189 | if (!Reader) |
190 | return false; |
191 | const FunctionSamples *Samples = Reader->getSamplesFor(F: MF.getFunction()); |
192 | if (!Samples) |
193 | return false; |
194 | |
195 | bool Changed = false; |
196 | |
197 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
198 | SmallVector<PrefetchInfo, 4> Prefetches; |
199 | for (auto &MBB : MF) { |
200 | for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) { |
201 | auto Current = MI; |
202 | ++MI; |
203 | |
204 | int Offset = X86II::getMemoryOperandNo(TSFlags: Current->getDesc().TSFlags); |
205 | if (Offset < 0) |
206 | continue; |
207 | unsigned Bias = X86II::getOperandBias(Desc: Current->getDesc()); |
208 | int MemOpOffset = Offset + Bias; |
209 | // FIXME(mtrofin): ORE message when the recommendation cannot be taken. |
210 | if (!IsMemOpCompatibleWithPrefetch(MI: *Current, Op: MemOpOffset)) |
211 | continue; |
212 | Prefetches.clear(); |
213 | if (!findPrefetchInfo(TopSamples: Samples, MI: *Current, Prefetches)) |
214 | continue; |
215 | assert(!Prefetches.empty() && |
216 | "The Prefetches vector should contain at least a value if " |
217 | "findPrefetchInfo returned true." ); |
218 | for (auto &PrefInfo : Prefetches) { |
219 | unsigned PFetchInstrID = PrefInfo.InstructionID; |
220 | int64_t Delta = PrefInfo.Delta; |
221 | const MCInstrDesc &Desc = TII->get(Opcode: PFetchInstrID); |
222 | MachineInstr *PFetch = |
223 | MF.CreateMachineInstr(MCID: Desc, DL: Current->getDebugLoc(), NoImplicit: true); |
224 | MachineInstrBuilder MIB(MF, PFetch); |
225 | |
226 | static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 && |
227 | X86::AddrIndexReg == 2 && X86::AddrDisp == 3 && |
228 | X86::AddrSegmentReg == 4, |
229 | "Unexpected change in X86 operand offset order." ); |
230 | |
231 | // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc. |
232 | // FIXME(mtrofin): consider adding a: |
233 | // MachineInstrBuilder::set(unsigned offset, op). |
234 | MIB.addReg(RegNo: Current->getOperand(i: MemOpOffset + X86::AddrBaseReg).getReg()) |
235 | .addImm( |
236 | Val: Current->getOperand(i: MemOpOffset + X86::AddrScaleAmt).getImm()) |
237 | .addReg( |
238 | RegNo: Current->getOperand(i: MemOpOffset + X86::AddrIndexReg).getReg()) |
239 | .addImm(Val: Current->getOperand(i: MemOpOffset + X86::AddrDisp).getImm() + |
240 | Delta) |
241 | .addReg(RegNo: Current->getOperand(i: MemOpOffset + X86::AddrSegmentReg) |
242 | .getReg()); |
243 | |
244 | if (!Current->memoperands_empty()) { |
245 | MachineMemOperand *CurrentOp = *(Current->memoperands_begin()); |
246 | MIB.addMemOperand(MMO: MF.getMachineMemOperand( |
247 | MMO: CurrentOp, Offset: CurrentOp->getOffset() + Delta, Size: CurrentOp->getSize())); |
248 | } |
249 | |
250 | // Insert before Current. This is because Current may clobber some of |
251 | // the registers used to describe the input memory operand. |
252 | MBB.insert(I: Current, M: PFetch); |
253 | Changed = true; |
254 | } |
255 | } |
256 | } |
257 | return Changed; |
258 | } |
259 | |
260 | FunctionPass *llvm::createX86InsertPrefetchPass() { |
261 | return new X86InsertPrefetch(PrefetchHintsFile); |
262 | } |
263 | |