1 | //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation of the basic block sections profile reader pass. It parses |
10 | // and stores the basic block sections profile file (which is specified via the |
11 | // `-basic-block-sections` flag). |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" |
16 | #include "llvm/ADT/DenseSet.h" |
17 | #include "llvm/ADT/SmallSet.h" |
18 | #include "llvm/ADT/SmallString.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/StringMap.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | #include "llvm/IR/DebugInfoMetadata.h" |
23 | #include "llvm/Pass.h" |
24 | #include "llvm/Support/Error.h" |
25 | #include "llvm/Support/ErrorHandling.h" |
26 | #include "llvm/Support/LineIterator.h" |
27 | #include "llvm/Support/MemoryBuffer.h" |
28 | #include "llvm/Support/Path.h" |
29 | #include <llvm/ADT/STLExtras.h> |
30 | |
31 | using namespace llvm; |
32 | |
33 | char BasicBlockSectionsProfileReaderWrapperPass::ID = 0; |
34 | INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass, |
35 | "bbsections-profile-reader" , |
36 | "Reads and parses a basic block sections profile." , false, |
37 | false) |
38 | |
39 | Expected<UniqueBBID> |
40 | BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { |
41 | SmallVector<StringRef, 2> Parts; |
42 | S.split(A&: Parts, Separator: '.'); |
43 | if (Parts.size() > 2) |
44 | return createProfileParseError(Message: Twine("unable to parse basic block id: '" ) + |
45 | S + "'" ); |
46 | unsigned long long BaseBBID; |
47 | if (getAsUnsignedInteger(Str: Parts[0], Radix: 10, Result&: BaseBBID)) |
48 | return createProfileParseError( |
49 | Message: Twine("unable to parse BB id: '" + Parts[0]) + |
50 | "': unsigned integer expected" ); |
51 | unsigned long long CloneID = 0; |
52 | if (Parts.size() > 1 && getAsUnsignedInteger(Str: Parts[1], Radix: 10, Result&: CloneID)) |
53 | return createProfileParseError(Message: Twine("unable to parse clone id: '" ) + |
54 | Parts[1] + "': unsigned integer expected" ); |
55 | return UniqueBBID{.BaseID: static_cast<unsigned>(BaseBBID), |
56 | .CloneID: static_cast<unsigned>(CloneID)}; |
57 | } |
58 | |
59 | bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { |
60 | return getClusterInfoForFunction(FuncName).first; |
61 | } |
62 | |
63 | std::pair<bool, SmallVector<BBClusterInfo>> |
64 | BasicBlockSectionsProfileReader::getClusterInfoForFunction( |
65 | StringRef FuncName) const { |
66 | auto R = ProgramPathAndClusterInfo.find(Key: getAliasName(FuncName)); |
67 | return R != ProgramPathAndClusterInfo.end() |
68 | ? std::pair(true, R->second.ClusterInfo) |
69 | : std::pair(false, SmallVector<BBClusterInfo>()); |
70 | } |
71 | |
72 | SmallVector<SmallVector<unsigned>> |
73 | BasicBlockSectionsProfileReader::getClonePathsForFunction( |
74 | StringRef FuncName) const { |
75 | return ProgramPathAndClusterInfo.lookup(Key: getAliasName(FuncName)).ClonePaths; |
76 | } |
77 | |
78 | // Reads the version 1 basic block sections profile. Profile for each function |
79 | // is encoded as follows: |
80 | // m <module_name> |
81 | // f <function_name_1> <function_name_2> ... |
82 | // c <bb_id_1> <bb_id_2> <bb_id_3> |
83 | // c <bb_id_4> <bb_id_5> |
84 | // ... |
85 | // Module name specifier (starting with 'm') is optional and allows |
86 | // distinguishing profile for internal-linkage functions with the same name. If |
87 | // not specified, it will apply to any function with the same name. Function |
88 | // name specifier (starting with 'f') can specify multiple function name |
89 | // aliases. Basic block clusters are specified by 'c' and specify the cluster of |
90 | // basic blocks, and the internal order in which they must be placed in the same |
91 | // section. |
92 | // This profile can also specify cloning paths which instruct the compiler to |
93 | // clone basic blocks along a path. The cloned blocks are then specified in the |
94 | // cluster information. |
95 | // The following profile lists two cloning paths (starting with 'p') for |
96 | // function bar and places the total 9 blocks within two clusters. The first two |
97 | // blocks of a cloning path specify the edge along which the path is cloned. For |
98 | // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along |
99 | // the edge 1->3. Within the given clusters, each cloned block is identified by |
100 | // "<original block id>.<clone id>". For instance, 3.1 represents the first |
101 | // clone of block 3. Original blocks are specified just with their block ids. A |
102 | // block cloned multiple times appears with distinct clone ids. The CFG for bar |
103 | // is shown below before and after cloning with its final clusters labeled. |
104 | // |
105 | // f main |
106 | // f bar |
107 | // p 1 3 4 # cloning path 1 |
108 | // p 4 2 # cloning path 2 |
109 | // c 1 3.1 4.1 6 # basic block cluster 1 |
110 | // c 0 2 3 4 2.1 5 # basic block cluster 2 |
111 | // **************************************************************************** |
112 | // function bar before and after cloning with basic block clusters shown. |
113 | // **************************************************************************** |
114 | // .... .............. |
115 | // 0 -------+ : 0 :---->: 1 ---> 3.1 : |
116 | // | | : | : :........ | : |
117 | // v v : v : : v : |
118 | // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 |
119 | // | | | | : | : : | : |
120 | // | v | | : v ....... : v : |
121 | // | 3 <------+ | : 3 <--+ : : 6 : |
122 | // | | | : | | : :....: |
123 | // | v | : v | : |
124 | // +--- 4 ---> 6 | : 4 | : |
125 | // | : | | : |
126 | // | : v | : |
127 | // | :2.1---+ : cluster 2 |
128 | // | : | ......: |
129 | // | : v : |
130 | // +-->: 5 : |
131 | // .... |
132 | // **************************************************************************** |
133 | Error BasicBlockSectionsProfileReader::ReadV1Profile() { |
134 | auto FI = ProgramPathAndClusterInfo.end(); |
135 | |
136 | // Current cluster ID corresponding to this function. |
137 | unsigned CurrentCluster = 0; |
138 | // Current position in the current cluster. |
139 | unsigned CurrentPosition = 0; |
140 | |
141 | // Temporary set to ensure every basic block ID appears once in the clusters |
142 | // of a function. |
143 | DenseSet<UniqueBBID> FuncBBIDs; |
144 | |
145 | // Debug-info-based module filename for the current function. Empty string |
146 | // means no filename. |
147 | StringRef DIFilename; |
148 | |
149 | for (; !LineIt.is_at_eof(); ++LineIt) { |
150 | StringRef S(*LineIt); |
151 | char Specifier = S[0]; |
152 | S = S.drop_front().trim(); |
153 | SmallVector<StringRef, 4> Values; |
154 | S.split(A&: Values, Separator: ' '); |
155 | switch (Specifier) { |
156 | case '@': |
157 | continue; |
158 | case 'm': // Module name speicifer. |
159 | if (Values.size() != 1) { |
160 | return createProfileParseError(Message: Twine("invalid module name value: '" ) + |
161 | S + "'" ); |
162 | } |
163 | DIFilename = sys::path::remove_leading_dotslash(path: Values[0]); |
164 | continue; |
165 | case 'f': { // Function names specifier. |
166 | bool FunctionFound = any_of(Range&: Values, P: [&](StringRef Alias) { |
167 | auto It = FunctionNameToDIFilename.find(Key: Alias); |
168 | // No match if this function name is not found in this module. |
169 | if (It == FunctionNameToDIFilename.end()) |
170 | return false; |
171 | // Return a match if debug-info-filename is not specified. Otherwise, |
172 | // check for equality. |
173 | return DIFilename.empty() || It->second == DIFilename; |
174 | }); |
175 | if (!FunctionFound) { |
176 | // Skip the following profile by setting the profile iterator (FI) to |
177 | // the past-the-end element. |
178 | FI = ProgramPathAndClusterInfo.end(); |
179 | DIFilename = "" ; |
180 | continue; |
181 | } |
182 | for (size_t i = 1; i < Values.size(); ++i) |
183 | FuncAliasMap.try_emplace(Key: Values[i], Args&: Values.front()); |
184 | |
185 | // Prepare for parsing clusters of this function name. |
186 | // Start a new cluster map for this function name. |
187 | auto R = ProgramPathAndClusterInfo.try_emplace(Key: Values.front()); |
188 | // Report error when multiple profiles have been specified for the same |
189 | // function. |
190 | if (!R.second) |
191 | return createProfileParseError(Message: "duplicate profile for function '" + |
192 | Values.front() + "'" ); |
193 | FI = R.first; |
194 | CurrentCluster = 0; |
195 | FuncBBIDs.clear(); |
196 | // We won't need DIFilename anymore. Clean it up to avoid its application |
197 | // on the next function. |
198 | DIFilename = "" ; |
199 | continue; |
200 | } |
201 | case 'c': // Basic block cluster specifier. |
202 | // Skip the profile when we the profile iterator (FI) refers to the |
203 | // past-the-end element. |
204 | if (FI == ProgramPathAndClusterInfo.end()) |
205 | continue; |
206 | // Reset current cluster position. |
207 | CurrentPosition = 0; |
208 | for (auto BasicBlockIDStr : Values) { |
209 | auto BasicBlockID = parseUniqueBBID(S: BasicBlockIDStr); |
210 | if (!BasicBlockID) |
211 | return BasicBlockID.takeError(); |
212 | if (!FuncBBIDs.insert(V: *BasicBlockID).second) |
213 | return createProfileParseError( |
214 | Message: Twine("duplicate basic block id found '" ) + BasicBlockIDStr + |
215 | "'" ); |
216 | |
217 | FI->second.ClusterInfo.emplace_back(Args: BBClusterInfo{ |
218 | .BBID: *std::move(BasicBlockID), .ClusterID: CurrentCluster, .PositionInCluster: CurrentPosition++}); |
219 | } |
220 | CurrentCluster++; |
221 | continue; |
222 | case 'p': { // Basic block cloning path specifier. |
223 | // Skip the profile when we the profile iterator (FI) refers to the |
224 | // past-the-end element. |
225 | if (FI == ProgramPathAndClusterInfo.end()) |
226 | continue; |
227 | SmallSet<unsigned, 5> BBsInPath; |
228 | FI->second.ClonePaths.push_back(Elt: {}); |
229 | for (size_t I = 0; I < Values.size(); ++I) { |
230 | auto BaseBBIDStr = Values[I]; |
231 | unsigned long long BaseBBID = 0; |
232 | if (getAsUnsignedInteger(Str: BaseBBIDStr, Radix: 10, Result&: BaseBBID)) |
233 | return createProfileParseError(Message: Twine("unsigned integer expected: '" ) + |
234 | BaseBBIDStr + "'" ); |
235 | if (I != 0 && !BBsInPath.insert(V: BaseBBID).second) |
236 | return createProfileParseError( |
237 | Message: Twine("duplicate cloned block in path: '" ) + BaseBBIDStr + "'" ); |
238 | FI->second.ClonePaths.back().push_back(Elt: BaseBBID); |
239 | } |
240 | continue; |
241 | } |
242 | default: |
243 | return createProfileParseError(Message: Twine("invalid specifier: '" ) + |
244 | Twine(Specifier) + "'" ); |
245 | } |
246 | llvm_unreachable("should not break from this switch statement" ); |
247 | } |
248 | return Error::success(); |
249 | } |
250 | |
251 | Error BasicBlockSectionsProfileReader::ReadV0Profile() { |
252 | auto FI = ProgramPathAndClusterInfo.end(); |
253 | // Current cluster ID corresponding to this function. |
254 | unsigned CurrentCluster = 0; |
255 | // Current position in the current cluster. |
256 | unsigned CurrentPosition = 0; |
257 | |
258 | // Temporary set to ensure every basic block ID appears once in the clusters |
259 | // of a function. |
260 | SmallSet<unsigned, 4> FuncBBIDs; |
261 | |
262 | for (; !LineIt.is_at_eof(); ++LineIt) { |
263 | StringRef S(*LineIt); |
264 | if (S[0] == '@') |
265 | continue; |
266 | // Check for the leading "!" |
267 | if (!S.consume_front(Prefix: "!" ) || S.empty()) |
268 | break; |
269 | // Check for second "!" which indicates a cluster of basic blocks. |
270 | if (S.consume_front(Prefix: "!" )) { |
271 | // Skip the profile when we the profile iterator (FI) refers to the |
272 | // past-the-end element. |
273 | if (FI == ProgramPathAndClusterInfo.end()) |
274 | continue; |
275 | SmallVector<StringRef, 4> BBIDs; |
276 | S.split(A&: BBIDs, Separator: ' '); |
277 | // Reset current cluster position. |
278 | CurrentPosition = 0; |
279 | for (auto BBIDStr : BBIDs) { |
280 | unsigned long long BBID; |
281 | if (getAsUnsignedInteger(Str: BBIDStr, Radix: 10, Result&: BBID)) |
282 | return createProfileParseError(Message: Twine("unsigned integer expected: '" ) + |
283 | BBIDStr + "'" ); |
284 | if (!FuncBBIDs.insert(V: BBID).second) |
285 | return createProfileParseError( |
286 | Message: Twine("duplicate basic block id found '" ) + BBIDStr + "'" ); |
287 | |
288 | FI->second.ClusterInfo.emplace_back( |
289 | Args: BBClusterInfo({.BBID: {.BaseID: static_cast<unsigned>(BBID), .CloneID: 0}, |
290 | .ClusterID: CurrentCluster, |
291 | .PositionInCluster: CurrentPosition++})); |
292 | } |
293 | CurrentCluster++; |
294 | } else { |
295 | // This is a function name specifier. It may include a debug info filename |
296 | // specifier starting with `M=`. |
297 | auto [AliasesStr, DIFilenameStr] = S.split(Separator: ' '); |
298 | SmallString<128> DIFilename; |
299 | if (DIFilenameStr.starts_with(Prefix: "M=" )) { |
300 | DIFilename = |
301 | sys::path::remove_leading_dotslash(path: DIFilenameStr.substr(Start: 2)); |
302 | if (DIFilename.empty()) |
303 | return createProfileParseError(Message: "empty module name specifier" ); |
304 | } else if (!DIFilenameStr.empty()) { |
305 | return createProfileParseError(Message: "unknown string found: '" + |
306 | DIFilenameStr + "'" ); |
307 | } |
308 | // Function aliases are separated using '/'. We use the first function |
309 | // name for the cluster info mapping and delegate all other aliases to |
310 | // this one. |
311 | SmallVector<StringRef, 4> Aliases; |
312 | AliasesStr.split(A&: Aliases, Separator: '/'); |
313 | bool FunctionFound = any_of(Range&: Aliases, P: [&](StringRef Alias) { |
314 | auto It = FunctionNameToDIFilename.find(Key: Alias); |
315 | // No match if this function name is not found in this module. |
316 | if (It == FunctionNameToDIFilename.end()) |
317 | return false; |
318 | // Return a match if debug-info-filename is not specified. Otherwise, |
319 | // check for equality. |
320 | return DIFilename.empty() || It->second == DIFilename; |
321 | }); |
322 | if (!FunctionFound) { |
323 | // Skip the following profile by setting the profile iterator (FI) to |
324 | // the past-the-end element. |
325 | FI = ProgramPathAndClusterInfo.end(); |
326 | continue; |
327 | } |
328 | for (size_t i = 1; i < Aliases.size(); ++i) |
329 | FuncAliasMap.try_emplace(Key: Aliases[i], Args&: Aliases.front()); |
330 | |
331 | // Prepare for parsing clusters of this function name. |
332 | // Start a new cluster map for this function name. |
333 | auto R = ProgramPathAndClusterInfo.try_emplace(Key: Aliases.front()); |
334 | // Report error when multiple profiles have been specified for the same |
335 | // function. |
336 | if (!R.second) |
337 | return createProfileParseError(Message: "duplicate profile for function '" + |
338 | Aliases.front() + "'" ); |
339 | FI = R.first; |
340 | CurrentCluster = 0; |
341 | FuncBBIDs.clear(); |
342 | } |
343 | } |
344 | return Error::success(); |
345 | } |
346 | |
347 | // Basic Block Sections can be enabled for a subset of machine basic blocks. |
348 | // This is done by passing a file containing names of functions for which basic |
349 | // block sections are desired. Additionally, machine basic block ids of the |
350 | // functions can also be specified for a finer granularity. Moreover, a cluster |
351 | // of basic blocks could be assigned to the same section. |
352 | // Optionally, a debug-info filename can be specified for each function to allow |
353 | // distinguishing internal-linkage functions of the same name. |
354 | // A file with basic block sections for all of function main and three blocks |
355 | // for function foo (of which 1 and 2 are placed in a cluster) looks like this: |
356 | // (Profile for function foo is only loaded when its debug-info filename |
357 | // matches 'path/to/foo_file.cc'). |
358 | // ---------------------------- |
359 | // list.txt: |
360 | // !main |
361 | // !foo M=path/to/foo_file.cc |
362 | // !!1 2 |
363 | // !!4 |
364 | Error BasicBlockSectionsProfileReader::ReadProfile() { |
365 | assert(MBuf); |
366 | |
367 | unsigned long long Version = 0; |
368 | StringRef FirstLine(*LineIt); |
369 | if (FirstLine.consume_front(Prefix: "v" )) { |
370 | if (getAsUnsignedInteger(Str: FirstLine, Radix: 10, Result&: Version)) { |
371 | return createProfileParseError(Message: Twine("version number expected: '" ) + |
372 | FirstLine + "'" ); |
373 | } |
374 | if (Version > 1) { |
375 | return createProfileParseError(Message: Twine("invalid profile version: " ) + |
376 | Twine(Version)); |
377 | } |
378 | ++LineIt; |
379 | } |
380 | |
381 | switch (Version) { |
382 | case 0: |
383 | // TODO: Deprecate V0 once V1 is fully integrated downstream. |
384 | return ReadV0Profile(); |
385 | case 1: |
386 | return ReadV1Profile(); |
387 | default: |
388 | llvm_unreachable("Invalid profile version." ); |
389 | } |
390 | } |
391 | |
392 | bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) { |
393 | if (!BBSPR.MBuf) |
394 | return false; |
395 | // Get the function name to debug info filename mapping. |
396 | BBSPR.FunctionNameToDIFilename.clear(); |
397 | for (const Function &F : M) { |
398 | SmallString<128> DIFilename; |
399 | if (F.isDeclaration()) |
400 | continue; |
401 | DISubprogram *Subprogram = F.getSubprogram(); |
402 | if (Subprogram) { |
403 | llvm::DICompileUnit *CU = Subprogram->getUnit(); |
404 | if (CU) |
405 | DIFilename = sys::path::remove_leading_dotslash(path: CU->getFilename()); |
406 | } |
407 | [[maybe_unused]] bool inserted = |
408 | BBSPR.FunctionNameToDIFilename.try_emplace(Key: F.getName(), Args&: DIFilename) |
409 | .second; |
410 | assert(inserted); |
411 | } |
412 | if (auto Err = BBSPR.ReadProfile()) |
413 | report_fatal_error(Err: std::move(Err)); |
414 | return false; |
415 | } |
416 | |
417 | AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key; |
418 | |
419 | BasicBlockSectionsProfileReader |
420 | BasicBlockSectionsProfileReaderAnalysis::run(Function &F, |
421 | FunctionAnalysisManager &AM) { |
422 | return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf()); |
423 | } |
424 | |
425 | bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( |
426 | StringRef FuncName) const { |
427 | return BBSPR.isFunctionHot(FuncName); |
428 | } |
429 | |
430 | std::pair<bool, SmallVector<BBClusterInfo>> |
431 | BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( |
432 | StringRef FuncName) const { |
433 | return BBSPR.getClusterInfoForFunction(FuncName); |
434 | } |
435 | |
436 | SmallVector<SmallVector<unsigned>> |
437 | BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( |
438 | StringRef FuncName) const { |
439 | return BBSPR.getClonePathsForFunction(FuncName); |
440 | } |
441 | |
442 | BasicBlockSectionsProfileReader & |
443 | BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { |
444 | return BBSPR; |
445 | } |
446 | |
447 | ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass( |
448 | const MemoryBuffer *Buf) { |
449 | return new BasicBlockSectionsProfileReaderWrapperPass(Buf); |
450 | } |
451 | |