1//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the basic block sections profile reader pass. It parses
10// and stores the basic block sections profile file (which is specified via the
11// `-basic-block-sections` flag).
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/SmallSet.h"
18#include "llvm/ADT/SmallString.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/DebugInfoMetadata.h"
23#include "llvm/Pass.h"
24#include "llvm/Support/Error.h"
25#include "llvm/Support/ErrorHandling.h"
26#include "llvm/Support/LineIterator.h"
27#include "llvm/Support/MemoryBuffer.h"
28#include "llvm/Support/Path.h"
29#include <llvm/ADT/STLExtras.h>
30
31using namespace llvm;
32
33char BasicBlockSectionsProfileReaderWrapperPass::ID = 0;
34INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass,
35 "bbsections-profile-reader",
36 "Reads and parses a basic block sections profile.", false,
37 false)
38
39Expected<UniqueBBID>
40BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const {
41 SmallVector<StringRef, 2> Parts;
42 S.split(A&: Parts, Separator: '.');
43 if (Parts.size() > 2)
44 return createProfileParseError(Message: Twine("unable to parse basic block id: '") +
45 S + "'");
46 unsigned long long BaseBBID;
47 if (getAsUnsignedInteger(Str: Parts[0], Radix: 10, Result&: BaseBBID))
48 return createProfileParseError(
49 Message: Twine("unable to parse BB id: '" + Parts[0]) +
50 "': unsigned integer expected");
51 unsigned long long CloneID = 0;
52 if (Parts.size() > 1 && getAsUnsignedInteger(Str: Parts[1], Radix: 10, Result&: CloneID))
53 return createProfileParseError(Message: Twine("unable to parse clone id: '") +
54 Parts[1] + "': unsigned integer expected");
55 return UniqueBBID{.BaseID: static_cast<unsigned>(BaseBBID),
56 .CloneID: static_cast<unsigned>(CloneID)};
57}
58
59bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
60 return getClusterInfoForFunction(FuncName).first;
61}
62
63std::pair<bool, SmallVector<BBClusterInfo>>
64BasicBlockSectionsProfileReader::getClusterInfoForFunction(
65 StringRef FuncName) const {
66 auto R = ProgramPathAndClusterInfo.find(Key: getAliasName(FuncName));
67 return R != ProgramPathAndClusterInfo.end()
68 ? std::pair(true, R->second.ClusterInfo)
69 : std::pair(false, SmallVector<BBClusterInfo>());
70}
71
72SmallVector<SmallVector<unsigned>>
73BasicBlockSectionsProfileReader::getClonePathsForFunction(
74 StringRef FuncName) const {
75 return ProgramPathAndClusterInfo.lookup(Key: getAliasName(FuncName)).ClonePaths;
76}
77
78// Reads the version 1 basic block sections profile. Profile for each function
79// is encoded as follows:
80// m <module_name>
81// f <function_name_1> <function_name_2> ...
82// c <bb_id_1> <bb_id_2> <bb_id_3>
83// c <bb_id_4> <bb_id_5>
84// ...
85// Module name specifier (starting with 'm') is optional and allows
86// distinguishing profile for internal-linkage functions with the same name. If
87// not specified, it will apply to any function with the same name. Function
88// name specifier (starting with 'f') can specify multiple function name
89// aliases. Basic block clusters are specified by 'c' and specify the cluster of
90// basic blocks, and the internal order in which they must be placed in the same
91// section.
92// This profile can also specify cloning paths which instruct the compiler to
93// clone basic blocks along a path. The cloned blocks are then specified in the
94// cluster information.
95// The following profile lists two cloning paths (starting with 'p') for
96// function bar and places the total 9 blocks within two clusters. The first two
97// blocks of a cloning path specify the edge along which the path is cloned. For
98// instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
99// the edge 1->3. Within the given clusters, each cloned block is identified by
100// "<original block id>.<clone id>". For instance, 3.1 represents the first
101// clone of block 3. Original blocks are specified just with their block ids. A
102// block cloned multiple times appears with distinct clone ids. The CFG for bar
103// is shown below before and after cloning with its final clusters labeled.
104//
105// f main
106// f bar
107// p 1 3 4 # cloning path 1
108// p 4 2 # cloning path 2
109// c 1 3.1 4.1 6 # basic block cluster 1
110// c 0 2 3 4 2.1 5 # basic block cluster 2
111// ****************************************************************************
112// function bar before and after cloning with basic block clusters shown.
113// ****************************************************************************
114// .... ..............
115// 0 -------+ : 0 :---->: 1 ---> 3.1 :
116// | | : | : :........ | :
117// v v : v : : v :
118// +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1
119// | | | | : | : : | :
120// | v | | : v ....... : v :
121// | 3 <------+ | : 3 <--+ : : 6 :
122// | | | : | | : :....:
123// | v | : v | :
124// +--- 4 ---> 6 | : 4 | :
125// | : | | :
126// | : v | :
127// | :2.1---+ : cluster 2
128// | : | ......:
129// | : v :
130// +-->: 5 :
131// ....
132// ****************************************************************************
133Error BasicBlockSectionsProfileReader::ReadV1Profile() {
134 auto FI = ProgramPathAndClusterInfo.end();
135
136 // Current cluster ID corresponding to this function.
137 unsigned CurrentCluster = 0;
138 // Current position in the current cluster.
139 unsigned CurrentPosition = 0;
140
141 // Temporary set to ensure every basic block ID appears once in the clusters
142 // of a function.
143 DenseSet<UniqueBBID> FuncBBIDs;
144
145 // Debug-info-based module filename for the current function. Empty string
146 // means no filename.
147 StringRef DIFilename;
148
149 for (; !LineIt.is_at_eof(); ++LineIt) {
150 StringRef S(*LineIt);
151 char Specifier = S[0];
152 S = S.drop_front().trim();
153 SmallVector<StringRef, 4> Values;
154 S.split(A&: Values, Separator: ' ');
155 switch (Specifier) {
156 case '@':
157 continue;
158 case 'm': // Module name speicifer.
159 if (Values.size() != 1) {
160 return createProfileParseError(Message: Twine("invalid module name value: '") +
161 S + "'");
162 }
163 DIFilename = sys::path::remove_leading_dotslash(path: Values[0]);
164 continue;
165 case 'f': { // Function names specifier.
166 bool FunctionFound = any_of(Range&: Values, P: [&](StringRef Alias) {
167 auto It = FunctionNameToDIFilename.find(Key: Alias);
168 // No match if this function name is not found in this module.
169 if (It == FunctionNameToDIFilename.end())
170 return false;
171 // Return a match if debug-info-filename is not specified. Otherwise,
172 // check for equality.
173 return DIFilename.empty() || It->second == DIFilename;
174 });
175 if (!FunctionFound) {
176 // Skip the following profile by setting the profile iterator (FI) to
177 // the past-the-end element.
178 FI = ProgramPathAndClusterInfo.end();
179 DIFilename = "";
180 continue;
181 }
182 for (size_t i = 1; i < Values.size(); ++i)
183 FuncAliasMap.try_emplace(Key: Values[i], Args&: Values.front());
184
185 // Prepare for parsing clusters of this function name.
186 // Start a new cluster map for this function name.
187 auto R = ProgramPathAndClusterInfo.try_emplace(Key: Values.front());
188 // Report error when multiple profiles have been specified for the same
189 // function.
190 if (!R.second)
191 return createProfileParseError(Message: "duplicate profile for function '" +
192 Values.front() + "'");
193 FI = R.first;
194 CurrentCluster = 0;
195 FuncBBIDs.clear();
196 // We won't need DIFilename anymore. Clean it up to avoid its application
197 // on the next function.
198 DIFilename = "";
199 continue;
200 }
201 case 'c': // Basic block cluster specifier.
202 // Skip the profile when we the profile iterator (FI) refers to the
203 // past-the-end element.
204 if (FI == ProgramPathAndClusterInfo.end())
205 continue;
206 // Reset current cluster position.
207 CurrentPosition = 0;
208 for (auto BasicBlockIDStr : Values) {
209 auto BasicBlockID = parseUniqueBBID(S: BasicBlockIDStr);
210 if (!BasicBlockID)
211 return BasicBlockID.takeError();
212 if (!FuncBBIDs.insert(V: *BasicBlockID).second)
213 return createProfileParseError(
214 Message: Twine("duplicate basic block id found '") + BasicBlockIDStr +
215 "'");
216
217 FI->second.ClusterInfo.emplace_back(Args: BBClusterInfo{
218 .BBID: *std::move(BasicBlockID), .ClusterID: CurrentCluster, .PositionInCluster: CurrentPosition++});
219 }
220 CurrentCluster++;
221 continue;
222 case 'p': { // Basic block cloning path specifier.
223 // Skip the profile when we the profile iterator (FI) refers to the
224 // past-the-end element.
225 if (FI == ProgramPathAndClusterInfo.end())
226 continue;
227 SmallSet<unsigned, 5> BBsInPath;
228 FI->second.ClonePaths.push_back(Elt: {});
229 for (size_t I = 0; I < Values.size(); ++I) {
230 auto BaseBBIDStr = Values[I];
231 unsigned long long BaseBBID = 0;
232 if (getAsUnsignedInteger(Str: BaseBBIDStr, Radix: 10, Result&: BaseBBID))
233 return createProfileParseError(Message: Twine("unsigned integer expected: '") +
234 BaseBBIDStr + "'");
235 if (I != 0 && !BBsInPath.insert(V: BaseBBID).second)
236 return createProfileParseError(
237 Message: Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'");
238 FI->second.ClonePaths.back().push_back(Elt: BaseBBID);
239 }
240 continue;
241 }
242 default:
243 return createProfileParseError(Message: Twine("invalid specifier: '") +
244 Twine(Specifier) + "'");
245 }
246 llvm_unreachable("should not break from this switch statement");
247 }
248 return Error::success();
249}
250
251Error BasicBlockSectionsProfileReader::ReadV0Profile() {
252 auto FI = ProgramPathAndClusterInfo.end();
253 // Current cluster ID corresponding to this function.
254 unsigned CurrentCluster = 0;
255 // Current position in the current cluster.
256 unsigned CurrentPosition = 0;
257
258 // Temporary set to ensure every basic block ID appears once in the clusters
259 // of a function.
260 SmallSet<unsigned, 4> FuncBBIDs;
261
262 for (; !LineIt.is_at_eof(); ++LineIt) {
263 StringRef S(*LineIt);
264 if (S[0] == '@')
265 continue;
266 // Check for the leading "!"
267 if (!S.consume_front(Prefix: "!") || S.empty())
268 break;
269 // Check for second "!" which indicates a cluster of basic blocks.
270 if (S.consume_front(Prefix: "!")) {
271 // Skip the profile when we the profile iterator (FI) refers to the
272 // past-the-end element.
273 if (FI == ProgramPathAndClusterInfo.end())
274 continue;
275 SmallVector<StringRef, 4> BBIDs;
276 S.split(A&: BBIDs, Separator: ' ');
277 // Reset current cluster position.
278 CurrentPosition = 0;
279 for (auto BBIDStr : BBIDs) {
280 unsigned long long BBID;
281 if (getAsUnsignedInteger(Str: BBIDStr, Radix: 10, Result&: BBID))
282 return createProfileParseError(Message: Twine("unsigned integer expected: '") +
283 BBIDStr + "'");
284 if (!FuncBBIDs.insert(V: BBID).second)
285 return createProfileParseError(
286 Message: Twine("duplicate basic block id found '") + BBIDStr + "'");
287
288 FI->second.ClusterInfo.emplace_back(
289 Args: BBClusterInfo({.BBID: {.BaseID: static_cast<unsigned>(BBID), .CloneID: 0},
290 .ClusterID: CurrentCluster,
291 .PositionInCluster: CurrentPosition++}));
292 }
293 CurrentCluster++;
294 } else {
295 // This is a function name specifier. It may include a debug info filename
296 // specifier starting with `M=`.
297 auto [AliasesStr, DIFilenameStr] = S.split(Separator: ' ');
298 SmallString<128> DIFilename;
299 if (DIFilenameStr.starts_with(Prefix: "M=")) {
300 DIFilename =
301 sys::path::remove_leading_dotslash(path: DIFilenameStr.substr(Start: 2));
302 if (DIFilename.empty())
303 return createProfileParseError(Message: "empty module name specifier");
304 } else if (!DIFilenameStr.empty()) {
305 return createProfileParseError(Message: "unknown string found: '" +
306 DIFilenameStr + "'");
307 }
308 // Function aliases are separated using '/'. We use the first function
309 // name for the cluster info mapping and delegate all other aliases to
310 // this one.
311 SmallVector<StringRef, 4> Aliases;
312 AliasesStr.split(A&: Aliases, Separator: '/');
313 bool FunctionFound = any_of(Range&: Aliases, P: [&](StringRef Alias) {
314 auto It = FunctionNameToDIFilename.find(Key: Alias);
315 // No match if this function name is not found in this module.
316 if (It == FunctionNameToDIFilename.end())
317 return false;
318 // Return a match if debug-info-filename is not specified. Otherwise,
319 // check for equality.
320 return DIFilename.empty() || It->second == DIFilename;
321 });
322 if (!FunctionFound) {
323 // Skip the following profile by setting the profile iterator (FI) to
324 // the past-the-end element.
325 FI = ProgramPathAndClusterInfo.end();
326 continue;
327 }
328 for (size_t i = 1; i < Aliases.size(); ++i)
329 FuncAliasMap.try_emplace(Key: Aliases[i], Args&: Aliases.front());
330
331 // Prepare for parsing clusters of this function name.
332 // Start a new cluster map for this function name.
333 auto R = ProgramPathAndClusterInfo.try_emplace(Key: Aliases.front());
334 // Report error when multiple profiles have been specified for the same
335 // function.
336 if (!R.second)
337 return createProfileParseError(Message: "duplicate profile for function '" +
338 Aliases.front() + "'");
339 FI = R.first;
340 CurrentCluster = 0;
341 FuncBBIDs.clear();
342 }
343 }
344 return Error::success();
345}
346
347// Basic Block Sections can be enabled for a subset of machine basic blocks.
348// This is done by passing a file containing names of functions for which basic
349// block sections are desired. Additionally, machine basic block ids of the
350// functions can also be specified for a finer granularity. Moreover, a cluster
351// of basic blocks could be assigned to the same section.
352// Optionally, a debug-info filename can be specified for each function to allow
353// distinguishing internal-linkage functions of the same name.
354// A file with basic block sections for all of function main and three blocks
355// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
356// (Profile for function foo is only loaded when its debug-info filename
357// matches 'path/to/foo_file.cc').
358// ----------------------------
359// list.txt:
360// !main
361// !foo M=path/to/foo_file.cc
362// !!1 2
363// !!4
364Error BasicBlockSectionsProfileReader::ReadProfile() {
365 assert(MBuf);
366
367 unsigned long long Version = 0;
368 StringRef FirstLine(*LineIt);
369 if (FirstLine.consume_front(Prefix: "v")) {
370 if (getAsUnsignedInteger(Str: FirstLine, Radix: 10, Result&: Version)) {
371 return createProfileParseError(Message: Twine("version number expected: '") +
372 FirstLine + "'");
373 }
374 if (Version > 1) {
375 return createProfileParseError(Message: Twine("invalid profile version: ") +
376 Twine(Version));
377 }
378 ++LineIt;
379 }
380
381 switch (Version) {
382 case 0:
383 // TODO: Deprecate V0 once V1 is fully integrated downstream.
384 return ReadV0Profile();
385 case 1:
386 return ReadV1Profile();
387 default:
388 llvm_unreachable("Invalid profile version.");
389 }
390}
391
392bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) {
393 if (!BBSPR.MBuf)
394 return false;
395 // Get the function name to debug info filename mapping.
396 BBSPR.FunctionNameToDIFilename.clear();
397 for (const Function &F : M) {
398 SmallString<128> DIFilename;
399 if (F.isDeclaration())
400 continue;
401 DISubprogram *Subprogram = F.getSubprogram();
402 if (Subprogram) {
403 llvm::DICompileUnit *CU = Subprogram->getUnit();
404 if (CU)
405 DIFilename = sys::path::remove_leading_dotslash(path: CU->getFilename());
406 }
407 [[maybe_unused]] bool inserted =
408 BBSPR.FunctionNameToDIFilename.try_emplace(Key: F.getName(), Args&: DIFilename)
409 .second;
410 assert(inserted);
411 }
412 if (auto Err = BBSPR.ReadProfile())
413 report_fatal_error(Err: std::move(Err));
414 return false;
415}
416
417AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key;
418
419BasicBlockSectionsProfileReader
420BasicBlockSectionsProfileReaderAnalysis::run(Function &F,
421 FunctionAnalysisManager &AM) {
422 return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf());
423}
424
425bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot(
426 StringRef FuncName) const {
427 return BBSPR.isFunctionHot(FuncName);
428}
429
430std::pair<bool, SmallVector<BBClusterInfo>>
431BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction(
432 StringRef FuncName) const {
433 return BBSPR.getClusterInfoForFunction(FuncName);
434}
435
436SmallVector<SmallVector<unsigned>>
437BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction(
438 StringRef FuncName) const {
439 return BBSPR.getClonePathsForFunction(FuncName);
440}
441
442BasicBlockSectionsProfileReader &
443BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
444 return BBSPR;
445}
446
447ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass(
448 const MemoryBuffer *Buf) {
449 return new BasicBlockSectionsProfileReaderWrapperPass(Buf);
450}
451