| 1 | //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Implementation of the basic block sections profile reader pass. It parses |
| 10 | // and stores the basic block sections profile file (which is specified via the |
| 11 | // `-basic-block-sections` flag). |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" |
| 16 | #include "llvm/ADT/DenseSet.h" |
| 17 | #include "llvm/ADT/SmallSet.h" |
| 18 | #include "llvm/ADT/SmallString.h" |
| 19 | #include "llvm/ADT/SmallVector.h" |
| 20 | #include "llvm/ADT/StringMap.h" |
| 21 | #include "llvm/ADT/StringRef.h" |
| 22 | #include "llvm/IR/DebugInfoMetadata.h" |
| 23 | #include "llvm/Pass.h" |
| 24 | #include "llvm/Support/Error.h" |
| 25 | #include "llvm/Support/ErrorHandling.h" |
| 26 | #include "llvm/Support/LineIterator.h" |
| 27 | #include "llvm/Support/MemoryBuffer.h" |
| 28 | #include "llvm/Support/Path.h" |
| 29 | #include <llvm/ADT/STLExtras.h> |
| 30 | |
| 31 | using namespace llvm; |
| 32 | |
| 33 | char BasicBlockSectionsProfileReaderWrapperPass::ID = 0; |
| 34 | INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass, |
| 35 | "bbsections-profile-reader" , |
| 36 | "Reads and parses a basic block sections profile." , false, |
| 37 | false) |
| 38 | |
| 39 | Expected<UniqueBBID> |
| 40 | BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { |
| 41 | SmallVector<StringRef, 2> Parts; |
| 42 | S.split(A&: Parts, Separator: '.'); |
| 43 | if (Parts.size() > 2) |
| 44 | return createProfileParseError(Message: Twine("unable to parse basic block id: '" ) + |
| 45 | S + "'" ); |
| 46 | unsigned long long BaseBBID; |
| 47 | if (getAsUnsignedInteger(Str: Parts[0], Radix: 10, Result&: BaseBBID)) |
| 48 | return createProfileParseError( |
| 49 | Message: Twine("unable to parse BB id: '" + Parts[0]) + |
| 50 | "': unsigned integer expected" ); |
| 51 | unsigned long long CloneID = 0; |
| 52 | if (Parts.size() > 1 && getAsUnsignedInteger(Str: Parts[1], Radix: 10, Result&: CloneID)) |
| 53 | return createProfileParseError(Message: Twine("unable to parse clone id: '" ) + |
| 54 | Parts[1] + "': unsigned integer expected" ); |
| 55 | return UniqueBBID{.BaseID: static_cast<unsigned>(BaseBBID), |
| 56 | .CloneID: static_cast<unsigned>(CloneID)}; |
| 57 | } |
| 58 | |
| 59 | bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { |
| 60 | return getClusterInfoForFunction(FuncName).first; |
| 61 | } |
| 62 | |
| 63 | std::pair<bool, SmallVector<BBClusterInfo>> |
| 64 | BasicBlockSectionsProfileReader::getClusterInfoForFunction( |
| 65 | StringRef FuncName) const { |
| 66 | auto R = ProgramPathAndClusterInfo.find(Key: getAliasName(FuncName)); |
| 67 | return R != ProgramPathAndClusterInfo.end() |
| 68 | ? std::pair(true, R->second.ClusterInfo) |
| 69 | : std::pair(false, SmallVector<BBClusterInfo>()); |
| 70 | } |
| 71 | |
| 72 | SmallVector<SmallVector<unsigned>> |
| 73 | BasicBlockSectionsProfileReader::getClonePathsForFunction( |
| 74 | StringRef FuncName) const { |
| 75 | return ProgramPathAndClusterInfo.lookup(Key: getAliasName(FuncName)).ClonePaths; |
| 76 | } |
| 77 | |
| 78 | // Reads the version 1 basic block sections profile. Profile for each function |
| 79 | // is encoded as follows: |
| 80 | // m <module_name> |
| 81 | // f <function_name_1> <function_name_2> ... |
| 82 | // c <bb_id_1> <bb_id_2> <bb_id_3> |
| 83 | // c <bb_id_4> <bb_id_5> |
| 84 | // ... |
| 85 | // Module name specifier (starting with 'm') is optional and allows |
| 86 | // distinguishing profile for internal-linkage functions with the same name. If |
| 87 | // not specified, it will apply to any function with the same name. Function |
| 88 | // name specifier (starting with 'f') can specify multiple function name |
| 89 | // aliases. Basic block clusters are specified by 'c' and specify the cluster of |
| 90 | // basic blocks, and the internal order in which they must be placed in the same |
| 91 | // section. |
| 92 | // This profile can also specify cloning paths which instruct the compiler to |
| 93 | // clone basic blocks along a path. The cloned blocks are then specified in the |
| 94 | // cluster information. |
| 95 | // The following profile lists two cloning paths (starting with 'p') for |
| 96 | // function bar and places the total 9 blocks within two clusters. The first two |
| 97 | // blocks of a cloning path specify the edge along which the path is cloned. For |
| 98 | // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along |
| 99 | // the edge 1->3. Within the given clusters, each cloned block is identified by |
| 100 | // "<original block id>.<clone id>". For instance, 3.1 represents the first |
| 101 | // clone of block 3. Original blocks are specified just with their block ids. A |
| 102 | // block cloned multiple times appears with distinct clone ids. The CFG for bar |
| 103 | // is shown below before and after cloning with its final clusters labeled. |
| 104 | // |
| 105 | // f main |
| 106 | // f bar |
| 107 | // p 1 3 4 # cloning path 1 |
| 108 | // p 4 2 # cloning path 2 |
| 109 | // c 1 3.1 4.1 6 # basic block cluster 1 |
| 110 | // c 0 2 3 4 2.1 5 # basic block cluster 2 |
| 111 | // **************************************************************************** |
| 112 | // function bar before and after cloning with basic block clusters shown. |
| 113 | // **************************************************************************** |
| 114 | // .... .............. |
| 115 | // 0 -------+ : 0 :---->: 1 ---> 3.1 : |
| 116 | // | | : | : :........ | : |
| 117 | // v v : v : : v : |
| 118 | // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 |
| 119 | // | | | | : | : : | : |
| 120 | // | v | | : v ....... : v : |
| 121 | // | 3 <------+ | : 3 <--+ : : 6 : |
| 122 | // | | | : | | : :....: |
| 123 | // | v | : v | : |
| 124 | // +--- 4 ---> 6 | : 4 | : |
| 125 | // | : | | : |
| 126 | // | : v | : |
| 127 | // | :2.1---+ : cluster 2 |
| 128 | // | : | ......: |
| 129 | // | : v : |
| 130 | // +-->: 5 : |
| 131 | // .... |
| 132 | // **************************************************************************** |
| 133 | Error BasicBlockSectionsProfileReader::ReadV1Profile() { |
| 134 | auto FI = ProgramPathAndClusterInfo.end(); |
| 135 | |
| 136 | // Current cluster ID corresponding to this function. |
| 137 | unsigned CurrentCluster = 0; |
| 138 | // Current position in the current cluster. |
| 139 | unsigned CurrentPosition = 0; |
| 140 | |
| 141 | // Temporary set to ensure every basic block ID appears once in the clusters |
| 142 | // of a function. |
| 143 | DenseSet<UniqueBBID> FuncBBIDs; |
| 144 | |
| 145 | // Debug-info-based module filename for the current function. Empty string |
| 146 | // means no filename. |
| 147 | StringRef DIFilename; |
| 148 | |
| 149 | for (; !LineIt.is_at_eof(); ++LineIt) { |
| 150 | StringRef S(*LineIt); |
| 151 | char Specifier = S[0]; |
| 152 | S = S.drop_front().trim(); |
| 153 | SmallVector<StringRef, 4> Values; |
| 154 | S.split(A&: Values, Separator: ' '); |
| 155 | switch (Specifier) { |
| 156 | case '@': |
| 157 | continue; |
| 158 | case 'm': // Module name speicifer. |
| 159 | if (Values.size() != 1) { |
| 160 | return createProfileParseError(Message: Twine("invalid module name value: '" ) + |
| 161 | S + "'" ); |
| 162 | } |
| 163 | DIFilename = sys::path::remove_leading_dotslash(path: Values[0]); |
| 164 | continue; |
| 165 | case 'f': { // Function names specifier. |
| 166 | bool FunctionFound = any_of(Range&: Values, P: [&](StringRef Alias) { |
| 167 | auto It = FunctionNameToDIFilename.find(Key: Alias); |
| 168 | // No match if this function name is not found in this module. |
| 169 | if (It == FunctionNameToDIFilename.end()) |
| 170 | return false; |
| 171 | // Return a match if debug-info-filename is not specified. Otherwise, |
| 172 | // check for equality. |
| 173 | return DIFilename.empty() || It->second == DIFilename; |
| 174 | }); |
| 175 | if (!FunctionFound) { |
| 176 | // Skip the following profile by setting the profile iterator (FI) to |
| 177 | // the past-the-end element. |
| 178 | FI = ProgramPathAndClusterInfo.end(); |
| 179 | DIFilename = "" ; |
| 180 | continue; |
| 181 | } |
| 182 | for (size_t i = 1; i < Values.size(); ++i) |
| 183 | FuncAliasMap.try_emplace(Key: Values[i], Args&: Values.front()); |
| 184 | |
| 185 | // Prepare for parsing clusters of this function name. |
| 186 | // Start a new cluster map for this function name. |
| 187 | auto R = ProgramPathAndClusterInfo.try_emplace(Key: Values.front()); |
| 188 | // Report error when multiple profiles have been specified for the same |
| 189 | // function. |
| 190 | if (!R.second) |
| 191 | return createProfileParseError(Message: "duplicate profile for function '" + |
| 192 | Values.front() + "'" ); |
| 193 | FI = R.first; |
| 194 | CurrentCluster = 0; |
| 195 | FuncBBIDs.clear(); |
| 196 | // We won't need DIFilename anymore. Clean it up to avoid its application |
| 197 | // on the next function. |
| 198 | DIFilename = "" ; |
| 199 | continue; |
| 200 | } |
| 201 | case 'c': // Basic block cluster specifier. |
| 202 | // Skip the profile when we the profile iterator (FI) refers to the |
| 203 | // past-the-end element. |
| 204 | if (FI == ProgramPathAndClusterInfo.end()) |
| 205 | continue; |
| 206 | // Reset current cluster position. |
| 207 | CurrentPosition = 0; |
| 208 | for (auto BasicBlockIDStr : Values) { |
| 209 | auto BasicBlockID = parseUniqueBBID(S: BasicBlockIDStr); |
| 210 | if (!BasicBlockID) |
| 211 | return BasicBlockID.takeError(); |
| 212 | if (!FuncBBIDs.insert(V: *BasicBlockID).second) |
| 213 | return createProfileParseError( |
| 214 | Message: Twine("duplicate basic block id found '" ) + BasicBlockIDStr + |
| 215 | "'" ); |
| 216 | |
| 217 | FI->second.ClusterInfo.emplace_back(Args: BBClusterInfo{ |
| 218 | .BBID: *std::move(BasicBlockID), .ClusterID: CurrentCluster, .PositionInCluster: CurrentPosition++}); |
| 219 | } |
| 220 | CurrentCluster++; |
| 221 | continue; |
| 222 | case 'p': { // Basic block cloning path specifier. |
| 223 | // Skip the profile when we the profile iterator (FI) refers to the |
| 224 | // past-the-end element. |
| 225 | if (FI == ProgramPathAndClusterInfo.end()) |
| 226 | continue; |
| 227 | SmallSet<unsigned, 5> BBsInPath; |
| 228 | FI->second.ClonePaths.push_back(Elt: {}); |
| 229 | for (size_t I = 0; I < Values.size(); ++I) { |
| 230 | auto BaseBBIDStr = Values[I]; |
| 231 | unsigned long long BaseBBID = 0; |
| 232 | if (getAsUnsignedInteger(Str: BaseBBIDStr, Radix: 10, Result&: BaseBBID)) |
| 233 | return createProfileParseError(Message: Twine("unsigned integer expected: '" ) + |
| 234 | BaseBBIDStr + "'" ); |
| 235 | if (I != 0 && !BBsInPath.insert(V: BaseBBID).second) |
| 236 | return createProfileParseError( |
| 237 | Message: Twine("duplicate cloned block in path: '" ) + BaseBBIDStr + "'" ); |
| 238 | FI->second.ClonePaths.back().push_back(Elt: BaseBBID); |
| 239 | } |
| 240 | continue; |
| 241 | } |
| 242 | default: |
| 243 | return createProfileParseError(Message: Twine("invalid specifier: '" ) + |
| 244 | Twine(Specifier) + "'" ); |
| 245 | } |
| 246 | llvm_unreachable("should not break from this switch statement" ); |
| 247 | } |
| 248 | return Error::success(); |
| 249 | } |
| 250 | |
| 251 | Error BasicBlockSectionsProfileReader::ReadV0Profile() { |
| 252 | auto FI = ProgramPathAndClusterInfo.end(); |
| 253 | // Current cluster ID corresponding to this function. |
| 254 | unsigned CurrentCluster = 0; |
| 255 | // Current position in the current cluster. |
| 256 | unsigned CurrentPosition = 0; |
| 257 | |
| 258 | // Temporary set to ensure every basic block ID appears once in the clusters |
| 259 | // of a function. |
| 260 | SmallSet<unsigned, 4> FuncBBIDs; |
| 261 | |
| 262 | for (; !LineIt.is_at_eof(); ++LineIt) { |
| 263 | StringRef S(*LineIt); |
| 264 | if (S[0] == '@') |
| 265 | continue; |
| 266 | // Check for the leading "!" |
| 267 | if (!S.consume_front(Prefix: "!" ) || S.empty()) |
| 268 | break; |
| 269 | // Check for second "!" which indicates a cluster of basic blocks. |
| 270 | if (S.consume_front(Prefix: "!" )) { |
| 271 | // Skip the profile when we the profile iterator (FI) refers to the |
| 272 | // past-the-end element. |
| 273 | if (FI == ProgramPathAndClusterInfo.end()) |
| 274 | continue; |
| 275 | SmallVector<StringRef, 4> BBIDs; |
| 276 | S.split(A&: BBIDs, Separator: ' '); |
| 277 | // Reset current cluster position. |
| 278 | CurrentPosition = 0; |
| 279 | for (auto BBIDStr : BBIDs) { |
| 280 | unsigned long long BBID; |
| 281 | if (getAsUnsignedInteger(Str: BBIDStr, Radix: 10, Result&: BBID)) |
| 282 | return createProfileParseError(Message: Twine("unsigned integer expected: '" ) + |
| 283 | BBIDStr + "'" ); |
| 284 | if (!FuncBBIDs.insert(V: BBID).second) |
| 285 | return createProfileParseError( |
| 286 | Message: Twine("duplicate basic block id found '" ) + BBIDStr + "'" ); |
| 287 | |
| 288 | FI->second.ClusterInfo.emplace_back( |
| 289 | Args: BBClusterInfo({.BBID: {.BaseID: static_cast<unsigned>(BBID), .CloneID: 0}, |
| 290 | .ClusterID: CurrentCluster, |
| 291 | .PositionInCluster: CurrentPosition++})); |
| 292 | } |
| 293 | CurrentCluster++; |
| 294 | } else { |
| 295 | // This is a function name specifier. It may include a debug info filename |
| 296 | // specifier starting with `M=`. |
| 297 | auto [AliasesStr, DIFilenameStr] = S.split(Separator: ' '); |
| 298 | SmallString<128> DIFilename; |
| 299 | if (DIFilenameStr.starts_with(Prefix: "M=" )) { |
| 300 | DIFilename = |
| 301 | sys::path::remove_leading_dotslash(path: DIFilenameStr.substr(Start: 2)); |
| 302 | if (DIFilename.empty()) |
| 303 | return createProfileParseError(Message: "empty module name specifier" ); |
| 304 | } else if (!DIFilenameStr.empty()) { |
| 305 | return createProfileParseError(Message: "unknown string found: '" + |
| 306 | DIFilenameStr + "'" ); |
| 307 | } |
| 308 | // Function aliases are separated using '/'. We use the first function |
| 309 | // name for the cluster info mapping and delegate all other aliases to |
| 310 | // this one. |
| 311 | SmallVector<StringRef, 4> Aliases; |
| 312 | AliasesStr.split(A&: Aliases, Separator: '/'); |
| 313 | bool FunctionFound = any_of(Range&: Aliases, P: [&](StringRef Alias) { |
| 314 | auto It = FunctionNameToDIFilename.find(Key: Alias); |
| 315 | // No match if this function name is not found in this module. |
| 316 | if (It == FunctionNameToDIFilename.end()) |
| 317 | return false; |
| 318 | // Return a match if debug-info-filename is not specified. Otherwise, |
| 319 | // check for equality. |
| 320 | return DIFilename.empty() || It->second == DIFilename; |
| 321 | }); |
| 322 | if (!FunctionFound) { |
| 323 | // Skip the following profile by setting the profile iterator (FI) to |
| 324 | // the past-the-end element. |
| 325 | FI = ProgramPathAndClusterInfo.end(); |
| 326 | continue; |
| 327 | } |
| 328 | for (size_t i = 1; i < Aliases.size(); ++i) |
| 329 | FuncAliasMap.try_emplace(Key: Aliases[i], Args&: Aliases.front()); |
| 330 | |
| 331 | // Prepare for parsing clusters of this function name. |
| 332 | // Start a new cluster map for this function name. |
| 333 | auto R = ProgramPathAndClusterInfo.try_emplace(Key: Aliases.front()); |
| 334 | // Report error when multiple profiles have been specified for the same |
| 335 | // function. |
| 336 | if (!R.second) |
| 337 | return createProfileParseError(Message: "duplicate profile for function '" + |
| 338 | Aliases.front() + "'" ); |
| 339 | FI = R.first; |
| 340 | CurrentCluster = 0; |
| 341 | FuncBBIDs.clear(); |
| 342 | } |
| 343 | } |
| 344 | return Error::success(); |
| 345 | } |
| 346 | |
| 347 | // Basic Block Sections can be enabled for a subset of machine basic blocks. |
| 348 | // This is done by passing a file containing names of functions for which basic |
| 349 | // block sections are desired. Additionally, machine basic block ids of the |
| 350 | // functions can also be specified for a finer granularity. Moreover, a cluster |
| 351 | // of basic blocks could be assigned to the same section. |
| 352 | // Optionally, a debug-info filename can be specified for each function to allow |
| 353 | // distinguishing internal-linkage functions of the same name. |
| 354 | // A file with basic block sections for all of function main and three blocks |
| 355 | // for function foo (of which 1 and 2 are placed in a cluster) looks like this: |
| 356 | // (Profile for function foo is only loaded when its debug-info filename |
| 357 | // matches 'path/to/foo_file.cc'). |
| 358 | // ---------------------------- |
| 359 | // list.txt: |
| 360 | // !main |
| 361 | // !foo M=path/to/foo_file.cc |
| 362 | // !!1 2 |
| 363 | // !!4 |
| 364 | Error BasicBlockSectionsProfileReader::ReadProfile() { |
| 365 | assert(MBuf); |
| 366 | |
| 367 | unsigned long long Version = 0; |
| 368 | StringRef FirstLine(*LineIt); |
| 369 | if (FirstLine.consume_front(Prefix: "v" )) { |
| 370 | if (getAsUnsignedInteger(Str: FirstLine, Radix: 10, Result&: Version)) { |
| 371 | return createProfileParseError(Message: Twine("version number expected: '" ) + |
| 372 | FirstLine + "'" ); |
| 373 | } |
| 374 | if (Version > 1) { |
| 375 | return createProfileParseError(Message: Twine("invalid profile version: " ) + |
| 376 | Twine(Version)); |
| 377 | } |
| 378 | ++LineIt; |
| 379 | } |
| 380 | |
| 381 | switch (Version) { |
| 382 | case 0: |
| 383 | // TODO: Deprecate V0 once V1 is fully integrated downstream. |
| 384 | return ReadV0Profile(); |
| 385 | case 1: |
| 386 | return ReadV1Profile(); |
| 387 | default: |
| 388 | llvm_unreachable("Invalid profile version." ); |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) { |
| 393 | if (!BBSPR.MBuf) |
| 394 | return false; |
| 395 | // Get the function name to debug info filename mapping. |
| 396 | BBSPR.FunctionNameToDIFilename.clear(); |
| 397 | for (const Function &F : M) { |
| 398 | SmallString<128> DIFilename; |
| 399 | if (F.isDeclaration()) |
| 400 | continue; |
| 401 | DISubprogram *Subprogram = F.getSubprogram(); |
| 402 | if (Subprogram) { |
| 403 | llvm::DICompileUnit *CU = Subprogram->getUnit(); |
| 404 | if (CU) |
| 405 | DIFilename = sys::path::remove_leading_dotslash(path: CU->getFilename()); |
| 406 | } |
| 407 | [[maybe_unused]] bool inserted = |
| 408 | BBSPR.FunctionNameToDIFilename.try_emplace(Key: F.getName(), Args&: DIFilename) |
| 409 | .second; |
| 410 | assert(inserted); |
| 411 | } |
| 412 | if (auto Err = BBSPR.ReadProfile()) |
| 413 | report_fatal_error(Err: std::move(Err)); |
| 414 | return false; |
| 415 | } |
| 416 | |
| 417 | AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key; |
| 418 | |
| 419 | BasicBlockSectionsProfileReader |
| 420 | BasicBlockSectionsProfileReaderAnalysis::run(Function &F, |
| 421 | FunctionAnalysisManager &AM) { |
| 422 | return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf()); |
| 423 | } |
| 424 | |
| 425 | bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( |
| 426 | StringRef FuncName) const { |
| 427 | return BBSPR.isFunctionHot(FuncName); |
| 428 | } |
| 429 | |
| 430 | std::pair<bool, SmallVector<BBClusterInfo>> |
| 431 | BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( |
| 432 | StringRef FuncName) const { |
| 433 | return BBSPR.getClusterInfoForFunction(FuncName); |
| 434 | } |
| 435 | |
| 436 | SmallVector<SmallVector<unsigned>> |
| 437 | BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( |
| 438 | StringRef FuncName) const { |
| 439 | return BBSPR.getClonePathsForFunction(FuncName); |
| 440 | } |
| 441 | |
| 442 | BasicBlockSectionsProfileReader & |
| 443 | BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { |
| 444 | return BBSPR; |
| 445 | } |
| 446 | |
| 447 | ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass( |
| 448 | const MemoryBuffer *Buf) { |
| 449 | return new BasicBlockSectionsProfileReaderWrapperPass(Buf); |
| 450 | } |
| 451 | |