| 1 | //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Defines the XRay Profile class representing the latency profile generated by |
| 10 | // XRay's profiling mode. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | #include "llvm/XRay/Profile.h" |
| 14 | |
| 15 | #include "llvm/Support/DataExtractor.h" |
| 16 | #include "llvm/Support/Error.h" |
| 17 | #include "llvm/Support/FileSystem.h" |
| 18 | #include "llvm/XRay/Trace.h" |
| 19 | #include <memory> |
| 20 | |
| 21 | using namespace llvm; |
| 22 | using namespace llvm::xray; |
| 23 | |
| 24 | Profile::Profile(const Profile &O) { |
| 25 | // We need to re-create all the tries from the original (O), into the current |
| 26 | // Profile being initialized, through the Block instances we see. |
| 27 | for (const auto &Block : O) { |
| 28 | Blocks.push_back(x: {.Thread: Block.Thread, .PathData: {}}); |
| 29 | auto &B = Blocks.back(); |
| 30 | for (const auto &PathData : Block.PathData) |
| 31 | B.PathData.push_back(x: {internPath(P: cantFail(ValOrErr: O.expandPath(P: PathData.first))), |
| 32 | PathData.second}); |
| 33 | } |
| 34 | } |
| 35 | |
| 36 | Profile &Profile::operator=(const Profile &O) { |
| 37 | Profile P = O; |
| 38 | *this = std::move(P); |
| 39 | return *this; |
| 40 | } |
| 41 | |
| 42 | namespace { |
| 43 | |
| 44 | struct { |
| 45 | uint32_t ; |
| 46 | uint32_t ; |
| 47 | uint64_t ; |
| 48 | }; |
| 49 | } // namespace |
| 50 | |
| 51 | static Expected<BlockHeader> (DataExtractor &, |
| 52 | uint64_t &Offset) { |
| 53 | BlockHeader H; |
| 54 | uint64_t CurrentOffset = Offset; |
| 55 | H.Size = Extractor.getU32(offset_ptr: &Offset); |
| 56 | if (Offset == CurrentOffset) |
| 57 | return make_error<StringError>( |
| 58 | Args: Twine("Error parsing block header size at offset '" ) + |
| 59 | Twine(CurrentOffset) + "'" , |
| 60 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 61 | CurrentOffset = Offset; |
| 62 | H.Number = Extractor.getU32(offset_ptr: &Offset); |
| 63 | if (Offset == CurrentOffset) |
| 64 | return make_error<StringError>( |
| 65 | Args: Twine("Error parsing block header number at offset '" ) + |
| 66 | Twine(CurrentOffset) + "'" , |
| 67 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 68 | CurrentOffset = Offset; |
| 69 | H.Thread = Extractor.getU64(offset_ptr: &Offset); |
| 70 | if (Offset == CurrentOffset) |
| 71 | return make_error<StringError>( |
| 72 | Args: Twine("Error parsing block header thread id at offset '" ) + |
| 73 | Twine(CurrentOffset) + "'" , |
| 74 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 75 | return H; |
| 76 | } |
| 77 | |
| 78 | static Expected<std::vector<Profile::FuncID>> (DataExtractor &, |
| 79 | uint64_t &Offset) { |
| 80 | // We're reading a sequence of int32_t's until we find a 0. |
| 81 | std::vector<Profile::FuncID> Path; |
| 82 | auto CurrentOffset = Offset; |
| 83 | int32_t FuncId; |
| 84 | do { |
| 85 | FuncId = Extractor.getSigned(offset_ptr: &Offset, size: 4); |
| 86 | if (CurrentOffset == Offset) |
| 87 | return make_error<StringError>( |
| 88 | Args: Twine("Error parsing path at offset '" ) + Twine(CurrentOffset) + "'" , |
| 89 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 90 | CurrentOffset = Offset; |
| 91 | Path.push_back(x: FuncId); |
| 92 | } while (FuncId != 0); |
| 93 | return std::move(Path); |
| 94 | } |
| 95 | |
| 96 | static Expected<Profile::Data> (DataExtractor &, |
| 97 | uint64_t &Offset) { |
| 98 | // We expect a certain number of elements for Data: |
| 99 | // - A 64-bit CallCount |
| 100 | // - A 64-bit CumulativeLocalTime counter |
| 101 | Profile::Data D; |
| 102 | auto CurrentOffset = Offset; |
| 103 | D.CallCount = Extractor.getU64(offset_ptr: &Offset); |
| 104 | if (CurrentOffset == Offset) |
| 105 | return make_error<StringError>( |
| 106 | Args: Twine("Error parsing call counts at offset '" ) + Twine(CurrentOffset) + |
| 107 | "'" , |
| 108 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 109 | CurrentOffset = Offset; |
| 110 | D.CumulativeLocalTime = Extractor.getU64(offset_ptr: &Offset); |
| 111 | if (CurrentOffset == Offset) |
| 112 | return make_error<StringError>( |
| 113 | Args: Twine("Error parsing cumulative local time at offset '" ) + |
| 114 | Twine(CurrentOffset) + "'" , |
| 115 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 116 | return D; |
| 117 | } |
| 118 | |
| 119 | Error Profile::addBlock(Block &&B) { |
| 120 | if (B.PathData.empty()) |
| 121 | return make_error<StringError>( |
| 122 | Args: "Block may not have empty path data." , |
| 123 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 124 | |
| 125 | Blocks.emplace_back(args: std::move(B)); |
| 126 | return Error::success(); |
| 127 | } |
| 128 | |
| 129 | Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { |
| 130 | auto It = PathIDMap.find(Val: P); |
| 131 | if (It == PathIDMap.end()) |
| 132 | return make_error<StringError>( |
| 133 | Args: Twine("PathID not found: " ) + Twine(P), |
| 134 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 135 | std::vector<Profile::FuncID> Path; |
| 136 | for (auto Node = It->second; Node; Node = Node->Caller) |
| 137 | Path.push_back(x: Node->Func); |
| 138 | return std::move(Path); |
| 139 | } |
| 140 | |
| 141 | Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { |
| 142 | if (P.empty()) |
| 143 | return 0; |
| 144 | |
| 145 | auto RootToLeafPath = reverse(C&: P); |
| 146 | |
| 147 | // Find the root. |
| 148 | auto It = RootToLeafPath.begin(); |
| 149 | auto PathRoot = *It++; |
| 150 | auto RootIt = |
| 151 | find_if(Range&: Roots, P: [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); |
| 152 | |
| 153 | // If we've not seen this root before, remember it. |
| 154 | TrieNode *Node = nullptr; |
| 155 | if (RootIt == Roots.end()) { |
| 156 | NodeStorage.emplace_back(); |
| 157 | Node = &NodeStorage.back(); |
| 158 | Node->Func = PathRoot; |
| 159 | Roots.push_back(Elt: Node); |
| 160 | } else { |
| 161 | Node = *RootIt; |
| 162 | } |
| 163 | |
| 164 | // Now traverse the path, re-creating if necessary. |
| 165 | while (It != RootToLeafPath.end()) { |
| 166 | auto NodeFuncID = *It++; |
| 167 | auto CalleeIt = find_if(Range&: Node->Callees, P: [NodeFuncID](TrieNode *N) { |
| 168 | return N->Func == NodeFuncID; |
| 169 | }); |
| 170 | if (CalleeIt == Node->Callees.end()) { |
| 171 | NodeStorage.emplace_back(); |
| 172 | auto NewNode = &NodeStorage.back(); |
| 173 | NewNode->Func = NodeFuncID; |
| 174 | NewNode->Caller = Node; |
| 175 | Node->Callees.push_back(x: NewNode); |
| 176 | Node = NewNode; |
| 177 | } else { |
| 178 | Node = *CalleeIt; |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | // At this point, Node *must* be pointing at the leaf. |
| 183 | assert(Node->Func == P.front()); |
| 184 | if (Node->ID == 0) { |
| 185 | Node->ID = NextID++; |
| 186 | PathIDMap.insert(KV: {Node->ID, Node}); |
| 187 | } |
| 188 | return Node->ID; |
| 189 | } |
| 190 | |
| 191 | Profile xray::mergeProfilesByThread(const Profile &L, const Profile &R) { |
| 192 | Profile Merged; |
| 193 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
| 194 | using PathDataMapPtr = std::unique_ptr<PathDataMap>; |
| 195 | using PathDataVector = decltype(Profile::Block::PathData); |
| 196 | using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; |
| 197 | ThreadProfileIndexMap ThreadProfileIndex; |
| 198 | |
| 199 | for (const auto &P : {std::ref(t: L), std::ref(t: R)}) |
| 200 | for (const auto &Block : P.get()) { |
| 201 | ThreadProfileIndexMap::iterator It; |
| 202 | std::tie(args&: It, args: std::ignore) = ThreadProfileIndex.insert( |
| 203 | KV: {Block.Thread, std::make_unique<PathDataMap>()}); |
| 204 | for (const auto &PathAndData : Block.PathData) { |
| 205 | auto &PathID = PathAndData.first; |
| 206 | auto &Data = PathAndData.second; |
| 207 | auto NewPathID = |
| 208 | Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathID))); |
| 209 | PathDataMap::iterator PathDataIt; |
| 210 | bool Inserted; |
| 211 | std::tie(args&: PathDataIt, args&: Inserted) = It->second->insert(KV: {NewPathID, Data}); |
| 212 | if (!Inserted) { |
| 213 | auto &ExistingData = PathDataIt->second; |
| 214 | ExistingData.CallCount += Data.CallCount; |
| 215 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
| 216 | } |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | for (const auto &IndexedThreadBlock : ThreadProfileIndex) { |
| 221 | PathDataVector PathAndData; |
| 222 | PathAndData.reserve(n: IndexedThreadBlock.second->size()); |
| 223 | copy(Range&: *IndexedThreadBlock.second, Out: std::back_inserter(x&: PathAndData)); |
| 224 | cantFail( |
| 225 | Err: Merged.addBlock(B: {.Thread: IndexedThreadBlock.first, .PathData: std::move(PathAndData)})); |
| 226 | } |
| 227 | return Merged; |
| 228 | } |
| 229 | |
| 230 | Profile xray::mergeProfilesByStack(const Profile &L, const Profile &R) { |
| 231 | Profile Merged; |
| 232 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
| 233 | PathDataMap PathData; |
| 234 | using PathDataVector = decltype(Profile::Block::PathData); |
| 235 | for (const auto &P : {std::ref(t: L), std::ref(t: R)}) |
| 236 | for (const auto &Block : P.get()) |
| 237 | for (const auto &PathAndData : Block.PathData) { |
| 238 | auto &PathId = PathAndData.first; |
| 239 | auto &Data = PathAndData.second; |
| 240 | auto NewPathID = |
| 241 | Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathId))); |
| 242 | PathDataMap::iterator PathDataIt; |
| 243 | bool Inserted; |
| 244 | std::tie(args&: PathDataIt, args&: Inserted) = PathData.insert(KV: {NewPathID, Data}); |
| 245 | if (!Inserted) { |
| 246 | auto &ExistingData = PathDataIt->second; |
| 247 | ExistingData.CallCount += Data.CallCount; |
| 248 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | // In the end there's a single Block, for thread 0. |
| 253 | PathDataVector Block; |
| 254 | Block.reserve(n: PathData.size()); |
| 255 | copy(Range&: PathData, Out: std::back_inserter(x&: Block)); |
| 256 | cantFail(Err: Merged.addBlock(B: {.Thread: 0, .PathData: std::move(Block)})); |
| 257 | return Merged; |
| 258 | } |
| 259 | |
| 260 | Expected<Profile> xray::loadProfile(StringRef Filename) { |
| 261 | Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Name: Filename); |
| 262 | if (!FdOrErr) |
| 263 | return FdOrErr.takeError(); |
| 264 | |
| 265 | uint64_t FileSize; |
| 266 | if (auto EC = sys::fs::file_size(Path: Filename, Result&: FileSize)) |
| 267 | return make_error<StringError>( |
| 268 | Args: Twine("Cannot get filesize of '" ) + Filename + "'" , Args&: EC); |
| 269 | |
| 270 | std::error_code EC; |
| 271 | sys::fs::mapped_file_region MappedFile( |
| 272 | *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, |
| 273 | EC); |
| 274 | sys::fs::closeFile(F&: *FdOrErr); |
| 275 | if (EC) |
| 276 | return make_error<StringError>( |
| 277 | Args: Twine("Cannot mmap profile '" ) + Filename + "'" , Args&: EC); |
| 278 | StringRef Data(MappedFile.data(), MappedFile.size()); |
| 279 | |
| 280 | Profile P; |
| 281 | uint64_t Offset = 0; |
| 282 | DataExtractor (Data, true, 8); |
| 283 | |
| 284 | // For each block we get from the file: |
| 285 | while (Offset != MappedFile.size()) { |
| 286 | auto = readBlockHeader(Extractor, Offset); |
| 287 | if (!HeaderOrError) |
| 288 | return HeaderOrError.takeError(); |
| 289 | |
| 290 | // TODO: Maybe store this header information for each block, even just for |
| 291 | // debugging? |
| 292 | const auto & = HeaderOrError.get(); |
| 293 | |
| 294 | // Read in the path data. |
| 295 | auto PathOrError = readPath(Extractor, Offset); |
| 296 | if (!PathOrError) |
| 297 | return PathOrError.takeError(); |
| 298 | const auto &Path = PathOrError.get(); |
| 299 | |
| 300 | // For each path we encounter, we should intern it to get a PathID. |
| 301 | auto DataOrError = readData(Extractor, Offset); |
| 302 | if (!DataOrError) |
| 303 | return DataOrError.takeError(); |
| 304 | auto &Data = DataOrError.get(); |
| 305 | |
| 306 | if (auto E = |
| 307 | P.addBlock(B: Profile::Block{.Thread: Profile::ThreadID{Header.Thread}, |
| 308 | .PathData: {{P.internPath(P: Path), std::move(Data)}}})) |
| 309 | return std::move(E); |
| 310 | } |
| 311 | |
| 312 | return P; |
| 313 | } |
| 314 | |
| 315 | namespace { |
| 316 | |
| 317 | struct StackEntry { |
| 318 | uint64_t Timestamp; |
| 319 | Profile::FuncID FuncId; |
| 320 | }; |
| 321 | |
| 322 | } // namespace |
| 323 | |
| 324 | Expected<Profile> xray::profileFromTrace(const Trace &T) { |
| 325 | Profile P; |
| 326 | |
| 327 | // The implementation of the algorithm re-creates the execution of |
| 328 | // the functions based on the trace data. To do this, we set up a number of |
| 329 | // data structures to track the execution context of every thread in the |
| 330 | // Trace. |
| 331 | DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; |
| 332 | DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> |
| 333 | ThreadPathData; |
| 334 | |
| 335 | // We then do a pass through the Trace to account data on a per-thread-basis. |
| 336 | for (const auto &E : T) { |
| 337 | auto &TSD = ThreadStacks[E.TId]; |
| 338 | switch (E.Type) { |
| 339 | case RecordTypes::ENTER: |
| 340 | case RecordTypes::ENTER_ARG: |
| 341 | |
| 342 | // Push entries into the function call stack. |
| 343 | TSD.push_back(x: {.Timestamp: E.TSC, .FuncId: E.FuncId}); |
| 344 | break; |
| 345 | |
| 346 | case RecordTypes::EXIT: |
| 347 | case RecordTypes::TAIL_EXIT: |
| 348 | |
| 349 | // Exits cause some accounting to happen, based on the state of the stack. |
| 350 | // For each function we pop off the stack, we take note of the path and |
| 351 | // record the cumulative state for this path. As we're doing this, we |
| 352 | // intern the path into the Profile. |
| 353 | while (!TSD.empty()) { |
| 354 | auto Top = TSD.back(); |
| 355 | auto FunctionLocalTime = AbsoluteDifference(X: Top.Timestamp, Y: E.TSC); |
| 356 | SmallVector<Profile::FuncID, 16> Path; |
| 357 | transform(Range: reverse(C&: TSD), d_first: std::back_inserter(x&: Path), |
| 358 | F: std::mem_fn(pm: &StackEntry::FuncId)); |
| 359 | auto InternedPath = P.internPath(P: Path); |
| 360 | auto &TPD = ThreadPathData[E.TId][InternedPath]; |
| 361 | ++TPD.CallCount; |
| 362 | TPD.CumulativeLocalTime += FunctionLocalTime; |
| 363 | TSD.pop_back(); |
| 364 | |
| 365 | // If we've matched the corresponding entry event for this function, |
| 366 | // then we exit the loop. |
| 367 | if (Top.FuncId == E.FuncId) |
| 368 | break; |
| 369 | |
| 370 | // FIXME: Consider the intermediate times and the cumulative tree time |
| 371 | // as well. |
| 372 | } |
| 373 | |
| 374 | break; |
| 375 | |
| 376 | case RecordTypes::CUSTOM_EVENT: |
| 377 | case RecordTypes::TYPED_EVENT: |
| 378 | // TODO: Support an extension point to allow handling of custom and typed |
| 379 | // events in profiles. |
| 380 | break; |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | // Once we've gone through the Trace, we now create one Block per thread in |
| 385 | // the Profile. |
| 386 | for (const auto &ThreadPaths : ThreadPathData) { |
| 387 | const auto &TID = ThreadPaths.first; |
| 388 | const auto &PathsData = ThreadPaths.second; |
| 389 | if (auto E = P.addBlock(B: { |
| 390 | .Thread: TID, |
| 391 | .PathData: std::vector<std::pair<Profile::PathID, Profile::Data>>( |
| 392 | PathsData.begin(), PathsData.end()), |
| 393 | })) |
| 394 | return std::move(E); |
| 395 | } |
| 396 | |
| 397 | return P; |
| 398 | } |
| 399 | |