| 1 | //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Defines the XRay Profile class representing the latency profile generated by |
| 10 | // XRay's profiling mode. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | #include "llvm/XRay/Profile.h" |
| 14 | |
| 15 | #include "llvm/Support/DataExtractor.h" |
| 16 | #include "llvm/Support/Error.h" |
| 17 | #include "llvm/Support/FileSystem.h" |
| 18 | #include "llvm/XRay/Trace.h" |
| 19 | #include <memory> |
| 20 | |
| 21 | namespace llvm { |
| 22 | namespace xray { |
| 23 | |
| 24 | Profile::Profile(const Profile &O) { |
| 25 | // We need to re-create all the tries from the original (O), into the current |
| 26 | // Profile being initialized, through the Block instances we see. |
| 27 | for (const auto &Block : O) { |
| 28 | Blocks.push_back(x: {.Thread: Block.Thread, .PathData: {}}); |
| 29 | auto &B = Blocks.back(); |
| 30 | for (const auto &PathData : Block.PathData) |
| 31 | B.PathData.push_back(x: {internPath(P: cantFail(ValOrErr: O.expandPath(P: PathData.first))), |
| 32 | PathData.second}); |
| 33 | } |
| 34 | } |
| 35 | |
| 36 | Profile &Profile::operator=(const Profile &O) { |
| 37 | Profile P = O; |
| 38 | *this = std::move(P); |
| 39 | return *this; |
| 40 | } |
| 41 | |
| 42 | namespace { |
| 43 | |
| 44 | struct { |
| 45 | uint32_t ; |
| 46 | uint32_t ; |
| 47 | uint64_t ; |
| 48 | }; |
| 49 | |
| 50 | static Expected<BlockHeader> (DataExtractor &, |
| 51 | uint64_t &Offset) { |
| 52 | BlockHeader H; |
| 53 | uint64_t CurrentOffset = Offset; |
| 54 | H.Size = Extractor.getU32(offset_ptr: &Offset); |
| 55 | if (Offset == CurrentOffset) |
| 56 | return make_error<StringError>( |
| 57 | Args: Twine("Error parsing block header size at offset '" ) + |
| 58 | Twine(CurrentOffset) + "'" , |
| 59 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 60 | CurrentOffset = Offset; |
| 61 | H.Number = Extractor.getU32(offset_ptr: &Offset); |
| 62 | if (Offset == CurrentOffset) |
| 63 | return make_error<StringError>( |
| 64 | Args: Twine("Error parsing block header number at offset '" ) + |
| 65 | Twine(CurrentOffset) + "'" , |
| 66 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 67 | CurrentOffset = Offset; |
| 68 | H.Thread = Extractor.getU64(offset_ptr: &Offset); |
| 69 | if (Offset == CurrentOffset) |
| 70 | return make_error<StringError>( |
| 71 | Args: Twine("Error parsing block header thread id at offset '" ) + |
| 72 | Twine(CurrentOffset) + "'" , |
| 73 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 74 | return H; |
| 75 | } |
| 76 | |
| 77 | static Expected<std::vector<Profile::FuncID>> (DataExtractor &, |
| 78 | uint64_t &Offset) { |
| 79 | // We're reading a sequence of int32_t's until we find a 0. |
| 80 | std::vector<Profile::FuncID> Path; |
| 81 | auto CurrentOffset = Offset; |
| 82 | int32_t FuncId; |
| 83 | do { |
| 84 | FuncId = Extractor.getSigned(offset_ptr: &Offset, size: 4); |
| 85 | if (CurrentOffset == Offset) |
| 86 | return make_error<StringError>( |
| 87 | Args: Twine("Error parsing path at offset '" ) + Twine(CurrentOffset) + "'" , |
| 88 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 89 | CurrentOffset = Offset; |
| 90 | Path.push_back(x: FuncId); |
| 91 | } while (FuncId != 0); |
| 92 | return std::move(Path); |
| 93 | } |
| 94 | |
| 95 | static Expected<Profile::Data> (DataExtractor &, |
| 96 | uint64_t &Offset) { |
| 97 | // We expect a certain number of elements for Data: |
| 98 | // - A 64-bit CallCount |
| 99 | // - A 64-bit CumulativeLocalTime counter |
| 100 | Profile::Data D; |
| 101 | auto CurrentOffset = Offset; |
| 102 | D.CallCount = Extractor.getU64(offset_ptr: &Offset); |
| 103 | if (CurrentOffset == Offset) |
| 104 | return make_error<StringError>( |
| 105 | Args: Twine("Error parsing call counts at offset '" ) + Twine(CurrentOffset) + |
| 106 | "'" , |
| 107 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 108 | CurrentOffset = Offset; |
| 109 | D.CumulativeLocalTime = Extractor.getU64(offset_ptr: &Offset); |
| 110 | if (CurrentOffset == Offset) |
| 111 | return make_error<StringError>( |
| 112 | Args: Twine("Error parsing cumulative local time at offset '" ) + |
| 113 | Twine(CurrentOffset) + "'" , |
| 114 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 115 | return D; |
| 116 | } |
| 117 | |
| 118 | } // namespace |
| 119 | |
| 120 | Error Profile::addBlock(Block &&B) { |
| 121 | if (B.PathData.empty()) |
| 122 | return make_error<StringError>( |
| 123 | Args: "Block may not have empty path data." , |
| 124 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 125 | |
| 126 | Blocks.emplace_back(args: std::move(B)); |
| 127 | return Error::success(); |
| 128 | } |
| 129 | |
| 130 | Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { |
| 131 | auto It = PathIDMap.find(Val: P); |
| 132 | if (It == PathIDMap.end()) |
| 133 | return make_error<StringError>( |
| 134 | Args: Twine("PathID not found: " ) + Twine(P), |
| 135 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
| 136 | std::vector<Profile::FuncID> Path; |
| 137 | for (auto Node = It->second; Node; Node = Node->Caller) |
| 138 | Path.push_back(x: Node->Func); |
| 139 | return std::move(Path); |
| 140 | } |
| 141 | |
| 142 | Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { |
| 143 | if (P.empty()) |
| 144 | return 0; |
| 145 | |
| 146 | auto RootToLeafPath = reverse(C&: P); |
| 147 | |
| 148 | // Find the root. |
| 149 | auto It = RootToLeafPath.begin(); |
| 150 | auto PathRoot = *It++; |
| 151 | auto RootIt = |
| 152 | find_if(Range&: Roots, P: [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); |
| 153 | |
| 154 | // If we've not seen this root before, remember it. |
| 155 | TrieNode *Node = nullptr; |
| 156 | if (RootIt == Roots.end()) { |
| 157 | NodeStorage.emplace_back(); |
| 158 | Node = &NodeStorage.back(); |
| 159 | Node->Func = PathRoot; |
| 160 | Roots.push_back(Elt: Node); |
| 161 | } else { |
| 162 | Node = *RootIt; |
| 163 | } |
| 164 | |
| 165 | // Now traverse the path, re-creating if necessary. |
| 166 | while (It != RootToLeafPath.end()) { |
| 167 | auto NodeFuncID = *It++; |
| 168 | auto CalleeIt = find_if(Range&: Node->Callees, P: [NodeFuncID](TrieNode *N) { |
| 169 | return N->Func == NodeFuncID; |
| 170 | }); |
| 171 | if (CalleeIt == Node->Callees.end()) { |
| 172 | NodeStorage.emplace_back(); |
| 173 | auto NewNode = &NodeStorage.back(); |
| 174 | NewNode->Func = NodeFuncID; |
| 175 | NewNode->Caller = Node; |
| 176 | Node->Callees.push_back(x: NewNode); |
| 177 | Node = NewNode; |
| 178 | } else { |
| 179 | Node = *CalleeIt; |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | // At this point, Node *must* be pointing at the leaf. |
| 184 | assert(Node->Func == P.front()); |
| 185 | if (Node->ID == 0) { |
| 186 | Node->ID = NextID++; |
| 187 | PathIDMap.insert(KV: {Node->ID, Node}); |
| 188 | } |
| 189 | return Node->ID; |
| 190 | } |
| 191 | |
| 192 | Profile mergeProfilesByThread(const Profile &L, const Profile &R) { |
| 193 | Profile Merged; |
| 194 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
| 195 | using PathDataMapPtr = std::unique_ptr<PathDataMap>; |
| 196 | using PathDataVector = decltype(Profile::Block::PathData); |
| 197 | using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; |
| 198 | ThreadProfileIndexMap ThreadProfileIndex; |
| 199 | |
| 200 | for (const auto &P : {std::ref(t: L), std::ref(t: R)}) |
| 201 | for (const auto &Block : P.get()) { |
| 202 | ThreadProfileIndexMap::iterator It; |
| 203 | std::tie(args&: It, args: std::ignore) = ThreadProfileIndex.insert( |
| 204 | KV: {Block.Thread, std::make_unique<PathDataMap>()}); |
| 205 | for (const auto &PathAndData : Block.PathData) { |
| 206 | auto &PathID = PathAndData.first; |
| 207 | auto &Data = PathAndData.second; |
| 208 | auto NewPathID = |
| 209 | Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathID))); |
| 210 | PathDataMap::iterator PathDataIt; |
| 211 | bool Inserted; |
| 212 | std::tie(args&: PathDataIt, args&: Inserted) = It->second->insert(KV: {NewPathID, Data}); |
| 213 | if (!Inserted) { |
| 214 | auto &ExistingData = PathDataIt->second; |
| 215 | ExistingData.CallCount += Data.CallCount; |
| 216 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | for (const auto &IndexedThreadBlock : ThreadProfileIndex) { |
| 222 | PathDataVector PathAndData; |
| 223 | PathAndData.reserve(n: IndexedThreadBlock.second->size()); |
| 224 | copy(Range&: *IndexedThreadBlock.second, Out: std::back_inserter(x&: PathAndData)); |
| 225 | cantFail( |
| 226 | Err: Merged.addBlock(B: {.Thread: IndexedThreadBlock.first, .PathData: std::move(PathAndData)})); |
| 227 | } |
| 228 | return Merged; |
| 229 | } |
| 230 | |
| 231 | Profile mergeProfilesByStack(const Profile &L, const Profile &R) { |
| 232 | Profile Merged; |
| 233 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
| 234 | PathDataMap PathData; |
| 235 | using PathDataVector = decltype(Profile::Block::PathData); |
| 236 | for (const auto &P : {std::ref(t: L), std::ref(t: R)}) |
| 237 | for (const auto &Block : P.get()) |
| 238 | for (const auto &PathAndData : Block.PathData) { |
| 239 | auto &PathId = PathAndData.first; |
| 240 | auto &Data = PathAndData.second; |
| 241 | auto NewPathID = |
| 242 | Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathId))); |
| 243 | PathDataMap::iterator PathDataIt; |
| 244 | bool Inserted; |
| 245 | std::tie(args&: PathDataIt, args&: Inserted) = PathData.insert(KV: {NewPathID, Data}); |
| 246 | if (!Inserted) { |
| 247 | auto &ExistingData = PathDataIt->second; |
| 248 | ExistingData.CallCount += Data.CallCount; |
| 249 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | // In the end there's a single Block, for thread 0. |
| 254 | PathDataVector Block; |
| 255 | Block.reserve(n: PathData.size()); |
| 256 | copy(Range&: PathData, Out: std::back_inserter(x&: Block)); |
| 257 | cantFail(Err: Merged.addBlock(B: {.Thread: 0, .PathData: std::move(Block)})); |
| 258 | return Merged; |
| 259 | } |
| 260 | |
| 261 | Expected<Profile> loadProfile(StringRef Filename) { |
| 262 | Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Name: Filename); |
| 263 | if (!FdOrErr) |
| 264 | return FdOrErr.takeError(); |
| 265 | |
| 266 | uint64_t FileSize; |
| 267 | if (auto EC = sys::fs::file_size(Path: Filename, Result&: FileSize)) |
| 268 | return make_error<StringError>( |
| 269 | Args: Twine("Cannot get filesize of '" ) + Filename + "'" , Args&: EC); |
| 270 | |
| 271 | std::error_code EC; |
| 272 | sys::fs::mapped_file_region MappedFile( |
| 273 | *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, |
| 274 | EC); |
| 275 | sys::fs::closeFile(F&: *FdOrErr); |
| 276 | if (EC) |
| 277 | return make_error<StringError>( |
| 278 | Args: Twine("Cannot mmap profile '" ) + Filename + "'" , Args&: EC); |
| 279 | StringRef Data(MappedFile.data(), MappedFile.size()); |
| 280 | |
| 281 | Profile P; |
| 282 | uint64_t Offset = 0; |
| 283 | DataExtractor (Data, true, 8); |
| 284 | |
| 285 | // For each block we get from the file: |
| 286 | while (Offset != MappedFile.size()) { |
| 287 | auto = readBlockHeader(Extractor, Offset); |
| 288 | if (!HeaderOrError) |
| 289 | return HeaderOrError.takeError(); |
| 290 | |
| 291 | // TODO: Maybe store this header information for each block, even just for |
| 292 | // debugging? |
| 293 | const auto & = HeaderOrError.get(); |
| 294 | |
| 295 | // Read in the path data. |
| 296 | auto PathOrError = readPath(Extractor, Offset); |
| 297 | if (!PathOrError) |
| 298 | return PathOrError.takeError(); |
| 299 | const auto &Path = PathOrError.get(); |
| 300 | |
| 301 | // For each path we encounter, we should intern it to get a PathID. |
| 302 | auto DataOrError = readData(Extractor, Offset); |
| 303 | if (!DataOrError) |
| 304 | return DataOrError.takeError(); |
| 305 | auto &Data = DataOrError.get(); |
| 306 | |
| 307 | if (auto E = |
| 308 | P.addBlock(B: Profile::Block{.Thread: Profile::ThreadID{Header.Thread}, |
| 309 | .PathData: {{P.internPath(P: Path), std::move(Data)}}})) |
| 310 | return std::move(E); |
| 311 | } |
| 312 | |
| 313 | return P; |
| 314 | } |
| 315 | |
| 316 | namespace { |
| 317 | |
| 318 | struct StackEntry { |
| 319 | uint64_t Timestamp; |
| 320 | Profile::FuncID FuncId; |
| 321 | }; |
| 322 | |
| 323 | } // namespace |
| 324 | |
| 325 | Expected<Profile> profileFromTrace(const Trace &T) { |
| 326 | Profile P; |
| 327 | |
| 328 | // The implementation of the algorithm re-creates the execution of |
| 329 | // the functions based on the trace data. To do this, we set up a number of |
| 330 | // data structures to track the execution context of every thread in the |
| 331 | // Trace. |
| 332 | DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; |
| 333 | DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> |
| 334 | ThreadPathData; |
| 335 | |
| 336 | // We then do a pass through the Trace to account data on a per-thread-basis. |
| 337 | for (const auto &E : T) { |
| 338 | auto &TSD = ThreadStacks[E.TId]; |
| 339 | switch (E.Type) { |
| 340 | case RecordTypes::ENTER: |
| 341 | case RecordTypes::ENTER_ARG: |
| 342 | |
| 343 | // Push entries into the function call stack. |
| 344 | TSD.push_back(x: {.Timestamp: E.TSC, .FuncId: E.FuncId}); |
| 345 | break; |
| 346 | |
| 347 | case RecordTypes::EXIT: |
| 348 | case RecordTypes::TAIL_EXIT: |
| 349 | |
| 350 | // Exits cause some accounting to happen, based on the state of the stack. |
| 351 | // For each function we pop off the stack, we take note of the path and |
| 352 | // record the cumulative state for this path. As we're doing this, we |
| 353 | // intern the path into the Profile. |
| 354 | while (!TSD.empty()) { |
| 355 | auto Top = TSD.back(); |
| 356 | auto FunctionLocalTime = AbsoluteDifference(X: Top.Timestamp, Y: E.TSC); |
| 357 | SmallVector<Profile::FuncID, 16> Path; |
| 358 | transform(Range: reverse(C&: TSD), d_first: std::back_inserter(x&: Path), |
| 359 | F: std::mem_fn(pm: &StackEntry::FuncId)); |
| 360 | auto InternedPath = P.internPath(P: Path); |
| 361 | auto &TPD = ThreadPathData[E.TId][InternedPath]; |
| 362 | ++TPD.CallCount; |
| 363 | TPD.CumulativeLocalTime += FunctionLocalTime; |
| 364 | TSD.pop_back(); |
| 365 | |
| 366 | // If we've matched the corresponding entry event for this function, |
| 367 | // then we exit the loop. |
| 368 | if (Top.FuncId == E.FuncId) |
| 369 | break; |
| 370 | |
| 371 | // FIXME: Consider the intermediate times and the cumulative tree time |
| 372 | // as well. |
| 373 | } |
| 374 | |
| 375 | break; |
| 376 | |
| 377 | case RecordTypes::CUSTOM_EVENT: |
| 378 | case RecordTypes::TYPED_EVENT: |
| 379 | // TODO: Support an extension point to allow handling of custom and typed |
| 380 | // events in profiles. |
| 381 | break; |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | // Once we've gone through the Trace, we now create one Block per thread in |
| 386 | // the Profile. |
| 387 | for (const auto &ThreadPaths : ThreadPathData) { |
| 388 | const auto &TID = ThreadPaths.first; |
| 389 | const auto &PathsData = ThreadPaths.second; |
| 390 | if (auto E = P.addBlock(B: { |
| 391 | .Thread: TID, |
| 392 | .PathData: std::vector<std::pair<Profile::PathID, Profile::Data>>( |
| 393 | PathsData.begin(), PathsData.end()), |
| 394 | })) |
| 395 | return std::move(E); |
| 396 | } |
| 397 | |
| 398 | return P; |
| 399 | } |
| 400 | |
| 401 | } // namespace xray |
| 402 | } // namespace llvm |
| 403 | |