1//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines the XRay Profile class representing the latency profile generated by
10// XRay's profiling mode.
11//
12//===----------------------------------------------------------------------===//
13#include "llvm/XRay/Profile.h"
14
15#include "llvm/Support/DataExtractor.h"
16#include "llvm/Support/Error.h"
17#include "llvm/Support/FileSystem.h"
18#include "llvm/XRay/Trace.h"
19#include <memory>
20
21namespace llvm {
22namespace xray {
23
24Profile::Profile(const Profile &O) {
25 // We need to re-create all the tries from the original (O), into the current
26 // Profile being initialized, through the Block instances we see.
27 for (const auto &Block : O) {
28 Blocks.push_back(x: {.Thread: Block.Thread, .PathData: {}});
29 auto &B = Blocks.back();
30 for (const auto &PathData : Block.PathData)
31 B.PathData.push_back(x: {internPath(P: cantFail(ValOrErr: O.expandPath(P: PathData.first))),
32 PathData.second});
33 }
34}
35
36Profile &Profile::operator=(const Profile &O) {
37 Profile P = O;
38 *this = std::move(P);
39 return *this;
40}
41
42namespace {
43
44struct BlockHeader {
45 uint32_t Size;
46 uint32_t Number;
47 uint64_t Thread;
48};
49
50static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
51 uint64_t &Offset) {
52 BlockHeader H;
53 uint64_t CurrentOffset = Offset;
54 H.Size = Extractor.getU32(offset_ptr: &Offset);
55 if (Offset == CurrentOffset)
56 return make_error<StringError>(
57 Args: Twine("Error parsing block header size at offset '") +
58 Twine(CurrentOffset) + "'",
59 Args: std::make_error_code(e: std::errc::invalid_argument));
60 CurrentOffset = Offset;
61 H.Number = Extractor.getU32(offset_ptr: &Offset);
62 if (Offset == CurrentOffset)
63 return make_error<StringError>(
64 Args: Twine("Error parsing block header number at offset '") +
65 Twine(CurrentOffset) + "'",
66 Args: std::make_error_code(e: std::errc::invalid_argument));
67 CurrentOffset = Offset;
68 H.Thread = Extractor.getU64(offset_ptr: &Offset);
69 if (Offset == CurrentOffset)
70 return make_error<StringError>(
71 Args: Twine("Error parsing block header thread id at offset '") +
72 Twine(CurrentOffset) + "'",
73 Args: std::make_error_code(e: std::errc::invalid_argument));
74 return H;
75}
76
77static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
78 uint64_t &Offset) {
79 // We're reading a sequence of int32_t's until we find a 0.
80 std::vector<Profile::FuncID> Path;
81 auto CurrentOffset = Offset;
82 int32_t FuncId;
83 do {
84 FuncId = Extractor.getSigned(offset_ptr: &Offset, size: 4);
85 if (CurrentOffset == Offset)
86 return make_error<StringError>(
87 Args: Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
88 Args: std::make_error_code(e: std::errc::invalid_argument));
89 CurrentOffset = Offset;
90 Path.push_back(x: FuncId);
91 } while (FuncId != 0);
92 return std::move(Path);
93}
94
95static Expected<Profile::Data> readData(DataExtractor &Extractor,
96 uint64_t &Offset) {
97 // We expect a certain number of elements for Data:
98 // - A 64-bit CallCount
99 // - A 64-bit CumulativeLocalTime counter
100 Profile::Data D;
101 auto CurrentOffset = Offset;
102 D.CallCount = Extractor.getU64(offset_ptr: &Offset);
103 if (CurrentOffset == Offset)
104 return make_error<StringError>(
105 Args: Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
106 "'",
107 Args: std::make_error_code(e: std::errc::invalid_argument));
108 CurrentOffset = Offset;
109 D.CumulativeLocalTime = Extractor.getU64(offset_ptr: &Offset);
110 if (CurrentOffset == Offset)
111 return make_error<StringError>(
112 Args: Twine("Error parsing cumulative local time at offset '") +
113 Twine(CurrentOffset) + "'",
114 Args: std::make_error_code(e: std::errc::invalid_argument));
115 return D;
116}
117
118} // namespace
119
120Error Profile::addBlock(Block &&B) {
121 if (B.PathData.empty())
122 return make_error<StringError>(
123 Args: "Block may not have empty path data.",
124 Args: std::make_error_code(e: std::errc::invalid_argument));
125
126 Blocks.emplace_back(args: std::move(B));
127 return Error::success();
128}
129
130Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
131 auto It = PathIDMap.find(Val: P);
132 if (It == PathIDMap.end())
133 return make_error<StringError>(
134 Args: Twine("PathID not found: ") + Twine(P),
135 Args: std::make_error_code(e: std::errc::invalid_argument));
136 std::vector<Profile::FuncID> Path;
137 for (auto Node = It->second; Node; Node = Node->Caller)
138 Path.push_back(x: Node->Func);
139 return std::move(Path);
140}
141
142Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
143 if (P.empty())
144 return 0;
145
146 auto RootToLeafPath = reverse(C&: P);
147
148 // Find the root.
149 auto It = RootToLeafPath.begin();
150 auto PathRoot = *It++;
151 auto RootIt =
152 find_if(Range&: Roots, P: [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
153
154 // If we've not seen this root before, remember it.
155 TrieNode *Node = nullptr;
156 if (RootIt == Roots.end()) {
157 NodeStorage.emplace_back();
158 Node = &NodeStorage.back();
159 Node->Func = PathRoot;
160 Roots.push_back(Elt: Node);
161 } else {
162 Node = *RootIt;
163 }
164
165 // Now traverse the path, re-creating if necessary.
166 while (It != RootToLeafPath.end()) {
167 auto NodeFuncID = *It++;
168 auto CalleeIt = find_if(Range&: Node->Callees, P: [NodeFuncID](TrieNode *N) {
169 return N->Func == NodeFuncID;
170 });
171 if (CalleeIt == Node->Callees.end()) {
172 NodeStorage.emplace_back();
173 auto NewNode = &NodeStorage.back();
174 NewNode->Func = NodeFuncID;
175 NewNode->Caller = Node;
176 Node->Callees.push_back(x: NewNode);
177 Node = NewNode;
178 } else {
179 Node = *CalleeIt;
180 }
181 }
182
183 // At this point, Node *must* be pointing at the leaf.
184 assert(Node->Func == P.front());
185 if (Node->ID == 0) {
186 Node->ID = NextID++;
187 PathIDMap.insert(KV: {Node->ID, Node});
188 }
189 return Node->ID;
190}
191
192Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
193 Profile Merged;
194 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
195 using PathDataMapPtr = std::unique_ptr<PathDataMap>;
196 using PathDataVector = decltype(Profile::Block::PathData);
197 using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
198 ThreadProfileIndexMap ThreadProfileIndex;
199
200 for (const auto &P : {std::ref(t: L), std::ref(t: R)})
201 for (const auto &Block : P.get()) {
202 ThreadProfileIndexMap::iterator It;
203 std::tie(args&: It, args: std::ignore) = ThreadProfileIndex.insert(
204 KV: {Block.Thread, std::make_unique<PathDataMap>()});
205 for (const auto &PathAndData : Block.PathData) {
206 auto &PathID = PathAndData.first;
207 auto &Data = PathAndData.second;
208 auto NewPathID =
209 Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathID)));
210 PathDataMap::iterator PathDataIt;
211 bool Inserted;
212 std::tie(args&: PathDataIt, args&: Inserted) = It->second->insert(KV: {NewPathID, Data});
213 if (!Inserted) {
214 auto &ExistingData = PathDataIt->second;
215 ExistingData.CallCount += Data.CallCount;
216 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
217 }
218 }
219 }
220
221 for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
222 PathDataVector PathAndData;
223 PathAndData.reserve(n: IndexedThreadBlock.second->size());
224 copy(Range&: *IndexedThreadBlock.second, Out: std::back_inserter(x&: PathAndData));
225 cantFail(
226 Err: Merged.addBlock(B: {.Thread: IndexedThreadBlock.first, .PathData: std::move(PathAndData)}));
227 }
228 return Merged;
229}
230
231Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
232 Profile Merged;
233 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
234 PathDataMap PathData;
235 using PathDataVector = decltype(Profile::Block::PathData);
236 for (const auto &P : {std::ref(t: L), std::ref(t: R)})
237 for (const auto &Block : P.get())
238 for (const auto &PathAndData : Block.PathData) {
239 auto &PathId = PathAndData.first;
240 auto &Data = PathAndData.second;
241 auto NewPathID =
242 Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathId)));
243 PathDataMap::iterator PathDataIt;
244 bool Inserted;
245 std::tie(args&: PathDataIt, args&: Inserted) = PathData.insert(KV: {NewPathID, Data});
246 if (!Inserted) {
247 auto &ExistingData = PathDataIt->second;
248 ExistingData.CallCount += Data.CallCount;
249 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
250 }
251 }
252
253 // In the end there's a single Block, for thread 0.
254 PathDataVector Block;
255 Block.reserve(n: PathData.size());
256 copy(Range&: PathData, Out: std::back_inserter(x&: Block));
257 cantFail(Err: Merged.addBlock(B: {.Thread: 0, .PathData: std::move(Block)}));
258 return Merged;
259}
260
261Expected<Profile> loadProfile(StringRef Filename) {
262 Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Name: Filename);
263 if (!FdOrErr)
264 return FdOrErr.takeError();
265
266 uint64_t FileSize;
267 if (auto EC = sys::fs::file_size(Path: Filename, Result&: FileSize))
268 return make_error<StringError>(
269 Args: Twine("Cannot get filesize of '") + Filename + "'", Args&: EC);
270
271 std::error_code EC;
272 sys::fs::mapped_file_region MappedFile(
273 *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
274 EC);
275 sys::fs::closeFile(F&: *FdOrErr);
276 if (EC)
277 return make_error<StringError>(
278 Args: Twine("Cannot mmap profile '") + Filename + "'", Args&: EC);
279 StringRef Data(MappedFile.data(), MappedFile.size());
280
281 Profile P;
282 uint64_t Offset = 0;
283 DataExtractor Extractor(Data, true, 8);
284
285 // For each block we get from the file:
286 while (Offset != MappedFile.size()) {
287 auto HeaderOrError = readBlockHeader(Extractor, Offset);
288 if (!HeaderOrError)
289 return HeaderOrError.takeError();
290
291 // TODO: Maybe store this header information for each block, even just for
292 // debugging?
293 const auto &Header = HeaderOrError.get();
294
295 // Read in the path data.
296 auto PathOrError = readPath(Extractor, Offset);
297 if (!PathOrError)
298 return PathOrError.takeError();
299 const auto &Path = PathOrError.get();
300
301 // For each path we encounter, we should intern it to get a PathID.
302 auto DataOrError = readData(Extractor, Offset);
303 if (!DataOrError)
304 return DataOrError.takeError();
305 auto &Data = DataOrError.get();
306
307 if (auto E =
308 P.addBlock(B: Profile::Block{.Thread: Profile::ThreadID{Header.Thread},
309 .PathData: {{P.internPath(P: Path), std::move(Data)}}}))
310 return std::move(E);
311 }
312
313 return P;
314}
315
316namespace {
317
318struct StackEntry {
319 uint64_t Timestamp;
320 Profile::FuncID FuncId;
321};
322
323} // namespace
324
325Expected<Profile> profileFromTrace(const Trace &T) {
326 Profile P;
327
328 // The implementation of the algorithm re-creates the execution of
329 // the functions based on the trace data. To do this, we set up a number of
330 // data structures to track the execution context of every thread in the
331 // Trace.
332 DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
333 DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
334 ThreadPathData;
335
336 // We then do a pass through the Trace to account data on a per-thread-basis.
337 for (const auto &E : T) {
338 auto &TSD = ThreadStacks[E.TId];
339 switch (E.Type) {
340 case RecordTypes::ENTER:
341 case RecordTypes::ENTER_ARG:
342
343 // Push entries into the function call stack.
344 TSD.push_back(x: {.Timestamp: E.TSC, .FuncId: E.FuncId});
345 break;
346
347 case RecordTypes::EXIT:
348 case RecordTypes::TAIL_EXIT:
349
350 // Exits cause some accounting to happen, based on the state of the stack.
351 // For each function we pop off the stack, we take note of the path and
352 // record the cumulative state for this path. As we're doing this, we
353 // intern the path into the Profile.
354 while (!TSD.empty()) {
355 auto Top = TSD.back();
356 auto FunctionLocalTime = AbsoluteDifference(X: Top.Timestamp, Y: E.TSC);
357 SmallVector<Profile::FuncID, 16> Path;
358 transform(Range: reverse(C&: TSD), d_first: std::back_inserter(x&: Path),
359 F: std::mem_fn(pm: &StackEntry::FuncId));
360 auto InternedPath = P.internPath(P: Path);
361 auto &TPD = ThreadPathData[E.TId][InternedPath];
362 ++TPD.CallCount;
363 TPD.CumulativeLocalTime += FunctionLocalTime;
364 TSD.pop_back();
365
366 // If we've matched the corresponding entry event for this function,
367 // then we exit the loop.
368 if (Top.FuncId == E.FuncId)
369 break;
370
371 // FIXME: Consider the intermediate times and the cumulative tree time
372 // as well.
373 }
374
375 break;
376
377 case RecordTypes::CUSTOM_EVENT:
378 case RecordTypes::TYPED_EVENT:
379 // TODO: Support an extension point to allow handling of custom and typed
380 // events in profiles.
381 break;
382 }
383 }
384
385 // Once we've gone through the Trace, we now create one Block per thread in
386 // the Profile.
387 for (const auto &ThreadPaths : ThreadPathData) {
388 const auto &TID = ThreadPaths.first;
389 const auto &PathsData = ThreadPaths.second;
390 if (auto E = P.addBlock(B: {
391 .Thread: TID,
392 .PathData: std::vector<std::pair<Profile::PathID, Profile::Data>>(
393 PathsData.begin(), PathsData.end()),
394 }))
395 return std::move(E);
396 }
397
398 return P;
399}
400
401} // namespace xray
402} // namespace llvm
403