1//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines the XRay Profile class representing the latency profile generated by
10// XRay's profiling mode.
11//
12//===----------------------------------------------------------------------===//
13#include "llvm/XRay/Profile.h"
14
15#include "llvm/Support/DataExtractor.h"
16#include "llvm/Support/Error.h"
17#include "llvm/Support/FileSystem.h"
18#include "llvm/XRay/Trace.h"
19#include <memory>
20
21using namespace llvm;
22using namespace llvm::xray;
23
24Profile::Profile(const Profile &O) {
25 // We need to re-create all the tries from the original (O), into the current
26 // Profile being initialized, through the Block instances we see.
27 for (const auto &Block : O) {
28 Blocks.push_back(x: {.Thread: Block.Thread, .PathData: {}});
29 auto &B = Blocks.back();
30 for (const auto &PathData : Block.PathData)
31 B.PathData.push_back(x: {internPath(P: cantFail(ValOrErr: O.expandPath(P: PathData.first))),
32 PathData.second});
33 }
34}
35
36Profile &Profile::operator=(const Profile &O) {
37 Profile P = O;
38 *this = std::move(P);
39 return *this;
40}
41
42namespace {
43
44struct BlockHeader {
45 uint32_t Size;
46 uint32_t Number;
47 uint64_t Thread;
48};
49} // namespace
50
51static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
52 uint64_t &Offset) {
53 BlockHeader H;
54 uint64_t CurrentOffset = Offset;
55 H.Size = Extractor.getU32(offset_ptr: &Offset);
56 if (Offset == CurrentOffset)
57 return make_error<StringError>(
58 Args: Twine("Error parsing block header size at offset '") +
59 Twine(CurrentOffset) + "'",
60 Args: std::make_error_code(e: std::errc::invalid_argument));
61 CurrentOffset = Offset;
62 H.Number = Extractor.getU32(offset_ptr: &Offset);
63 if (Offset == CurrentOffset)
64 return make_error<StringError>(
65 Args: Twine("Error parsing block header number at offset '") +
66 Twine(CurrentOffset) + "'",
67 Args: std::make_error_code(e: std::errc::invalid_argument));
68 CurrentOffset = Offset;
69 H.Thread = Extractor.getU64(offset_ptr: &Offset);
70 if (Offset == CurrentOffset)
71 return make_error<StringError>(
72 Args: Twine("Error parsing block header thread id at offset '") +
73 Twine(CurrentOffset) + "'",
74 Args: std::make_error_code(e: std::errc::invalid_argument));
75 return H;
76}
77
78static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
79 uint64_t &Offset) {
80 // We're reading a sequence of int32_t's until we find a 0.
81 std::vector<Profile::FuncID> Path;
82 auto CurrentOffset = Offset;
83 int32_t FuncId;
84 do {
85 FuncId = Extractor.getSigned(offset_ptr: &Offset, size: 4);
86 if (CurrentOffset == Offset)
87 return make_error<StringError>(
88 Args: Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
89 Args: std::make_error_code(e: std::errc::invalid_argument));
90 CurrentOffset = Offset;
91 Path.push_back(x: FuncId);
92 } while (FuncId != 0);
93 return std::move(Path);
94}
95
96static Expected<Profile::Data> readData(DataExtractor &Extractor,
97 uint64_t &Offset) {
98 // We expect a certain number of elements for Data:
99 // - A 64-bit CallCount
100 // - A 64-bit CumulativeLocalTime counter
101 Profile::Data D;
102 auto CurrentOffset = Offset;
103 D.CallCount = Extractor.getU64(offset_ptr: &Offset);
104 if (CurrentOffset == Offset)
105 return make_error<StringError>(
106 Args: Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
107 "'",
108 Args: std::make_error_code(e: std::errc::invalid_argument));
109 CurrentOffset = Offset;
110 D.CumulativeLocalTime = Extractor.getU64(offset_ptr: &Offset);
111 if (CurrentOffset == Offset)
112 return make_error<StringError>(
113 Args: Twine("Error parsing cumulative local time at offset '") +
114 Twine(CurrentOffset) + "'",
115 Args: std::make_error_code(e: std::errc::invalid_argument));
116 return D;
117}
118
119Error Profile::addBlock(Block &&B) {
120 if (B.PathData.empty())
121 return make_error<StringError>(
122 Args: "Block may not have empty path data.",
123 Args: std::make_error_code(e: std::errc::invalid_argument));
124
125 Blocks.emplace_back(args: std::move(B));
126 return Error::success();
127}
128
129Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
130 auto It = PathIDMap.find(Val: P);
131 if (It == PathIDMap.end())
132 return make_error<StringError>(
133 Args: Twine("PathID not found: ") + Twine(P),
134 Args: std::make_error_code(e: std::errc::invalid_argument));
135 std::vector<Profile::FuncID> Path;
136 for (auto Node = It->second; Node; Node = Node->Caller)
137 Path.push_back(x: Node->Func);
138 return std::move(Path);
139}
140
141Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
142 if (P.empty())
143 return 0;
144
145 auto RootToLeafPath = reverse(C&: P);
146
147 // Find the root.
148 auto It = RootToLeafPath.begin();
149 auto PathRoot = *It++;
150 auto RootIt =
151 find_if(Range&: Roots, P: [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
152
153 // If we've not seen this root before, remember it.
154 TrieNode *Node = nullptr;
155 if (RootIt == Roots.end()) {
156 NodeStorage.emplace_back();
157 Node = &NodeStorage.back();
158 Node->Func = PathRoot;
159 Roots.push_back(Elt: Node);
160 } else {
161 Node = *RootIt;
162 }
163
164 // Now traverse the path, re-creating if necessary.
165 while (It != RootToLeafPath.end()) {
166 auto NodeFuncID = *It++;
167 auto CalleeIt = find_if(Range&: Node->Callees, P: [NodeFuncID](TrieNode *N) {
168 return N->Func == NodeFuncID;
169 });
170 if (CalleeIt == Node->Callees.end()) {
171 NodeStorage.emplace_back();
172 auto NewNode = &NodeStorage.back();
173 NewNode->Func = NodeFuncID;
174 NewNode->Caller = Node;
175 Node->Callees.push_back(x: NewNode);
176 Node = NewNode;
177 } else {
178 Node = *CalleeIt;
179 }
180 }
181
182 // At this point, Node *must* be pointing at the leaf.
183 assert(Node->Func == P.front());
184 if (Node->ID == 0) {
185 Node->ID = NextID++;
186 PathIDMap.insert(KV: {Node->ID, Node});
187 }
188 return Node->ID;
189}
190
191Profile xray::mergeProfilesByThread(const Profile &L, const Profile &R) {
192 Profile Merged;
193 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
194 using PathDataMapPtr = std::unique_ptr<PathDataMap>;
195 using PathDataVector = decltype(Profile::Block::PathData);
196 using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
197 ThreadProfileIndexMap ThreadProfileIndex;
198
199 for (const auto &P : {std::ref(t: L), std::ref(t: R)})
200 for (const auto &Block : P.get()) {
201 ThreadProfileIndexMap::iterator It;
202 std::tie(args&: It, args: std::ignore) = ThreadProfileIndex.insert(
203 KV: {Block.Thread, std::make_unique<PathDataMap>()});
204 for (const auto &PathAndData : Block.PathData) {
205 auto &PathID = PathAndData.first;
206 auto &Data = PathAndData.second;
207 auto NewPathID =
208 Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathID)));
209 PathDataMap::iterator PathDataIt;
210 bool Inserted;
211 std::tie(args&: PathDataIt, args&: Inserted) = It->second->insert(KV: {NewPathID, Data});
212 if (!Inserted) {
213 auto &ExistingData = PathDataIt->second;
214 ExistingData.CallCount += Data.CallCount;
215 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
216 }
217 }
218 }
219
220 for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
221 PathDataVector PathAndData;
222 PathAndData.reserve(n: IndexedThreadBlock.second->size());
223 copy(Range&: *IndexedThreadBlock.second, Out: std::back_inserter(x&: PathAndData));
224 cantFail(
225 Err: Merged.addBlock(B: {.Thread: IndexedThreadBlock.first, .PathData: std::move(PathAndData)}));
226 }
227 return Merged;
228}
229
230Profile xray::mergeProfilesByStack(const Profile &L, const Profile &R) {
231 Profile Merged;
232 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
233 PathDataMap PathData;
234 using PathDataVector = decltype(Profile::Block::PathData);
235 for (const auto &P : {std::ref(t: L), std::ref(t: R)})
236 for (const auto &Block : P.get())
237 for (const auto &PathAndData : Block.PathData) {
238 auto &PathId = PathAndData.first;
239 auto &Data = PathAndData.second;
240 auto NewPathID =
241 Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathId)));
242 PathDataMap::iterator PathDataIt;
243 bool Inserted;
244 std::tie(args&: PathDataIt, args&: Inserted) = PathData.insert(KV: {NewPathID, Data});
245 if (!Inserted) {
246 auto &ExistingData = PathDataIt->second;
247 ExistingData.CallCount += Data.CallCount;
248 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
249 }
250 }
251
252 // In the end there's a single Block, for thread 0.
253 PathDataVector Block;
254 Block.reserve(n: PathData.size());
255 copy(Range&: PathData, Out: std::back_inserter(x&: Block));
256 cantFail(Err: Merged.addBlock(B: {.Thread: 0, .PathData: std::move(Block)}));
257 return Merged;
258}
259
260Expected<Profile> xray::loadProfile(StringRef Filename) {
261 Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Name: Filename);
262 if (!FdOrErr)
263 return FdOrErr.takeError();
264
265 uint64_t FileSize;
266 if (auto EC = sys::fs::file_size(Path: Filename, Result&: FileSize))
267 return make_error<StringError>(
268 Args: Twine("Cannot get filesize of '") + Filename + "'", Args&: EC);
269
270 std::error_code EC;
271 sys::fs::mapped_file_region MappedFile(
272 *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
273 EC);
274 sys::fs::closeFile(F&: *FdOrErr);
275 if (EC)
276 return make_error<StringError>(
277 Args: Twine("Cannot mmap profile '") + Filename + "'", Args&: EC);
278 StringRef Data(MappedFile.data(), MappedFile.size());
279
280 Profile P;
281 uint64_t Offset = 0;
282 DataExtractor Extractor(Data, true, 8);
283
284 // For each block we get from the file:
285 while (Offset != MappedFile.size()) {
286 auto HeaderOrError = readBlockHeader(Extractor, Offset);
287 if (!HeaderOrError)
288 return HeaderOrError.takeError();
289
290 // TODO: Maybe store this header information for each block, even just for
291 // debugging?
292 const auto &Header = HeaderOrError.get();
293
294 // Read in the path data.
295 auto PathOrError = readPath(Extractor, Offset);
296 if (!PathOrError)
297 return PathOrError.takeError();
298 const auto &Path = PathOrError.get();
299
300 // For each path we encounter, we should intern it to get a PathID.
301 auto DataOrError = readData(Extractor, Offset);
302 if (!DataOrError)
303 return DataOrError.takeError();
304 auto &Data = DataOrError.get();
305
306 if (auto E =
307 P.addBlock(B: Profile::Block{.Thread: Profile::ThreadID{Header.Thread},
308 .PathData: {{P.internPath(P: Path), std::move(Data)}}}))
309 return std::move(E);
310 }
311
312 return P;
313}
314
315namespace {
316
317struct StackEntry {
318 uint64_t Timestamp;
319 Profile::FuncID FuncId;
320};
321
322} // namespace
323
324Expected<Profile> xray::profileFromTrace(const Trace &T) {
325 Profile P;
326
327 // The implementation of the algorithm re-creates the execution of
328 // the functions based on the trace data. To do this, we set up a number of
329 // data structures to track the execution context of every thread in the
330 // Trace.
331 DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
332 DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
333 ThreadPathData;
334
335 // We then do a pass through the Trace to account data on a per-thread-basis.
336 for (const auto &E : T) {
337 auto &TSD = ThreadStacks[E.TId];
338 switch (E.Type) {
339 case RecordTypes::ENTER:
340 case RecordTypes::ENTER_ARG:
341
342 // Push entries into the function call stack.
343 TSD.push_back(x: {.Timestamp: E.TSC, .FuncId: E.FuncId});
344 break;
345
346 case RecordTypes::EXIT:
347 case RecordTypes::TAIL_EXIT:
348
349 // Exits cause some accounting to happen, based on the state of the stack.
350 // For each function we pop off the stack, we take note of the path and
351 // record the cumulative state for this path. As we're doing this, we
352 // intern the path into the Profile.
353 while (!TSD.empty()) {
354 auto Top = TSD.back();
355 auto FunctionLocalTime = AbsoluteDifference(X: Top.Timestamp, Y: E.TSC);
356 SmallVector<Profile::FuncID, 16> Path;
357 transform(Range: reverse(C&: TSD), d_first: std::back_inserter(x&: Path),
358 F: std::mem_fn(pm: &StackEntry::FuncId));
359 auto InternedPath = P.internPath(P: Path);
360 auto &TPD = ThreadPathData[E.TId][InternedPath];
361 ++TPD.CallCount;
362 TPD.CumulativeLocalTime += FunctionLocalTime;
363 TSD.pop_back();
364
365 // If we've matched the corresponding entry event for this function,
366 // then we exit the loop.
367 if (Top.FuncId == E.FuncId)
368 break;
369
370 // FIXME: Consider the intermediate times and the cumulative tree time
371 // as well.
372 }
373
374 break;
375
376 case RecordTypes::CUSTOM_EVENT:
377 case RecordTypes::TYPED_EVENT:
378 // TODO: Support an extension point to allow handling of custom and typed
379 // events in profiles.
380 break;
381 }
382 }
383
384 // Once we've gone through the Trace, we now create one Block per thread in
385 // the Profile.
386 for (const auto &ThreadPaths : ThreadPathData) {
387 const auto &TID = ThreadPaths.first;
388 const auto &PathsData = ThreadPaths.second;
389 if (auto E = P.addBlock(B: {
390 .Thread: TID,
391 .PathData: std::vector<std::pair<Profile::PathID, Profile::Data>>(
392 PathsData.begin(), PathsData.end()),
393 }))
394 return std::move(E);
395 }
396
397 return P;
398}
399