1 | //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Defines the XRay Profile class representing the latency profile generated by |
10 | // XRay's profiling mode. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/XRay/Profile.h" |
14 | |
15 | #include "llvm/Support/DataExtractor.h" |
16 | #include "llvm/Support/Error.h" |
17 | #include "llvm/Support/FileSystem.h" |
18 | #include "llvm/XRay/Trace.h" |
19 | #include <deque> |
20 | #include <memory> |
21 | |
22 | namespace llvm { |
23 | namespace xray { |
24 | |
25 | Profile::Profile(const Profile &O) { |
26 | // We need to re-create all the tries from the original (O), into the current |
27 | // Profile being initialized, through the Block instances we see. |
28 | for (const auto &Block : O) { |
29 | Blocks.push_back(x: {.Thread: Block.Thread, .PathData: {}}); |
30 | auto &B = Blocks.back(); |
31 | for (const auto &PathData : Block.PathData) |
32 | B.PathData.push_back(x: {internPath(P: cantFail(ValOrErr: O.expandPath(P: PathData.first))), |
33 | PathData.second}); |
34 | } |
35 | } |
36 | |
37 | Profile &Profile::operator=(const Profile &O) { |
38 | Profile P = O; |
39 | *this = std::move(P); |
40 | return *this; |
41 | } |
42 | |
43 | namespace { |
44 | |
45 | struct { |
46 | uint32_t ; |
47 | uint32_t ; |
48 | uint64_t ; |
49 | }; |
50 | |
51 | static Expected<BlockHeader> (DataExtractor &, |
52 | uint64_t &Offset) { |
53 | BlockHeader H; |
54 | uint64_t CurrentOffset = Offset; |
55 | H.Size = Extractor.getU32(offset_ptr: &Offset); |
56 | if (Offset == CurrentOffset) |
57 | return make_error<StringError>( |
58 | Args: Twine("Error parsing block header size at offset '" ) + |
59 | Twine(CurrentOffset) + "'" , |
60 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
61 | CurrentOffset = Offset; |
62 | H.Number = Extractor.getU32(offset_ptr: &Offset); |
63 | if (Offset == CurrentOffset) |
64 | return make_error<StringError>( |
65 | Args: Twine("Error parsing block header number at offset '" ) + |
66 | Twine(CurrentOffset) + "'" , |
67 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
68 | CurrentOffset = Offset; |
69 | H.Thread = Extractor.getU64(offset_ptr: &Offset); |
70 | if (Offset == CurrentOffset) |
71 | return make_error<StringError>( |
72 | Args: Twine("Error parsing block header thread id at offset '" ) + |
73 | Twine(CurrentOffset) + "'" , |
74 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
75 | return H; |
76 | } |
77 | |
78 | static Expected<std::vector<Profile::FuncID>> (DataExtractor &, |
79 | uint64_t &Offset) { |
80 | // We're reading a sequence of int32_t's until we find a 0. |
81 | std::vector<Profile::FuncID> Path; |
82 | auto CurrentOffset = Offset; |
83 | int32_t FuncId; |
84 | do { |
85 | FuncId = Extractor.getSigned(offset_ptr: &Offset, size: 4); |
86 | if (CurrentOffset == Offset) |
87 | return make_error<StringError>( |
88 | Args: Twine("Error parsing path at offset '" ) + Twine(CurrentOffset) + "'" , |
89 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
90 | CurrentOffset = Offset; |
91 | Path.push_back(x: FuncId); |
92 | } while (FuncId != 0); |
93 | return std::move(Path); |
94 | } |
95 | |
96 | static Expected<Profile::Data> (DataExtractor &, |
97 | uint64_t &Offset) { |
98 | // We expect a certain number of elements for Data: |
99 | // - A 64-bit CallCount |
100 | // - A 64-bit CumulativeLocalTime counter |
101 | Profile::Data D; |
102 | auto CurrentOffset = Offset; |
103 | D.CallCount = Extractor.getU64(offset_ptr: &Offset); |
104 | if (CurrentOffset == Offset) |
105 | return make_error<StringError>( |
106 | Args: Twine("Error parsing call counts at offset '" ) + Twine(CurrentOffset) + |
107 | "'" , |
108 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
109 | CurrentOffset = Offset; |
110 | D.CumulativeLocalTime = Extractor.getU64(offset_ptr: &Offset); |
111 | if (CurrentOffset == Offset) |
112 | return make_error<StringError>( |
113 | Args: Twine("Error parsing cumulative local time at offset '" ) + |
114 | Twine(CurrentOffset) + "'" , |
115 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
116 | return D; |
117 | } |
118 | |
119 | } // namespace |
120 | |
121 | Error Profile::addBlock(Block &&B) { |
122 | if (B.PathData.empty()) |
123 | return make_error<StringError>( |
124 | Args: "Block may not have empty path data." , |
125 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
126 | |
127 | Blocks.emplace_back(args: std::move(B)); |
128 | return Error::success(); |
129 | } |
130 | |
131 | Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { |
132 | auto It = PathIDMap.find(Val: P); |
133 | if (It == PathIDMap.end()) |
134 | return make_error<StringError>( |
135 | Args: Twine("PathID not found: " ) + Twine(P), |
136 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
137 | std::vector<Profile::FuncID> Path; |
138 | for (auto Node = It->second; Node; Node = Node->Caller) |
139 | Path.push_back(x: Node->Func); |
140 | return std::move(Path); |
141 | } |
142 | |
143 | Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { |
144 | if (P.empty()) |
145 | return 0; |
146 | |
147 | auto RootToLeafPath = reverse(C&: P); |
148 | |
149 | // Find the root. |
150 | auto It = RootToLeafPath.begin(); |
151 | auto PathRoot = *It++; |
152 | auto RootIt = |
153 | find_if(Range&: Roots, P: [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); |
154 | |
155 | // If we've not seen this root before, remember it. |
156 | TrieNode *Node = nullptr; |
157 | if (RootIt == Roots.end()) { |
158 | NodeStorage.emplace_back(); |
159 | Node = &NodeStorage.back(); |
160 | Node->Func = PathRoot; |
161 | Roots.push_back(Elt: Node); |
162 | } else { |
163 | Node = *RootIt; |
164 | } |
165 | |
166 | // Now traverse the path, re-creating if necessary. |
167 | while (It != RootToLeafPath.end()) { |
168 | auto NodeFuncID = *It++; |
169 | auto CalleeIt = find_if(Range&: Node->Callees, P: [NodeFuncID](TrieNode *N) { |
170 | return N->Func == NodeFuncID; |
171 | }); |
172 | if (CalleeIt == Node->Callees.end()) { |
173 | NodeStorage.emplace_back(); |
174 | auto NewNode = &NodeStorage.back(); |
175 | NewNode->Func = NodeFuncID; |
176 | NewNode->Caller = Node; |
177 | Node->Callees.push_back(x: NewNode); |
178 | Node = NewNode; |
179 | } else { |
180 | Node = *CalleeIt; |
181 | } |
182 | } |
183 | |
184 | // At this point, Node *must* be pointing at the leaf. |
185 | assert(Node->Func == P.front()); |
186 | if (Node->ID == 0) { |
187 | Node->ID = NextID++; |
188 | PathIDMap.insert(KV: {Node->ID, Node}); |
189 | } |
190 | return Node->ID; |
191 | } |
192 | |
193 | Profile mergeProfilesByThread(const Profile &L, const Profile &R) { |
194 | Profile Merged; |
195 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
196 | using PathDataMapPtr = std::unique_ptr<PathDataMap>; |
197 | using PathDataVector = decltype(Profile::Block::PathData); |
198 | using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; |
199 | ThreadProfileIndexMap ThreadProfileIndex; |
200 | |
201 | for (const auto &P : {std::ref(t: L), std::ref(t: R)}) |
202 | for (const auto &Block : P.get()) { |
203 | ThreadProfileIndexMap::iterator It; |
204 | std::tie(args&: It, args: std::ignore) = ThreadProfileIndex.insert( |
205 | KV: {Block.Thread, std::make_unique<PathDataMap>()}); |
206 | for (const auto &PathAndData : Block.PathData) { |
207 | auto &PathID = PathAndData.first; |
208 | auto &Data = PathAndData.second; |
209 | auto NewPathID = |
210 | Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathID))); |
211 | PathDataMap::iterator PathDataIt; |
212 | bool Inserted; |
213 | std::tie(args&: PathDataIt, args&: Inserted) = It->second->insert(KV: {NewPathID, Data}); |
214 | if (!Inserted) { |
215 | auto &ExistingData = PathDataIt->second; |
216 | ExistingData.CallCount += Data.CallCount; |
217 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
218 | } |
219 | } |
220 | } |
221 | |
222 | for (const auto &IndexedThreadBlock : ThreadProfileIndex) { |
223 | PathDataVector PathAndData; |
224 | PathAndData.reserve(n: IndexedThreadBlock.second->size()); |
225 | copy(Range&: *IndexedThreadBlock.second, Out: std::back_inserter(x&: PathAndData)); |
226 | cantFail( |
227 | Err: Merged.addBlock(B: {.Thread: IndexedThreadBlock.first, .PathData: std::move(PathAndData)})); |
228 | } |
229 | return Merged; |
230 | } |
231 | |
232 | Profile mergeProfilesByStack(const Profile &L, const Profile &R) { |
233 | Profile Merged; |
234 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
235 | PathDataMap PathData; |
236 | using PathDataVector = decltype(Profile::Block::PathData); |
237 | for (const auto &P : {std::ref(t: L), std::ref(t: R)}) |
238 | for (const auto &Block : P.get()) |
239 | for (const auto &PathAndData : Block.PathData) { |
240 | auto &PathId = PathAndData.first; |
241 | auto &Data = PathAndData.second; |
242 | auto NewPathID = |
243 | Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathId))); |
244 | PathDataMap::iterator PathDataIt; |
245 | bool Inserted; |
246 | std::tie(args&: PathDataIt, args&: Inserted) = PathData.insert(KV: {NewPathID, Data}); |
247 | if (!Inserted) { |
248 | auto &ExistingData = PathDataIt->second; |
249 | ExistingData.CallCount += Data.CallCount; |
250 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
251 | } |
252 | } |
253 | |
254 | // In the end there's a single Block, for thread 0. |
255 | PathDataVector Block; |
256 | Block.reserve(n: PathData.size()); |
257 | copy(Range&: PathData, Out: std::back_inserter(x&: Block)); |
258 | cantFail(Err: Merged.addBlock(B: {.Thread: 0, .PathData: std::move(Block)})); |
259 | return Merged; |
260 | } |
261 | |
262 | Expected<Profile> loadProfile(StringRef Filename) { |
263 | Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Name: Filename); |
264 | if (!FdOrErr) |
265 | return FdOrErr.takeError(); |
266 | |
267 | uint64_t FileSize; |
268 | if (auto EC = sys::fs::file_size(Path: Filename, Result&: FileSize)) |
269 | return make_error<StringError>( |
270 | Args: Twine("Cannot get filesize of '" ) + Filename + "'" , Args&: EC); |
271 | |
272 | std::error_code EC; |
273 | sys::fs::mapped_file_region MappedFile( |
274 | *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, |
275 | EC); |
276 | sys::fs::closeFile(F&: *FdOrErr); |
277 | if (EC) |
278 | return make_error<StringError>( |
279 | Args: Twine("Cannot mmap profile '" ) + Filename + "'" , Args&: EC); |
280 | StringRef Data(MappedFile.data(), MappedFile.size()); |
281 | |
282 | Profile P; |
283 | uint64_t Offset = 0; |
284 | DataExtractor (Data, true, 8); |
285 | |
286 | // For each block we get from the file: |
287 | while (Offset != MappedFile.size()) { |
288 | auto = readBlockHeader(Extractor, Offset); |
289 | if (!HeaderOrError) |
290 | return HeaderOrError.takeError(); |
291 | |
292 | // TODO: Maybe store this header information for each block, even just for |
293 | // debugging? |
294 | const auto & = HeaderOrError.get(); |
295 | |
296 | // Read in the path data. |
297 | auto PathOrError = readPath(Extractor, Offset); |
298 | if (!PathOrError) |
299 | return PathOrError.takeError(); |
300 | const auto &Path = PathOrError.get(); |
301 | |
302 | // For each path we encounter, we should intern it to get a PathID. |
303 | auto DataOrError = readData(Extractor, Offset); |
304 | if (!DataOrError) |
305 | return DataOrError.takeError(); |
306 | auto &Data = DataOrError.get(); |
307 | |
308 | if (auto E = |
309 | P.addBlock(B: Profile::Block{.Thread: Profile::ThreadID{Header.Thread}, |
310 | .PathData: {{P.internPath(P: Path), std::move(Data)}}})) |
311 | return std::move(E); |
312 | } |
313 | |
314 | return P; |
315 | } |
316 | |
317 | namespace { |
318 | |
319 | struct StackEntry { |
320 | uint64_t Timestamp; |
321 | Profile::FuncID FuncId; |
322 | }; |
323 | |
324 | } // namespace |
325 | |
326 | Expected<Profile> profileFromTrace(const Trace &T) { |
327 | Profile P; |
328 | |
329 | // The implementation of the algorithm re-creates the execution of |
330 | // the functions based on the trace data. To do this, we set up a number of |
331 | // data structures to track the execution context of every thread in the |
332 | // Trace. |
333 | DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; |
334 | DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> |
335 | ThreadPathData; |
336 | |
337 | // We then do a pass through the Trace to account data on a per-thread-basis. |
338 | for (const auto &E : T) { |
339 | auto &TSD = ThreadStacks[E.TId]; |
340 | switch (E.Type) { |
341 | case RecordTypes::ENTER: |
342 | case RecordTypes::ENTER_ARG: |
343 | |
344 | // Push entries into the function call stack. |
345 | TSD.push_back(x: {.Timestamp: E.TSC, .FuncId: E.FuncId}); |
346 | break; |
347 | |
348 | case RecordTypes::EXIT: |
349 | case RecordTypes::TAIL_EXIT: |
350 | |
351 | // Exits cause some accounting to happen, based on the state of the stack. |
352 | // For each function we pop off the stack, we take note of the path and |
353 | // record the cumulative state for this path. As we're doing this, we |
354 | // intern the path into the Profile. |
355 | while (!TSD.empty()) { |
356 | auto Top = TSD.back(); |
357 | auto FunctionLocalTime = AbsoluteDifference(X: Top.Timestamp, Y: E.TSC); |
358 | SmallVector<Profile::FuncID, 16> Path; |
359 | transform(Range: reverse(C&: TSD), d_first: std::back_inserter(x&: Path), |
360 | F: std::mem_fn(pm: &StackEntry::FuncId)); |
361 | auto InternedPath = P.internPath(P: Path); |
362 | auto &TPD = ThreadPathData[E.TId][InternedPath]; |
363 | ++TPD.CallCount; |
364 | TPD.CumulativeLocalTime += FunctionLocalTime; |
365 | TSD.pop_back(); |
366 | |
367 | // If we've matched the corresponding entry event for this function, |
368 | // then we exit the loop. |
369 | if (Top.FuncId == E.FuncId) |
370 | break; |
371 | |
372 | // FIXME: Consider the intermediate times and the cumulative tree time |
373 | // as well. |
374 | } |
375 | |
376 | break; |
377 | |
378 | case RecordTypes::CUSTOM_EVENT: |
379 | case RecordTypes::TYPED_EVENT: |
380 | // TODO: Support an extension point to allow handling of custom and typed |
381 | // events in profiles. |
382 | break; |
383 | } |
384 | } |
385 | |
386 | // Once we've gone through the Trace, we now create one Block per thread in |
387 | // the Profile. |
388 | for (const auto &ThreadPaths : ThreadPathData) { |
389 | const auto &TID = ThreadPaths.first; |
390 | const auto &PathsData = ThreadPaths.second; |
391 | if (auto E = P.addBlock(B: { |
392 | .Thread: TID, |
393 | .PathData: std::vector<std::pair<Profile::PathID, Profile::Data>>( |
394 | PathsData.begin(), PathsData.end()), |
395 | })) |
396 | return std::move(E); |
397 | } |
398 | |
399 | return P; |
400 | } |
401 | |
402 | } // namespace xray |
403 | } // namespace llvm |
404 | |