1 | //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Defines the XRay Profile class representing the latency profile generated by |
10 | // XRay's profiling mode. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/XRay/Profile.h" |
14 | |
15 | #include "llvm/Support/DataExtractor.h" |
16 | #include "llvm/Support/Error.h" |
17 | #include "llvm/Support/FileSystem.h" |
18 | #include "llvm/XRay/Trace.h" |
19 | #include <memory> |
20 | |
21 | namespace llvm { |
22 | namespace xray { |
23 | |
24 | Profile::Profile(const Profile &O) { |
25 | // We need to re-create all the tries from the original (O), into the current |
26 | // Profile being initialized, through the Block instances we see. |
27 | for (const auto &Block : O) { |
28 | Blocks.push_back(x: {.Thread: Block.Thread, .PathData: {}}); |
29 | auto &B = Blocks.back(); |
30 | for (const auto &PathData : Block.PathData) |
31 | B.PathData.push_back(x: {internPath(P: cantFail(ValOrErr: O.expandPath(P: PathData.first))), |
32 | PathData.second}); |
33 | } |
34 | } |
35 | |
36 | Profile &Profile::operator=(const Profile &O) { |
37 | Profile P = O; |
38 | *this = std::move(P); |
39 | return *this; |
40 | } |
41 | |
42 | namespace { |
43 | |
44 | struct { |
45 | uint32_t ; |
46 | uint32_t ; |
47 | uint64_t ; |
48 | }; |
49 | |
50 | static Expected<BlockHeader> (DataExtractor &, |
51 | uint64_t &Offset) { |
52 | BlockHeader H; |
53 | uint64_t CurrentOffset = Offset; |
54 | H.Size = Extractor.getU32(offset_ptr: &Offset); |
55 | if (Offset == CurrentOffset) |
56 | return make_error<StringError>( |
57 | Args: Twine("Error parsing block header size at offset '" ) + |
58 | Twine(CurrentOffset) + "'" , |
59 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
60 | CurrentOffset = Offset; |
61 | H.Number = Extractor.getU32(offset_ptr: &Offset); |
62 | if (Offset == CurrentOffset) |
63 | return make_error<StringError>( |
64 | Args: Twine("Error parsing block header number at offset '" ) + |
65 | Twine(CurrentOffset) + "'" , |
66 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
67 | CurrentOffset = Offset; |
68 | H.Thread = Extractor.getU64(offset_ptr: &Offset); |
69 | if (Offset == CurrentOffset) |
70 | return make_error<StringError>( |
71 | Args: Twine("Error parsing block header thread id at offset '" ) + |
72 | Twine(CurrentOffset) + "'" , |
73 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
74 | return H; |
75 | } |
76 | |
77 | static Expected<std::vector<Profile::FuncID>> (DataExtractor &, |
78 | uint64_t &Offset) { |
79 | // We're reading a sequence of int32_t's until we find a 0. |
80 | std::vector<Profile::FuncID> Path; |
81 | auto CurrentOffset = Offset; |
82 | int32_t FuncId; |
83 | do { |
84 | FuncId = Extractor.getSigned(offset_ptr: &Offset, size: 4); |
85 | if (CurrentOffset == Offset) |
86 | return make_error<StringError>( |
87 | Args: Twine("Error parsing path at offset '" ) + Twine(CurrentOffset) + "'" , |
88 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
89 | CurrentOffset = Offset; |
90 | Path.push_back(x: FuncId); |
91 | } while (FuncId != 0); |
92 | return std::move(Path); |
93 | } |
94 | |
95 | static Expected<Profile::Data> (DataExtractor &, |
96 | uint64_t &Offset) { |
97 | // We expect a certain number of elements for Data: |
98 | // - A 64-bit CallCount |
99 | // - A 64-bit CumulativeLocalTime counter |
100 | Profile::Data D; |
101 | auto CurrentOffset = Offset; |
102 | D.CallCount = Extractor.getU64(offset_ptr: &Offset); |
103 | if (CurrentOffset == Offset) |
104 | return make_error<StringError>( |
105 | Args: Twine("Error parsing call counts at offset '" ) + Twine(CurrentOffset) + |
106 | "'" , |
107 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
108 | CurrentOffset = Offset; |
109 | D.CumulativeLocalTime = Extractor.getU64(offset_ptr: &Offset); |
110 | if (CurrentOffset == Offset) |
111 | return make_error<StringError>( |
112 | Args: Twine("Error parsing cumulative local time at offset '" ) + |
113 | Twine(CurrentOffset) + "'" , |
114 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
115 | return D; |
116 | } |
117 | |
118 | } // namespace |
119 | |
120 | Error Profile::addBlock(Block &&B) { |
121 | if (B.PathData.empty()) |
122 | return make_error<StringError>( |
123 | Args: "Block may not have empty path data." , |
124 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
125 | |
126 | Blocks.emplace_back(args: std::move(B)); |
127 | return Error::success(); |
128 | } |
129 | |
130 | Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { |
131 | auto It = PathIDMap.find(Val: P); |
132 | if (It == PathIDMap.end()) |
133 | return make_error<StringError>( |
134 | Args: Twine("PathID not found: " ) + Twine(P), |
135 | Args: std::make_error_code(e: std::errc::invalid_argument)); |
136 | std::vector<Profile::FuncID> Path; |
137 | for (auto Node = It->second; Node; Node = Node->Caller) |
138 | Path.push_back(x: Node->Func); |
139 | return std::move(Path); |
140 | } |
141 | |
142 | Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { |
143 | if (P.empty()) |
144 | return 0; |
145 | |
146 | auto RootToLeafPath = reverse(C&: P); |
147 | |
148 | // Find the root. |
149 | auto It = RootToLeafPath.begin(); |
150 | auto PathRoot = *It++; |
151 | auto RootIt = |
152 | find_if(Range&: Roots, P: [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); |
153 | |
154 | // If we've not seen this root before, remember it. |
155 | TrieNode *Node = nullptr; |
156 | if (RootIt == Roots.end()) { |
157 | NodeStorage.emplace_back(); |
158 | Node = &NodeStorage.back(); |
159 | Node->Func = PathRoot; |
160 | Roots.push_back(Elt: Node); |
161 | } else { |
162 | Node = *RootIt; |
163 | } |
164 | |
165 | // Now traverse the path, re-creating if necessary. |
166 | while (It != RootToLeafPath.end()) { |
167 | auto NodeFuncID = *It++; |
168 | auto CalleeIt = find_if(Range&: Node->Callees, P: [NodeFuncID](TrieNode *N) { |
169 | return N->Func == NodeFuncID; |
170 | }); |
171 | if (CalleeIt == Node->Callees.end()) { |
172 | NodeStorage.emplace_back(); |
173 | auto NewNode = &NodeStorage.back(); |
174 | NewNode->Func = NodeFuncID; |
175 | NewNode->Caller = Node; |
176 | Node->Callees.push_back(x: NewNode); |
177 | Node = NewNode; |
178 | } else { |
179 | Node = *CalleeIt; |
180 | } |
181 | } |
182 | |
183 | // At this point, Node *must* be pointing at the leaf. |
184 | assert(Node->Func == P.front()); |
185 | if (Node->ID == 0) { |
186 | Node->ID = NextID++; |
187 | PathIDMap.insert(KV: {Node->ID, Node}); |
188 | } |
189 | return Node->ID; |
190 | } |
191 | |
192 | Profile mergeProfilesByThread(const Profile &L, const Profile &R) { |
193 | Profile Merged; |
194 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
195 | using PathDataMapPtr = std::unique_ptr<PathDataMap>; |
196 | using PathDataVector = decltype(Profile::Block::PathData); |
197 | using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; |
198 | ThreadProfileIndexMap ThreadProfileIndex; |
199 | |
200 | for (const auto &P : {std::ref(t: L), std::ref(t: R)}) |
201 | for (const auto &Block : P.get()) { |
202 | ThreadProfileIndexMap::iterator It; |
203 | std::tie(args&: It, args: std::ignore) = ThreadProfileIndex.insert( |
204 | KV: {Block.Thread, std::make_unique<PathDataMap>()}); |
205 | for (const auto &PathAndData : Block.PathData) { |
206 | auto &PathID = PathAndData.first; |
207 | auto &Data = PathAndData.second; |
208 | auto NewPathID = |
209 | Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathID))); |
210 | PathDataMap::iterator PathDataIt; |
211 | bool Inserted; |
212 | std::tie(args&: PathDataIt, args&: Inserted) = It->second->insert(KV: {NewPathID, Data}); |
213 | if (!Inserted) { |
214 | auto &ExistingData = PathDataIt->second; |
215 | ExistingData.CallCount += Data.CallCount; |
216 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
217 | } |
218 | } |
219 | } |
220 | |
221 | for (const auto &IndexedThreadBlock : ThreadProfileIndex) { |
222 | PathDataVector PathAndData; |
223 | PathAndData.reserve(n: IndexedThreadBlock.second->size()); |
224 | copy(Range&: *IndexedThreadBlock.second, Out: std::back_inserter(x&: PathAndData)); |
225 | cantFail( |
226 | Err: Merged.addBlock(B: {.Thread: IndexedThreadBlock.first, .PathData: std::move(PathAndData)})); |
227 | } |
228 | return Merged; |
229 | } |
230 | |
231 | Profile mergeProfilesByStack(const Profile &L, const Profile &R) { |
232 | Profile Merged; |
233 | using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; |
234 | PathDataMap PathData; |
235 | using PathDataVector = decltype(Profile::Block::PathData); |
236 | for (const auto &P : {std::ref(t: L), std::ref(t: R)}) |
237 | for (const auto &Block : P.get()) |
238 | for (const auto &PathAndData : Block.PathData) { |
239 | auto &PathId = PathAndData.first; |
240 | auto &Data = PathAndData.second; |
241 | auto NewPathID = |
242 | Merged.internPath(P: cantFail(ValOrErr: P.get().expandPath(P: PathId))); |
243 | PathDataMap::iterator PathDataIt; |
244 | bool Inserted; |
245 | std::tie(args&: PathDataIt, args&: Inserted) = PathData.insert(KV: {NewPathID, Data}); |
246 | if (!Inserted) { |
247 | auto &ExistingData = PathDataIt->second; |
248 | ExistingData.CallCount += Data.CallCount; |
249 | ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; |
250 | } |
251 | } |
252 | |
253 | // In the end there's a single Block, for thread 0. |
254 | PathDataVector Block; |
255 | Block.reserve(n: PathData.size()); |
256 | copy(Range&: PathData, Out: std::back_inserter(x&: Block)); |
257 | cantFail(Err: Merged.addBlock(B: {.Thread: 0, .PathData: std::move(Block)})); |
258 | return Merged; |
259 | } |
260 | |
261 | Expected<Profile> loadProfile(StringRef Filename) { |
262 | Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Name: Filename); |
263 | if (!FdOrErr) |
264 | return FdOrErr.takeError(); |
265 | |
266 | uint64_t FileSize; |
267 | if (auto EC = sys::fs::file_size(Path: Filename, Result&: FileSize)) |
268 | return make_error<StringError>( |
269 | Args: Twine("Cannot get filesize of '" ) + Filename + "'" , Args&: EC); |
270 | |
271 | std::error_code EC; |
272 | sys::fs::mapped_file_region MappedFile( |
273 | *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, |
274 | EC); |
275 | sys::fs::closeFile(F&: *FdOrErr); |
276 | if (EC) |
277 | return make_error<StringError>( |
278 | Args: Twine("Cannot mmap profile '" ) + Filename + "'" , Args&: EC); |
279 | StringRef Data(MappedFile.data(), MappedFile.size()); |
280 | |
281 | Profile P; |
282 | uint64_t Offset = 0; |
283 | DataExtractor (Data, true, 8); |
284 | |
285 | // For each block we get from the file: |
286 | while (Offset != MappedFile.size()) { |
287 | auto = readBlockHeader(Extractor, Offset); |
288 | if (!HeaderOrError) |
289 | return HeaderOrError.takeError(); |
290 | |
291 | // TODO: Maybe store this header information for each block, even just for |
292 | // debugging? |
293 | const auto & = HeaderOrError.get(); |
294 | |
295 | // Read in the path data. |
296 | auto PathOrError = readPath(Extractor, Offset); |
297 | if (!PathOrError) |
298 | return PathOrError.takeError(); |
299 | const auto &Path = PathOrError.get(); |
300 | |
301 | // For each path we encounter, we should intern it to get a PathID. |
302 | auto DataOrError = readData(Extractor, Offset); |
303 | if (!DataOrError) |
304 | return DataOrError.takeError(); |
305 | auto &Data = DataOrError.get(); |
306 | |
307 | if (auto E = |
308 | P.addBlock(B: Profile::Block{.Thread: Profile::ThreadID{Header.Thread}, |
309 | .PathData: {{P.internPath(P: Path), std::move(Data)}}})) |
310 | return std::move(E); |
311 | } |
312 | |
313 | return P; |
314 | } |
315 | |
316 | namespace { |
317 | |
318 | struct StackEntry { |
319 | uint64_t Timestamp; |
320 | Profile::FuncID FuncId; |
321 | }; |
322 | |
323 | } // namespace |
324 | |
325 | Expected<Profile> profileFromTrace(const Trace &T) { |
326 | Profile P; |
327 | |
328 | // The implementation of the algorithm re-creates the execution of |
329 | // the functions based on the trace data. To do this, we set up a number of |
330 | // data structures to track the execution context of every thread in the |
331 | // Trace. |
332 | DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; |
333 | DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> |
334 | ThreadPathData; |
335 | |
336 | // We then do a pass through the Trace to account data on a per-thread-basis. |
337 | for (const auto &E : T) { |
338 | auto &TSD = ThreadStacks[E.TId]; |
339 | switch (E.Type) { |
340 | case RecordTypes::ENTER: |
341 | case RecordTypes::ENTER_ARG: |
342 | |
343 | // Push entries into the function call stack. |
344 | TSD.push_back(x: {.Timestamp: E.TSC, .FuncId: E.FuncId}); |
345 | break; |
346 | |
347 | case RecordTypes::EXIT: |
348 | case RecordTypes::TAIL_EXIT: |
349 | |
350 | // Exits cause some accounting to happen, based on the state of the stack. |
351 | // For each function we pop off the stack, we take note of the path and |
352 | // record the cumulative state for this path. As we're doing this, we |
353 | // intern the path into the Profile. |
354 | while (!TSD.empty()) { |
355 | auto Top = TSD.back(); |
356 | auto FunctionLocalTime = AbsoluteDifference(X: Top.Timestamp, Y: E.TSC); |
357 | SmallVector<Profile::FuncID, 16> Path; |
358 | transform(Range: reverse(C&: TSD), d_first: std::back_inserter(x&: Path), |
359 | F: std::mem_fn(pm: &StackEntry::FuncId)); |
360 | auto InternedPath = P.internPath(P: Path); |
361 | auto &TPD = ThreadPathData[E.TId][InternedPath]; |
362 | ++TPD.CallCount; |
363 | TPD.CumulativeLocalTime += FunctionLocalTime; |
364 | TSD.pop_back(); |
365 | |
366 | // If we've matched the corresponding entry event for this function, |
367 | // then we exit the loop. |
368 | if (Top.FuncId == E.FuncId) |
369 | break; |
370 | |
371 | // FIXME: Consider the intermediate times and the cumulative tree time |
372 | // as well. |
373 | } |
374 | |
375 | break; |
376 | |
377 | case RecordTypes::CUSTOM_EVENT: |
378 | case RecordTypes::TYPED_EVENT: |
379 | // TODO: Support an extension point to allow handling of custom and typed |
380 | // events in profiles. |
381 | break; |
382 | } |
383 | } |
384 | |
385 | // Once we've gone through the Trace, we now create one Block per thread in |
386 | // the Profile. |
387 | for (const auto &ThreadPaths : ThreadPathData) { |
388 | const auto &TID = ThreadPaths.first; |
389 | const auto &PathsData = ThreadPaths.second; |
390 | if (auto E = P.addBlock(B: { |
391 | .Thread: TID, |
392 | .PathData: std::vector<std::pair<Profile::PathID, Profile::Data>>( |
393 | PathsData.begin(), PathsData.end()), |
394 | })) |
395 | return std::move(E); |
396 | } |
397 | |
398 | return P; |
399 | } |
400 | |
401 | } // namespace xray |
402 | } // namespace llvm |
403 | |