1 | //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of XRay, a dynamic runtime instrumentation system. |
10 | // |
11 | // This file defines the interface for a function call trie. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | #ifndef XRAY_FUNCTION_CALL_TRIE_H |
15 | #define XRAY_FUNCTION_CALL_TRIE_H |
16 | |
17 | #include "xray_buffer_queue.h" |
18 | #include "xray_defs.h" |
19 | #include "xray_profiling_flags.h" |
20 | #include "xray_segmented_array.h" |
21 | #include <limits> |
22 | #include <memory> // For placement new. |
23 | #include <utility> |
24 | |
25 | namespace __xray { |
26 | |
27 | /// A FunctionCallTrie represents the stack traces of XRay instrumented |
28 | /// functions that we've encountered, where a node corresponds to a function and |
29 | /// the path from the root to the node its stack trace. Each node in the trie |
30 | /// will contain some useful values, including: |
31 | /// |
32 | /// * The cumulative amount of time spent in this particular node/stack. |
33 | /// * The number of times this stack has appeared. |
34 | /// * A histogram of latencies for that particular node. |
35 | /// |
36 | /// Each node in the trie will also contain a list of callees, represented using |
37 | /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function |
38 | /// ID of the callee, and a pointer to the node. |
39 | /// |
40 | /// If we visualise this data structure, we'll find the following potential |
41 | /// representation: |
42 | /// |
43 | /// [function id node] -> [callees] [cumulative time] |
44 | /// [call counter] [latency histogram] |
45 | /// |
46 | /// As an example, when we have a function in this pseudocode: |
47 | /// |
48 | /// func f(N) { |
49 | /// g() |
50 | /// h() |
51 | /// for i := 1..N { j() } |
52 | /// } |
53 | /// |
54 | /// We may end up with a trie of the following form: |
55 | /// |
56 | /// f -> [ g, h, j ] [...] [1] [...] |
57 | /// g -> [ ... ] [...] [1] [...] |
58 | /// h -> [ ... ] [...] [1] [...] |
59 | /// j -> [ ... ] [...] [N] [...] |
60 | /// |
61 | /// If for instance the function g() called j() like so: |
62 | /// |
63 | /// func g() { |
64 | /// for i := 1..10 { j() } |
65 | /// } |
66 | /// |
67 | /// We'll find the following updated trie: |
68 | /// |
69 | /// f -> [ g, h, j ] [...] [1] [...] |
70 | /// g -> [ j' ] [...] [1] [...] |
71 | /// h -> [ ... ] [...] [1] [...] |
72 | /// j -> [ ... ] [...] [N] [...] |
73 | /// j' -> [ ... ] [...] [10] [...] |
74 | /// |
75 | /// Note that we'll have a new node representing the path `f -> g -> j'` with |
76 | /// isolated data. This isolation gives us a means of representing the stack |
77 | /// traces as a path, as opposed to a key in a table. The alternative |
78 | /// implementation here would be to use a separate table for the path, and use |
79 | /// hashes of the path as an identifier to accumulate the information. We've |
80 | /// moved away from this approach as it takes a lot of time to compute the hash |
81 | /// every time we need to update a function's call information as we're handling |
82 | /// the entry and exit events. |
83 | /// |
84 | /// This approach allows us to maintain a shadow stack, which represents the |
85 | /// currently executing path, and on function exits quickly compute the amount |
86 | /// of time elapsed from the entry, then update the counters for the node |
87 | /// already represented in the trie. This necessitates an efficient |
88 | /// representation of the various data structures (the list of callees must be |
89 | /// cache-aware and efficient to look up, and the histogram must be compact and |
90 | /// quick to update) to enable us to keep the overheads of this implementation |
91 | /// to the minimum. |
92 | class FunctionCallTrie { |
93 | public: |
94 | struct Node; |
95 | |
96 | // We use a NodeIdPair type instead of a std::pair<...> to not rely on the |
97 | // standard library types in this header. |
98 | struct NodeIdPair { |
99 | Node *NodePtr; |
100 | int32_t FId; |
101 | }; |
102 | |
103 | using NodeIdPairArray = Array<NodeIdPair>; |
104 | using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; |
105 | |
106 | // A Node in the FunctionCallTrie gives us a list of callees, the cumulative |
107 | // number of times this node actually appeared, the cumulative amount of time |
108 | // for this particular node including its children call times, and just the |
109 | // local time spent on this node. Each Node will have the ID of the XRay |
110 | // instrumented function that it is associated to. |
111 | struct Node { |
112 | Node *Parent; |
113 | NodeIdPairArray Callees; |
114 | uint64_t CallCount; |
115 | uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. |
116 | int32_t FId; |
117 | |
118 | // TODO: Include the compact histogram. |
119 | }; |
120 | |
121 | private: |
122 | struct ShadowStackEntry { |
123 | uint64_t EntryTSC; |
124 | Node *NodePtr; |
125 | uint16_t EntryCPU; |
126 | }; |
127 | |
128 | using NodeArray = Array<Node>; |
129 | using RootArray = Array<Node *>; |
130 | using ShadowStackArray = Array<ShadowStackEntry>; |
131 | |
132 | public: |
133 | // We collate the allocators we need into a single struct, as a convenience to |
134 | // allow us to initialize these as a group. |
135 | struct Allocators { |
136 | using NodeAllocatorType = NodeArray::AllocatorType; |
137 | using RootAllocatorType = RootArray::AllocatorType; |
138 | using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; |
139 | |
140 | // Use hosted aligned storage members to allow for trivial move and init. |
141 | // This also allows us to sidestep the potential-failing allocation issue. |
142 | alignas(NodeAllocatorType) std::byte |
143 | NodeAllocatorStorage[sizeof(NodeAllocatorType)]; |
144 | alignas(RootAllocatorType) std::byte |
145 | RootAllocatorStorage[sizeof(RootAllocatorType)]; |
146 | alignas(ShadowStackAllocatorType) std::byte |
147 | ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)]; |
148 | alignas(NodeIdPairAllocatorType) std::byte |
149 | NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)]; |
150 | |
151 | NodeAllocatorType *NodeAllocator = nullptr; |
152 | RootAllocatorType *RootAllocator = nullptr; |
153 | ShadowStackAllocatorType *ShadowStackAllocator = nullptr; |
154 | NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; |
155 | |
156 | Allocators() = default; |
157 | Allocators(const Allocators &) = delete; |
158 | Allocators &operator=(const Allocators &) = delete; |
159 | |
160 | struct Buffers { |
161 | BufferQueue::Buffer NodeBuffer; |
162 | BufferQueue::Buffer RootsBuffer; |
163 | BufferQueue::Buffer ShadowStackBuffer; |
164 | BufferQueue::Buffer NodeIdPairBuffer; |
165 | }; |
166 | |
167 | explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { |
168 | new (&NodeAllocatorStorage) |
169 | NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); |
170 | NodeAllocator = |
171 | reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); |
172 | |
173 | new (&RootAllocatorStorage) |
174 | RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); |
175 | RootAllocator = |
176 | reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); |
177 | |
178 | new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( |
179 | B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); |
180 | ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( |
181 | &ShadowStackAllocatorStorage); |
182 | |
183 | new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( |
184 | B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); |
185 | NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
186 | &NodeIdPairAllocatorStorage); |
187 | } |
188 | |
189 | explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { |
190 | new (&NodeAllocatorStorage) NodeAllocatorType(Max); |
191 | NodeAllocator = |
192 | reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); |
193 | |
194 | new (&RootAllocatorStorage) RootAllocatorType(Max); |
195 | RootAllocator = |
196 | reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); |
197 | |
198 | new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); |
199 | ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( |
200 | &ShadowStackAllocatorStorage); |
201 | |
202 | new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); |
203 | NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
204 | &NodeIdPairAllocatorStorage); |
205 | } |
206 | |
207 | Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { |
208 | // Here we rely on the safety of memcpy'ing contents of the storage |
209 | // members, and then pointing the source pointers to nullptr. |
210 | internal_memcpy(dest: &NodeAllocatorStorage, src: &O.NodeAllocatorStorage, |
211 | n: sizeof(NodeAllocatorType)); |
212 | internal_memcpy(dest: &RootAllocatorStorage, src: &O.RootAllocatorStorage, |
213 | n: sizeof(RootAllocatorType)); |
214 | internal_memcpy(dest: &ShadowStackAllocatorStorage, |
215 | src: &O.ShadowStackAllocatorStorage, |
216 | n: sizeof(ShadowStackAllocatorType)); |
217 | internal_memcpy(dest: &NodeIdPairAllocatorStorage, |
218 | src: &O.NodeIdPairAllocatorStorage, |
219 | n: sizeof(NodeIdPairAllocatorType)); |
220 | |
221 | NodeAllocator = |
222 | reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); |
223 | RootAllocator = |
224 | reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); |
225 | ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( |
226 | &ShadowStackAllocatorStorage); |
227 | NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
228 | &NodeIdPairAllocatorStorage); |
229 | |
230 | O.NodeAllocator = nullptr; |
231 | O.RootAllocator = nullptr; |
232 | O.ShadowStackAllocator = nullptr; |
233 | O.NodeIdPairAllocator = nullptr; |
234 | } |
235 | |
236 | Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { |
237 | // When moving into an existing instance, we ensure that we clean up the |
238 | // current allocators. |
239 | if (NodeAllocator) |
240 | NodeAllocator->~NodeAllocatorType(); |
241 | if (O.NodeAllocator) { |
242 | new (&NodeAllocatorStorage) |
243 | NodeAllocatorType(std::move(t&: *O.NodeAllocator)); |
244 | NodeAllocator = |
245 | reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); |
246 | O.NodeAllocator = nullptr; |
247 | } else { |
248 | NodeAllocator = nullptr; |
249 | } |
250 | |
251 | if (RootAllocator) |
252 | RootAllocator->~RootAllocatorType(); |
253 | if (O.RootAllocator) { |
254 | new (&RootAllocatorStorage) |
255 | RootAllocatorType(std::move(t&: *O.RootAllocator)); |
256 | RootAllocator = |
257 | reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); |
258 | O.RootAllocator = nullptr; |
259 | } else { |
260 | RootAllocator = nullptr; |
261 | } |
262 | |
263 | if (ShadowStackAllocator) |
264 | ShadowStackAllocator->~ShadowStackAllocatorType(); |
265 | if (O.ShadowStackAllocator) { |
266 | new (&ShadowStackAllocatorStorage) |
267 | ShadowStackAllocatorType(std::move(t&: *O.ShadowStackAllocator)); |
268 | ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( |
269 | &ShadowStackAllocatorStorage); |
270 | O.ShadowStackAllocator = nullptr; |
271 | } else { |
272 | ShadowStackAllocator = nullptr; |
273 | } |
274 | |
275 | if (NodeIdPairAllocator) |
276 | NodeIdPairAllocator->~NodeIdPairAllocatorType(); |
277 | if (O.NodeIdPairAllocator) { |
278 | new (&NodeIdPairAllocatorStorage) |
279 | NodeIdPairAllocatorType(std::move(t&: *O.NodeIdPairAllocator)); |
280 | NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
281 | &NodeIdPairAllocatorStorage); |
282 | O.NodeIdPairAllocator = nullptr; |
283 | } else { |
284 | NodeIdPairAllocator = nullptr; |
285 | } |
286 | |
287 | return *this; |
288 | } |
289 | |
290 | ~Allocators() XRAY_NEVER_INSTRUMENT { |
291 | if (NodeAllocator != nullptr) |
292 | NodeAllocator->~NodeAllocatorType(); |
293 | if (RootAllocator != nullptr) |
294 | RootAllocator->~RootAllocatorType(); |
295 | if (ShadowStackAllocator != nullptr) |
296 | ShadowStackAllocator->~ShadowStackAllocatorType(); |
297 | if (NodeIdPairAllocator != nullptr) |
298 | NodeIdPairAllocator->~NodeIdPairAllocatorType(); |
299 | } |
300 | }; |
301 | |
302 | static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { |
303 | return InitAllocatorsCustom(Max: profilingFlags()->per_thread_allocator_max); |
304 | } |
305 | |
306 | static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { |
307 | Allocators A(Max); |
308 | return A; |
309 | } |
310 | |
311 | static Allocators |
312 | InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { |
313 | Allocators A(Bufs); |
314 | return A; |
315 | } |
316 | |
317 | private: |
318 | NodeArray Nodes; |
319 | RootArray Roots; |
320 | ShadowStackArray ShadowStack; |
321 | NodeIdPairAllocatorType *NodeIdPairAllocator; |
322 | uint32_t OverflowedFunctions; |
323 | |
324 | public: |
325 | explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT |
326 | : Nodes(*A.NodeAllocator), |
327 | Roots(*A.RootAllocator), |
328 | ShadowStack(*A.ShadowStackAllocator), |
329 | NodeIdPairAllocator(A.NodeIdPairAllocator), |
330 | OverflowedFunctions(0) {} |
331 | |
332 | FunctionCallTrie() = delete; |
333 | FunctionCallTrie(const FunctionCallTrie &) = delete; |
334 | FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; |
335 | |
336 | FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT |
337 | : Nodes(std::move(t&: O.Nodes)), |
338 | Roots(std::move(t&: O.Roots)), |
339 | ShadowStack(std::move(t&: O.ShadowStack)), |
340 | NodeIdPairAllocator(O.NodeIdPairAllocator), |
341 | OverflowedFunctions(O.OverflowedFunctions) {} |
342 | |
343 | FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { |
344 | Nodes = std::move(t&: O.Nodes); |
345 | Roots = std::move(t&: O.Roots); |
346 | ShadowStack = std::move(t&: O.ShadowStack); |
347 | NodeIdPairAllocator = O.NodeIdPairAllocator; |
348 | OverflowedFunctions = O.OverflowedFunctions; |
349 | return *this; |
350 | } |
351 | |
352 | ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} |
353 | |
354 | void enterFunction(const int32_t FId, uint64_t TSC, |
355 | uint16_t CPU) XRAY_NEVER_INSTRUMENT { |
356 | DCHECK_NE(FId, 0); |
357 | |
358 | // If we're already overflowed the function call stack, do not bother |
359 | // attempting to record any more function entries. |
360 | if (UNLIKELY(OverflowedFunctions)) { |
361 | ++OverflowedFunctions; |
362 | return; |
363 | } |
364 | |
365 | // If this is the first function we've encountered, we want to set up the |
366 | // node(s) and treat it as a root. |
367 | if (UNLIKELY(ShadowStack.empty())) { |
368 | auto *NewRoot = Nodes.AppendEmplace( |
369 | args: nullptr, args: NodeIdPairArray(*NodeIdPairAllocator), args: 0u, args: 0u, args: FId); |
370 | if (UNLIKELY(NewRoot == nullptr)) |
371 | return; |
372 | if (Roots.AppendEmplace(args&: NewRoot) == nullptr) { |
373 | Nodes.trim(Elements: 1); |
374 | return; |
375 | } |
376 | if (ShadowStack.AppendEmplace(args&: TSC, args&: NewRoot, args&: CPU) == nullptr) { |
377 | Nodes.trim(Elements: 1); |
378 | Roots.trim(Elements: 1); |
379 | ++OverflowedFunctions; |
380 | return; |
381 | } |
382 | return; |
383 | } |
384 | |
385 | // From this point on, we require that the stack is not empty. |
386 | DCHECK(!ShadowStack.empty()); |
387 | auto TopNode = ShadowStack.back().NodePtr; |
388 | DCHECK_NE(TopNode, nullptr); |
389 | |
390 | // If we've seen this callee before, then we access that node and place that |
391 | // on the top of the stack. |
392 | auto* Callee = TopNode->Callees.find_element( |
393 | P: [FId](const NodeIdPair &NR) { return NR.FId == FId; }); |
394 | if (Callee != nullptr) { |
395 | CHECK_NE(Callee->NodePtr, nullptr); |
396 | if (ShadowStack.AppendEmplace(args&: TSC, args&: Callee->NodePtr, args&: CPU) == nullptr) |
397 | ++OverflowedFunctions; |
398 | return; |
399 | } |
400 | |
401 | // This means we've never seen this stack before, create a new node here. |
402 | auto* NewNode = Nodes.AppendEmplace( |
403 | args&: TopNode, args: NodeIdPairArray(*NodeIdPairAllocator), args: 0u, args: 0u, args: FId); |
404 | if (UNLIKELY(NewNode == nullptr)) |
405 | return; |
406 | DCHECK_NE(NewNode, nullptr); |
407 | TopNode->Callees.AppendEmplace(args&: NewNode, args: FId); |
408 | if (ShadowStack.AppendEmplace(args&: TSC, args&: NewNode, args&: CPU) == nullptr) |
409 | ++OverflowedFunctions; |
410 | return; |
411 | } |
412 | |
413 | void exitFunction(int32_t FId, uint64_t TSC, |
414 | uint16_t CPU) XRAY_NEVER_INSTRUMENT { |
415 | // If we're exiting functions that have "overflowed" or don't fit into the |
416 | // stack due to allocator constraints, we then decrement that count first. |
417 | if (OverflowedFunctions) { |
418 | --OverflowedFunctions; |
419 | return; |
420 | } |
421 | |
422 | // When we exit a function, we look up the ShadowStack to see whether we've |
423 | // entered this function before. We do as little processing here as we can, |
424 | // since most of the hard work would have already been done at function |
425 | // entry. |
426 | uint64_t CumulativeTreeTime = 0; |
427 | |
428 | while (!ShadowStack.empty()) { |
429 | const auto &Top = ShadowStack.back(); |
430 | auto TopNode = Top.NodePtr; |
431 | DCHECK_NE(TopNode, nullptr); |
432 | |
433 | // We may encounter overflow on the TSC we're provided, which may end up |
434 | // being less than the TSC when we first entered the function. |
435 | // |
436 | // To get the accurate measurement of cycles, we need to check whether |
437 | // we've overflowed (TSC < Top.EntryTSC) and then account the difference |
438 | // between the entry TSC and the max for the TSC counter (max of uint64_t) |
439 | // then add the value of TSC. We can prove that the maximum delta we will |
440 | // get is at most the 64-bit unsigned value, since the difference between |
441 | // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() |
442 | // - 1) + 1. |
443 | // |
444 | // NOTE: This assumes that TSCs are synchronised across CPUs. |
445 | // TODO: Count the number of times we've seen CPU migrations. |
446 | uint64_t LocalTime = |
447 | Top.EntryTSC > TSC |
448 | ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC |
449 | : TSC - Top.EntryTSC; |
450 | TopNode->CallCount++; |
451 | TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; |
452 | CumulativeTreeTime += LocalTime; |
453 | ShadowStack.trim(Elements: 1); |
454 | |
455 | // TODO: Update the histogram for the node. |
456 | if (TopNode->FId == FId) |
457 | break; |
458 | } |
459 | } |
460 | |
461 | const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } |
462 | |
463 | // The deepCopyInto operation will update the provided FunctionCallTrie by |
464 | // re-creating the contents of this particular FunctionCallTrie in the other |
465 | // FunctionCallTrie. It will do this using a Depth First Traversal from the |
466 | // roots, and while doing so recreating the traversal in the provided |
467 | // FunctionCallTrie. |
468 | // |
469 | // This operation will *not* destroy the state in `O`, and thus may cause some |
470 | // duplicate entries in `O` if it is not empty. |
471 | // |
472 | // This function is *not* thread-safe, and may require external |
473 | // synchronisation of both "this" and |O|. |
474 | // |
475 | // This function must *not* be called with a non-empty FunctionCallTrie |O|. |
476 | void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { |
477 | DCHECK(O.getRoots().empty()); |
478 | |
479 | // We then push the root into a stack, to use as the parent marker for new |
480 | // nodes we push in as we're traversing depth-first down the call tree. |
481 | struct NodeAndParent { |
482 | FunctionCallTrie::Node *Node; |
483 | FunctionCallTrie::Node *NewNode; |
484 | }; |
485 | using Stack = Array<NodeAndParent>; |
486 | |
487 | typename Stack::AllocatorType StackAllocator( |
488 | profilingFlags()->stack_allocator_max); |
489 | Stack DFSStack(StackAllocator); |
490 | |
491 | for (const auto Root : getRoots()) { |
492 | // Add a node in O for this root. |
493 | auto NewRoot = O.Nodes.AppendEmplace( |
494 | args: nullptr, args: NodeIdPairArray(*O.NodeIdPairAllocator), args&: Root->CallCount, |
495 | args&: Root->CumulativeLocalTime, args&: Root->FId); |
496 | |
497 | // Because we cannot allocate more memory we should bail out right away. |
498 | if (UNLIKELY(NewRoot == nullptr)) |
499 | return; |
500 | |
501 | if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) |
502 | return; |
503 | |
504 | // TODO: Figure out what to do if we fail to allocate any more stack |
505 | // space. Maybe warn or report once? |
506 | if (DFSStack.AppendEmplace(args: Root, args&: NewRoot) == nullptr) |
507 | return; |
508 | while (!DFSStack.empty()) { |
509 | NodeAndParent NP = DFSStack.back(); |
510 | DCHECK_NE(NP.Node, nullptr); |
511 | DCHECK_NE(NP.NewNode, nullptr); |
512 | DFSStack.trim(Elements: 1); |
513 | for (const auto Callee : NP.Node->Callees) { |
514 | auto NewNode = O.Nodes.AppendEmplace( |
515 | args&: NP.NewNode, args: NodeIdPairArray(*O.NodeIdPairAllocator), |
516 | args&: Callee.NodePtr->CallCount, args&: Callee.NodePtr->CumulativeLocalTime, |
517 | args: Callee.FId); |
518 | if (UNLIKELY(NewNode == nullptr)) |
519 | return; |
520 | if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == |
521 | nullptr)) |
522 | return; |
523 | if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == |
524 | nullptr)) |
525 | return; |
526 | } |
527 | } |
528 | } |
529 | } |
530 | |
531 | // The mergeInto operation will update the provided FunctionCallTrie by |
532 | // traversing the current trie's roots and updating (i.e. merging) the data in |
533 | // the nodes with the data in the target's nodes. If the node doesn't exist in |
534 | // the provided trie, we add a new one in the right position, and inherit the |
535 | // data from the original (current) trie, along with all its callees. |
536 | // |
537 | // This function is *not* thread-safe, and may require external |
538 | // synchronisation of both "this" and |O|. |
539 | void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { |
540 | struct NodeAndTarget { |
541 | FunctionCallTrie::Node *OrigNode; |
542 | FunctionCallTrie::Node *TargetNode; |
543 | }; |
544 | using Stack = Array<NodeAndTarget>; |
545 | typename Stack::AllocatorType StackAllocator( |
546 | profilingFlags()->stack_allocator_max); |
547 | Stack DFSStack(StackAllocator); |
548 | |
549 | for (const auto Root : getRoots()) { |
550 | Node *TargetRoot = nullptr; |
551 | auto R = O.Roots.find_element( |
552 | P: [&](const Node *Node) { return Node->FId == Root->FId; }); |
553 | if (R == nullptr) { |
554 | TargetRoot = O.Nodes.AppendEmplace( |
555 | args: nullptr, args: NodeIdPairArray(*O.NodeIdPairAllocator), args: 0u, args: 0u, |
556 | args&: Root->FId); |
557 | if (UNLIKELY(TargetRoot == nullptr)) |
558 | return; |
559 | |
560 | O.Roots.Append(E: TargetRoot); |
561 | } else { |
562 | TargetRoot = *R; |
563 | } |
564 | |
565 | DFSStack.AppendEmplace(args: Root, args&: TargetRoot); |
566 | while (!DFSStack.empty()) { |
567 | NodeAndTarget NT = DFSStack.back(); |
568 | DCHECK_NE(NT.OrigNode, nullptr); |
569 | DCHECK_NE(NT.TargetNode, nullptr); |
570 | DFSStack.trim(Elements: 1); |
571 | // TODO: Update the histogram as well when we have it ready. |
572 | NT.TargetNode->CallCount += NT.OrigNode->CallCount; |
573 | NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; |
574 | for (const auto Callee : NT.OrigNode->Callees) { |
575 | auto TargetCallee = NT.TargetNode->Callees.find_element( |
576 | P: [&](const FunctionCallTrie::NodeIdPair &C) { |
577 | return C.FId == Callee.FId; |
578 | }); |
579 | if (TargetCallee == nullptr) { |
580 | auto NewTargetNode = O.Nodes.AppendEmplace( |
581 | args&: NT.TargetNode, args: NodeIdPairArray(*O.NodeIdPairAllocator), args: 0u, args: 0u, |
582 | args: Callee.FId); |
583 | |
584 | if (UNLIKELY(NewTargetNode == nullptr)) |
585 | return; |
586 | |
587 | TargetCallee = |
588 | NT.TargetNode->Callees.AppendEmplace(args&: NewTargetNode, args: Callee.FId); |
589 | } |
590 | DFSStack.AppendEmplace(args: Callee.NodePtr, args&: TargetCallee->NodePtr); |
591 | } |
592 | } |
593 | } |
594 | } |
595 | }; |
596 | |
597 | } // namespace __xray |
598 | |
599 | #endif // XRAY_FUNCTION_CALL_TRIE_H |
600 | |