1//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// This file defines the interface for a function call trie.
12//
13//===----------------------------------------------------------------------===//
14#ifndef XRAY_FUNCTION_CALL_TRIE_H
15#define XRAY_FUNCTION_CALL_TRIE_H
16
17#include "xray_buffer_queue.h"
18#include "xray_defs.h"
19#include "xray_profiling_flags.h"
20#include "xray_segmented_array.h"
21#include <limits>
22#include <memory> // For placement new.
23#include <utility>
24
25namespace __xray {
26
27/// A FunctionCallTrie represents the stack traces of XRay instrumented
28/// functions that we've encountered, where a node corresponds to a function and
29/// the path from the root to the node its stack trace. Each node in the trie
30/// will contain some useful values, including:
31///
32/// * The cumulative amount of time spent in this particular node/stack.
33/// * The number of times this stack has appeared.
34/// * A histogram of latencies for that particular node.
35///
36/// Each node in the trie will also contain a list of callees, represented using
37/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
38/// ID of the callee, and a pointer to the node.
39///
40/// If we visualise this data structure, we'll find the following potential
41/// representation:
42///
43/// [function id node] -> [callees] [cumulative time]
44/// [call counter] [latency histogram]
45///
46/// As an example, when we have a function in this pseudocode:
47///
48/// func f(N) {
49/// g()
50/// h()
51/// for i := 1..N { j() }
52/// }
53///
54/// We may end up with a trie of the following form:
55///
56/// f -> [ g, h, j ] [...] [1] [...]
57/// g -> [ ... ] [...] [1] [...]
58/// h -> [ ... ] [...] [1] [...]
59/// j -> [ ... ] [...] [N] [...]
60///
61/// If for instance the function g() called j() like so:
62///
63/// func g() {
64/// for i := 1..10 { j() }
65/// }
66///
67/// We'll find the following updated trie:
68///
69/// f -> [ g, h, j ] [...] [1] [...]
70/// g -> [ j' ] [...] [1] [...]
71/// h -> [ ... ] [...] [1] [...]
72/// j -> [ ... ] [...] [N] [...]
73/// j' -> [ ... ] [...] [10] [...]
74///
75/// Note that we'll have a new node representing the path `f -> g -> j'` with
76/// isolated data. This isolation gives us a means of representing the stack
77/// traces as a path, as opposed to a key in a table. The alternative
78/// implementation here would be to use a separate table for the path, and use
79/// hashes of the path as an identifier to accumulate the information. We've
80/// moved away from this approach as it takes a lot of time to compute the hash
81/// every time we need to update a function's call information as we're handling
82/// the entry and exit events.
83///
84/// This approach allows us to maintain a shadow stack, which represents the
85/// currently executing path, and on function exits quickly compute the amount
86/// of time elapsed from the entry, then update the counters for the node
87/// already represented in the trie. This necessitates an efficient
88/// representation of the various data structures (the list of callees must be
89/// cache-aware and efficient to look up, and the histogram must be compact and
90/// quick to update) to enable us to keep the overheads of this implementation
91/// to the minimum.
92class FunctionCallTrie {
93public:
94 struct Node;
95
96 // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
97 // standard library types in this header.
98 struct NodeIdPair {
99 Node *NodePtr;
100 int32_t FId;
101 };
102
103 using NodeIdPairArray = Array<NodeIdPair>;
104 using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
105
106 // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
107 // number of times this node actually appeared, the cumulative amount of time
108 // for this particular node including its children call times, and just the
109 // local time spent on this node. Each Node will have the ID of the XRay
110 // instrumented function that it is associated to.
111 struct Node {
112 Node *Parent;
113 NodeIdPairArray Callees;
114 uint64_t CallCount;
115 uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
116 int32_t FId;
117
118 // TODO: Include the compact histogram.
119 };
120
121private:
122 struct ShadowStackEntry {
123 uint64_t EntryTSC;
124 Node *NodePtr;
125 uint16_t EntryCPU;
126 };
127
128 using NodeArray = Array<Node>;
129 using RootArray = Array<Node *>;
130 using ShadowStackArray = Array<ShadowStackEntry>;
131
132public:
133 // We collate the allocators we need into a single struct, as a convenience to
134 // allow us to initialize these as a group.
135 struct Allocators {
136 using NodeAllocatorType = NodeArray::AllocatorType;
137 using RootAllocatorType = RootArray::AllocatorType;
138 using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
139
140 // Use hosted aligned storage members to allow for trivial move and init.
141 // This also allows us to sidestep the potential-failing allocation issue.
142 alignas(NodeAllocatorType) std::byte
143 NodeAllocatorStorage[sizeof(NodeAllocatorType)];
144 alignas(RootAllocatorType) std::byte
145 RootAllocatorStorage[sizeof(RootAllocatorType)];
146 alignas(ShadowStackAllocatorType) std::byte
147 ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)];
148 alignas(NodeIdPairAllocatorType) std::byte
149 NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)];
150
151 NodeAllocatorType *NodeAllocator = nullptr;
152 RootAllocatorType *RootAllocator = nullptr;
153 ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
154 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
155
156 Allocators() = default;
157 Allocators(const Allocators &) = delete;
158 Allocators &operator=(const Allocators &) = delete;
159
160 struct Buffers {
161 BufferQueue::Buffer NodeBuffer;
162 BufferQueue::Buffer RootsBuffer;
163 BufferQueue::Buffer ShadowStackBuffer;
164 BufferQueue::Buffer NodeIdPairBuffer;
165 };
166
167 explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
168 new (&NodeAllocatorStorage)
169 NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
170 NodeAllocator =
171 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
172
173 new (&RootAllocatorStorage)
174 RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
175 RootAllocator =
176 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
177
178 new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
179 B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
180 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
181 &ShadowStackAllocatorStorage);
182
183 new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
184 B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
185 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
186 &NodeIdPairAllocatorStorage);
187 }
188
189 explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
190 new (&NodeAllocatorStorage) NodeAllocatorType(Max);
191 NodeAllocator =
192 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
193
194 new (&RootAllocatorStorage) RootAllocatorType(Max);
195 RootAllocator =
196 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
197
198 new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
199 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
200 &ShadowStackAllocatorStorage);
201
202 new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
203 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
204 &NodeIdPairAllocatorStorage);
205 }
206
207 Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
208 // Here we rely on the safety of memcpy'ing contents of the storage
209 // members, and then pointing the source pointers to nullptr.
210 internal_memcpy(dest: &NodeAllocatorStorage, src: &O.NodeAllocatorStorage,
211 n: sizeof(NodeAllocatorType));
212 internal_memcpy(dest: &RootAllocatorStorage, src: &O.RootAllocatorStorage,
213 n: sizeof(RootAllocatorType));
214 internal_memcpy(dest: &ShadowStackAllocatorStorage,
215 src: &O.ShadowStackAllocatorStorage,
216 n: sizeof(ShadowStackAllocatorType));
217 internal_memcpy(dest: &NodeIdPairAllocatorStorage,
218 src: &O.NodeIdPairAllocatorStorage,
219 n: sizeof(NodeIdPairAllocatorType));
220
221 NodeAllocator =
222 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
223 RootAllocator =
224 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
225 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
226 &ShadowStackAllocatorStorage);
227 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
228 &NodeIdPairAllocatorStorage);
229
230 O.NodeAllocator = nullptr;
231 O.RootAllocator = nullptr;
232 O.ShadowStackAllocator = nullptr;
233 O.NodeIdPairAllocator = nullptr;
234 }
235
236 Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
237 // When moving into an existing instance, we ensure that we clean up the
238 // current allocators.
239 if (NodeAllocator)
240 NodeAllocator->~NodeAllocatorType();
241 if (O.NodeAllocator) {
242 new (&NodeAllocatorStorage)
243 NodeAllocatorType(std::move(t&: *O.NodeAllocator));
244 NodeAllocator =
245 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
246 O.NodeAllocator = nullptr;
247 } else {
248 NodeAllocator = nullptr;
249 }
250
251 if (RootAllocator)
252 RootAllocator->~RootAllocatorType();
253 if (O.RootAllocator) {
254 new (&RootAllocatorStorage)
255 RootAllocatorType(std::move(t&: *O.RootAllocator));
256 RootAllocator =
257 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
258 O.RootAllocator = nullptr;
259 } else {
260 RootAllocator = nullptr;
261 }
262
263 if (ShadowStackAllocator)
264 ShadowStackAllocator->~ShadowStackAllocatorType();
265 if (O.ShadowStackAllocator) {
266 new (&ShadowStackAllocatorStorage)
267 ShadowStackAllocatorType(std::move(t&: *O.ShadowStackAllocator));
268 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
269 &ShadowStackAllocatorStorage);
270 O.ShadowStackAllocator = nullptr;
271 } else {
272 ShadowStackAllocator = nullptr;
273 }
274
275 if (NodeIdPairAllocator)
276 NodeIdPairAllocator->~NodeIdPairAllocatorType();
277 if (O.NodeIdPairAllocator) {
278 new (&NodeIdPairAllocatorStorage)
279 NodeIdPairAllocatorType(std::move(t&: *O.NodeIdPairAllocator));
280 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
281 &NodeIdPairAllocatorStorage);
282 O.NodeIdPairAllocator = nullptr;
283 } else {
284 NodeIdPairAllocator = nullptr;
285 }
286
287 return *this;
288 }
289
290 ~Allocators() XRAY_NEVER_INSTRUMENT {
291 if (NodeAllocator != nullptr)
292 NodeAllocator->~NodeAllocatorType();
293 if (RootAllocator != nullptr)
294 RootAllocator->~RootAllocatorType();
295 if (ShadowStackAllocator != nullptr)
296 ShadowStackAllocator->~ShadowStackAllocatorType();
297 if (NodeIdPairAllocator != nullptr)
298 NodeIdPairAllocator->~NodeIdPairAllocatorType();
299 }
300 };
301
302 static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
303 return InitAllocatorsCustom(Max: profilingFlags()->per_thread_allocator_max);
304 }
305
306 static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
307 Allocators A(Max);
308 return A;
309 }
310
311 static Allocators
312 InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
313 Allocators A(Bufs);
314 return A;
315 }
316
317private:
318 NodeArray Nodes;
319 RootArray Roots;
320 ShadowStackArray ShadowStack;
321 NodeIdPairAllocatorType *NodeIdPairAllocator;
322 uint32_t OverflowedFunctions;
323
324public:
325 explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
326 : Nodes(*A.NodeAllocator),
327 Roots(*A.RootAllocator),
328 ShadowStack(*A.ShadowStackAllocator),
329 NodeIdPairAllocator(A.NodeIdPairAllocator),
330 OverflowedFunctions(0) {}
331
332 FunctionCallTrie() = delete;
333 FunctionCallTrie(const FunctionCallTrie &) = delete;
334 FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
335
336 FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
337 : Nodes(std::move(t&: O.Nodes)),
338 Roots(std::move(t&: O.Roots)),
339 ShadowStack(std::move(t&: O.ShadowStack)),
340 NodeIdPairAllocator(O.NodeIdPairAllocator),
341 OverflowedFunctions(O.OverflowedFunctions) {}
342
343 FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
344 Nodes = std::move(t&: O.Nodes);
345 Roots = std::move(t&: O.Roots);
346 ShadowStack = std::move(t&: O.ShadowStack);
347 NodeIdPairAllocator = O.NodeIdPairAllocator;
348 OverflowedFunctions = O.OverflowedFunctions;
349 return *this;
350 }
351
352 ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
353
354 void enterFunction(const int32_t FId, uint64_t TSC,
355 uint16_t CPU) XRAY_NEVER_INSTRUMENT {
356 DCHECK_NE(FId, 0);
357
358 // If we're already overflowed the function call stack, do not bother
359 // attempting to record any more function entries.
360 if (UNLIKELY(OverflowedFunctions)) {
361 ++OverflowedFunctions;
362 return;
363 }
364
365 // If this is the first function we've encountered, we want to set up the
366 // node(s) and treat it as a root.
367 if (UNLIKELY(ShadowStack.empty())) {
368 auto *NewRoot = Nodes.AppendEmplace(
369 args: nullptr, args: NodeIdPairArray(*NodeIdPairAllocator), args: 0u, args: 0u, args: FId);
370 if (UNLIKELY(NewRoot == nullptr))
371 return;
372 if (Roots.AppendEmplace(args&: NewRoot) == nullptr) {
373 Nodes.trim(Elements: 1);
374 return;
375 }
376 if (ShadowStack.AppendEmplace(args&: TSC, args&: NewRoot, args&: CPU) == nullptr) {
377 Nodes.trim(Elements: 1);
378 Roots.trim(Elements: 1);
379 ++OverflowedFunctions;
380 return;
381 }
382 return;
383 }
384
385 // From this point on, we require that the stack is not empty.
386 DCHECK(!ShadowStack.empty());
387 auto TopNode = ShadowStack.back().NodePtr;
388 DCHECK_NE(TopNode, nullptr);
389
390 // If we've seen this callee before, then we access that node and place that
391 // on the top of the stack.
392 auto* Callee = TopNode->Callees.find_element(
393 P: [FId](const NodeIdPair &NR) { return NR.FId == FId; });
394 if (Callee != nullptr) {
395 CHECK_NE(Callee->NodePtr, nullptr);
396 if (ShadowStack.AppendEmplace(args&: TSC, args&: Callee->NodePtr, args&: CPU) == nullptr)
397 ++OverflowedFunctions;
398 return;
399 }
400
401 // This means we've never seen this stack before, create a new node here.
402 auto* NewNode = Nodes.AppendEmplace(
403 args&: TopNode, args: NodeIdPairArray(*NodeIdPairAllocator), args: 0u, args: 0u, args: FId);
404 if (UNLIKELY(NewNode == nullptr))
405 return;
406 DCHECK_NE(NewNode, nullptr);
407 TopNode->Callees.AppendEmplace(args&: NewNode, args: FId);
408 if (ShadowStack.AppendEmplace(args&: TSC, args&: NewNode, args&: CPU) == nullptr)
409 ++OverflowedFunctions;
410 return;
411 }
412
413 void exitFunction(int32_t FId, uint64_t TSC,
414 uint16_t CPU) XRAY_NEVER_INSTRUMENT {
415 // If we're exiting functions that have "overflowed" or don't fit into the
416 // stack due to allocator constraints, we then decrement that count first.
417 if (OverflowedFunctions) {
418 --OverflowedFunctions;
419 return;
420 }
421
422 // When we exit a function, we look up the ShadowStack to see whether we've
423 // entered this function before. We do as little processing here as we can,
424 // since most of the hard work would have already been done at function
425 // entry.
426 uint64_t CumulativeTreeTime = 0;
427
428 while (!ShadowStack.empty()) {
429 const auto &Top = ShadowStack.back();
430 auto TopNode = Top.NodePtr;
431 DCHECK_NE(TopNode, nullptr);
432
433 // We may encounter overflow on the TSC we're provided, which may end up
434 // being less than the TSC when we first entered the function.
435 //
436 // To get the accurate measurement of cycles, we need to check whether
437 // we've overflowed (TSC < Top.EntryTSC) and then account the difference
438 // between the entry TSC and the max for the TSC counter (max of uint64_t)
439 // then add the value of TSC. We can prove that the maximum delta we will
440 // get is at most the 64-bit unsigned value, since the difference between
441 // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
442 // - 1) + 1.
443 //
444 // NOTE: This assumes that TSCs are synchronised across CPUs.
445 // TODO: Count the number of times we've seen CPU migrations.
446 uint64_t LocalTime =
447 Top.EntryTSC > TSC
448 ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
449 : TSC - Top.EntryTSC;
450 TopNode->CallCount++;
451 TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
452 CumulativeTreeTime += LocalTime;
453 ShadowStack.trim(Elements: 1);
454
455 // TODO: Update the histogram for the node.
456 if (TopNode->FId == FId)
457 break;
458 }
459 }
460
461 const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
462
463 // The deepCopyInto operation will update the provided FunctionCallTrie by
464 // re-creating the contents of this particular FunctionCallTrie in the other
465 // FunctionCallTrie. It will do this using a Depth First Traversal from the
466 // roots, and while doing so recreating the traversal in the provided
467 // FunctionCallTrie.
468 //
469 // This operation will *not* destroy the state in `O`, and thus may cause some
470 // duplicate entries in `O` if it is not empty.
471 //
472 // This function is *not* thread-safe, and may require external
473 // synchronisation of both "this" and |O|.
474 //
475 // This function must *not* be called with a non-empty FunctionCallTrie |O|.
476 void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
477 DCHECK(O.getRoots().empty());
478
479 // We then push the root into a stack, to use as the parent marker for new
480 // nodes we push in as we're traversing depth-first down the call tree.
481 struct NodeAndParent {
482 FunctionCallTrie::Node *Node;
483 FunctionCallTrie::Node *NewNode;
484 };
485 using Stack = Array<NodeAndParent>;
486
487 typename Stack::AllocatorType StackAllocator(
488 profilingFlags()->stack_allocator_max);
489 Stack DFSStack(StackAllocator);
490
491 for (const auto Root : getRoots()) {
492 // Add a node in O for this root.
493 auto NewRoot = O.Nodes.AppendEmplace(
494 args: nullptr, args: NodeIdPairArray(*O.NodeIdPairAllocator), args&: Root->CallCount,
495 args&: Root->CumulativeLocalTime, args&: Root->FId);
496
497 // Because we cannot allocate more memory we should bail out right away.
498 if (UNLIKELY(NewRoot == nullptr))
499 return;
500
501 if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
502 return;
503
504 // TODO: Figure out what to do if we fail to allocate any more stack
505 // space. Maybe warn or report once?
506 if (DFSStack.AppendEmplace(args: Root, args&: NewRoot) == nullptr)
507 return;
508 while (!DFSStack.empty()) {
509 NodeAndParent NP = DFSStack.back();
510 DCHECK_NE(NP.Node, nullptr);
511 DCHECK_NE(NP.NewNode, nullptr);
512 DFSStack.trim(Elements: 1);
513 for (const auto Callee : NP.Node->Callees) {
514 auto NewNode = O.Nodes.AppendEmplace(
515 args&: NP.NewNode, args: NodeIdPairArray(*O.NodeIdPairAllocator),
516 args&: Callee.NodePtr->CallCount, args&: Callee.NodePtr->CumulativeLocalTime,
517 args: Callee.FId);
518 if (UNLIKELY(NewNode == nullptr))
519 return;
520 if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
521 nullptr))
522 return;
523 if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
524 nullptr))
525 return;
526 }
527 }
528 }
529 }
530
531 // The mergeInto operation will update the provided FunctionCallTrie by
532 // traversing the current trie's roots and updating (i.e. merging) the data in
533 // the nodes with the data in the target's nodes. If the node doesn't exist in
534 // the provided trie, we add a new one in the right position, and inherit the
535 // data from the original (current) trie, along with all its callees.
536 //
537 // This function is *not* thread-safe, and may require external
538 // synchronisation of both "this" and |O|.
539 void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
540 struct NodeAndTarget {
541 FunctionCallTrie::Node *OrigNode;
542 FunctionCallTrie::Node *TargetNode;
543 };
544 using Stack = Array<NodeAndTarget>;
545 typename Stack::AllocatorType StackAllocator(
546 profilingFlags()->stack_allocator_max);
547 Stack DFSStack(StackAllocator);
548
549 for (const auto Root : getRoots()) {
550 Node *TargetRoot = nullptr;
551 auto R = O.Roots.find_element(
552 P: [&](const Node *Node) { return Node->FId == Root->FId; });
553 if (R == nullptr) {
554 TargetRoot = O.Nodes.AppendEmplace(
555 args: nullptr, args: NodeIdPairArray(*O.NodeIdPairAllocator), args: 0u, args: 0u,
556 args&: Root->FId);
557 if (UNLIKELY(TargetRoot == nullptr))
558 return;
559
560 O.Roots.Append(E: TargetRoot);
561 } else {
562 TargetRoot = *R;
563 }
564
565 DFSStack.AppendEmplace(args: Root, args&: TargetRoot);
566 while (!DFSStack.empty()) {
567 NodeAndTarget NT = DFSStack.back();
568 DCHECK_NE(NT.OrigNode, nullptr);
569 DCHECK_NE(NT.TargetNode, nullptr);
570 DFSStack.trim(Elements: 1);
571 // TODO: Update the histogram as well when we have it ready.
572 NT.TargetNode->CallCount += NT.OrigNode->CallCount;
573 NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
574 for (const auto Callee : NT.OrigNode->Callees) {
575 auto TargetCallee = NT.TargetNode->Callees.find_element(
576 P: [&](const FunctionCallTrie::NodeIdPair &C) {
577 return C.FId == Callee.FId;
578 });
579 if (TargetCallee == nullptr) {
580 auto NewTargetNode = O.Nodes.AppendEmplace(
581 args&: NT.TargetNode, args: NodeIdPairArray(*O.NodeIdPairAllocator), args: 0u, args: 0u,
582 args: Callee.FId);
583
584 if (UNLIKELY(NewTargetNode == nullptr))
585 return;
586
587 TargetCallee =
588 NT.TargetNode->Callees.AppendEmplace(args&: NewTargetNode, args: Callee.FId);
589 }
590 DFSStack.AppendEmplace(args: Callee.NodePtr, args&: TargetCallee->NodePtr);
591 }
592 }
593 }
594 }
595};
596
597} // namespace __xray
598
599#endif // XRAY_FUNCTION_CALL_TRIE_H
600