xray_function_call_trie.h source code [llvm_runtimes/compiler-rt/lib/xray/xray_function_call_trie.h]

1	//===-- xray_function_call_trie.h ------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file is a part of XRay, a dynamic runtime instrumentation system.
10	//
11	// This file defines the interface for a function call trie.
12	//
13	//===----------------------------------------------------------------------===//
14	#ifndef XRAY_FUNCTION_CALL_TRIE_H
15	#define XRAY_FUNCTION_CALL_TRIE_H
16
17	#include "xray_buffer_queue.h"
18	#include "xray_defs.h"
19	#include "xray_profiling_flags.h"
20	#include "xray_segmented_array.h"
21	#include <limits>
22	#include <memory> // For placement new.
23	#include <utility>
24
25	namespace __xray {
26
27	/// A FunctionCallTrie represents the stack traces of XRay instrumented
28	/// functions that we've encountered, where a node corresponds to a function and
29	/// the path from the root to the node its stack trace. Each node in the trie
30	/// will contain some useful values, including:
31	///
32	/// The cumulative amount of time spent in this particular node/stack.*
33	/// The number of times this stack has appeared.*
34	/// A histogram of latencies for that particular node.*
35	///
36	/// Each node in the trie will also contain a list of callees, represented using
37	/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
38	/// ID of the callee, and a pointer to the node.
39	///
40	/// If we visualise this data structure, we'll find the following potential
41	/// representation:
42	///
43	/// [function id node] -> [callees] [cumulative time]
44	/// [call counter] [latency histogram]
45	///
46	/// As an example, when we have a function in this pseudocode:
47	///
48	/// func f(N) {
49	/// g()
50	/// h()
51	/// for i := 1..N { j() }
52	/// }
53	///
54	/// We may end up with a trie of the following form:
55	///
56	/// f -> [ g, h, j ] [...] [1] [...]
57	/// g -> [ ... ] [...] [1] [...]
58	/// h -> [ ... ] [...] [1] [...]
59	/// j -> [ ... ] [...] [N] [...]
60	///
61	/// If for instance the function g() called j() like so:
62	///
63	/// func g() {
64	/// for i := 1..10 { j() }
65	/// }
66	///
67	/// We'll find the following updated trie:
68	///
69	/// f -> [ g, h, j ] [...] [1] [...]
70	/// g -> [ j' ] [...] [1] [...]
71	/// h -> [ ... ] [...] [1] [...]
72	/// j -> [ ... ] [...] [N] [...]
73	/// j' -> [ ... ] [...] [10] [...]
74	///
75	/// Note that we'll have a new node representing the path `f -> g -> j'` with
76	/// isolated data. This isolation gives us a means of representing the stack
77	/// traces as a path, as opposed to a key in a table. The alternative
78	/// implementation here would be to use a separate table for the path, and use
79	/// hashes of the path as an identifier to accumulate the information. We've
80	/// moved away from this approach as it takes a lot of time to compute the hash
81	/// every time we need to update a function's call information as we're handling
82	/// the entry and exit events.
83	///
84	/// This approach allows us to maintain a shadow stack, which represents the
85	/// currently executing path, and on function exits quickly compute the amount
86	/// of time elapsed from the entry, then update the counters for the node
87	/// already represented in the trie. This necessitates an efficient
88	/// representation of the various data structures (the list of callees must be
89	/// cache-aware and efficient to look up, and the histogram must be compact and
90	/// quick to update) to enable us to keep the overheads of this implementation
91	/// to the minimum.
92	class FunctionCallTrie {
93	public:
94	struct Node;
95
96	// We use a NodeIdPair type instead of a std::pair<...> to not rely on the
97	// standard library types in this header.
98	struct NodeIdPair {
99	Node *NodePtr;
100	int32_t FId;
101	};
102
103	using NodeIdPairArray = Array<NodeIdPair>;
104	using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
105
106	// A Node in the FunctionCallTrie gives us a list of callees, the cumulative
107	// number of times this node actually appeared, the cumulative amount of time
108	// for this particular node including its children call times, and just the
109	// local time spent on this node. Each Node will have the ID of the XRay
110	// instrumented function that it is associated to.
111	struct Node {
112	Node *Parent;
113	NodeIdPairArray Callees;
114	uint64_t CallCount;
115	uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
116	int32_t FId;
117
118	// TODO: Include the compact histogram.
119	};
120
121	private:
122	struct ShadowStackEntry {
123	uint64_t EntryTSC;
124	Node *NodePtr;
125	uint16_t EntryCPU;
126	};
127
128	using NodeArray = Array<Node>;
129	using RootArray = Array<Node *>;
130	using ShadowStackArray = Array<ShadowStackEntry>;
131
132	public:
133	// We collate the allocators we need into a single struct, as a convenience to
134	// allow us to initialize these as a group.
135	struct Allocators {
136	using NodeAllocatorType = NodeArray::AllocatorType;
137	using RootAllocatorType = RootArray::AllocatorType;
138	using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
139
140	// Use hosted aligned storage members to allow for trivial move and init.
141	// This also allows us to sidestep the potential-failing allocation issue.
142	alignas(NodeAllocatorType) std::byte
143	NodeAllocatorStorage[sizeof(NodeAllocatorType)];
144	alignas(RootAllocatorType) std::byte
145	RootAllocatorStorage[sizeof(RootAllocatorType)];
146	alignas(ShadowStackAllocatorType) std::byte
147	ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)];
148	alignas(NodeIdPairAllocatorType) std::byte
149	NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)];
150
151	NodeAllocatorType NodeAllocator = nullptr*;
152	RootAllocatorType RootAllocator = nullptr*;
153	ShadowStackAllocatorType ShadowStackAllocator = nullptr*;
154	NodeIdPairAllocatorType NodeIdPairAllocator = nullptr*;
155
156	Allocators() = default;
157	Allocators(const Allocators &) = delete;
158	Allocators &operator=(const Allocators &) = delete;
159
160	struct Buffers {
161	BufferQueue::Buffer NodeBuffer;
162	BufferQueue::Buffer RootsBuffer;
163	BufferQueue::Buffer ShadowStackBuffer;
164	BufferQueue::Buffer NodeIdPairBuffer;
165	};
166
167	explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
168	new (&NodeAllocatorStorage)
169	NodeAllocatorType (B.NodeBuffer.Data, B.NodeBuffer.Size);
170	NodeAllocator =
171	reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
172
173	new (&RootAllocatorStorage)
174	RootAllocatorType (B.RootsBuffer.Data, B.RootsBuffer.Size);
175	RootAllocator =
176	reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
177
178	new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType (
179	B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
180	ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
181	&ShadowStackAllocatorStorage);
182
183	new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType (
184	B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
185	NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
186	&NodeIdPairAllocatorStorage);
187	}
188
189	explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
190	new (&NodeAllocatorStorage) NodeAllocatorType (Max);
191	NodeAllocator =
192	reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
193
194	new (&RootAllocatorStorage) RootAllocatorType (Max);
195	RootAllocator =
196	reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
197
198	new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType (Max);
199	ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
200	&ShadowStackAllocatorStorage);
201
202	new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType (Max);
203	NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
204	&NodeIdPairAllocatorStorage);
205	}
206
207	Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
208	// Here we rely on the safety of memcpy'ing contents of the storage
209	// members, and then pointing the source pointers to nullptr.
210	internal_memcpy(dest: &NodeAllocatorStorage, src: &O.NodeAllocatorStorage,
211	n: sizeof(NodeAllocatorType));
212	internal_memcpy(dest: &RootAllocatorStorage, src: &O.RootAllocatorStorage,
213	n: sizeof(RootAllocatorType));
214	internal_memcpy(dest: &ShadowStackAllocatorStorage,
215	src: &O.ShadowStackAllocatorStorage,
216	n: sizeof(ShadowStackAllocatorType));
217	internal_memcpy(dest: &NodeIdPairAllocatorStorage,
218	src: &O.NodeIdPairAllocatorStorage,
219	n: sizeof(NodeIdPairAllocatorType));
220
221	NodeAllocator =
222	reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
223	RootAllocator =
224	reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
225	ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
226	&ShadowStackAllocatorStorage);
227	NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
228	&NodeIdPairAllocatorStorage);
229
230	O.NodeAllocator = nullptr;
231	O.RootAllocator = nullptr;
232	O.ShadowStackAllocator = nullptr;
233	O.NodeIdPairAllocator = nullptr;
234	}
235
236	Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
237	// When moving into an existing instance, we ensure that we clean up the
238	// current allocators.
239	if (NodeAllocator)
240	NodeAllocator->~NodeAllocatorType();
241	if (O.NodeAllocator) {
242	new (&NodeAllocatorStorage)
243	NodeAllocatorType (std::move(*O.NodeAllocator));
244	NodeAllocator =
245	reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
246	O.NodeAllocator = nullptr;
247	} else {
248	NodeAllocator = nullptr;
249	}
250
251	if (RootAllocator)
252	RootAllocator->~RootAllocatorType();
253	if (O.RootAllocator) {
254	new (&RootAllocatorStorage)
255	RootAllocatorType (std::move(*O.RootAllocator));
256	RootAllocator =
257	reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
258	O.RootAllocator = nullptr;
259	} else {
260	RootAllocator = nullptr;
261	}
262
263	if (ShadowStackAllocator)
264	ShadowStackAllocator->~ShadowStackAllocatorType();
265	if (O.ShadowStackAllocator) {
266	new (&ShadowStackAllocatorStorage)
267	ShadowStackAllocatorType (std::move(*O.ShadowStackAllocator));
268	ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
269	&ShadowStackAllocatorStorage);
270	O.ShadowStackAllocator = nullptr;
271	} else {
272	ShadowStackAllocator = nullptr;
273	}
274
275	if (NodeIdPairAllocator)
276	NodeIdPairAllocator->~NodeIdPairAllocatorType();
277	if (O.NodeIdPairAllocator) {
278	new (&NodeIdPairAllocatorStorage)
279	NodeIdPairAllocatorType (std::move(*O.NodeIdPairAllocator));
280	NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
281	&NodeIdPairAllocatorStorage);
282	O.NodeIdPairAllocator = nullptr;
283	} else {
284	NodeIdPairAllocator = nullptr;
285	}
286
287	return *this;
288	}
289
290	~Allocators() XRAY_NEVER_INSTRUMENT {
291	if (NodeAllocator != nullptr)
292	NodeAllocator->~NodeAllocatorType();
293	if (RootAllocator != nullptr)
294	RootAllocator->~RootAllocatorType();
295	if (ShadowStackAllocator != nullptr)
296	ShadowStackAllocator->~ShadowStackAllocatorType();
297	if (NodeIdPairAllocator != nullptr)
298	NodeIdPairAllocator->~NodeIdPairAllocatorType();
299	}
300	};
301
302	static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
303	return InitAllocatorsCustom(Max: profilingFlags()->per_thread_allocator_max);
304	}
305
306	static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
307	Allocators A(Max);
308	return A;
309	}
310
311	static Allocators
312	InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
313	Allocators A(Bufs);
314	return A;
315	}
316
317	private:
318	NodeArray Nodes;
319	RootArray Roots;
320	ShadowStackArray ShadowStack;
321	NodeIdPairAllocatorType *NodeIdPairAllocator;
322	uint32_t OverflowedFunctions;
323
324	public:
325	explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
326	: Nodes (*A.NodeAllocator),
327	Roots (*A.RootAllocator),
328	ShadowStack (*A.ShadowStackAllocator),
329	NodeIdPairAllocator(A.NodeIdPairAllocator),
330	OverflowedFunctions(`0`) {}
331
332	FunctionCallTrie() = delete;
333	FunctionCallTrie(const FunctionCallTrie &) = delete;
334	FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
335
336	FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
337	: Nodes (std::move(O.Nodes)),
338	Roots (std::move(O.Roots)),
339	ShadowStack (std::move(O.ShadowStack)),
340	NodeIdPairAllocator(O.NodeIdPairAllocator),
341	OverflowedFunctions(O.OverflowedFunctions) {}
342
343	FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
344	Nodes = std::move(O.Nodes);
345	Roots = std::move(O.Roots);
346	ShadowStack = std::move(O.ShadowStack);
347	NodeIdPairAllocator = O.NodeIdPairAllocator;
348	OverflowedFunctions = O.OverflowedFunctions;
349	return *this;
350	}
351
352	~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
353
354	void enterFunction(const int32_t FId, uint64_t TSC,
355	uint16_t CPU) XRAY_NEVER_INSTRUMENT {
356	DCHECK_NE(FId, `0`);
357
358	// If we're already overflowed the function call stack, do not bother
359	// attempting to record any more function entries.
360	if (UNLIKELY(OverflowedFunctions)) {
361	++OverflowedFunctions;
362	return;
363	}
364
365	// If this is the first function we've encountered, we want to set up the
366	// node(s) and treat it as a root.
367	if (UNLIKELY(ShadowStack.empty())) {
368	auto *NewRoot = Nodes.AppendEmplace(
369	args: nullptr, args: NodeIdPairArray (*NodeIdPairAllocator), args: `0u`, args: `0u`, args: FId);
370	if (UNLIKELY(NewRoot == nullptr))
371	return;
372	if (Roots.AppendEmplace(args&: NewRoot) == nullptr) {
373	Nodes.trim(Elements: `1`);
374	return;
375	}
376	if (ShadowStack.AppendEmplace(args&: TSC, args&: NewRoot, args&: CPU) == nullptr) {
377	Nodes.trim(Elements: `1`);
378	Roots.trim(Elements: `1`);
379	++OverflowedFunctions;
380	return;
381	}
382	return;
383	}
384
385	// From this point on, we require that the stack is not empty.
386	DCHECK(!ShadowStack.empty());
387	auto TopNode = ShadowStack.back().NodePtr;
388	DCHECK_NE(TopNode, nullptr);
389
390	// If we've seen this callee before, then we access that node and place that
391	// on the top of the stack.
392	auto* Callee = TopNode->Callees.find_element(
393	P: [FId](const NodeIdPair &NR) { return NR.FId == FId; });
394	if (Callee != nullptr) {
395	CHECK_NE(Callee->NodePtr, nullptr);
396	if (ShadowStack.AppendEmplace(args&: TSC, args&: Callee->NodePtr, args&: CPU) == nullptr)
397	++OverflowedFunctions;
398	return;
399	}
400
401	// This means we've never seen this stack before, create a new node here.
402	auto* NewNode = Nodes.AppendEmplace(
403	args&: TopNode, args: NodeIdPairArray (*NodeIdPairAllocator), args: `0u`, args: `0u`, args: FId);
404	if (UNLIKELY(NewNode == nullptr))
405	return;
406	DCHECK_NE(NewNode, nullptr);
407	TopNode->Callees.AppendEmplace(args&: NewNode, args: FId);
408	if (ShadowStack.AppendEmplace(args&: TSC, args&: NewNode, args&: CPU) == nullptr)
409	++OverflowedFunctions;
410	return;
411	}
412
413	void exitFunction(int32_t FId, uint64_t TSC,
414	uint16_t CPU) XRAY_NEVER_INSTRUMENT {
415	// If we're exiting functions that have "overflowed" or don't fit into the
416	// stack due to allocator constraints, we then decrement that count first.
417	if (OverflowedFunctions) {
418	--OverflowedFunctions;
419	return;
420	}
421
422	// When we exit a function, we look up the ShadowStack to see whether we've
423	// entered this function before. We do as little processing here as we can,
424	// since most of the hard work would have already been done at function
425	// entry.
426	uint64_t CumulativeTreeTime = `0`;
427
428	while (!ShadowStack.empty()) {
429	const auto &Top = ShadowStack.back();
430	auto TopNode = Top.NodePtr;
431	DCHECK_NE(TopNode, nullptr);
432
433	// We may encounter overflow on the TSC we're provided, which may end up
434	// being less than the TSC when we first entered the function.
435	//
436	// To get the accurate measurement of cycles, we need to check whether
437	// we've overflowed (TSC < Top.EntryTSC) and then account the difference
438	// between the entry TSC and the max for the TSC counter (max of uint64_t)
439	// then add the value of TSC. We can prove that the maximum delta we will
440	// get is at most the 64-bit unsigned value, since the difference between
441	// a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
442	// - 1) + 1.
443	//
444	// NOTE: This assumes that TSCs are synchronised across CPUs.
445	// TODO: Count the number of times we've seen CPU migrations.
446	uint64_t LocalTime =
447	Top.EntryTSC > TSC
448	? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
449	: TSC - Top.EntryTSC;
450	TopNode->CallCount++;
451	TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
452	CumulativeTreeTime += LocalTime;
453	ShadowStack.trim(Elements: `1`);
454
455	// TODO: Update the histogram for the node.
456	if (TopNode->FId == FId)
457	break;
458	}
459	}
460
461	const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
462
463	// The deepCopyInto operation will update the provided FunctionCallTrie by
464	// re-creating the contents of this particular FunctionCallTrie in the other
465	// FunctionCallTrie. It will do this using a Depth First Traversal from the
466	// roots, and while doing so recreating the traversal in the provided
467	// FunctionCallTrie.
468	//
469	// This operation will not* destroy the state in `O`, and thus may cause some*
470	// duplicate entries in `O` if it is not empty.
471	//
472	// This function is not* thread-safe, and may require external*
473	// synchronisation of both "this" and \|O\|.
474	//
475	// This function must not* be called with a non-empty FunctionCallTrie \|O\|.*
476	void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
477	DCHECK(O.getRoots().empty());
478
479	// We then push the root into a stack, to use as the parent marker for new
480	// nodes we push in as we're traversing depth-first down the call tree.
481	struct NodeAndParent {
482	FunctionCallTrie::Node *Node;
483	FunctionCallTrie::Node *NewNode;
484	};
485	using Stack = Array<NodeAndParent>;
486
487	typename Stack::AllocatorType StackAllocator(
488	profilingFlags()->stack_allocator_max);
489	Stack DFSStack(StackAllocator);
490
491	for (const auto Root : getRoots()) {
492	// Add a node in O for this root.
493	auto NewRoot = O.Nodes.AppendEmplace(
494	args: nullptr, args: NodeIdPairArray (*O.NodeIdPairAllocator), args&: Root->CallCount,
495	args&: Root->CumulativeLocalTime, args&: Root->FId);
496
497	// Because we cannot allocate more memory we should bail out right away.
498	if (UNLIKELY(NewRoot == nullptr))
499	return;
500
501	if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
502	return;
503
504	// TODO: Figure out what to do if we fail to allocate any more stack
505	// space. Maybe warn or report once?
506	if (DFSStack.AppendEmplace(args: Root, args&: NewRoot) == nullptr)
507	return;
508	while (!DFSStack.empty()) {
509	NodeAndParent NP = DFSStack.back();
510	DCHECK_NE(NP.Node, nullptr);
511	DCHECK_NE(NP.NewNode, nullptr);
512	DFSStack.trim(Elements: `1`);
513	for (const auto Callee : NP.Node->Callees) {
514	auto NewNode = O.Nodes.AppendEmplace(
515	args&: NP.NewNode, args: NodeIdPairArray (*O.NodeIdPairAllocator),
516	args&: Callee.NodePtr->CallCount, args&: Callee.NodePtr->CumulativeLocalTime,
517	args: Callee.FId);
518	if (UNLIKELY(NewNode == nullptr))
519	return;
520	if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
521	nullptr))
522	return;
523	if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
524	nullptr))
525	return;
526	}
527	}
528	}
529	}
530
531	// The mergeInto operation will update the provided FunctionCallTrie by
532	// traversing the current trie's roots and updating (i.e. merging) the data in
533	// the nodes with the data in the target's nodes. If the node doesn't exist in
534	// the provided trie, we add a new one in the right position, and inherit the
535	// data from the original (current) trie, along with all its callees.
536	//
537	// This function is not* thread-safe, and may require external*
538	// synchronisation of both "this" and \|O\|.
539	void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
540	struct NodeAndTarget {
541	FunctionCallTrie::Node *OrigNode;
542	FunctionCallTrie::Node *TargetNode;
543	};
544	using Stack = Array<NodeAndTarget>;
545	typename Stack::AllocatorType StackAllocator(
546	profilingFlags()->stack_allocator_max);
547	Stack DFSStack(StackAllocator);
548
549	for (const auto Root : getRoots()) {
550	Node TargetRoot = nullptr*;
551	auto R = O.Roots.find_element(
552	P: [&](const Node Node) { return* Node->FId == Root->FId; });
553	if (R == nullptr) {
554	TargetRoot = O.Nodes.AppendEmplace(
555	args: nullptr, args: NodeIdPairArray (*O.NodeIdPairAllocator), args: `0u`, args: `0u`,
556	args&: Root->FId);
557	if (UNLIKELY(TargetRoot == nullptr))
558	return;
559
560	O.Roots.Append(E: TargetRoot);
561	} else {
562	TargetRoot = *R;
563	}
564
565	DFSStack.AppendEmplace(args: Root, args&: TargetRoot);
566	while (!DFSStack.empty()) {
567	NodeAndTarget NT = DFSStack.back();
568	DCHECK_NE(NT.OrigNode, nullptr);
569	DCHECK_NE(NT.TargetNode, nullptr);
570	DFSStack.trim(Elements: `1`);
571	// TODO: Update the histogram as well when we have it ready.
572	NT.TargetNode->CallCount += NT.OrigNode->CallCount;
573	NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
574	for (const auto Callee : NT.OrigNode->Callees) {
575	auto TargetCallee = NT.TargetNode->Callees.find_element(
576	P: [&](const FunctionCallTrie::NodeIdPair &C) {
577	return C.FId == Callee.FId;
578	});
579	if (TargetCallee == nullptr) {
580	auto NewTargetNode = O.Nodes.AppendEmplace(
581	args&: NT.TargetNode, args: NodeIdPairArray (*O.NodeIdPairAllocator), args: `0u`, args: `0u`,
582	args: Callee.FId);
583
584	if (UNLIKELY(NewTargetNode == nullptr))
585	return;
586
587	TargetCallee =
588	NT.TargetNode->Callees.AppendEmplace(args&: NewTargetNode, args: Callee.FId);
589	}
590	DFSStack.AppendEmplace(args: Callee.NodePtr, args&: TargetCallee->NodePtr);
591	}
592	}
593	}
594	}
595	};
596
597	} // namespace __xray
598
599	#endif // XRAY_FUNCTION_CALL_TRIE_H
600

Browse the source code of llvm_runtimes/compiler-rt/lib/xray/xray_function_call_trie.h