CtxInstrProfiling.cpp source code [llvm_runtimes/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp]

1	//===- CtxInstrProfiling.cpp - contextual instrumented PGO ----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "CtxInstrProfiling.h"
10	#include "RootAutoDetector.h"
11	#include "sanitizer_common/sanitizer_allocator_internal.h"
12	#include "sanitizer_common/sanitizer_atomic.h"
13	#include "sanitizer_common/sanitizer_atomic_clang.h"
14	#include "sanitizer_common/sanitizer_common.h"
15	#include "sanitizer_common/sanitizer_dense_map.h"
16	#include "sanitizer_common/sanitizer_libc.h"
17	#include "sanitizer_common/sanitizer_mutex.h"
18	#include "sanitizer_common/sanitizer_placement_new.h"
19	#include "sanitizer_common/sanitizer_thread_safety.h"
20	#include "sanitizer_common/sanitizer_vector.h"
21
22	#include <assert.h>
23
24	using namespace __ctx_profile;
25
26	namespace {
27	// Keep track of all the context roots we actually saw, so we can then traverse
28	// them when the user asks for the profile in __llvm_ctx_profile_fetch
29	__sanitizer::SpinMutex AllContextsMutex;
30	SANITIZER_GUARDED_BY(AllContextsMutex)
31	__sanitizer::Vector<ContextRoot *> AllContextRoots;
32
33	__sanitizer::atomic_uintptr_t AllFunctionsData = {};
34
35	// Keep all the functions for which we collect a flat profile in a linked list.
36	__sanitizer::SpinMutex FlatCtxArenaMutex;
37	SANITIZER_GUARDED_BY(FlatCtxArenaMutex)
38	Arena FlatCtxArenaHead = nullptr*;
39	SANITIZER_GUARDED_BY(FlatCtxArenaMutex)
40	Arena FlatCtxArena = nullptr*;
41
42	// Set to true when we enter a root, and false when we exit - regardless if this
43	// thread collects a contextual profile for that root.
44	__thread bool IsUnderContext = false;
45	__sanitizer::atomic_uint8_t ProfilingStarted = {};
46
47	__sanitizer::atomic_uintptr_t RootDetector = {};
48	RootAutoDetector *getRootDetector() {
49	return reinterpret_cast<RootAutoDetector *>(
50	__sanitizer::atomic_load_relaxed(a: &RootDetector));
51	}
52
53	// utility to taint a pointer by setting the LSB. There is an assumption
54	// throughout that the addresses of contexts are even (really, they should be
55	// align(8), but "even"-ness is the minimum assumption)
56	// "scratch contexts" are buffers that we return in certain cases - they are
57	// large enough to allow for memory safe counter access, but they don't link
58	// subcontexts below them (the runtime recognizes them and enforces that)
59	ContextNode markAsScratch(const* ContextNode *Ctx) {
60	return reinterpret_cast<ContextNode >(reinterpret_cast*<uint64_t>(Ctx) \| `1`);
61	}
62
63	// Used when getting the data from TLS. We don't really* need to reset, but*
64	// it's a simpler system if we do.
65	template <typename T> inline T consume(T &V) {
66	auto R = V;
67	V = {`0`};
68	return R;
69	}
70
71	// We allocate at least kBuffSize Arena pages. The scratch buffer is also that
72	// large.
73	constexpr size_t kPower = `20`;
74	constexpr size_t kBuffSize = `1` << kPower;
75
76	// Highly unlikely we need more than kBuffSize for a context.
77	size_t getArenaAllocSize(size_t Needed) {
78	if (Needed >= kBuffSize)
79	return `2` * Needed;
80	return kBuffSize;
81	}
82
83	// verify the structural integrity of the context
84	bool validate(const ContextRoot *Root) {
85	// all contexts should be laid out in some arena page. Go over each arena
86	// allocated for this Root, and jump over contained contexts based on
87	// self-reported sizes.
88	__sanitizer::DenseMap<uint64_t, bool> ContextStartAddrs;
89	for (const auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next()) {
90	const auto *Pos = Mem->start();
91	while (Pos < Mem->pos()) {
92	const auto Ctx = reinterpret_cast<const* ContextNode *>(Pos);
93	if (!ContextStartAddrs.insert(KV: {reinterpret_cast<uint64_t>(Ctx), true})
94	.second)
95	return false;
96	Pos += Ctx->size();
97	}
98	}
99
100	// Now traverse the contexts again the same way, but validate all nonull
101	// subcontext addresses appear in the set computed above.
102	for (const auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next()) {
103	const auto *Pos = Mem->start();
104	while (Pos < Mem->pos()) {
105	const auto Ctx = reinterpret_cast<const* ContextNode *>(Pos);
106	for (uint32_t I = `0`; I < Ctx->callsites_size(); ++I)
107	for (auto *Sub = Ctx->subContexts()[I]; Sub; Sub = Sub->next())
108	if (!ContextStartAddrs.find(Key: reinterpret_cast<uint64_t>(Sub)))
109	return false;
110
111	Pos += Ctx->size();
112	}
113	}
114	return true;
115	}
116
117	inline ContextNode allocContextNode(char* *Place, GUID Guid,
118	uint32_t NumCounters,
119	uint32_t NumCallsites,
120	ContextNode Next = nullptr*) {
121	assert(reinterpret_cast<uint64_t>(Place) % ExpectedAlignment == `0`);
122	return new (Place) ContextNode (Guid, NumCounters, NumCallsites, Next);
123	}
124
125	void resetContextNode(ContextNode &Node) {
126	// FIXME(mtrofin): this is std::memset, which we can probably use if we
127	// drop/reduce the dependency on sanitizer_common.
128	for (uint32_t I = `0`; I < Node.counters_size(); ++I)
129	Node.counters()[I] = `0`;
130	for (uint32_t I = `0`; I < Node.callsites_size(); ++I)
131	for (auto *Next = Node.subContexts()[I]; Next; Next = Next->next())
132	resetContextNode(Node&: *Next);
133	}
134
135	ContextNode *onContextEnter(ContextNode &Node) {
136	++Node.counters()[`0`];
137	return &Node;
138	}
139
140	} // namespace
141
142	// the scratch buffer - what we give when we can't produce a real context (the
143	// scratch isn't "real" in that it's expected to be clobbered carelessly - we
144	// don't read it). The other important thing is that the callees from a scratch
145	// context also get a scratch context.
146	// Eventually this can be replaced with per-function buffers, a'la the typical
147	// (flat) instrumented FDO buffers. The clobbering aspect won't apply there, but
148	// the part about determining the nature of the subcontexts does.
149	__thread char __Buffer[kBuffSize] = {`0`};
150
151	#define TheScratchContext \
152	markAsScratch(reinterpret_cast<ContextNode *>(__Buffer))
153
154	// init the TLSes
155	__thread void *volatile __llvm_ctx_profile_expected_callee[`2`] = {nullptr,
156	nullptr};
157	__thread ContextNode **volatile __llvm_ctx_profile_callsite[`2`] = {`0`, `0`};
158
159	__thread ContextRoot *volatile __llvm_ctx_profile_current_context_root =
160	nullptr;
161
162	Arena::Arena(uint32_t Size) : Size(Size) {
163	__sanitizer::internal_memset(s: start(), c: `0`, n: Size);
164	}
165
166	// FIXME(mtrofin): use malloc / mmap instead of sanitizer common APIs to reduce
167	// the dependency on the latter.
168	Arena Arena::allocateNewArena(size_t Size, Arena Prev) {
169	assert(!Prev \|\| Prev->Next == nullptr);
170	Arena NewArena = new* (__sanitizer::InternalAlloc(
171	size: Size + sizeof(Arena), /cache=/nullptr, /alignment=/ExpectedAlignment))
172	Arena (Size);
173	if (Prev)
174	Prev->Next = NewArena;
175	return NewArena;
176	}
177
178	void Arena::freeArenaList(Arena *&A) {
179	assert(A);
180	for (auto I = A; I != nullptr*;) {
181	auto *Current = I;
182	I = I->Next;
183	__sanitizer::InternalFree(p: Current);
184	}
185	A = nullptr;
186	}
187
188	// If this is the first time we hit a callsite with this (Guid) particular
189	// callee, we need to allocate.
190	ContextNode getCallsiteSlow(GUID Guid, ContextNode *InsertionPoint,
191	uint32_t NumCounters, uint32_t NumCallsites) {
192	auto AllocSize = ContextNode::getAllocSize(NumCounters, NumCallsites);
193	auto *Mem = __llvm_ctx_profile_current_context_root->CurrentMem;
194	char *AllocPlace = Mem->tryBumpAllocate(S: AllocSize);
195	if (!AllocPlace) {
196	// if we failed to allocate on the current arena, allocate a new arena,
197	// and place it on __llvm_ctx_profile_current_context_root->CurrentMem so we
198	// find it from now on for other cases when we need to getCallsiteSlow.
199	// Note that allocateNewArena will link the allocated memory in the list of
200	// Arenas.
201	__llvm_ctx_profile_current_context_root->CurrentMem = Mem =
202	Mem->allocateNewArena(Size: getArenaAllocSize(Needed: AllocSize), Prev: Mem);
203	AllocPlace = Mem->tryBumpAllocate(S: AllocSize);
204	}
205	auto *Ret = allocContextNode(Place: AllocPlace, Guid, NumCounters, NumCallsites,
206	Next: *InsertionPoint);
207	*InsertionPoint = Ret;
208	return Ret;
209	}
210
211	ContextNode getFlatProfile(FunctionData &Data, void* *Callee, GUID Guid,
212	uint32_t NumCounters) {
213	if (ContextNode *Existing = Data.FlatCtx)
214	return Existing;
215	{
216	// We could instead try to take the lock and, if that fails, return
217	// TheScratchContext. But that could leave message pump loops more sparsely
218	// profiled than everything else. Maybe that doesn't matter, and we can
219	// optimize this later.
220	__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Data.Mutex);
221	if (ContextNode *Existing = Data.FlatCtx)
222	return Existing;
223
224	auto NeededSize = ContextNode::getAllocSize(NumCounters, NumCallsites: `0`);
225	char AllocBuff = nullptr*;
226	{
227	__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL(
228	&FlatCtxArenaMutex);
229	if (FlatCtxArena)
230	AllocBuff = FlatCtxArena->tryBumpAllocate(S: NeededSize);
231	if (!AllocBuff) {
232	FlatCtxArena = Arena::allocateNewArena(Size: getArenaAllocSize(Needed: NeededSize),
233	Prev: FlatCtxArena);
234	AllocBuff = FlatCtxArena->tryBumpAllocate(S: NeededSize);
235	}
236	if (!FlatCtxArenaHead)
237	FlatCtxArenaHead = FlatCtxArena;
238	}
239	auto *Ret = allocContextNode(Place: AllocBuff, Guid, NumCounters, NumCallsites: `0`);
240	Data.FlatCtx = Ret;
241
242	Data.EntryAddress = Callee;
243	Data.Next = reinterpret_cast<FunctionData *>(
244	__sanitizer::atomic_load_relaxed(a: &AllFunctionsData));
245	while (!__sanitizer::atomic_compare_exchange_strong(
246	a: &AllFunctionsData, cmp: reinterpret_cast<uintptr_t *>(&Data.Next),
247	xchg: reinterpret_cast<uintptr_t>(&Data),
248	mo: __sanitizer::memory_order_release)) {
249	}
250	}
251
252	return Data.FlatCtx;
253	}
254
255	// This should be called once for a Root. Allocate the first arena, set up the
256	// first context.
257	void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
258	uint32_t NumCallsites) {
259	__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
260	&AllContextsMutex);
261	// Re-check - we got here without having had taken a lock.
262	if (Root->FirstMemBlock)
263	return;
264	const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites);
265	auto *M = Arena::allocateNewArena(Size: getArenaAllocSize(Needed));
266	Root->FirstMemBlock = M;
267	Root->CurrentMem = M;
268	Root->FirstNode = allocContextNode(Place: M->tryBumpAllocate(S: Needed), Guid,
269	NumCounters, NumCallsites);
270	AllContextRoots.PushBack(v: Root);
271	}
272
273	ContextRoot *FunctionData::getOrAllocateContextRoot() {
274	auto *Root = CtxRoot;
275	if (!canBeRoot(Ctx: Root))
276	return Root;
277	if (Root)
278	return Root;
279	__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex);
280	Root = CtxRoot;
281	if (!Root) {
282	Root = new (__sanitizer::InternalAlloc(size: sizeof(ContextRoot))) ContextRoot ();
283	CtxRoot = Root;
284	}
285
286	assert(Root);
287	return Root;
288	}
289
290	ContextNode tryStartContextGivenRoot(ContextRoot Root, GUID Guid,
291	uint32_t Counters, uint32_t Callsites)
292	SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
293	IsUnderContext = true;
294	__sanitizer::atomic_fetch_add(a: &Root->TotalEntries, v: `1`,
295	mo: __sanitizer::memory_order_relaxed);
296	if (!Root->FirstMemBlock) {
297	setupContext(Root, Guid, NumCounters: Counters, NumCallsites: Callsites);
298	}
299	if (Root->Taken.TryLock()) {
300	__llvm_ctx_profile_current_context_root = Root;
301	onContextEnter(Node&: *Root->FirstNode);
302	return Root->FirstNode;
303	}
304	// If this thread couldn't take the lock, return scratch context.
305	__llvm_ctx_profile_current_context_root = nullptr;
306	return TheScratchContext;
307	}
308
309	ContextNode getUnhandledContext(FunctionData &Data, void* *Callee, GUID Guid,
310	uint32_t NumCounters, uint32_t NumCallsites,
311	ContextRoot *CtxRoot) {
312
313	// 1) if we are currently collecting a contextual profile, fetch a ContextNode
314	// in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
315	// to (hopefully) offset the penalty of creating these contexts to before
316	// profiling.
317	//
318	// 2) if we are under a root (regardless if this thread is collecting or not a
319	// contextual profile for that root), do not collect a flat profile. We want
320	// to keep flat profiles only for activations that can't happen under a root,
321	// to avoid confusing profiles. We can, for example, combine flattened and
322	// flat profiles meaningfully, as we wouldn't double-count anything.
323	//
324	// 3) to avoid lengthy startup, don't bother with flat profiles until the
325	// profiling has started. We would reset them anyway when profiling starts.
326	// HOWEVER. This does lose profiling for message pumps: those functions are
327	// entered once and never exit. They should be assumed to be entered before
328	// profiling starts - because profiling should start after the server is up
329	// and running (which is equivalent to "message pumps are set up").
330	if (!CtxRoot) {
331	if (auto *RAD = getRootDetector())
332	RAD->sample();
333	else if (auto *CR = Data.CtxRoot) {
334	if (canBeRoot(Ctx: CR))
335	return tryStartContextGivenRoot(Root: CR, Guid, Counters: NumCounters, Callsites: NumCallsites);
336	}
337	if (IsUnderContext \|\| !__sanitizer::atomic_load_relaxed(a: &ProfilingStarted))
338	return TheScratchContext;
339	else
340	return markAsScratch(
341	Ctx: onContextEnter(Node&: *getFlatProfile(Data, Callee, Guid, NumCounters)));
342	}
343	auto [Iter, Ins] = CtxRoot->Unhandled.insert(KV: {Guid, nullptr});
344	if (Ins)
345	Iter->second = getCallsiteSlow(Guid, InsertionPoint: &CtxRoot->FirstUnhandledCalleeNode,
346	NumCounters, NumCallsites: `0`);
347	return markAsScratch(Ctx: onContextEnter(Node&: *Iter->second));
348	}
349
350	ContextNode __llvm_ctx_profile_get_context(FunctionData Data, void *Callee,
351	GUID Guid, uint32_t NumCounters,
352	uint32_t NumCallsites) {
353	auto *CtxRoot = __llvm_ctx_profile_current_context_root;
354	// fast "out" if we're not even doing contextual collection.
355	if (!CtxRoot)
356	return getUnhandledContext(Data&: *Data, Callee, Guid, NumCounters, NumCallsites,
357	CtxRoot: nullptr);
358
359	// also fast "out" if the caller is scratch. We can see if it's scratch by
360	// looking at the interior pointer into the subcontexts vector that the caller
361	// provided, which, if the context is scratch, so is that interior pointer
362	// (because all the address calculations are using even values. Or more
363	// precisely, aligned - 8 values)
364	auto **CallsiteContext = consume(V&: __llvm_ctx_profile_callsite[`0`]);
365	if (!CallsiteContext \|\| isScratch(Ctx: CallsiteContext))
366	return getUnhandledContext(Data&: *Data, Callee, Guid, NumCounters, NumCallsites,
367	CtxRoot);
368
369	// if the callee isn't the expected one, return scratch.
370	// Signal handler(s) could have been invoked at any point in the execution.
371	// Should that have happened, and had it (the handler) be built with
372	// instrumentation, its __llvm_ctx_profile_get_context would have failed here.
373	// Its sub call graph would have then populated
374	// __llvm_ctx_profile_{expected_callee \| callsite} at index 1.
375	// The normal call graph may be impacted in that, if the signal handler
376	// happened somewhere before we read the TLS here, we'd see the TLS reset and
377	// we'd also fail here. That would just mean we would loose counter values for
378	// the normal subgraph, this time around. That should be very unlikely, but if
379	// it happens too frequently, we should be able to detect discrepancies in
380	// entry counts (caller-callee). At the moment, the design goes on the
381	// assumption that is so unfrequent, though, that it's not worth doing more
382	// for that case.
383	auto *ExpectedCallee = consume(V&: __llvm_ctx_profile_expected_callee[`0`]);
384	if (ExpectedCallee != Callee)
385	return getUnhandledContext(Data&: *Data, Callee, Guid, NumCounters, NumCallsites,
386	CtxRoot);
387
388	auto Callsite = CallsiteContext;
389	// in the case of indirect calls, we will have all seen targets forming a
390	// linked list here. Find the one corresponding to this callee.
391	while (Callsite && Callsite->guid() != Guid) {
392	Callsite = Callsite->next();
393	}
394	auto *Ret = Callsite ? Callsite
395	: getCallsiteSlow(Guid, InsertionPoint: CallsiteContext, NumCounters,
396	NumCallsites);
397	if (Ret->callsites_size() != NumCallsites \|\|
398	Ret->counters_size() != NumCounters)
399	__sanitizer::Printf(format: "[ctxprof] Returned ctx differs from what's asked: "
400	"Context: %p, Asked: %lu %u %u, Got: %lu %u %u \n",
401	reinterpret_cast<void *>(Ret), Guid, NumCallsites,
402	NumCounters, Ret->guid(), Ret->callsites_size(),
403	Ret->counters_size());
404	onContextEnter(Node&: *Ret);
405	return Ret;
406	}
407
408	ContextNode __llvm_ctx_profile_start_context(FunctionData FData, GUID Guid,
409	uint32_t Counters,
410	uint32_t Callsites) {
411	auto *Root = FData->getOrAllocateContextRoot();
412	assert(canBeRoot(Root));
413	return tryStartContextGivenRoot(Root, Guid, Counters, Callsites);
414	}
415
416	void __llvm_ctx_profile_release_context(FunctionData *FData)
417	SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
418	const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
419	auto *CR = FData->CtxRoot;
420	if (!CurrentRoot \|\| CR != CurrentRoot)
421	return;
422	IsUnderContext = false;
423	assert(CR && canBeRoot(CR));
424	__llvm_ctx_profile_current_context_root = nullptr;
425	CR->Taken.Unlock();
426	}
427
428	void __llvm_ctx_profile_start_collection(unsigned AutodetectDuration) {
429	size_t NumMemUnits = `0`;
430	__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
431	&AllContextsMutex);
432	for (uint32_t I = `0`; I < AllContextRoots.Size(); ++I) {
433	auto *Root = AllContextRoots [I];
434	__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> Lock(
435	&Root->Taken);
436	for (auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next())
437	++NumMemUnits;
438
439	resetContextNode(Node&: *Root->FirstNode);
440	if (Root->FirstUnhandledCalleeNode)
441	resetContextNode(Node&: *Root->FirstUnhandledCalleeNode);
442	__sanitizer::atomic_store_relaxed(a: &Root->TotalEntries, v: `0`);
443	}
444	if (AutodetectDuration) {
445	// we leak RD intentionally. Knowing when to free it is tricky, there's a
446	// race condition with functions observing the `RootDectector` as non-null.
447	// This can be addressed but the alternatives have some added complexity and
448	// it's not (yet) worth it.
449	auto RD = new* (__sanitizer::InternalAlloc(size: sizeof(RootAutoDetector)))
450	RootAutoDetector (AllFunctionsData, RootDetector, AutodetectDuration);
451	RD->start();
452	} else {
453	__sanitizer::Printf(format: "[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits);
454	}
455	__sanitizer::atomic_store_relaxed(a: &ProfilingStarted, v: true);
456	}
457
458	bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
459	__sanitizer::atomic_store_relaxed(a: &ProfilingStarted, v: false);
460	if (auto *RD = getRootDetector()) {
461	__sanitizer::Printf(format: "[ctxprof] Expected the root autodetector to have "
462	"finished well before attempting to fetch a context");
463	RD->join();
464	}
465
466	__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
467	&AllContextsMutex);
468
469	Writer.startContextSection();
470	for (int I = `0`, E = AllContextRoots.Size(); I < E; ++I) {
471	auto *Root = AllContextRoots [I];
472	__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> TakenLock(
473	&Root->Taken);
474	if (!validate(Root)) {
475	__sanitizer::Printf(format: "[ctxprof] Contextual Profile is %s\n", "invalid");
476	return false;
477	}
478	Writer.writeContextual(
479	RootNode: *Root->FirstNode, Unhandled: Root->FirstUnhandledCalleeNode,
480	TotalRootEntryCount: __sanitizer::atomic_load_relaxed(a: &Root->TotalEntries));
481	}
482	Writer.endContextSection();
483	Writer.startFlatSection();
484	// The list progresses behind the head, so taking this snapshot allows the
485	// list to grow concurrently without causing a race condition with our
486	// traversing it.
487	const auto Pos = reinterpret_cast<const* FunctionData *>(
488	__sanitizer::atomic_load_relaxed(a: &AllFunctionsData));
489	for (; Pos; Pos = Pos->Next) {
490	const auto *CR = Pos->CtxRoot;
491	if (!CR && canBeRoot(Ctx: CR)) {
492	const auto *FP = Pos->FlatCtx;
493	Writer.writeFlat(Guid: FP->guid(), Buffer: FP->counters(), BufferSize: FP->counters_size());
494	}
495	}
496	Writer.endFlatSection();
497	return true;
498	}
499
500	void __llvm_ctx_profile_free() {
501	__sanitizer::atomic_store_relaxed(a: &ProfilingStarted, v: false);
502	{
503	__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
504	&AllContextsMutex);
505	for (int I = `0`, E = AllContextRoots.Size(); I < E; ++I)
506	for (auto *A = AllContextRoots [I]->FirstMemBlock; A;) {
507	auto *C = A;
508	A = A->next();
509	__sanitizer::InternalFree(p: C);
510	}
511	AllContextRoots.Reset();
512	}
513	__sanitizer::atomic_store_relaxed(a: &AllFunctionsData, v: `0U`);
514	{
515	__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
516	&FlatCtxArenaMutex);
517	FlatCtxArena = nullptr;
518	for (auto *A = FlatCtxArenaHead; A;) {
519	auto *C = A;
520	A = C->next();
521	__sanitizer::InternalFree(p: C);
522	}
523
524	FlatCtxArenaHead = nullptr;
525	}
526	}
527

Browse the source code of llvm_runtimes/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp