CtxInstrProfiling.h source code [llvm_runtimes/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h]

1	/===- CtxInstrProfiling.h- Contextual instrumentation-based PGO ---------===\
2	\|*
3	\| Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.*
4	\| See https://llvm.org/LICENSE.txt for license information.*
5	\| SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception*
6	\|*
7	\===----------------------------------------------------------------------===/
8
9	#ifndef CTX_PROFILE_CTXINSTRPROFILING_H_
10	#define CTX_PROFILE_CTXINSTRPROFILING_H_
11
12	#include "CtxInstrContextNode.h"
13	#include "sanitizer_common/sanitizer_mutex.h"
14	#include <sanitizer/common_interface_defs.h>
15
16	using namespace llvm::ctx_profile;
17
18	// Forward-declare for the one unittest checking Arena construction zeroes out
19	// its allocatable space.
20	class ArenaTest_ZeroInit_Test;
21	namespace __ctx_profile {
22
23	static constexpr size_t ExpectedAlignment = `8`;
24	// We really depend on this, see further below. We currently support x86_64.
25	// When we want to support other archs, we need to trace the places Alignment is
26	// used and adjust accordingly.
27	static_assert(sizeof(void *) == ExpectedAlignment);
28
29	/// Arena (bump allocator) forming a linked list. Intentionally not thread safe.
30	/// Allocation and de-allocation happen using sanitizer APIs. We make that
31	/// explicit.
32	class Arena final {
33	public:
34	// When allocating a new Arena, optionally specify an existing one to append
35	// to, assumed to be the last in the Arena list. We only need to support
36	// appending to the arena list.
37	static Arena allocateNewArena(size_t Size, Arena Prev = nullptr);
38	static void freeArenaList(Arena *&A);
39
40	uint64_t size() const { return Size; }
41
42	// Allocate S bytes or return nullptr if we don't have that many available.
43	char *tryBumpAllocate(size_t S) {
44	if (Pos + S > Size)
45	return nullptr;
46	Pos += S;
47	return start() + (Pos - S);
48	}
49
50	Arena next() const* { return Next; }
51
52	// the beginning of allocatable memory.
53	const char start() const* { return const_cast<Arena >(this*)->start(); }
54	const char pos() const* { return start() + Pos; }
55
56	private:
57	friend class ::ArenaTest_ZeroInit_Test;
58	explicit Arena(uint32_t Size);
59	~Arena() = delete;
60
61	char start() { return* reinterpret_cast<char >(&this*[`1`]); }
62
63	Arena Next = nullptr*;
64	uint64_t Pos = `0`;
65	const uint64_t Size;
66	};
67
68	// The memory available for allocation follows the Arena header, and we expect
69	// it to be thus aligned.
70	static_assert(alignof(Arena) == ExpectedAlignment);
71
72	// Verify maintenance to ContextNode doesn't change this invariant, which makes
73	// sure the inlined vectors are appropriately aligned.
74	static_assert(alignof(ContextNode) == ExpectedAlignment);
75
76	/// ContextRoots are allocated by LLVM for entrypoints. LLVM is only concerned
77	/// with allocating and zero-initializing the global value (as in, GlobalValue)
78	/// for it.
79	struct ContextRoot {
80	ContextNode FirstNode = nullptr*;
81	Arena FirstMemBlock = nullptr*;
82	Arena CurrentMem = nullptr*;
83	// This is init-ed by the static zero initializer in LLVM.
84	// Taken is used to ensure only one thread traverses the contextual graph -
85	// either to read it or to write it. On server side, the same entrypoint will
86	// be entered by numerous threads, but over time, the profile aggregated by
87	// collecting sequentially on one thread at a time is expected to converge to
88	// the aggregate profile that may have been observable on all the threads.
89	// Note that this is node-by-node aggregation, i.e. summing counters of nodes
90	// at the same position in the graph, not flattening.
91	// Threads that cannot lock Taken (fail TryLock) are given a "scratch context"
92	// - a buffer they can clobber, safely from a memory access perspective.
93	//
94	// Note about "scratch"-ness: we currently ignore the data written in them
95	// (which is anyway clobbered). The design allows for that not be the case -
96	// because "scratch"-ness is first and foremost about not trying to build
97	// subcontexts, and is captured by tainting the pointer value (pointer to the
98	// memory treated as context), but right now, we drop that info.
99	//
100	// We could consider relaxing the requirement of more than one thread
101	// entering by holding a few context trees per entrypoint and then aggregating
102	// them (as explained above) at the end of the profile collection - it's a
103	// tradeoff between collection time and memory use: higher precision can be
104	// obtained with either less concurrent collections but more collection time,
105	// or with more concurrent collections (==more memory) and less collection
106	// time. Note that concurrent collection does happen for different
107	// entrypoints, regardless.
108	::__sanitizer::StaticSpinMutex Taken;
109
110	// If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
111	// instrumentation lowering side because it is responsible for allocating and
112	// zero-initializing ContextRoots.
113	static_assert(sizeof(Taken) == `1`);
114	};
115
116	/// This API is exposed for testing. See the APIs below about the contract with
117	/// LLVM.
118	inline bool isScratch(const void *Ctx) {
119	return (reinterpret_cast<uint64_t>(Ctx) & `1`);
120	}
121
122	} // namespace __ctx_profile
123
124	extern "C" {
125
126	// LLVM fills these in when lowering a llvm.instrprof.callsite intrinsic.
127	// position 0 is used when the current context isn't scratch, 1 when it is. They
128	// are volatile because of signal handlers - we mean to specifically control
129	// when the data is loaded.
130	//
131	/// TLS where LLVM stores the pointer of the called value, as part of lowering a
132	/// llvm.instrprof.callsite
133	extern __thread void *volatile __llvm_ctx_profile_expected_callee[`2`];
134	/// TLS where LLVM stores the pointer inside a caller's subcontexts vector that
135	/// corresponds to the callsite being lowered.
136	extern __thread ContextNode **volatile __llvm_ctx_profile_callsite[`2`];
137
138	// __llvm_ctx_profile_current_context_root is exposed for unit testing,
139	// othwerise it's only used internally by compiler-rt/ctx_profile.
140	extern __thread __ctx_profile::ContextRoot
141	*volatile __llvm_ctx_profile_current_context_root;
142
143	/// called by LLVM in the entry BB of a "entry point" function. The returned
144	/// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
145	ContextNode __llvm_ctx_profile_start_context(__ctx_profile::ContextRoot Root,
146	GUID Guid, uint32_t Counters,
147	uint32_t Callsites);
148
149	/// paired with __llvm_ctx_profile_start_context, and called at the exit of the
150	/// entry point function.
151	void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
152
153	/// called for any other function than entry points, in the entry BB of such
154	/// function. Same consideration about LSB of returned value as .._start_context
155	ContextNode __llvm_ctx_profile_get_context(void* *Callee, GUID Guid,
156	uint32_t NrCounters,
157	uint32_t NrCallsites);
158
159	/// Prepares for collection. Currently this resets counter values but preserves
160	/// internal context tree structure.
161	void __llvm_ctx_profile_start_collection();
162
163	/// Completely free allocated memory.
164	void __llvm_ctx_profile_free();
165
166	/// Used to obtain the profile. The Writer is called for each root ContextNode,
167	/// with the ContextRoot::Taken taken. The Writer is responsible for traversing
168	/// the structure underneath.
169	/// The Writer's first parameter plays the role of closure for Writer, and is
170	/// what the caller of __llvm_ctx_profile_fetch passes as the Data parameter.
171	/// The second parameter is the root of a context tree.
172	bool __llvm_ctx_profile_fetch(void *Data,
173	bool (Writer)(void* , const* ContextNode &));
174	}
175	#endif // CTX_PROFILE_CTXINSTRPROFILING_H_
176

Browse the source code of llvm_runtimes/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h