1/*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2|*
3|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4|* See https://llvm.org/LICENSE.txt for license information.
5|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6|*
7\*===----------------------------------------------------------------------===*/
8
9#include <assert.h>
10#include <limits.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "InstrProfiling.h"
16#include "InstrProfilingInternal.h"
17#include "InstrProfilingUtil.h"
18
19#define INSTR_PROF_VALUE_PROF_DATA
20#define INSTR_PROF_COMMON_API_IMPL
21#define INSTR_PROF_VALUE_PROF_MEMOP_API
22#include "profile/InstrProfData.inc"
23
24static int hasStaticCounters = 1;
25static int OutOfNodesWarnings = 0;
26static int hasNonDefaultValsPerSite = 0;
27#define INSTR_PROF_MAX_VP_WARNS 10
28#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
29#define INSTR_PROF_VNODE_POOL_SIZE 1024
30
31#ifndef _MSC_VER
32/* A shared static pool in addition to the vnodes statically
33 * allocated by the compiler. */
34COMPILER_RT_VISIBILITY ValueProfNode
35 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
36 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
37#endif
38
39COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
40 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
41
42COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) {
43 const char *Str = 0;
44 Str = getenv(name: "LLVM_VP_MAX_NUM_VALS_PER_SITE");
45 if (Str && Str[0]) {
46 VPMaxNumValsPerSite = atoi(nptr: Str);
47 hasNonDefaultValsPerSite = 1;
48 }
49 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
50 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
51}
52
53COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
54 VPMaxNumValsPerSite = MaxVals;
55 hasNonDefaultValsPerSite = 1;
56}
57
58/* This method is only used in value profiler mock testing. */
59COMPILER_RT_VISIBILITY void
60__llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
61 uint32_t ValueKind, uint16_t NumValueSites) {
62#ifdef __GNUC__
63#pragma GCC diagnostic push
64#pragma GCC diagnostic ignored "-Wcast-qual"
65#elif defined(__clang__)
66#pragma clang diagnostic push
67#pragma clang diagnostic ignored "-Wcast-qual"
68#endif
69 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
70#ifdef __GNUC__
71#pragma GCC diagnostic pop
72#elif defined(__clang__)
73#pragma clang diagnostic pop
74#endif
75}
76
77/* This method is only used in value profiler mock testing. */
78COMPILER_RT_VISIBILITY const __llvm_profile_data *
79__llvm_profile_iterate_data(const __llvm_profile_data *Data) {
80 return Data + 1;
81}
82
83/* This method is only used in value profiler mock testing. */
84COMPILER_RT_VISIBILITY void *
85__llvm_get_function_addr(const __llvm_profile_data *Data) {
86 return Data->FunctionPointer;
87}
88
89/* Allocate an array that holds the pointers to the linked lists of
90 * value profile counter nodes. The number of element of the array
91 * is the total number of value profile sites instrumented. Returns
92 * 0 if allocation fails.
93 */
94
95static int allocateValueProfileCounters(__llvm_profile_data *Data) {
96 uint64_t NumVSites = 0;
97 uint32_t VKI;
98
99 /* This function will never be called when value site array is allocated
100 statically at compile time. */
101 hasStaticCounters = 0;
102 /* When dynamic allocation is enabled, allow tracking the max number of
103 * values allowd. */
104 if (!hasNonDefaultValsPerSite)
105 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
106
107 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
108 NumVSites += Data->NumValueSites[VKI];
109
110 // If NumVSites = 0, calloc is allowed to return a non-null pointer.
111 assert(NumVSites > 0 && "NumVSites can't be zero");
112 ValueProfNode **Mem =
113 (ValueProfNode **)calloc(nmemb: NumVSites, size: sizeof(ValueProfNode *));
114 if (!Mem)
115 return 0;
116 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
117 free(ptr: Mem);
118 return 0;
119 }
120 return 1;
121}
122
123static ValueProfNode *allocateOneNode(void) {
124 ValueProfNode *Node;
125
126 if (!hasStaticCounters)
127 return (ValueProfNode *)calloc(nmemb: 1, size: sizeof(ValueProfNode));
128
129 /* Early check to avoid value wrapping around. */
130 if (CurrentVNode + 1 > EndVNode) {
131 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
132 PROF_WARN("Unable to track new values: %s. "
133 " Consider using option -mllvm -vp-counters-per-site=<n> to "
134 "allocate more"
135 " value profile counters at compile time. \n",
136 "Running out of static counters");
137 }
138 return 0;
139 }
140 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
141 /* Due to section padding, EndVNode point to a byte which is one pass
142 * an incomplete VNode, so we need to skip the last incomplete node. */
143 if (Node + 1 > EndVNode)
144 return 0;
145
146 return Node;
147}
148
149static COMPILER_RT_ALWAYS_INLINE void
150instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
151 uint32_t CounterIndex, uint64_t CountValue) {
152 __llvm_profile_data *PData = (__llvm_profile_data *)Data;
153 if (!PData)
154 return;
155 if (!CountValue)
156 return;
157 if (!PData->Values) {
158 if (!allocateValueProfileCounters(Data: PData))
159 return;
160 }
161
162 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
163 ValueProfNode *PrevVNode = NULL;
164 ValueProfNode *MinCountVNode = NULL;
165 ValueProfNode *CurVNode = ValueCounters[CounterIndex];
166 uint64_t MinCount = UINT64_MAX;
167
168 uint8_t VDataCount = 0;
169 while (CurVNode) {
170 if (TargetValue == CurVNode->Value) {
171 CurVNode->Count += CountValue;
172 return;
173 }
174 if (CurVNode->Count < MinCount) {
175 MinCount = CurVNode->Count;
176 MinCountVNode = CurVNode;
177 }
178 PrevVNode = CurVNode;
179 CurVNode = CurVNode->Next;
180 ++VDataCount;
181 }
182
183 if (VDataCount >= VPMaxNumValsPerSite) {
184 /* Bump down the min count node's count. If it reaches 0,
185 * evict it. This eviction/replacement policy makes hot
186 * targets more sticky while cold targets less so. In other
187 * words, it makes it less likely for the hot targets to be
188 * prematurally evicted during warmup/establishment period,
189 * when their counts are still low. In a special case when
190 * the number of values tracked is reduced to only one, this
191 * policy will guarantee that the dominating target with >50%
192 * total count will survive in the end. Note that this scheme
193 * allows the runtime to track the min count node in an adaptive
194 * manner. It can correct previous mistakes and eventually
195 * lock on a cold target that is alread in stable state.
196 *
197 * In very rare cases, this replacement scheme may still lead
198 * to target loss. For instance, out of \c N value slots, \c N-1
199 * slots are occupied by luke warm targets during the warmup
200 * period and the remaining one slot is competed by two or more
201 * very hot targets. If those hot targets occur in an interleaved
202 * way, none of them will survive (gain enough weight to throw out
203 * other established entries) due to the ping-pong effect.
204 * To handle this situation, user can choose to increase the max
205 * number of tracked values per value site. Alternatively, a more
206 * expensive eviction mechanism can be implemented. It requires
207 * the runtime to track the total number of evictions per-site.
208 * When the total number of evictions reaches certain threshold,
209 * the runtime can wipe out more than one lowest count entries
210 * to give space for hot targets.
211 */
212 if (MinCountVNode->Count <= CountValue) {
213 CurVNode = MinCountVNode;
214 CurVNode->Value = TargetValue;
215 CurVNode->Count = CountValue;
216 } else
217 MinCountVNode->Count -= CountValue;
218
219 return;
220 }
221
222 CurVNode = allocateOneNode();
223 if (!CurVNode)
224 return;
225 CurVNode->Value = TargetValue;
226 CurVNode->Count += CountValue;
227
228 uint32_t Success = 0;
229 if (!ValueCounters[CounterIndex])
230 Success =
231 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
232 else if (PrevVNode && !PrevVNode->Next)
233 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
234
235 if (!Success && !hasStaticCounters) {
236 free(ptr: CurVNode);
237 return;
238 }
239}
240
241COMPILER_RT_VISIBILITY void
242__llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
243 uint32_t CounterIndex) {
244 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue: 1);
245}
246COMPILER_RT_VISIBILITY void
247__llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
248 uint32_t CounterIndex,
249 uint64_t CountValue) {
250 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
251}
252
253/*
254 * The target values are partitioned into multiple ranges. The range spec is
255 * defined in InstrProfData.inc.
256 */
257COMPILER_RT_VISIBILITY void
258__llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
259 uint32_t CounterIndex) {
260 // Map the target value to the representative value of its range.
261 uint64_t RepValue = InstrProfGetRangeRepValue(Value: TargetValue);
262 __llvm_profile_instrument_target(TargetValue: RepValue, Data, CounterIndex);
263}
264
265/*
266 * A wrapper struct that represents value profile runtime data.
267 * Like InstrProfRecord class which is used by profiling host tools,
268 * ValueProfRuntimeRecord also implements the abstract interfaces defined in
269 * ValueProfRecordClosure so that the runtime data can be serialized using
270 * shared C implementation.
271 */
272typedef struct ValueProfRuntimeRecord {
273 const __llvm_profile_data *Data;
274 ValueProfNode **NodesKind[IPVK_Last + 1];
275 uint8_t **SiteCountArray;
276} ValueProfRuntimeRecord;
277
278/* ValueProfRecordClosure Interface implementation. */
279
280static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
281 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
282}
283
284static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
285 uint32_t S = 0, I;
286 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
287 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
288 return 0;
289 for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
290 S += Record->SiteCountArray[VK][I];
291 return S;
292}
293
294static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
295 uint32_t S) {
296 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
297 return Record->SiteCountArray[VK][S];
298}
299
300static ValueProfRuntimeRecord RTRecord;
301static ValueProfRecordClosure RTRecordClosure = {
302 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
303 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
304 INSTR_PROF_NULLPTR, /* RemapValueData */
305 INSTR_PROF_NULLPTR, /* GetValueForSite, */
306 INSTR_PROF_NULLPTR /* AllocValueProfData */
307};
308
309static uint32_t
310initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
311 uint8_t *SiteCountArray[]) {
312 unsigned I, J, S = 0, NumValueKinds = 0;
313 ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
314 RTRecord.Data = Data;
315 RTRecord.SiteCountArray = SiteCountArray;
316 for (I = 0; I <= IPVK_Last; I++) {
317 uint16_t N = Data->NumValueSites[I];
318 if (!N)
319 continue;
320
321 NumValueKinds++;
322
323 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
324 for (J = 0; J < N; J++) {
325 /* Compute value count for each site. */
326 uint32_t C = 0;
327 ValueProfNode *Site =
328 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
329 while (Site) {
330 C++;
331 Site = Site->Next;
332 }
333 if (C > UCHAR_MAX)
334 C = UCHAR_MAX;
335 RTRecord.SiteCountArray[I][J] = C;
336 }
337 S += N;
338 }
339 return NumValueKinds;
340}
341
342static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
343 InstrProfValueData *Dst,
344 ValueProfNode *StartNode, uint32_t N) {
345 unsigned I;
346 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
347 for (I = 0; I < N; I++) {
348 Dst[I].Value = VNode->Value;
349 Dst[I].Count = VNode->Count;
350 VNode = VNode->Next;
351 }
352 return VNode;
353}
354
355static uint32_t getValueProfDataSizeWrapper(void) {
356 return getValueProfDataSize(Closure: &RTRecordClosure);
357}
358
359static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
360 return getNumValueDataForSiteRT(R: &RTRecord, VK, S);
361}
362
363static VPDataReaderType TheVPDataReader = {
364 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
365 getFirstValueProfRecord, getNumValueDataForSiteWrapper,
366 getValueProfDataSizeWrapper, getNextNValueData};
367
368COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) {
369 return &TheVPDataReader;
370}
371