1#include "llvm/ProfileData/MemProf.h"
2#include "llvm/ADT/SmallVector.h"
3#include "llvm/IR/Function.h"
4#include "llvm/ProfileData/InstrProf.h"
5#include "llvm/ProfileData/SampleProf.h"
6#include "llvm/Support/Endian.h"
7#include "llvm/Support/EndianStream.h"
8
9namespace llvm {
10namespace memprof {
11MemProfSchema getFullSchema() {
12 MemProfSchema List;
13#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
14#include "llvm/ProfileData/MIBEntryDef.inc"
15#undef MIBEntryDef
16 return List;
17}
18
19MemProfSchema getHotColdSchema() {
20 return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
21 Meta::TotalLifetimeAccessDensity};
22}
23
24static size_t serializedSizeV2(const IndexedAllocationInfo &IAI,
25 const MemProfSchema &Schema) {
26 size_t Size = 0;
27 // The CallStackId
28 Size += sizeof(CallStackId);
29 // The size of the payload.
30 Size += PortableMemInfoBlock::serializedSize(Schema);
31 return Size;
32}
33
34static size_t serializedSizeV3(const IndexedAllocationInfo &IAI,
35 const MemProfSchema &Schema) {
36 size_t Size = 0;
37 // The linear call stack ID.
38 Size += sizeof(LinearCallStackId);
39 // The size of the payload.
40 Size += PortableMemInfoBlock::serializedSize(Schema);
41 return Size;
42}
43
44size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
45 IndexedVersion Version) const {
46 switch (Version) {
47 case Version2:
48 return serializedSizeV2(IAI: *this, Schema);
49 // Combine V3 and V4 as the size calculation is the same
50 case Version3:
51 case Version4:
52 return serializedSizeV3(IAI: *this, Schema);
53 }
54 llvm_unreachable("unsupported MemProf version");
55}
56
57static size_t serializedSizeV2(const IndexedMemProfRecord &Record,
58 const MemProfSchema &Schema) {
59 // The number of alloc sites to serialize.
60 size_t Result = sizeof(uint64_t);
61 for (const IndexedAllocationInfo &N : Record.AllocSites)
62 Result += N.serializedSize(Schema, Version: Version2);
63
64 // The number of callsites we have information for.
65 Result += sizeof(uint64_t);
66 // The CallStackId
67 Result += Record.CallSites.size() * sizeof(CallStackId);
68 return Result;
69}
70
71static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
72 const MemProfSchema &Schema) {
73 // The number of alloc sites to serialize.
74 size_t Result = sizeof(uint64_t);
75 for (const IndexedAllocationInfo &N : Record.AllocSites)
76 Result += N.serializedSize(Schema, Version: Version3);
77
78 // The number of callsites we have information for.
79 Result += sizeof(uint64_t);
80 // The linear call stack ID.
81 // Note: V3 only stored the LinearCallStackId per call site.
82 Result += Record.CallSites.size() * sizeof(LinearCallStackId);
83 return Result;
84}
85
86static size_t serializedSizeV4(const IndexedMemProfRecord &Record,
87 const MemProfSchema &Schema) {
88 // The number of alloc sites to serialize.
89 size_t Result = sizeof(uint64_t);
90 for (const IndexedAllocationInfo &N : Record.AllocSites)
91 Result += N.serializedSize(Schema, Version: Version4);
92
93 // The number of callsites we have information for.
94 Result += sizeof(uint64_t);
95 for (const auto &CS : Record.CallSites)
96 Result += sizeof(LinearCallStackId) + sizeof(uint64_t) +
97 CS.CalleeGuids.size() * sizeof(GlobalValue::GUID);
98 return Result;
99}
100
101size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
102 IndexedVersion Version) const {
103 switch (Version) {
104 case Version2:
105 return serializedSizeV2(Record: *this, Schema);
106 case Version3:
107 return serializedSizeV3(Record: *this, Schema);
108 case Version4:
109 return serializedSizeV4(Record: *this, Schema);
110 }
111 llvm_unreachable("unsupported MemProf version");
112}
113
114static void serializeV2(const IndexedMemProfRecord &Record,
115 const MemProfSchema &Schema, raw_ostream &OS) {
116 using namespace support;
117
118 endian::Writer LE(OS, llvm::endianness::little);
119
120 LE.write<uint64_t>(Val: Record.AllocSites.size());
121 for (const IndexedAllocationInfo &N : Record.AllocSites) {
122 LE.write<CallStackId>(Val: N.CSId);
123 N.Info.serialize(Schema, OS);
124 }
125
126 // Related contexts.
127 LE.write<uint64_t>(Val: Record.CallSites.size());
128 for (const auto &CS : Record.CallSites)
129 LE.write<CallStackId>(Val: CS.CSId);
130}
131
132static void serializeV3(
133 const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
134 raw_ostream &OS,
135 llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
136 using namespace support;
137
138 endian::Writer LE(OS, llvm::endianness::little);
139
140 LE.write<uint64_t>(Val: Record.AllocSites.size());
141 for (const IndexedAllocationInfo &N : Record.AllocSites) {
142 assert(MemProfCallStackIndexes.contains(N.CSId));
143 LE.write<LinearCallStackId>(Val: MemProfCallStackIndexes[N.CSId]);
144 N.Info.serialize(Schema, OS);
145 }
146
147 // Related contexts.
148 LE.write<uint64_t>(Val: Record.CallSites.size());
149 for (const auto &CS : Record.CallSites) {
150 assert(MemProfCallStackIndexes.contains(CS.CSId));
151 LE.write<LinearCallStackId>(Val: MemProfCallStackIndexes[CS.CSId]);
152 }
153}
154
155static void serializeV4(
156 const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
157 raw_ostream &OS,
158 llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
159 using namespace support;
160
161 endian::Writer LE(OS, llvm::endianness::little);
162
163 LE.write<uint64_t>(Val: Record.AllocSites.size());
164 for (const IndexedAllocationInfo &N : Record.AllocSites) {
165 assert(MemProfCallStackIndexes.contains(N.CSId));
166 LE.write<LinearCallStackId>(Val: MemProfCallStackIndexes[N.CSId]);
167 N.Info.serialize(Schema, OS);
168 }
169
170 // Related contexts.
171 LE.write<uint64_t>(Val: Record.CallSites.size());
172 for (const auto &CS : Record.CallSites) {
173 assert(MemProfCallStackIndexes.contains(CS.CSId));
174 LE.write<LinearCallStackId>(Val: MemProfCallStackIndexes[CS.CSId]);
175 LE.write<uint64_t>(Val: CS.CalleeGuids.size());
176 for (const auto &Guid : CS.CalleeGuids)
177 LE.write<GlobalValue::GUID>(Val: Guid);
178 }
179}
180
181void IndexedMemProfRecord::serialize(
182 const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
183 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
184 const {
185 switch (Version) {
186 case Version2:
187 serializeV2(Record: *this, Schema, OS);
188 return;
189 case Version3:
190 serializeV3(Record: *this, Schema, OS, MemProfCallStackIndexes&: *MemProfCallStackIndexes);
191 return;
192 case Version4:
193 serializeV4(Record: *this, Schema, OS, MemProfCallStackIndexes&: *MemProfCallStackIndexes);
194 return;
195 }
196 llvm_unreachable("unsupported MemProf version");
197}
198
199static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
200 const unsigned char *Ptr) {
201 using namespace support;
202
203 IndexedMemProfRecord Record;
204
205 // Read the meminfo nodes.
206 const uint64_t NumNodes =
207 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
208 Record.AllocSites.reserve(N: NumNodes);
209 for (uint64_t I = 0; I < NumNodes; I++) {
210 IndexedAllocationInfo Node;
211 Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(memory&: Ptr);
212 Node.Info.deserialize(IncomingSchema: Schema, Ptr);
213 Ptr += PortableMemInfoBlock::serializedSize(Schema);
214 Record.AllocSites.push_back(Elt: Node);
215 }
216
217 // Read the callsite information.
218 const uint64_t NumCtxs =
219 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
220 Record.CallSites.reserve(N: NumCtxs);
221 for (uint64_t J = 0; J < NumCtxs; J++) {
222 CallStackId CSId =
223 endian::readNext<CallStackId, llvm::endianness::little>(memory&: Ptr);
224 Record.CallSites.emplace_back(Args&: CSId);
225 }
226
227 return Record;
228}
229
230static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
231 const unsigned char *Ptr) {
232 using namespace support;
233
234 IndexedMemProfRecord Record;
235
236 // Read the meminfo nodes.
237 const uint64_t NumNodes =
238 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
239 Record.AllocSites.reserve(N: NumNodes);
240 const size_t SerializedSize = PortableMemInfoBlock::serializedSize(Schema);
241 for (uint64_t I = 0; I < NumNodes; I++) {
242 IndexedAllocationInfo Node;
243 Node.CSId =
244 endian::readNext<LinearCallStackId, llvm::endianness::little>(memory&: Ptr);
245 Node.Info.deserialize(IncomingSchema: Schema, Ptr);
246 Ptr += SerializedSize;
247 Record.AllocSites.push_back(Elt: Node);
248 }
249
250 // Read the callsite information.
251 const uint64_t NumCtxs =
252 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
253 Record.CallSites.reserve(N: NumCtxs);
254 for (uint64_t J = 0; J < NumCtxs; J++) {
255 // We are storing LinearCallStackId in CallSiteIds, which is a vector of
256 // CallStackId. Assert that CallStackId is no smaller than
257 // LinearCallStackId.
258 static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
259 LinearCallStackId CSId =
260 endian::readNext<LinearCallStackId, llvm::endianness::little>(memory&: Ptr);
261 Record.CallSites.emplace_back(Args&: CSId);
262 }
263
264 return Record;
265}
266
267static IndexedMemProfRecord deserializeV4(const MemProfSchema &Schema,
268 const unsigned char *Ptr) {
269 using namespace support;
270
271 IndexedMemProfRecord Record;
272
273 // Read the meminfo nodes.
274 const uint64_t NumNodes =
275 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
276 Record.AllocSites.reserve(N: NumNodes);
277 const size_t SerializedSize = PortableMemInfoBlock::serializedSize(Schema);
278 for (uint64_t I = 0; I < NumNodes; I++) {
279 IndexedAllocationInfo Node;
280 Node.CSId =
281 endian::readNext<LinearCallStackId, llvm::endianness::little>(memory&: Ptr);
282 Node.Info.deserialize(IncomingSchema: Schema, Ptr);
283 Ptr += SerializedSize;
284 Record.AllocSites.push_back(Elt: Node);
285 }
286
287 // Read the callsite information.
288 const uint64_t NumCtxs =
289 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
290 Record.CallSites.reserve(N: NumCtxs);
291 for (uint64_t J = 0; J < NumCtxs; J++) {
292 static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
293 LinearCallStackId CSId =
294 endian::readNext<LinearCallStackId, llvm::endianness::little>(memory&: Ptr);
295 const uint64_t NumGuids =
296 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
297 SmallVector<GlobalValue::GUID, 1> Guids;
298 Guids.reserve(N: NumGuids);
299 for (uint64_t K = 0; K < NumGuids; ++K)
300 Guids.push_back(
301 Elt: endian::readNext<GlobalValue::GUID, llvm::endianness::little>(memory&: Ptr));
302 Record.CallSites.emplace_back(Args&: CSId, Args: std::move(Guids));
303 }
304
305 return Record;
306}
307
308IndexedMemProfRecord
309IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
310 const unsigned char *Ptr,
311 IndexedVersion Version) {
312 switch (Version) {
313 case Version2:
314 return deserializeV2(Schema, Ptr);
315 case Version3:
316 return deserializeV3(Schema, Ptr);
317 case Version4:
318 return deserializeV4(Schema, Ptr);
319 }
320 llvm_unreachable("unsupported MemProf version");
321}
322
323MemProfRecord IndexedMemProfRecord::toMemProfRecord(
324 llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const {
325 MemProfRecord Record;
326
327 Record.AllocSites.reserve(N: AllocSites.size());
328 for (const IndexedAllocationInfo &IndexedAI : AllocSites) {
329 AllocationInfo AI;
330 AI.Info = IndexedAI.Info;
331 AI.CallStack = Callback(IndexedAI.CSId);
332 Record.AllocSites.push_back(Elt: std::move(AI));
333 }
334
335 Record.CallSites.reserve(N: CallSites.size());
336 for (const IndexedCallSiteInfo &CS : CallSites) {
337 std::vector<Frame> Frames = Callback(CS.CSId);
338 Record.CallSites.emplace_back(Args: std::move(Frames), Args: CS.CalleeGuids);
339 }
340
341 return Record;
342}
343
344GlobalValue::GUID getGUID(const StringRef FunctionName) {
345 // Canonicalize the function name to drop suffixes such as ".llvm.". Note
346 // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
347 // those by default. This is by design to differentiate internal linkage
348 // functions during matching. By dropping the other suffixes we can then match
349 // functions in the profile use phase prior to their addition. Note that this
350 // applies to both instrumented and sampled function names.
351 StringRef CanonicalName =
352 sampleprof::FunctionSamples::getCanonicalFnName(FnName: FunctionName);
353
354 // We use the function guid which we expect to be a uint64_t. At
355 // this time, it is the lower 64 bits of the md5 of the canonical
356 // function name.
357 return Function::getGUIDAssumingExternalLinkage(GlobalName: CanonicalName);
358}
359
360Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
361 using namespace support;
362
363 const unsigned char *Ptr = Buffer;
364 const uint64_t NumSchemaIds =
365 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
366 if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
367 return make_error<InstrProfError>(Args: instrprof_error::malformed,
368 Args: "memprof schema invalid");
369 }
370
371 MemProfSchema Result;
372 for (size_t I = 0; I < NumSchemaIds; I++) {
373 const uint64_t Tag =
374 endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
375 if (Tag >= static_cast<uint64_t>(Meta::Size)) {
376 return make_error<InstrProfError>(Args: instrprof_error::malformed,
377 Args: "memprof schema invalid");
378 }
379 Result.push_back(Elt: static_cast<Meta>(Tag));
380 }
381 // Advance the buffer to one past the schema if we succeeded.
382 Buffer = Ptr;
383 return Result;
384}
385} // namespace memprof
386} // namespace llvm
387