1//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MemProf data is serialized in writeMemProf provided in this file.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ProfileData/DataAccessProf.h"
14#include "llvm/ProfileData/InstrProf.h"
15#include "llvm/ProfileData/InstrProfReader.h"
16#include "llvm/ProfileData/MemProf.h"
17#include "llvm/ProfileData/MemProfRadixTree.h"
18#include "llvm/ProfileData/MemProfSummary.h"
19#include "llvm/Support/FormatVariadic.h"
20#include "llvm/Support/OnDiskHashTable.h"
21
22namespace llvm {
23
24// Serialize Schema.
25static void writeMemProfSchema(ProfOStream &OS,
26 const memprof::MemProfSchema &Schema) {
27 OS.write(V: static_cast<uint64_t>(Schema.size()));
28 for (const auto Id : Schema)
29 OS.write(V: static_cast<uint64_t>(Id));
30}
31
32// Serialize MemProfRecordData. Return RecordTableOffset.
33static uint64_t writeMemProfRecords(
34 ProfOStream &OS,
35 llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
36 &MemProfRecordData,
37 memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
38 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
39 *MemProfCallStackIndexes = nullptr) {
40 memprof::RecordWriterTrait RecordWriter(Schema, Version,
41 MemProfCallStackIndexes);
42 OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
43 RecordTableGenerator;
44 for (auto &[GUID, Record] : MemProfRecordData) {
45 // Insert the key (func hash) and value (memprof record).
46 RecordTableGenerator.insert(Key: GUID, Data&: Record, InfoObj&: RecordWriter);
47 }
48 // Release the memory of this MapVector as it is no longer needed.
49 MemProfRecordData.clear();
50
51 // The call to Emit invokes RecordWriterTrait::EmitData which destructs
52 // the memprof record copies owned by the RecordTableGenerator. This works
53 // because the RecordTableGenerator is not used after this point.
54 return RecordTableGenerator.Emit(Out&: OS.OS, InfoObj&: RecordWriter);
55}
56
57// Serialize MemProfFrameData. Return FrameTableOffset.
58static uint64_t writeMemProfFrames(
59 ProfOStream &OS,
60 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
61 OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
62 FrameTableGenerator;
63 for (auto &[FrameId, Frame] : MemProfFrameData) {
64 // Insert the key (frame id) and value (frame contents).
65 FrameTableGenerator.insert(Key: FrameId, Data&: Frame);
66 }
67 // Release the memory of this MapVector as it is no longer needed.
68 MemProfFrameData.clear();
69
70 return FrameTableGenerator.Emit(Out&: OS.OS);
71}
72
73// Serialize MemProfFrameData. Return the mapping from FrameIds to their
74// indexes within the frame array.
75static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
76writeMemProfFrameArray(
77 ProfOStream &OS,
78 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
79 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
80 // Mappings from FrameIds to array indexes.
81 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
82
83 // Compute the order in which we serialize Frames. The order does not matter
84 // in terms of correctness, but we still compute it for deserialization
85 // performance. Specifically, if we serialize frequently used Frames one
86 // after another, we have better cache utilization. For two Frames that
87 // appear equally frequently, we break a tie by serializing the one that tends
88 // to appear earlier in call stacks. We implement the tie-breaking mechanism
89 // by computing the sum of indexes within call stacks for each Frame. If we
90 // still have a tie, then we just resort to compare two FrameIds, which is
91 // just for stability of output.
92 std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
93 FrameIdOrder.reserve(n: MemProfFrameData.size());
94 for (const auto &[Id, Frame] : MemProfFrameData)
95 FrameIdOrder.emplace_back(args: Id, args: &Frame);
96 assert(MemProfFrameData.size() == FrameIdOrder.size());
97 llvm::sort(C&: FrameIdOrder,
98 Comp: [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
99 const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
100 const auto &SL = FrameHistogram[L.first];
101 const auto &SR = FrameHistogram[R.first];
102 // Popular FrameIds should come first.
103 if (SL.Count != SR.Count)
104 return SL.Count > SR.Count;
105 // If they are equally popular, then the one that tends to appear
106 // earlier in call stacks should come first.
107 if (SL.PositionSum != SR.PositionSum)
108 return SL.PositionSum < SR.PositionSum;
109 // Compare their FrameIds for sort stability.
110 return L.first < R.first;
111 });
112
113 // Serialize all frames while creating mappings from linear IDs to FrameIds.
114 uint64_t Index = 0;
115 MemProfFrameIndexes.reserve(NumEntries: FrameIdOrder.size());
116 for (const auto &[Id, F] : FrameIdOrder) {
117 F->serialize(OS&: OS.OS);
118 MemProfFrameIndexes.insert(KV: {Id, Index});
119 ++Index;
120 }
121 assert(MemProfFrameData.size() == Index);
122 assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
123
124 // Release the memory of this MapVector as it is no longer needed.
125 MemProfFrameData.clear();
126
127 return MemProfFrameIndexes;
128}
129
130static uint64_t writeMemProfCallStacks(
131 ProfOStream &OS,
132 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
133 &MemProfCallStackData) {
134 OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
135 CallStackTableGenerator;
136 for (auto &[CSId, CallStack] : MemProfCallStackData)
137 CallStackTableGenerator.insert(Key: CSId, Data&: CallStack);
138 // Release the memory of this vector as it is no longer needed.
139 MemProfCallStackData.clear();
140
141 return CallStackTableGenerator.Emit(Out&: OS.OS);
142}
143
144static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
145writeMemProfCallStackArray(
146 ProfOStream &OS,
147 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
148 &MemProfCallStackData,
149 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
150 &MemProfFrameIndexes,
151 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
152 unsigned &NumElements) {
153 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
154 MemProfCallStackIndexes;
155
156 memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
157 Builder.build(MemProfCallStackData: std::move(MemProfCallStackData), MemProfFrameIndexes: &MemProfFrameIndexes,
158 FrameHistogram);
159 for (auto I : Builder.getRadixArray())
160 OS.write32(V: I);
161 NumElements = Builder.getRadixArray().size();
162 MemProfCallStackIndexes = Builder.takeCallStackPos();
163
164 // Release the memory of this vector as it is no longer needed.
165 MemProfCallStackData.clear();
166
167 return MemProfCallStackIndexes;
168}
169
170// Write out MemProf Version2 as follows:
171// uint64_t Version
172// uint64_t RecordTableOffset = RecordTableGenerator.Emit
173// uint64_t FramePayloadOffset = Offset for the frame payload
174// uint64_t FrameTableOffset = FrameTableGenerator.Emit
175// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
176// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
177// uint64_t Num schema entries
178// uint64_t Schema entry 0
179// uint64_t Schema entry 1
180// ....
181// uint64_t Schema entry N - 1
182// OnDiskChainedHashTable MemProfRecordData
183// OnDiskChainedHashTable MemProfFrameData
184// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
185static Error writeMemProfV2(ProfOStream &OS,
186 memprof::IndexedMemProfData &MemProfData,
187 bool MemProfFullSchema) {
188 OS.write(V: memprof::Version2);
189 uint64_t HeaderUpdatePos = OS.tell();
190 OS.write(V: 0ULL); // Reserve space for the memprof record table offset.
191 OS.write(V: 0ULL); // Reserve space for the memprof frame payload offset.
192 OS.write(V: 0ULL); // Reserve space for the memprof frame table offset.
193 OS.write(V: 0ULL); // Reserve space for the memprof call stack payload offset.
194 OS.write(V: 0ULL); // Reserve space for the memprof call stack table offset.
195
196 auto Schema = memprof::getHotColdSchema();
197 if (MemProfFullSchema)
198 Schema = memprof::getFullSchema();
199 writeMemProfSchema(OS, Schema);
200
201 uint64_t RecordTableOffset =
202 writeMemProfRecords(OS, MemProfRecordData&: MemProfData.Records, Schema: &Schema, Version: memprof::Version2);
203
204 uint64_t FramePayloadOffset = OS.tell();
205 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData&: MemProfData.Frames);
206
207 uint64_t CallStackPayloadOffset = OS.tell();
208 uint64_t CallStackTableOffset =
209 writeMemProfCallStacks(OS, MemProfCallStackData&: MemProfData.CallStacks);
210
211 uint64_t Header[] = {
212 RecordTableOffset, FramePayloadOffset, FrameTableOffset,
213 CallStackPayloadOffset, CallStackTableOffset,
214 };
215 OS.patch(P: {{.Pos: HeaderUpdatePos, .D: Header}});
216
217 return Error::success();
218}
219
220static Error writeMemProfRadixTreeBased(
221 ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
222 memprof::IndexedVersion Version, bool MemProfFullSchema,
223 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData =
224 nullptr,
225 std::unique_ptr<memprof::MemProfSummary> MemProfSum = nullptr) {
226 assert((Version == memprof::Version3 || Version == memprof::Version4) &&
227 "Unsupported version for radix tree format");
228
229 OS.write(V: Version); // Write the specific version (V3 or V4)
230 uint64_t HeaderUpdatePos = OS.tell();
231 OS.write(V: 0ULL); // Reserve space for the memprof call stack payload offset.
232 OS.write(V: 0ULL); // Reserve space for the memprof record payload offset.
233 OS.write(V: 0ULL); // Reserve space for the memprof record table offset.
234 if (Version >= memprof::Version4) {
235 OS.write(V: 0ULL); // Reserve space for the data access profile offset.
236
237 MemProfSum->write(OS);
238 }
239
240 auto Schema = memprof::getHotColdSchema();
241 if (MemProfFullSchema)
242 Schema = memprof::getFullSchema();
243 writeMemProfSchema(OS, Schema);
244
245 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
246 memprof::computeFrameHistogram(MemProfCallStackData&: MemProfData.CallStacks);
247 assert(MemProfData.Frames.size() == FrameHistogram.size());
248
249 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
250 writeMemProfFrameArray(OS, MemProfFrameData&: MemProfData.Frames, FrameHistogram);
251
252 uint64_t CallStackPayloadOffset = OS.tell();
253 // The number of elements in the call stack array.
254 unsigned NumElements = 0;
255 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
256 MemProfCallStackIndexes =
257 writeMemProfCallStackArray(OS, MemProfCallStackData&: MemProfData.CallStacks,
258 MemProfFrameIndexes, FrameHistogram,
259 NumElements);
260
261 uint64_t RecordPayloadOffset = OS.tell();
262 uint64_t RecordTableOffset = writeMemProfRecords(
263 OS, MemProfRecordData&: MemProfData.Records, Schema: &Schema, Version, MemProfCallStackIndexes: &MemProfCallStackIndexes);
264
265 uint64_t DataAccessProfOffset = 0;
266 if (DataAccessProfileData != nullptr) {
267 assert(Version >= memprof::Version4 &&
268 "Data access profiles are added starting from v4");
269 DataAccessProfOffset = OS.tell();
270 if (Error E = DataAccessProfileData->serialize(OS))
271 return E;
272 }
273
274 // Verify that the computation for the number of elements in the call stack
275 // array works.
276 assert(CallStackPayloadOffset +
277 NumElements * sizeof(memprof::LinearFrameId) ==
278 RecordPayloadOffset);
279
280 SmallVector<uint64_t, 4> Header = {
281 CallStackPayloadOffset,
282 RecordPayloadOffset,
283 RecordTableOffset,
284 };
285 if (Version >= memprof::Version4)
286 Header.push_back(Elt: DataAccessProfOffset);
287
288 OS.patch(P: {{.Pos: HeaderUpdatePos, .D: Header}});
289
290 return Error::success();
291}
292
293// Write out MemProf Version3
294static Error writeMemProfV3(ProfOStream &OS,
295 memprof::IndexedMemProfData &MemProfData,
296 bool MemProfFullSchema) {
297 return writeMemProfRadixTreeBased(OS, MemProfData, Version: memprof::Version3,
298 MemProfFullSchema);
299}
300
301// Write out MemProf Version4
302static Error writeMemProfV4(
303 ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
304 bool MemProfFullSchema,
305 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
306 std::unique_ptr<memprof::MemProfSummary> MemProfSum) {
307 return writeMemProfRadixTreeBased(
308 OS, MemProfData, Version: memprof::Version4, MemProfFullSchema,
309 DataAccessProfileData: std::move(DataAccessProfileData), MemProfSum: std::move(MemProfSum));
310}
311
312// Write out the MemProf data in a requested version.
313Error writeMemProf(
314 ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
315 memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
316 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
317 std::unique_ptr<memprof::MemProfSummary> MemProfSum) {
318 switch (MemProfVersionRequested) {
319 case memprof::Version2:
320 return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
321 case memprof::Version3:
322 return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
323 case memprof::Version4:
324 return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
325 DataAccessProfileData: std::move(DataAccessProfileData),
326 MemProfSum: std::move(MemProfSum));
327 }
328
329 return make_error<InstrProfError>(
330 Args: instrprof_error::unsupported_version,
331 Args: formatv(Fmt: "MemProf version {} not supported; "
332 "requires version between {} and {}, inclusive",
333 Vals&: MemProfVersionRequested, Vals: memprof::MinimumSupportedVersion,
334 Vals: memprof::MaximumSupportedVersion));
335}
336
337Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
338 const unsigned char *Ptr) {
339 // The value returned from RecordTableGenerator.Emit.
340 const uint64_t RecordTableOffset =
341 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
342 // The offset in the stream right before invoking
343 // FrameTableGenerator.Emit.
344 const uint64_t FramePayloadOffset =
345 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
346 // The value returned from FrameTableGenerator.Emit.
347 const uint64_t FrameTableOffset =
348 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
349
350 // The offset in the stream right before invoking
351 // CallStackTableGenerator.Emit.
352 uint64_t CallStackPayloadOffset = 0;
353 // The value returned from CallStackTableGenerator.Emit.
354 uint64_t CallStackTableOffset = 0;
355 if (Version >= memprof::Version2) {
356 CallStackPayloadOffset =
357 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
358 CallStackTableOffset =
359 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
360 }
361
362 // Read the schema.
363 auto SchemaOr = memprof::readMemProfSchema(Buffer&: Ptr);
364 if (!SchemaOr)
365 return SchemaOr.takeError();
366 Schema = SchemaOr.get();
367
368 // Now initialize the table reader with a pointer into data buffer.
369 MemProfRecordTable.reset(p: MemProfRecordHashTable::Create(
370 /*Buckets=*/Start + RecordTableOffset,
371 /*Payload=*/Ptr,
372 /*Base=*/Start, InfoObj: memprof::RecordLookupTrait(Version, Schema)));
373
374 // Initialize the frame table reader with the payload and bucket offsets.
375 MemProfFrameTable.reset(p: MemProfFrameHashTable::Create(
376 /*Buckets=*/Start + FrameTableOffset,
377 /*Payload=*/Start + FramePayloadOffset,
378 /*Base=*/Start));
379
380 if (Version >= memprof::Version2)
381 MemProfCallStackTable.reset(p: MemProfCallStackHashTable::Create(
382 /*Buckets=*/Start + CallStackTableOffset,
383 /*Payload=*/Start + CallStackPayloadOffset,
384 /*Base=*/Start));
385
386 return Error::success();
387}
388
389Error IndexedMemProfReader::deserializeRadixTreeBased(
390 const unsigned char *Start, const unsigned char *Ptr,
391 memprof::IndexedVersion Version) {
392 assert((Version == memprof::Version3 || Version == memprof::Version4) &&
393 "Unsupported version for radix tree format");
394 // The offset in the stream right before invoking
395 // CallStackTableGenerator.Emit.
396 const uint64_t CallStackPayloadOffset =
397 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
398 // The offset in the stream right before invoking RecordTableGenerator.Emit.
399 const uint64_t RecordPayloadOffset =
400 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
401 // The value returned from RecordTableGenerator.Emit.
402 const uint64_t RecordTableOffset =
403 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
404
405 uint64_t DataAccessProfOffset = 0;
406 if (Version >= memprof::Version4) {
407 DataAccessProfOffset =
408 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
409 MemProfSum = memprof::MemProfSummary::deserialize(Ptr);
410 }
411
412 // Read the schema.
413 auto SchemaOr = memprof::readMemProfSchema(Buffer&: Ptr);
414 if (!SchemaOr)
415 return SchemaOr.takeError();
416 Schema = SchemaOr.get();
417
418 FrameBase = Ptr;
419 CallStackBase = Start + CallStackPayloadOffset;
420
421 // Compute the number of elements in the radix tree array. Since we use this
422 // to reserve enough bits in a BitVector, it's totally OK if we overestimate
423 // this number a little bit because of padding just before the next section.
424 RadixTreeSize = (RecordPayloadOffset - CallStackPayloadOffset) /
425 sizeof(memprof::LinearFrameId);
426
427 // Now initialize the table reader with a pointer into data buffer.
428 MemProfRecordTable.reset(p: MemProfRecordHashTable::Create(
429 /*Buckets=*/Start + RecordTableOffset,
430 /*Payload=*/Start + RecordPayloadOffset,
431 /*Base=*/Start, InfoObj: memprof::RecordLookupTrait(Version, Schema)));
432
433 assert((!DataAccessProfOffset || DataAccessProfOffset > RecordTableOffset) &&
434 "Data access profile is either empty or after the record table");
435 if (DataAccessProfOffset > RecordTableOffset) {
436 DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();
437 const unsigned char *DAPPtr = Start + DataAccessProfOffset;
438 if (Error E = DataAccessProfileData->deserialize(Ptr&: DAPPtr))
439 return E;
440 }
441
442 return Error::success();
443}
444
445Error IndexedMemProfReader::deserialize(const unsigned char *Start,
446 uint64_t MemProfOffset) {
447 const unsigned char *Ptr = Start + MemProfOffset;
448
449 // Read the MemProf version number.
450 const uint64_t FirstWord =
451 support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr);
452
453 // Check if the version is supported
454 if (FirstWord >= memprof::MinimumSupportedVersion &&
455 FirstWord <= memprof::MaximumSupportedVersion) {
456 // Everything is good. We can proceed to deserialize the rest.
457 Version = static_cast<memprof::IndexedVersion>(FirstWord);
458 } else {
459 return make_error<InstrProfError>(
460 Args: instrprof_error::unsupported_version,
461 Args: formatv(Fmt: "MemProf version {} not supported; "
462 "requires version between {} and {}, inclusive",
463 Vals: FirstWord, Vals: memprof::MinimumSupportedVersion,
464 Vals: memprof::MaximumSupportedVersion));
465 }
466
467 switch (Version) {
468 case memprof::Version2:
469 if (Error E = deserializeV2(Start, Ptr))
470 return E;
471 break;
472 case memprof::Version3:
473 case memprof::Version4:
474 // V3 and V4 share the same high-level structure (radix tree, linear IDs).
475 if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
476 return E;
477 break;
478 }
479
480 return Error::success();
481}
482} // namespace llvm
483