1 | //===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains support for writing profiling data for clang's |
10 | // instrumentation based PGO and coverage. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/ProfileData/InstrProfWriter.h" |
15 | #include "llvm/ADT/STLExtras.h" |
16 | #include "llvm/ADT/SetVector.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/IR/ProfileSummary.h" |
19 | #include "llvm/ProfileData/InstrProf.h" |
20 | #include "llvm/ProfileData/MemProf.h" |
21 | #include "llvm/ProfileData/ProfileCommon.h" |
22 | #include "llvm/Support/Compression.h" |
23 | #include "llvm/Support/Endian.h" |
24 | #include "llvm/Support/EndianStream.h" |
25 | #include "llvm/Support/Error.h" |
26 | #include "llvm/Support/FormatVariadic.h" |
27 | #include "llvm/Support/MemoryBuffer.h" |
28 | #include "llvm/Support/OnDiskHashTable.h" |
29 | #include "llvm/Support/raw_ostream.h" |
30 | #include <cstdint> |
31 | #include <memory> |
32 | #include <string> |
33 | #include <tuple> |
34 | #include <utility> |
35 | #include <vector> |
36 | |
37 | using namespace llvm; |
38 | |
39 | // A struct to define how the data stream should be patched. For Indexed |
40 | // profiling, only uint64_t data type is needed. |
41 | struct PatchItem { |
42 | uint64_t Pos; // Where to patch. |
43 | ArrayRef<uint64_t> D; // An array of source data. |
44 | }; |
45 | |
46 | namespace llvm { |
47 | |
48 | // A wrapper class to abstract writer stream with support of bytes |
49 | // back patching. |
50 | class ProfOStream { |
51 | public: |
52 | ProfOStream(raw_fd_ostream &FD) |
53 | : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {} |
54 | ProfOStream(raw_string_ostream &STR) |
55 | : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {} |
56 | |
57 | [[nodiscard]] uint64_t tell() const { return OS.tell(); } |
58 | void write(uint64_t V) { LE.write<uint64_t>(Val: V); } |
59 | void write32(uint32_t V) { LE.write<uint32_t>(Val: V); } |
60 | void writeByte(uint8_t V) { LE.write<uint8_t>(Val: V); } |
61 | |
62 | // \c patch can only be called when all data is written and flushed. |
63 | // For raw_string_ostream, the patch is done on the target string |
64 | // directly and it won't be reflected in the stream's internal buffer. |
65 | void patch(ArrayRef<PatchItem> P) { |
66 | using namespace support; |
67 | |
68 | if (IsFDOStream) { |
69 | raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS); |
70 | const uint64_t LastPos = FDOStream.tell(); |
71 | for (const auto &K : P) { |
72 | FDOStream.seek(off: K.Pos); |
73 | for (uint64_t Elem : K.D) |
74 | write(V: Elem); |
75 | } |
76 | // Reset the stream to the last position after patching so that users |
77 | // don't accidentally overwrite data. This makes it consistent with |
78 | // the string stream below which replaces the data directly. |
79 | FDOStream.seek(off: LastPos); |
80 | } else { |
81 | raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS); |
82 | std::string &Data = SOStream.str(); // with flush |
83 | for (const auto &K : P) { |
84 | for (int I = 0, E = K.D.size(); I != E; I++) { |
85 | uint64_t Bytes = |
86 | endian::byte_swap<uint64_t, llvm::endianness::little>(value: K.D[I]); |
87 | Data.replace(pos: K.Pos + I * sizeof(uint64_t), n1: sizeof(uint64_t), |
88 | s: (const char *)&Bytes, n2: sizeof(uint64_t)); |
89 | } |
90 | } |
91 | } |
92 | } |
93 | |
94 | // If \c OS is an instance of \c raw_fd_ostream, this field will be |
95 | // true. Otherwise, \c OS will be an raw_string_ostream. |
96 | bool IsFDOStream; |
97 | raw_ostream &OS; |
98 | support::endian::Writer LE; |
99 | }; |
100 | |
101 | class InstrProfRecordWriterTrait { |
102 | public: |
103 | using key_type = StringRef; |
104 | using key_type_ref = StringRef; |
105 | |
106 | using data_type = const InstrProfWriter::ProfilingData *const; |
107 | using data_type_ref = const InstrProfWriter::ProfilingData *const; |
108 | |
109 | using hash_value_type = uint64_t; |
110 | using offset_type = uint64_t; |
111 | |
112 | llvm::endianness ValueProfDataEndianness = llvm::endianness::little; |
113 | InstrProfSummaryBuilder *SummaryBuilder; |
114 | InstrProfSummaryBuilder *CSSummaryBuilder; |
115 | |
116 | InstrProfRecordWriterTrait() = default; |
117 | |
118 | static hash_value_type ComputeHash(key_type_ref K) { |
119 | return IndexedInstrProf::ComputeHash(K); |
120 | } |
121 | |
122 | static std::pair<offset_type, offset_type> |
123 | EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { |
124 | using namespace support; |
125 | |
126 | endian::Writer LE(Out, llvm::endianness::little); |
127 | |
128 | offset_type N = K.size(); |
129 | LE.write<offset_type>(Val: N); |
130 | |
131 | offset_type M = 0; |
132 | for (const auto &ProfileData : *V) { |
133 | const InstrProfRecord &ProfRecord = ProfileData.second; |
134 | M += sizeof(uint64_t); // The function hash |
135 | M += sizeof(uint64_t); // The size of the Counts vector |
136 | M += ProfRecord.Counts.size() * sizeof(uint64_t); |
137 | M += sizeof(uint64_t); // The size of the Bitmap vector |
138 | M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t); |
139 | |
140 | // Value data |
141 | M += ValueProfData::getSize(Record: ProfileData.second); |
142 | } |
143 | LE.write<offset_type>(Val: M); |
144 | |
145 | return std::make_pair(x&: N, y&: M); |
146 | } |
147 | |
148 | void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) { |
149 | Out.write(Ptr: K.data(), Size: N); |
150 | } |
151 | |
152 | void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) { |
153 | using namespace support; |
154 | |
155 | endian::Writer LE(Out, llvm::endianness::little); |
156 | for (const auto &ProfileData : *V) { |
157 | const InstrProfRecord &ProfRecord = ProfileData.second; |
158 | if (NamedInstrProfRecord::hasCSFlagInHash(FuncHash: ProfileData.first)) |
159 | CSSummaryBuilder->addRecord(ProfRecord); |
160 | else |
161 | SummaryBuilder->addRecord(ProfRecord); |
162 | |
163 | LE.write<uint64_t>(Val: ProfileData.first); // Function hash |
164 | LE.write<uint64_t>(Val: ProfRecord.Counts.size()); |
165 | for (uint64_t I : ProfRecord.Counts) |
166 | LE.write<uint64_t>(Val: I); |
167 | |
168 | LE.write<uint64_t>(Val: ProfRecord.BitmapBytes.size()); |
169 | for (uint64_t I : ProfRecord.BitmapBytes) |
170 | LE.write<uint64_t>(Val: I); |
171 | |
172 | // Write value data |
173 | std::unique_ptr<ValueProfData> VDataPtr = |
174 | ValueProfData::serializeFrom(Record: ProfileData.second); |
175 | uint32_t S = VDataPtr->getSize(); |
176 | VDataPtr->swapBytesFromHost(Endianness: ValueProfDataEndianness); |
177 | Out.write(Ptr: (const char *)VDataPtr.get(), Size: S); |
178 | } |
179 | } |
180 | }; |
181 | |
182 | } // end namespace llvm |
183 | |
184 | InstrProfWriter::InstrProfWriter( |
185 | bool Sparse, uint64_t TemporalProfTraceReservoirSize, |
186 | uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion, |
187 | memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) |
188 | : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength), |
189 | TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize), |
190 | InfoObj(new InstrProfRecordWriterTrait()), |
191 | WritePrevVersion(WritePrevVersion), |
192 | MemProfVersionRequested(MemProfVersionRequested), |
193 | MemProfFullSchema(MemProfFullSchema) {} |
194 | |
195 | InstrProfWriter::~InstrProfWriter() { delete InfoObj; } |
196 | |
197 | // Internal interface for testing purpose only. |
198 | void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) { |
199 | InfoObj->ValueProfDataEndianness = Endianness; |
200 | } |
201 | |
202 | void InstrProfWriter::setOutputSparse(bool Sparse) { |
203 | this->Sparse = Sparse; |
204 | } |
205 | |
206 | void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, |
207 | function_ref<void(Error)> Warn) { |
208 | auto Name = I.Name; |
209 | auto Hash = I.Hash; |
210 | addRecord(Name, Hash, I: std::move(I), Weight, Warn); |
211 | } |
212 | |
213 | void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, |
214 | OverlapStats &Overlap, |
215 | OverlapStats &FuncLevelOverlap, |
216 | const OverlapFuncFilters &FuncFilter) { |
217 | auto Name = Other.Name; |
218 | auto Hash = Other.Hash; |
219 | Other.accumulateCounts(Sum&: FuncLevelOverlap.Test); |
220 | if (!FunctionData.contains(Key: Name)) { |
221 | Overlap.addOneUnique(UniqueFunc: FuncLevelOverlap.Test); |
222 | return; |
223 | } |
224 | if (FuncLevelOverlap.Test.CountSum < 1.0f) { |
225 | Overlap.Overlap.NumEntries += 1; |
226 | return; |
227 | } |
228 | auto &ProfileDataMap = FunctionData[Name]; |
229 | bool NewFunc; |
230 | ProfilingData::iterator Where; |
231 | std::tie(args&: Where, args&: NewFunc) = |
232 | ProfileDataMap.insert(KV: std::make_pair(x&: Hash, y: InstrProfRecord())); |
233 | if (NewFunc) { |
234 | Overlap.addOneMismatch(MismatchFunc: FuncLevelOverlap.Test); |
235 | return; |
236 | } |
237 | InstrProfRecord &Dest = Where->second; |
238 | |
239 | uint64_t ValueCutoff = FuncFilter.ValueCutoff; |
240 | if (!FuncFilter.NameFilter.empty() && Name.contains(Other: FuncFilter.NameFilter)) |
241 | ValueCutoff = 0; |
242 | |
243 | Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff); |
244 | } |
245 | |
246 | void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, |
247 | InstrProfRecord &&I, uint64_t Weight, |
248 | function_ref<void(Error)> Warn) { |
249 | auto &ProfileDataMap = FunctionData[Name]; |
250 | |
251 | bool NewFunc; |
252 | ProfilingData::iterator Where; |
253 | std::tie(args&: Where, args&: NewFunc) = |
254 | ProfileDataMap.insert(KV: std::make_pair(x&: Hash, y: InstrProfRecord())); |
255 | InstrProfRecord &Dest = Where->second; |
256 | |
257 | auto MapWarn = [&](instrprof_error E) { |
258 | Warn(make_error<InstrProfError>(Args&: E)); |
259 | }; |
260 | |
261 | if (NewFunc) { |
262 | // We've never seen a function with this name and hash, add it. |
263 | Dest = std::move(I); |
264 | if (Weight > 1) |
265 | Dest.scale(N: Weight, D: 1, Warn: MapWarn); |
266 | } else { |
267 | // We're updating a function we've seen before. |
268 | Dest.merge(Other&: I, Weight, Warn: MapWarn); |
269 | } |
270 | |
271 | Dest.sortValueData(); |
272 | } |
273 | |
274 | void InstrProfWriter::addMemProfRecord( |
275 | const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) { |
276 | auto [Iter, Inserted] = MemProfData.Records.insert(KV: {Id, Record}); |
277 | // If we inserted a new record then we are done. |
278 | if (Inserted) { |
279 | return; |
280 | } |
281 | memprof::IndexedMemProfRecord &Existing = Iter->second; |
282 | Existing.merge(Other: Record); |
283 | } |
284 | |
285 | bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, |
286 | const memprof::Frame &Frame, |
287 | function_ref<void(Error)> Warn) { |
288 | auto [Iter, Inserted] = MemProfData.Frames.insert(KV: {Id, Frame}); |
289 | // If a mapping already exists for the current frame id and it does not |
290 | // match the new mapping provided then reset the existing contents and bail |
291 | // out. We don't support the merging of memprof data whose Frame -> Id |
292 | // mapping across profiles is inconsistent. |
293 | if (!Inserted && Iter->second != Frame) { |
294 | Warn(make_error<InstrProfError>(Args: instrprof_error::malformed, |
295 | Args: "frame to id mapping mismatch" )); |
296 | return false; |
297 | } |
298 | return true; |
299 | } |
300 | |
301 | bool InstrProfWriter::addMemProfCallStack( |
302 | const memprof::CallStackId CSId, |
303 | const llvm::SmallVector<memprof::FrameId> &CallStack, |
304 | function_ref<void(Error)> Warn) { |
305 | auto [Iter, Inserted] = MemProfData.CallStacks.insert(KV: {CSId, CallStack}); |
306 | // If a mapping already exists for the current call stack id and it does not |
307 | // match the new mapping provided then reset the existing contents and bail |
308 | // out. We don't support the merging of memprof data whose CallStack -> Id |
309 | // mapping across profiles is inconsistent. |
310 | if (!Inserted && Iter->second != CallStack) { |
311 | Warn(make_error<InstrProfError>(Args: instrprof_error::malformed, |
312 | Args: "call stack to id mapping mismatch" )); |
313 | return false; |
314 | } |
315 | return true; |
316 | } |
317 | |
318 | void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) { |
319 | llvm::append_range(C&: BinaryIds, R&: BIs); |
320 | } |
321 | |
322 | void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { |
323 | assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength); |
324 | assert(!Trace.FunctionNameRefs.empty()); |
325 | if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) { |
326 | // Simply append the trace if we have not yet hit our reservoir size limit. |
327 | TemporalProfTraces.push_back(Elt: std::move(Trace)); |
328 | } else { |
329 | // Otherwise, replace a random trace in the stream. |
330 | std::uniform_int_distribution<uint64_t> Distribution( |
331 | 0, TemporalProfTraceStreamSize); |
332 | uint64_t RandomIndex = Distribution(RNG); |
333 | if (RandomIndex < TemporalProfTraces.size()) |
334 | TemporalProfTraces[RandomIndex] = std::move(Trace); |
335 | } |
336 | ++TemporalProfTraceStreamSize; |
337 | } |
338 | |
339 | void InstrProfWriter::addTemporalProfileTraces( |
340 | SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) { |
341 | for (auto &Trace : SrcTraces) |
342 | if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength) |
343 | Trace.FunctionNameRefs.resize(new_size: MaxTemporalProfTraceLength); |
344 | llvm::erase_if(C&: SrcTraces, P: [](auto &T) { return T.FunctionNameRefs.empty(); }); |
345 | // Assume that the source has the same reservoir size as the destination to |
346 | // avoid needing to record it in the indexed profile format. |
347 | bool IsDestSampled = |
348 | (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize); |
349 | bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize); |
350 | if (!IsDestSampled && IsSrcSampled) { |
351 | // If one of the traces are sampled, ensure that it belongs to Dest. |
352 | std::swap(LHS&: TemporalProfTraces, RHS&: SrcTraces); |
353 | std::swap(a&: TemporalProfTraceStreamSize, b&: SrcStreamSize); |
354 | std::swap(a&: IsDestSampled, b&: IsSrcSampled); |
355 | } |
356 | if (!IsSrcSampled) { |
357 | // If the source stream is not sampled, we add each source trace normally. |
358 | for (auto &Trace : SrcTraces) |
359 | addTemporalProfileTrace(Trace: std::move(Trace)); |
360 | return; |
361 | } |
362 | // Otherwise, we find the traces that would have been removed if we added |
363 | // the whole source stream. |
364 | SmallSetVector<uint64_t, 8> IndicesToReplace; |
365 | for (uint64_t I = 0; I < SrcStreamSize; I++) { |
366 | std::uniform_int_distribution<uint64_t> Distribution( |
367 | 0, TemporalProfTraceStreamSize); |
368 | uint64_t RandomIndex = Distribution(RNG); |
369 | if (RandomIndex < TemporalProfTraces.size()) |
370 | IndicesToReplace.insert(X: RandomIndex); |
371 | ++TemporalProfTraceStreamSize; |
372 | } |
373 | // Then we insert a random sample of the source traces. |
374 | llvm::shuffle(first: SrcTraces.begin(), last: SrcTraces.end(), g&: RNG); |
375 | for (const auto &[Index, Trace] : llvm::zip(t&: IndicesToReplace, u&: SrcTraces)) |
376 | TemporalProfTraces[Index] = std::move(Trace); |
377 | } |
378 | |
379 | void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, |
380 | function_ref<void(Error)> Warn) { |
381 | for (auto &I : IPW.FunctionData) |
382 | for (auto &Func : I.getValue()) |
383 | addRecord(Name: I.getKey(), Hash: Func.first, I: std::move(Func.second), Weight: 1, Warn); |
384 | |
385 | BinaryIds.reserve(n: BinaryIds.size() + IPW.BinaryIds.size()); |
386 | for (auto &I : IPW.BinaryIds) |
387 | addBinaryIds(BIs: I); |
388 | |
389 | addTemporalProfileTraces(SrcTraces&: IPW.TemporalProfTraces, |
390 | SrcStreamSize: IPW.TemporalProfTraceStreamSize); |
391 | |
392 | MemProfData.Frames.reserve(NumEntries: IPW.MemProfData.Frames.size()); |
393 | for (auto &[FrameId, Frame] : IPW.MemProfData.Frames) { |
394 | // If we weren't able to add the frame mappings then it doesn't make sense |
395 | // to try to merge the records from this profile. |
396 | if (!addMemProfFrame(Id: FrameId, Frame, Warn)) |
397 | return; |
398 | } |
399 | |
400 | MemProfData.CallStacks.reserve(NumEntries: IPW.MemProfData.CallStacks.size()); |
401 | for (auto &[CSId, CallStack] : IPW.MemProfData.CallStacks) { |
402 | if (!addMemProfCallStack(CSId, CallStack, Warn)) |
403 | return; |
404 | } |
405 | |
406 | MemProfData.Records.reserve(NumEntries: IPW.MemProfData.Records.size()); |
407 | for (auto &[GUID, Record] : IPW.MemProfData.Records) { |
408 | addMemProfRecord(Id: GUID, Record); |
409 | } |
410 | } |
411 | |
412 | bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { |
413 | if (!Sparse) |
414 | return true; |
415 | for (const auto &Func : PD) { |
416 | const InstrProfRecord &IPR = Func.second; |
417 | if (llvm::any_of(Range: IPR.Counts, P: [](uint64_t Count) { return Count > 0; })) |
418 | return true; |
419 | if (llvm::any_of(Range: IPR.BitmapBytes, P: [](uint8_t Byte) { return Byte > 0; })) |
420 | return true; |
421 | } |
422 | return false; |
423 | } |
424 | |
425 | static void setSummary(IndexedInstrProf::Summary *TheSummary, |
426 | ProfileSummary &PS) { |
427 | using namespace IndexedInstrProf; |
428 | |
429 | const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary(); |
430 | TheSummary->NumSummaryFields = Summary::NumKinds; |
431 | TheSummary->NumCutoffEntries = Res.size(); |
432 | TheSummary->set(K: Summary::MaxFunctionCount, V: PS.getMaxFunctionCount()); |
433 | TheSummary->set(K: Summary::MaxBlockCount, V: PS.getMaxCount()); |
434 | TheSummary->set(K: Summary::MaxInternalBlockCount, V: PS.getMaxInternalCount()); |
435 | TheSummary->set(K: Summary::TotalBlockCount, V: PS.getTotalCount()); |
436 | TheSummary->set(K: Summary::TotalNumBlocks, V: PS.getNumCounts()); |
437 | TheSummary->set(K: Summary::TotalNumFunctions, V: PS.getNumFunctions()); |
438 | for (unsigned I = 0; I < Res.size(); I++) |
439 | TheSummary->setEntry(I, E: Res[I]); |
440 | } |
441 | |
442 | // Serialize Schema. |
443 | static void writeMemProfSchema(ProfOStream &OS, |
444 | const memprof::MemProfSchema &Schema) { |
445 | OS.write(V: static_cast<uint64_t>(Schema.size())); |
446 | for (const auto Id : Schema) |
447 | OS.write(V: static_cast<uint64_t>(Id)); |
448 | } |
449 | |
450 | // Serialize MemProfRecordData. Return RecordTableOffset. |
451 | static uint64_t writeMemProfRecords( |
452 | ProfOStream &OS, |
453 | llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> |
454 | &MemProfRecordData, |
455 | memprof::MemProfSchema *Schema, memprof::IndexedVersion Version, |
456 | llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> |
457 | *MemProfCallStackIndexes = nullptr) { |
458 | memprof::RecordWriterTrait RecordWriter(Schema, Version, |
459 | MemProfCallStackIndexes); |
460 | OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> |
461 | RecordTableGenerator; |
462 | for (auto &[GUID, Record] : MemProfRecordData) { |
463 | // Insert the key (func hash) and value (memprof record). |
464 | RecordTableGenerator.insert(Key: GUID, Data&: Record, InfoObj&: RecordWriter); |
465 | } |
466 | // Release the memory of this MapVector as it is no longer needed. |
467 | MemProfRecordData.clear(); |
468 | |
469 | // The call to Emit invokes RecordWriterTrait::EmitData which destructs |
470 | // the memprof record copies owned by the RecordTableGenerator. This works |
471 | // because the RecordTableGenerator is not used after this point. |
472 | return RecordTableGenerator.Emit(Out&: OS.OS, InfoObj&: RecordWriter); |
473 | } |
474 | |
475 | // Serialize MemProfFrameData. Return FrameTableOffset. |
476 | static uint64_t writeMemProfFrames( |
477 | ProfOStream &OS, |
478 | llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) { |
479 | OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait> |
480 | FrameTableGenerator; |
481 | for (auto &[FrameId, Frame] : MemProfFrameData) { |
482 | // Insert the key (frame id) and value (frame contents). |
483 | FrameTableGenerator.insert(Key: FrameId, Data&: Frame); |
484 | } |
485 | // Release the memory of this MapVector as it is no longer needed. |
486 | MemProfFrameData.clear(); |
487 | |
488 | return FrameTableGenerator.Emit(Out&: OS.OS); |
489 | } |
490 | |
491 | // Serialize MemProfFrameData. Return the mapping from FrameIds to their |
492 | // indexes within the frame array. |
493 | static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> |
494 | writeMemProfFrameArray( |
495 | ProfOStream &OS, |
496 | llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, |
497 | llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { |
498 | // Mappings from FrameIds to array indexes. |
499 | llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes; |
500 | |
501 | // Compute the order in which we serialize Frames. The order does not matter |
502 | // in terms of correctness, but we still compute it for deserialization |
503 | // performance. Specifically, if we serialize frequently used Frames one |
504 | // after another, we have better cache utilization. For two Frames that |
505 | // appear equally frequently, we break a tie by serializing the one that tends |
506 | // to appear earlier in call stacks. We implement the tie-breaking mechanism |
507 | // by computing the sum of indexes within call stacks for each Frame. If we |
508 | // still have a tie, then we just resort to compare two FrameIds, which is |
509 | // just for stability of output. |
510 | std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder; |
511 | FrameIdOrder.reserve(n: MemProfFrameData.size()); |
512 | for (const auto &[Id, Frame] : MemProfFrameData) |
513 | FrameIdOrder.emplace_back(args: Id, args: &Frame); |
514 | assert(MemProfFrameData.size() == FrameIdOrder.size()); |
515 | llvm::sort(C&: FrameIdOrder, |
516 | Comp: [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L, |
517 | const std::pair<memprof::FrameId, const memprof::Frame *> &R) { |
518 | const auto &SL = FrameHistogram[L.first]; |
519 | const auto &SR = FrameHistogram[R.first]; |
520 | // Popular FrameIds should come first. |
521 | if (SL.Count != SR.Count) |
522 | return SL.Count > SR.Count; |
523 | // If they are equally popular, then the one that tends to appear |
524 | // earlier in call stacks should come first. |
525 | if (SL.PositionSum != SR.PositionSum) |
526 | return SL.PositionSum < SR.PositionSum; |
527 | // Compare their FrameIds for sort stability. |
528 | return L.first < R.first; |
529 | }); |
530 | |
531 | // Serialize all frames while creating mappings from linear IDs to FrameIds. |
532 | uint64_t Index = 0; |
533 | MemProfFrameIndexes.reserve(NumEntries: FrameIdOrder.size()); |
534 | for (const auto &[Id, F] : FrameIdOrder) { |
535 | F->serialize(OS&: OS.OS); |
536 | MemProfFrameIndexes.insert(KV: {Id, Index}); |
537 | ++Index; |
538 | } |
539 | assert(MemProfFrameData.size() == Index); |
540 | assert(MemProfFrameData.size() == MemProfFrameIndexes.size()); |
541 | |
542 | // Release the memory of this MapVector as it is no longer needed. |
543 | MemProfFrameData.clear(); |
544 | |
545 | return MemProfFrameIndexes; |
546 | } |
547 | |
548 | static uint64_t writeMemProfCallStacks( |
549 | ProfOStream &OS, |
550 | llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> |
551 | &MemProfCallStackData) { |
552 | OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait> |
553 | CallStackTableGenerator; |
554 | for (auto &[CSId, CallStack] : MemProfCallStackData) |
555 | CallStackTableGenerator.insert(Key: CSId, Data&: CallStack); |
556 | // Release the memory of this vector as it is no longer needed. |
557 | MemProfCallStackData.clear(); |
558 | |
559 | return CallStackTableGenerator.Emit(Out&: OS.OS); |
560 | } |
561 | |
562 | static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> |
563 | writeMemProfCallStackArray( |
564 | ProfOStream &OS, |
565 | llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> |
566 | &MemProfCallStackData, |
567 | llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> |
568 | &MemProfFrameIndexes, |
569 | llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { |
570 | llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> |
571 | MemProfCallStackIndexes; |
572 | |
573 | memprof::CallStackRadixTreeBuilder Builder; |
574 | Builder.build(MemProfCallStackData: std::move(MemProfCallStackData), MemProfFrameIndexes, |
575 | FrameHistogram); |
576 | for (auto I : Builder.getRadixArray()) |
577 | OS.write32(V: I); |
578 | MemProfCallStackIndexes = Builder.takeCallStackPos(); |
579 | |
580 | // Release the memory of this vector as it is no longer needed. |
581 | MemProfCallStackData.clear(); |
582 | |
583 | return MemProfCallStackIndexes; |
584 | } |
585 | |
586 | // Write out MemProf Version0 as follows: |
587 | // uint64_t RecordTableOffset = RecordTableGenerator.Emit |
588 | // uint64_t FramePayloadOffset = Offset for the frame payload |
589 | // uint64_t FrameTableOffset = FrameTableGenerator.Emit |
590 | // uint64_t Num schema entries |
591 | // uint64_t Schema entry 0 |
592 | // uint64_t Schema entry 1 |
593 | // .... |
594 | // uint64_t Schema entry N - 1 |
595 | // OnDiskChainedHashTable MemProfRecordData |
596 | // OnDiskChainedHashTable MemProfFrameData |
597 | static Error writeMemProfV0(ProfOStream &OS, |
598 | memprof::IndexedMemProfData &MemProfData) { |
599 | uint64_t = OS.tell(); |
600 | OS.write(V: 0ULL); // Reserve space for the memprof record table offset. |
601 | OS.write(V: 0ULL); // Reserve space for the memprof frame payload offset. |
602 | OS.write(V: 0ULL); // Reserve space for the memprof frame table offset. |
603 | |
604 | auto Schema = memprof::getFullSchema(); |
605 | writeMemProfSchema(OS, Schema); |
606 | |
607 | uint64_t RecordTableOffset = |
608 | writeMemProfRecords(OS, MemProfRecordData&: MemProfData.Records, Schema: &Schema, Version: memprof::Version0); |
609 | |
610 | uint64_t FramePayloadOffset = OS.tell(); |
611 | uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData&: MemProfData.Frames); |
612 | |
613 | uint64_t [] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; |
614 | OS.patch(P: {{.Pos: HeaderUpdatePos, .D: Header}}); |
615 | |
616 | return Error::success(); |
617 | } |
618 | |
619 | // Write out MemProf Version1 as follows: |
620 | // uint64_t Version (NEW in V1) |
621 | // uint64_t RecordTableOffset = RecordTableGenerator.Emit |
622 | // uint64_t FramePayloadOffset = Offset for the frame payload |
623 | // uint64_t FrameTableOffset = FrameTableGenerator.Emit |
624 | // uint64_t Num schema entries |
625 | // uint64_t Schema entry 0 |
626 | // uint64_t Schema entry 1 |
627 | // .... |
628 | // uint64_t Schema entry N - 1 |
629 | // OnDiskChainedHashTable MemProfRecordData |
630 | // OnDiskChainedHashTable MemProfFrameData |
631 | static Error writeMemProfV1(ProfOStream &OS, |
632 | memprof::IndexedMemProfData &MemProfData) { |
633 | OS.write(V: memprof::Version1); |
634 | uint64_t = OS.tell(); |
635 | OS.write(V: 0ULL); // Reserve space for the memprof record table offset. |
636 | OS.write(V: 0ULL); // Reserve space for the memprof frame payload offset. |
637 | OS.write(V: 0ULL); // Reserve space for the memprof frame table offset. |
638 | |
639 | auto Schema = memprof::getFullSchema(); |
640 | writeMemProfSchema(OS, Schema); |
641 | |
642 | uint64_t RecordTableOffset = |
643 | writeMemProfRecords(OS, MemProfRecordData&: MemProfData.Records, Schema: &Schema, Version: memprof::Version1); |
644 | |
645 | uint64_t FramePayloadOffset = OS.tell(); |
646 | uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData&: MemProfData.Frames); |
647 | |
648 | uint64_t [] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; |
649 | OS.patch(P: {{.Pos: HeaderUpdatePos, .D: Header}}); |
650 | |
651 | return Error::success(); |
652 | } |
653 | |
654 | // Write out MemProf Version2 as follows: |
655 | // uint64_t Version |
656 | // uint64_t RecordTableOffset = RecordTableGenerator.Emit |
657 | // uint64_t FramePayloadOffset = Offset for the frame payload |
658 | // uint64_t FrameTableOffset = FrameTableGenerator.Emit |
659 | // uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2) |
660 | // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2) |
661 | // uint64_t Num schema entries |
662 | // uint64_t Schema entry 0 |
663 | // uint64_t Schema entry 1 |
664 | // .... |
665 | // uint64_t Schema entry N - 1 |
666 | // OnDiskChainedHashTable MemProfRecordData |
667 | // OnDiskChainedHashTable MemProfFrameData |
668 | // OnDiskChainedHashTable MemProfCallStackData (NEW in V2) |
669 | static Error writeMemProfV2(ProfOStream &OS, |
670 | memprof::IndexedMemProfData &MemProfData, |
671 | bool MemProfFullSchema) { |
672 | OS.write(V: memprof::Version2); |
673 | uint64_t = OS.tell(); |
674 | OS.write(V: 0ULL); // Reserve space for the memprof record table offset. |
675 | OS.write(V: 0ULL); // Reserve space for the memprof frame payload offset. |
676 | OS.write(V: 0ULL); // Reserve space for the memprof frame table offset. |
677 | OS.write(V: 0ULL); // Reserve space for the memprof call stack payload offset. |
678 | OS.write(V: 0ULL); // Reserve space for the memprof call stack table offset. |
679 | |
680 | auto Schema = memprof::getHotColdSchema(); |
681 | if (MemProfFullSchema) |
682 | Schema = memprof::getFullSchema(); |
683 | writeMemProfSchema(OS, Schema); |
684 | |
685 | uint64_t RecordTableOffset = |
686 | writeMemProfRecords(OS, MemProfRecordData&: MemProfData.Records, Schema: &Schema, Version: memprof::Version2); |
687 | |
688 | uint64_t FramePayloadOffset = OS.tell(); |
689 | uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData&: MemProfData.Frames); |
690 | |
691 | uint64_t CallStackPayloadOffset = OS.tell(); |
692 | uint64_t CallStackTableOffset = |
693 | writeMemProfCallStacks(OS, MemProfCallStackData&: MemProfData.CallStacks); |
694 | |
695 | uint64_t [] = { |
696 | RecordTableOffset, FramePayloadOffset, FrameTableOffset, |
697 | CallStackPayloadOffset, CallStackTableOffset, |
698 | }; |
699 | OS.patch(P: {{.Pos: HeaderUpdatePos, .D: Header}}); |
700 | |
701 | return Error::success(); |
702 | } |
703 | |
704 | // Write out MemProf Version3 as follows: |
705 | // uint64_t Version |
706 | // uint64_t CallStackPayloadOffset = Offset for the call stack payload |
707 | // uint64_t RecordPayloadOffset = Offset for the record payload |
708 | // uint64_t RecordTableOffset = RecordTableGenerator.Emit |
709 | // uint64_t Num schema entries |
710 | // uint64_t Schema entry 0 |
711 | // uint64_t Schema entry 1 |
712 | // .... |
713 | // uint64_t Schema entry N - 1 |
714 | // Frames serialized one after another |
715 | // Call stacks encoded as a radix tree |
716 | // OnDiskChainedHashTable MemProfRecordData |
717 | static Error writeMemProfV3(ProfOStream &OS, |
718 | memprof::IndexedMemProfData &MemProfData, |
719 | bool MemProfFullSchema) { |
720 | OS.write(V: memprof::Version3); |
721 | uint64_t = OS.tell(); |
722 | OS.write(V: 0ULL); // Reserve space for the memprof call stack payload offset. |
723 | OS.write(V: 0ULL); // Reserve space for the memprof record payload offset. |
724 | OS.write(V: 0ULL); // Reserve space for the memprof record table offset. |
725 | |
726 | auto Schema = memprof::getHotColdSchema(); |
727 | if (MemProfFullSchema) |
728 | Schema = memprof::getFullSchema(); |
729 | writeMemProfSchema(OS, Schema); |
730 | |
731 | llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram = |
732 | memprof::computeFrameHistogram(MemProfCallStackData&: MemProfData.CallStacks); |
733 | assert(MemProfData.Frames.size() == FrameHistogram.size()); |
734 | |
735 | llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes = |
736 | writeMemProfFrameArray(OS, MemProfFrameData&: MemProfData.Frames, FrameHistogram); |
737 | |
738 | uint64_t CallStackPayloadOffset = OS.tell(); |
739 | llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> |
740 | MemProfCallStackIndexes = writeMemProfCallStackArray( |
741 | OS, MemProfCallStackData&: MemProfData.CallStacks, MemProfFrameIndexes, FrameHistogram); |
742 | |
743 | uint64_t RecordPayloadOffset = OS.tell(); |
744 | uint64_t RecordTableOffset = |
745 | writeMemProfRecords(OS, MemProfRecordData&: MemProfData.Records, Schema: &Schema, Version: memprof::Version3, |
746 | MemProfCallStackIndexes: &MemProfCallStackIndexes); |
747 | |
748 | uint64_t [] = { |
749 | CallStackPayloadOffset, |
750 | RecordPayloadOffset, |
751 | RecordTableOffset, |
752 | }; |
753 | OS.patch(P: {{.Pos: HeaderUpdatePos, .D: Header}}); |
754 | |
755 | return Error::success(); |
756 | } |
757 | |
758 | // Write out the MemProf data in a requested version. |
759 | static Error writeMemProf(ProfOStream &OS, |
760 | memprof::IndexedMemProfData &MemProfData, |
761 | memprof::IndexedVersion MemProfVersionRequested, |
762 | bool MemProfFullSchema) { |
763 | switch (MemProfVersionRequested) { |
764 | case memprof::Version0: |
765 | return writeMemProfV0(OS, MemProfData); |
766 | case memprof::Version1: |
767 | return writeMemProfV1(OS, MemProfData); |
768 | case memprof::Version2: |
769 | return writeMemProfV2(OS, MemProfData, MemProfFullSchema); |
770 | case memprof::Version3: |
771 | return writeMemProfV3(OS, MemProfData, MemProfFullSchema); |
772 | } |
773 | |
774 | return make_error<InstrProfError>( |
775 | Args: instrprof_error::unsupported_version, |
776 | Args: formatv(Fmt: "MemProf version {} not supported; " |
777 | "requires version between {} and {}, inclusive" , |
778 | Vals&: MemProfVersionRequested, Vals: memprof::MinimumSupportedVersion, |
779 | Vals: memprof::MaximumSupportedVersion)); |
780 | } |
781 | |
782 | uint64_t InstrProfWriter::(const IndexedInstrProf::Header &, |
783 | const bool WritePrevVersion, |
784 | ProfOStream &OS) { |
785 | // Only write out the first four fields. |
786 | for (int I = 0; I < 4; I++) |
787 | OS.write(V: reinterpret_cast<const uint64_t *>(&Header)[I]); |
788 | |
789 | // Remember the offset of the remaining fields to allow back patching later. |
790 | auto BackPatchStartOffset = OS.tell(); |
791 | |
792 | // Reserve the space for back patching later. |
793 | OS.write(V: 0); // HashOffset |
794 | OS.write(V: 0); // MemProfOffset |
795 | OS.write(V: 0); // BinaryIdOffset |
796 | OS.write(V: 0); // TemporalProfTracesOffset |
797 | if (!WritePrevVersion) |
798 | OS.write(V: 0); // VTableNamesOffset |
799 | |
800 | return BackPatchStartOffset; |
801 | } |
802 | |
803 | Error InstrProfWriter::writeVTableNames(ProfOStream &OS) { |
804 | std::vector<std::string> VTableNameStrs; |
805 | for (StringRef VTableName : VTableNames.keys()) |
806 | VTableNameStrs.push_back(x: VTableName.str()); |
807 | |
808 | std::string CompressedVTableNames; |
809 | if (!VTableNameStrs.empty()) |
810 | if (Error E = collectGlobalObjectNameStrings( |
811 | NameStrs: VTableNameStrs, doCompression: compression::zlib::isAvailable(), |
812 | Result&: CompressedVTableNames)) |
813 | return E; |
814 | |
815 | const uint64_t CompressedStringLen = CompressedVTableNames.length(); |
816 | |
817 | // Record the length of compressed string. |
818 | OS.write(V: CompressedStringLen); |
819 | |
820 | // Write the chars in compressed strings. |
821 | for (auto &c : CompressedVTableNames) |
822 | OS.writeByte(V: static_cast<uint8_t>(c)); |
823 | |
824 | // Pad up to a multiple of 8. |
825 | // InstrProfReader could read bytes according to 'CompressedStringLen'. |
826 | const uint64_t PaddedLength = alignTo(Value: CompressedStringLen, Align: 8); |
827 | |
828 | for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) |
829 | OS.writeByte(V: 0); |
830 | |
831 | return Error::success(); |
832 | } |
833 | |
834 | Error InstrProfWriter::writeImpl(ProfOStream &OS) { |
835 | using namespace IndexedInstrProf; |
836 | using namespace support; |
837 | |
838 | OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator; |
839 | |
840 | InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs); |
841 | InfoObj->SummaryBuilder = &ISB; |
842 | InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs); |
843 | InfoObj->CSSummaryBuilder = &CSISB; |
844 | |
845 | // Populate the hash table generator. |
846 | SmallVector<std::pair<StringRef, const ProfilingData *>> OrderedData; |
847 | for (const auto &I : FunctionData) |
848 | if (shouldEncodeData(PD: I.getValue())) |
849 | OrderedData.emplace_back(Args: (I.getKey()), Args: &I.getValue()); |
850 | llvm::sort(C&: OrderedData, Comp: less_first()); |
851 | for (const auto &I : OrderedData) |
852 | Generator.insert(Key: I.first, Data: I.second); |
853 | |
854 | // Write the header. |
855 | IndexedInstrProf::Header ; |
856 | Header.Version = WritePrevVersion |
857 | ? IndexedInstrProf::ProfVersion::Version11 |
858 | : IndexedInstrProf::ProfVersion::CurrentVersion; |
859 | // The WritePrevVersion handling will either need to be removed or updated |
860 | // if the version is advanced beyond 12. |
861 | static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == |
862 | IndexedInstrProf::ProfVersion::Version12); |
863 | if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) |
864 | Header.Version |= VARIANT_MASK_IR_PROF; |
865 | if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) |
866 | Header.Version |= VARIANT_MASK_CSIR_PROF; |
867 | if (static_cast<bool>(ProfileKind & |
868 | InstrProfKind::FunctionEntryInstrumentation)) |
869 | Header.Version |= VARIANT_MASK_INSTR_ENTRY; |
870 | if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) |
871 | Header.Version |= VARIANT_MASK_BYTE_COVERAGE; |
872 | if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly)) |
873 | Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; |
874 | if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) |
875 | Header.Version |= VARIANT_MASK_MEMPROF; |
876 | if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) |
877 | Header.Version |= VARIANT_MASK_TEMPORAL_PROF; |
878 | |
879 | const uint64_t BackPatchStartOffset = |
880 | writeHeader(Header, WritePrevVersion, OS); |
881 | |
882 | // Reserve space to write profile summary data. |
883 | uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); |
884 | uint32_t SummarySize = Summary::getSize(NumSumFields: Summary::NumKinds, NumCutoffEntries: NumEntries); |
885 | // Remember the summary offset. |
886 | uint64_t SummaryOffset = OS.tell(); |
887 | for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) |
888 | OS.write(V: 0); |
889 | uint64_t CSSummaryOffset = 0; |
890 | uint64_t CSSummarySize = 0; |
891 | if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) { |
892 | CSSummaryOffset = OS.tell(); |
893 | CSSummarySize = SummarySize / sizeof(uint64_t); |
894 | for (unsigned I = 0; I < CSSummarySize; I++) |
895 | OS.write(V: 0); |
896 | } |
897 | |
898 | // Write the hash table. |
899 | uint64_t HashTableStart = Generator.Emit(Out&: OS.OS, InfoObj&: *InfoObj); |
900 | |
901 | // Write the MemProf profile data if we have it. |
902 | uint64_t MemProfSectionStart = 0; |
903 | if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) { |
904 | MemProfSectionStart = OS.tell(); |
905 | if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested, |
906 | MemProfFullSchema)) |
907 | return E; |
908 | } |
909 | |
910 | // BinaryIdSection has two parts: |
911 | // 1. uint64_t BinaryIdsSectionSize |
912 | // 2. list of binary ids that consist of: |
913 | // a. uint64_t BinaryIdLength |
914 | // b. uint8_t BinaryIdData |
915 | // c. uint8_t Padding (if necessary) |
916 | uint64_t BinaryIdSectionStart = OS.tell(); |
917 | // Calculate size of binary section. |
918 | uint64_t BinaryIdsSectionSize = 0; |
919 | |
920 | // Remove duplicate binary ids. |
921 | llvm::sort(C&: BinaryIds); |
922 | BinaryIds.erase(first: llvm::unique(R&: BinaryIds), last: BinaryIds.end()); |
923 | |
924 | for (const auto &BI : BinaryIds) { |
925 | // Increment by binary id length data type size. |
926 | BinaryIdsSectionSize += sizeof(uint64_t); |
927 | // Increment by binary id data length, aligned to 8 bytes. |
928 | BinaryIdsSectionSize += alignToPowerOf2(Value: BI.size(), Align: sizeof(uint64_t)); |
929 | } |
930 | // Write binary ids section size. |
931 | OS.write(V: BinaryIdsSectionSize); |
932 | |
933 | for (const auto &BI : BinaryIds) { |
934 | uint64_t BILen = BI.size(); |
935 | // Write binary id length. |
936 | OS.write(V: BILen); |
937 | // Write binary id data. |
938 | for (unsigned K = 0; K < BILen; K++) |
939 | OS.writeByte(V: BI[K]); |
940 | // Write padding if necessary. |
941 | uint64_t PaddingSize = alignToPowerOf2(Value: BILen, Align: sizeof(uint64_t)) - BILen; |
942 | for (unsigned K = 0; K < PaddingSize; K++) |
943 | OS.writeByte(V: 0); |
944 | } |
945 | |
946 | uint64_t VTableNamesSectionStart = OS.tell(); |
947 | |
948 | if (!WritePrevVersion) |
949 | if (Error E = writeVTableNames(OS)) |
950 | return E; |
951 | |
952 | uint64_t TemporalProfTracesSectionStart = 0; |
953 | if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) { |
954 | TemporalProfTracesSectionStart = OS.tell(); |
955 | OS.write(V: TemporalProfTraces.size()); |
956 | OS.write(V: TemporalProfTraceStreamSize); |
957 | for (auto &Trace : TemporalProfTraces) { |
958 | OS.write(V: Trace.Weight); |
959 | OS.write(V: Trace.FunctionNameRefs.size()); |
960 | for (auto &NameRef : Trace.FunctionNameRefs) |
961 | OS.write(V: NameRef); |
962 | } |
963 | } |
964 | |
965 | // Allocate space for data to be serialized out. |
966 | std::unique_ptr<IndexedInstrProf::Summary> TheSummary = |
967 | IndexedInstrProf::allocSummary(TotalSize: SummarySize); |
968 | // Compute the Summary and copy the data to the data |
969 | // structure to be serialized out (to disk or buffer). |
970 | std::unique_ptr<ProfileSummary> PS = ISB.getSummary(); |
971 | setSummary(TheSummary: TheSummary.get(), PS&: *PS); |
972 | InfoObj->SummaryBuilder = nullptr; |
973 | |
974 | // For Context Sensitive summary. |
975 | std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr; |
976 | if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) { |
977 | TheCSSummary = IndexedInstrProf::allocSummary(TotalSize: SummarySize); |
978 | std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary(); |
979 | setSummary(TheSummary: TheCSSummary.get(), PS&: *CSPS); |
980 | } |
981 | InfoObj->CSSummaryBuilder = nullptr; |
982 | |
983 | SmallVector<uint64_t, 8> = {HashTableStart, MemProfSectionStart, |
984 | BinaryIdSectionStart, |
985 | TemporalProfTracesSectionStart}; |
986 | if (!WritePrevVersion) |
987 | HeaderOffsets.push_back(Elt: VTableNamesSectionStart); |
988 | |
989 | PatchItem PatchItems[] = { |
990 | // Patch the Header fields |
991 | {.Pos: BackPatchStartOffset, .D: HeaderOffsets}, |
992 | // Patch the summary data. |
993 | {.Pos: SummaryOffset, |
994 | .D: ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheSummary.get()), |
995 | SummarySize / sizeof(uint64_t))}, |
996 | {.Pos: CSSummaryOffset, |
997 | .D: ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheCSSummary.get()), |
998 | CSSummarySize)}}; |
999 | |
1000 | OS.patch(P: PatchItems); |
1001 | |
1002 | for (const auto &I : FunctionData) |
1003 | for (const auto &F : I.getValue()) |
1004 | if (Error E = validateRecord(Func: F.second)) |
1005 | return E; |
1006 | |
1007 | return Error::success(); |
1008 | } |
1009 | |
1010 | Error InstrProfWriter::write(raw_fd_ostream &OS) { |
1011 | // Write the hash table. |
1012 | ProfOStream POS(OS); |
1013 | return writeImpl(OS&: POS); |
1014 | } |
1015 | |
1016 | Error InstrProfWriter::write(raw_string_ostream &OS) { |
1017 | ProfOStream POS(OS); |
1018 | return writeImpl(OS&: POS); |
1019 | } |
1020 | |
1021 | std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() { |
1022 | std::string Data; |
1023 | raw_string_ostream OS(Data); |
1024 | // Write the hash table. |
1025 | if (Error E = write(OS)) |
1026 | return nullptr; |
1027 | // Return this in an aligned memory buffer. |
1028 | return MemoryBuffer::getMemBufferCopy(InputData: Data); |
1029 | } |
1030 | |
1031 | static const char *ValueProfKindStr[] = { |
1032 | #define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator, |
1033 | #include "llvm/ProfileData/InstrProfData.inc" |
1034 | }; |
1035 | |
1036 | Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) { |
1037 | for (uint32_t VK = 0; VK <= IPVK_Last; VK++) { |
1038 | if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget) |
1039 | continue; |
1040 | uint32_t NS = Func.getNumValueSites(ValueKind: VK); |
1041 | for (uint32_t S = 0; S < NS; S++) { |
1042 | DenseSet<uint64_t> SeenValues; |
1043 | for (const auto &V : Func.getValueArrayForSite(ValueKind: VK, Site: S)) |
1044 | if (!SeenValues.insert(V: V.Value).second) |
1045 | return make_error<InstrProfError>(Args: instrprof_error::invalid_prof); |
1046 | } |
1047 | } |
1048 | |
1049 | return Error::success(); |
1050 | } |
1051 | |
1052 | void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, |
1053 | const InstrProfRecord &Func, |
1054 | InstrProfSymtab &Symtab, |
1055 | raw_fd_ostream &OS) { |
1056 | OS << Name << "\n" ; |
1057 | OS << "# Func Hash:\n" << Hash << "\n" ; |
1058 | OS << "# Num Counters:\n" << Func.Counts.size() << "\n" ; |
1059 | OS << "# Counter Values:\n" ; |
1060 | for (uint64_t Count : Func.Counts) |
1061 | OS << Count << "\n" ; |
1062 | |
1063 | if (Func.BitmapBytes.size() > 0) { |
1064 | OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n" ; |
1065 | OS << "# Bitmap Byte Values:\n" ; |
1066 | for (uint8_t Byte : Func.BitmapBytes) { |
1067 | OS << "0x" ; |
1068 | OS.write_hex(N: Byte); |
1069 | OS << "\n" ; |
1070 | } |
1071 | OS << "\n" ; |
1072 | } |
1073 | |
1074 | uint32_t NumValueKinds = Func.getNumValueKinds(); |
1075 | if (!NumValueKinds) { |
1076 | OS << "\n" ; |
1077 | return; |
1078 | } |
1079 | |
1080 | OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n" ; |
1081 | for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) { |
1082 | uint32_t NS = Func.getNumValueSites(ValueKind: VK); |
1083 | if (!NS) |
1084 | continue; |
1085 | OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n" ; |
1086 | OS << "# NumValueSites:\n" << NS << "\n" ; |
1087 | for (uint32_t S = 0; S < NS; S++) { |
1088 | auto VD = Func.getValueArrayForSite(ValueKind: VK, Site: S); |
1089 | OS << VD.size() << "\n" ; |
1090 | for (const auto &V : VD) { |
1091 | if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget) |
1092 | OS << Symtab.getFuncOrVarNameIfDefined(MD5Hash: V.Value) << ":" << V.Count |
1093 | << "\n" ; |
1094 | else |
1095 | OS << V.Value << ":" << V.Count << "\n" ; |
1096 | } |
1097 | } |
1098 | } |
1099 | |
1100 | OS << "\n" ; |
1101 | } |
1102 | |
1103 | Error InstrProfWriter::writeText(raw_fd_ostream &OS) { |
1104 | // Check CS first since it implies an IR level profile. |
1105 | if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) |
1106 | OS << "# CSIR level Instrumentation Flag\n:csir\n" ; |
1107 | else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) |
1108 | OS << "# IR level Instrumentation Flag\n:ir\n" ; |
1109 | |
1110 | if (static_cast<bool>(ProfileKind & |
1111 | InstrProfKind::FunctionEntryInstrumentation)) |
1112 | OS << "# Always instrument the function entry block\n:entry_first\n" ; |
1113 | if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) |
1114 | OS << "# Instrument block coverage\n:single_byte_coverage\n" ; |
1115 | InstrProfSymtab Symtab; |
1116 | |
1117 | using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>; |
1118 | using RecordType = std::pair<StringRef, FuncPair>; |
1119 | SmallVector<RecordType, 4> OrderedFuncData; |
1120 | |
1121 | for (const auto &I : FunctionData) { |
1122 | if (shouldEncodeData(PD: I.getValue())) { |
1123 | if (Error E = Symtab.addFuncName(FuncName: I.getKey())) |
1124 | return E; |
1125 | for (const auto &Func : I.getValue()) |
1126 | OrderedFuncData.push_back(Elt: std::make_pair(x: I.getKey(), y: Func)); |
1127 | } |
1128 | } |
1129 | |
1130 | for (const auto &VTableName : VTableNames) |
1131 | if (Error E = Symtab.addVTableName(VTableName: VTableName.getKey())) |
1132 | return E; |
1133 | |
1134 | if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) |
1135 | writeTextTemporalProfTraceData(OS, Symtab); |
1136 | |
1137 | llvm::sort(C&: OrderedFuncData, Comp: [](const RecordType &A, const RecordType &B) { |
1138 | return std::tie(args: A.first, args: A.second.first) < |
1139 | std::tie(args: B.first, args: B.second.first); |
1140 | }); |
1141 | |
1142 | for (const auto &record : OrderedFuncData) { |
1143 | const StringRef &Name = record.first; |
1144 | const FuncPair &Func = record.second; |
1145 | writeRecordInText(Name, Hash: Func.first, Func: Func.second, Symtab, OS); |
1146 | } |
1147 | |
1148 | for (const auto &record : OrderedFuncData) { |
1149 | const FuncPair &Func = record.second; |
1150 | if (Error E = validateRecord(Func: Func.second)) |
1151 | return E; |
1152 | } |
1153 | |
1154 | return Error::success(); |
1155 | } |
1156 | |
1157 | void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS, |
1158 | InstrProfSymtab &Symtab) { |
1159 | OS << ":temporal_prof_traces\n" ; |
1160 | OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n" ; |
1161 | OS << "# Temporal Profile Trace Stream Size:\n" |
1162 | << TemporalProfTraceStreamSize << "\n" ; |
1163 | for (auto &Trace : TemporalProfTraces) { |
1164 | OS << "# Weight:\n" << Trace.Weight << "\n" ; |
1165 | for (auto &NameRef : Trace.FunctionNameRefs) |
1166 | OS << Symtab.getFuncOrVarName(MD5Hash: NameRef) << "," ; |
1167 | OS << "\n" ; |
1168 | } |
1169 | OS << "\n" ; |
1170 | } |
1171 | |