1//===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the class that reads LLVM sample profiles. It
10// supports three file formats: text, binary and gcov.
11//
12// The textual representation is useful for debugging and testing purposes. The
13// binary representation is more compact, resulting in smaller file sizes.
14//
15// The gcov encoding is the one generated by GCC's AutoFDO profile creation
16// tool (https://github.com/google/autofdo)
17//
18// All three encodings can be used interchangeably as an input sample profile.
19//
20//===----------------------------------------------------------------------===//
21
22#include "llvm/ProfileData/SampleProfReader.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/IR/Module.h"
27#include "llvm/IR/ProfileSummary.h"
28#include "llvm/ProfileData/ProfileCommon.h"
29#include "llvm/ProfileData/SampleProf.h"
30#include "llvm/Support/CommandLine.h"
31#include "llvm/Support/Compression.h"
32#include "llvm/Support/ErrorOr.h"
33#include "llvm/Support/JSON.h"
34#include "llvm/Support/LEB128.h"
35#include "llvm/Support/LineIterator.h"
36#include "llvm/Support/MD5.h"
37#include "llvm/Support/MemoryBuffer.h"
38#include "llvm/Support/VirtualFileSystem.h"
39#include "llvm/Support/raw_ostream.h"
40#include <algorithm>
41#include <cstddef>
42#include <cstdint>
43#include <limits>
44#include <memory>
45#include <system_error>
46#include <vector>
47
48using namespace llvm;
49using namespace sampleprof;
50
51#define DEBUG_TYPE "samplepgo-reader"
52
53// This internal option specifies if the profile uses FS discriminators.
54// It only applies to text, and binary format profiles.
55// For ext-binary format profiles, the flag is set in the summary.
56static cl::opt<bool> ProfileIsFSDisciminator(
57 "profile-isfs", cl::Hidden, cl::init(Val: false),
58 cl::desc("Profile uses flow sensitive discriminators"));
59
60/// Dump the function profile for \p FName.
61///
62/// \param FContext Name + context of the function to print.
63/// \param OS Stream to emit the output to.
64void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
65 raw_ostream &OS) {
66 OS << "Function: " << FS.getContext().toString() << ": " << FS;
67}
68
69/// Dump all the function profiles found on stream \p OS.
70void SampleProfileReader::dump(raw_ostream &OS) {
71 std::vector<NameFunctionSamples> V;
72 sortFuncProfiles(ProfileMap: Profiles, SortedProfiles&: V);
73 for (const auto &I : V)
74 dumpFunctionProfile(FS: *I.second, OS);
75}
76
77static void dumpFunctionProfileJson(const FunctionSamples &S,
78 json::OStream &JOS, bool TopLevel = false) {
79 auto DumpBody = [&](const BodySampleMap &BodySamples) {
80 for (const auto &I : BodySamples) {
81 const LineLocation &Loc = I.first;
82 const SampleRecord &Sample = I.second;
83 JOS.object(Contents: [&] {
84 JOS.attribute(Key: "line", Contents: Loc.LineOffset);
85 if (Loc.Discriminator)
86 JOS.attribute(Key: "discriminator", Contents: Loc.Discriminator);
87 JOS.attribute(Key: "samples", Contents: Sample.getSamples());
88
89 auto CallTargets = Sample.getSortedCallTargets();
90 if (!CallTargets.empty()) {
91 JOS.attributeArray(Key: "calls", Contents: [&] {
92 for (const auto &J : CallTargets) {
93 JOS.object(Contents: [&] {
94 JOS.attribute(Key: "function", Contents: J.first.str());
95 JOS.attribute(Key: "samples", Contents: J.second);
96 });
97 }
98 });
99 }
100 });
101 }
102 };
103
104 auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) {
105 for (const auto &I : CallsiteSamples)
106 for (const auto &FS : I.second) {
107 const LineLocation &Loc = I.first;
108 const FunctionSamples &CalleeSamples = FS.second;
109 JOS.object(Contents: [&] {
110 JOS.attribute(Key: "line", Contents: Loc.LineOffset);
111 if (Loc.Discriminator)
112 JOS.attribute(Key: "discriminator", Contents: Loc.Discriminator);
113 JOS.attributeArray(
114 Key: "samples", Contents: [&] { dumpFunctionProfileJson(S: CalleeSamples, JOS); });
115 });
116 }
117 };
118
119 JOS.object(Contents: [&] {
120 JOS.attribute(Key: "name", Contents: S.getFunction().str());
121 JOS.attribute(Key: "total", Contents: S.getTotalSamples());
122 if (TopLevel)
123 JOS.attribute(Key: "head", Contents: S.getHeadSamples());
124
125 const auto &BodySamples = S.getBodySamples();
126 if (!BodySamples.empty())
127 JOS.attributeArray(Key: "body", Contents: [&] { DumpBody(BodySamples); });
128
129 const auto &CallsiteSamples = S.getCallsiteSamples();
130 if (!CallsiteSamples.empty())
131 JOS.attributeArray(Key: "callsites",
132 Contents: [&] { DumpCallsiteSamples(CallsiteSamples); });
133 });
134}
135
136/// Dump all the function profiles found on stream \p OS in the JSON format.
137void SampleProfileReader::dumpJson(raw_ostream &OS) {
138 std::vector<NameFunctionSamples> V;
139 sortFuncProfiles(ProfileMap: Profiles, SortedProfiles&: V);
140 json::OStream JOS(OS, 2);
141 JOS.arrayBegin();
142 for (const auto &F : V)
143 dumpFunctionProfileJson(S: *F.second, JOS, TopLevel: true);
144 JOS.arrayEnd();
145
146 // Emit a newline character at the end as json::OStream doesn't emit one.
147 OS << "\n";
148}
149
150/// Parse \p Input as function head.
151///
152/// Parse one line of \p Input, and update function name in \p FName,
153/// function's total sample count in \p NumSamples, function's entry
154/// count in \p NumHeadSamples.
155///
156/// \returns true if parsing is successful.
157static bool ParseHead(const StringRef &Input, StringRef &FName,
158 uint64_t &NumSamples, uint64_t &NumHeadSamples) {
159 if (Input[0] == ' ')
160 return false;
161 size_t n2 = Input.rfind(C: ':');
162 size_t n1 = Input.rfind(C: ':', From: n2 - 1);
163 FName = Input.substr(Start: 0, N: n1);
164 if (Input.substr(Start: n1 + 1, N: n2 - n1 - 1).getAsInteger(Radix: 10, Result&: NumSamples))
165 return false;
166 if (Input.substr(Start: n2 + 1).getAsInteger(Radix: 10, Result&: NumHeadSamples))
167 return false;
168 return true;
169}
170
171/// Returns true if line offset \p L is legal (only has 16 bits).
172static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
173
174/// Parse \p Input that contains metadata.
175/// Possible metadata:
176/// - CFG Checksum information:
177/// !CFGChecksum: 12345
178/// - CFG Checksum information:
179/// !Attributes: 1
180/// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
181static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
182 uint32_t &Attributes) {
183 if (Input.starts_with(Prefix: "!CFGChecksum:")) {
184 StringRef CFGInfo = Input.substr(Start: strlen(s: "!CFGChecksum:")).trim();
185 return !CFGInfo.getAsInteger(Radix: 10, Result&: FunctionHash);
186 }
187
188 if (Input.starts_with(Prefix: "!Attributes:")) {
189 StringRef Attrib = Input.substr(Start: strlen(s: "!Attributes:")).trim();
190 return !Attrib.getAsInteger(Radix: 10, Result&: Attributes);
191 }
192
193 return false;
194}
195
196enum class LineType {
197 CallSiteProfile,
198 BodyProfile,
199 Metadata,
200};
201
202/// Parse \p Input as line sample.
203///
204/// \param Input input line.
205/// \param LineTy Type of this line.
206/// \param Depth the depth of the inline stack.
207/// \param NumSamples total samples of the line/inlined callsite.
208/// \param LineOffset line offset to the start of the function.
209/// \param Discriminator discriminator of the line.
210/// \param TargetCountMap map from indirect call target to count.
211/// \param FunctionHash the function's CFG hash, used by pseudo probe.
212///
213/// returns true if parsing is successful.
214static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
215 uint64_t &NumSamples, uint32_t &LineOffset,
216 uint32_t &Discriminator, StringRef &CalleeName,
217 DenseMap<StringRef, uint64_t> &TargetCountMap,
218 uint64_t &FunctionHash, uint32_t &Attributes,
219 bool &IsFlat) {
220 for (Depth = 0; Input[Depth] == ' '; Depth++)
221 ;
222 if (Depth == 0)
223 return false;
224
225 if (Input[Depth] == '!') {
226 LineTy = LineType::Metadata;
227 // This metadata is only for manual inspection only. We already created a
228 // FunctionSamples and put it in the profile map, so there is no point
229 // to skip profiles even they have no use for ThinLTO.
230 if (Input == StringRef(" !Flat")) {
231 IsFlat = true;
232 return true;
233 }
234 return parseMetadata(Input: Input.substr(Start: Depth), FunctionHash, Attributes);
235 }
236
237 size_t n1 = Input.find(C: ':');
238 StringRef Loc = Input.substr(Start: Depth, N: n1 - Depth);
239 size_t n2 = Loc.find(C: '.');
240 if (n2 == StringRef::npos) {
241 if (Loc.getAsInteger(Radix: 10, Result&: LineOffset) || !isOffsetLegal(L: LineOffset))
242 return false;
243 Discriminator = 0;
244 } else {
245 if (Loc.substr(Start: 0, N: n2).getAsInteger(Radix: 10, Result&: LineOffset))
246 return false;
247 if (Loc.substr(Start: n2 + 1).getAsInteger(Radix: 10, Result&: Discriminator))
248 return false;
249 }
250
251 StringRef Rest = Input.substr(Start: n1 + 2);
252 if (isDigit(C: Rest[0])) {
253 LineTy = LineType::BodyProfile;
254 size_t n3 = Rest.find(C: ' ');
255 if (n3 == StringRef::npos) {
256 if (Rest.getAsInteger(Radix: 10, Result&: NumSamples))
257 return false;
258 } else {
259 if (Rest.substr(Start: 0, N: n3).getAsInteger(Radix: 10, Result&: NumSamples))
260 return false;
261 }
262 // Find call targets and their sample counts.
263 // Note: In some cases, there are symbols in the profile which are not
264 // mangled. To accommodate such cases, use colon + integer pairs as the
265 // anchor points.
266 // An example:
267 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
268 // ":1000" and ":437" are used as anchor points so the string above will
269 // be interpreted as
270 // target: _M_construct<char *>
271 // count: 1000
272 // target: string_view<std::allocator<char> >
273 // count: 437
274 while (n3 != StringRef::npos) {
275 n3 += Rest.substr(Start: n3).find_first_not_of(C: ' ');
276 Rest = Rest.substr(Start: n3);
277 n3 = Rest.find_first_of(C: ':');
278 if (n3 == StringRef::npos || n3 == 0)
279 return false;
280
281 StringRef Target;
282 uint64_t count, n4;
283 while (true) {
284 // Get the segment after the current colon.
285 StringRef AfterColon = Rest.substr(Start: n3 + 1);
286 // Get the target symbol before the current colon.
287 Target = Rest.substr(Start: 0, N: n3);
288 // Check if the word after the current colon is an integer.
289 n4 = AfterColon.find_first_of(C: ' ');
290 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
291 StringRef WordAfterColon = Rest.substr(Start: n3 + 1, N: n4 - n3 - 1);
292 if (!WordAfterColon.getAsInteger(Radix: 10, Result&: count))
293 break;
294
295 // Try to find the next colon.
296 uint64_t n5 = AfterColon.find_first_of(C: ':');
297 if (n5 == StringRef::npos)
298 return false;
299 n3 += n5 + 1;
300 }
301
302 // An anchor point is found. Save the {target, count} pair
303 TargetCountMap[Target] = count;
304 if (n4 == Rest.size())
305 break;
306 // Change n3 to the next blank space after colon + integer pair.
307 n3 = n4;
308 }
309 } else {
310 LineTy = LineType::CallSiteProfile;
311 size_t n3 = Rest.find_last_of(C: ':');
312 CalleeName = Rest.substr(Start: 0, N: n3);
313 if (Rest.substr(Start: n3 + 1).getAsInteger(Radix: 10, Result&: NumSamples))
314 return false;
315 }
316 return true;
317}
318
319/// Load samples from a text file.
320///
321/// See the documentation at the top of the file for an explanation of
322/// the expected format.
323///
324/// \returns true if the file was loaded successfully, false otherwise.
325std::error_code SampleProfileReaderText::readImpl() {
326 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
327 sampleprof_error Result = sampleprof_error::success;
328
329 InlineCallStack InlineStack;
330 uint32_t TopLevelProbeProfileCount = 0;
331
332 // DepthMetadata tracks whether we have processed metadata for the current
333 // top-level or nested function profile.
334 uint32_t DepthMetadata = 0;
335
336 std::vector<SampleContext *> FlatSamples;
337
338 ProfileIsFS = ProfileIsFSDisciminator;
339 FunctionSamples::ProfileIsFS = ProfileIsFS;
340 for (; !LineIt.is_at_eof(); ++LineIt) {
341 size_t pos = LineIt->find_first_not_of(C: ' ');
342 if (pos == LineIt->npos || (*LineIt)[pos] == '#')
343 continue;
344 // Read the header of each function.
345 //
346 // Note that for function identifiers we are actually expecting
347 // mangled names, but we may not always get them. This happens when
348 // the compiler decides not to emit the function (e.g., it was inlined
349 // and removed). In this case, the binary will not have the linkage
350 // name for the function, so the profiler will emit the function's
351 // unmangled name, which may contain characters like ':' and '>' in its
352 // name (member functions, templates, etc).
353 //
354 // The only requirement we place on the identifier, then, is that it
355 // should not begin with a number.
356 if ((*LineIt)[0] != ' ') {
357 uint64_t NumSamples, NumHeadSamples;
358 StringRef FName;
359 if (!ParseHead(Input: *LineIt, FName, NumSamples, NumHeadSamples)) {
360 reportError(LineNumber: LineIt.line_number(),
361 Msg: "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
362 return sampleprof_error::malformed;
363 }
364 DepthMetadata = 0;
365 SampleContext FContext(FName, CSNameTable);
366 if (FContext.hasContext())
367 ++CSProfileCount;
368 FunctionSamples &FProfile = Profiles.create(Ctx: FContext);
369 mergeSampleProfErrors(Accumulator&: Result, Result: FProfile.addTotalSamples(Num: NumSamples));
370 mergeSampleProfErrors(Accumulator&: Result, Result: FProfile.addHeadSamples(Num: NumHeadSamples));
371 InlineStack.clear();
372 InlineStack.push_back(Elt: &FProfile);
373 } else {
374 uint64_t NumSamples;
375 StringRef FName;
376 DenseMap<StringRef, uint64_t> TargetCountMap;
377 uint32_t Depth, LineOffset, Discriminator;
378 LineType LineTy;
379 uint64_t FunctionHash = 0;
380 uint32_t Attributes = 0;
381 bool IsFlat = false;
382 if (!ParseLine(Input: *LineIt, LineTy, Depth, NumSamples, LineOffset,
383 Discriminator, CalleeName&: FName, TargetCountMap, FunctionHash,
384 Attributes, IsFlat)) {
385 reportError(LineNumber: LineIt.line_number(),
386 Msg: "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
387 *LineIt);
388 return sampleprof_error::malformed;
389 }
390 if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
391 // Metadata must be put at the end of a function profile.
392 reportError(LineNumber: LineIt.line_number(),
393 Msg: "Found non-metadata after metadata: " + *LineIt);
394 return sampleprof_error::malformed;
395 }
396
397 // Here we handle FS discriminators.
398 Discriminator &= getDiscriminatorMask();
399
400 while (InlineStack.size() > Depth) {
401 InlineStack.pop_back();
402 }
403 switch (LineTy) {
404 case LineType::CallSiteProfile: {
405 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
406 Loc: LineLocation(LineOffset, Discriminator))[FunctionId(FName)];
407 FSamples.setFunction(FunctionId(FName));
408 mergeSampleProfErrors(Accumulator&: Result, Result: FSamples.addTotalSamples(Num: NumSamples));
409 InlineStack.push_back(Elt: &FSamples);
410 DepthMetadata = 0;
411 break;
412 }
413 case LineType::BodyProfile: {
414 FunctionSamples &FProfile = *InlineStack.back();
415 for (const auto &name_count : TargetCountMap) {
416 mergeSampleProfErrors(Accumulator&: Result, Result: FProfile.addCalledTargetSamples(
417 LineOffset, Discriminator,
418 Func: FunctionId(name_count.first),
419 Num: name_count.second));
420 }
421 mergeSampleProfErrors(
422 Accumulator&: Result,
423 Result: FProfile.addBodySamples(LineOffset, Discriminator, Num: NumSamples));
424 break;
425 }
426 case LineType::Metadata: {
427 FunctionSamples &FProfile = *InlineStack.back();
428 if (FunctionHash) {
429 FProfile.setFunctionHash(FunctionHash);
430 if (Depth == 1)
431 ++TopLevelProbeProfileCount;
432 }
433 FProfile.getContext().setAllAttributes(Attributes);
434 if (Attributes & (uint32_t)ContextShouldBeInlined)
435 ProfileIsPreInlined = true;
436 DepthMetadata = Depth;
437 if (IsFlat) {
438 if (Depth == 1)
439 FlatSamples.push_back(x: &FProfile.getContext());
440 else
441 Ctx.diagnose(DI: DiagnosticInfoSampleProfile(
442 Buffer->getBufferIdentifier(), LineIt.line_number(),
443 "!Flat may only be used at top level function.", DS_Warning));
444 }
445 break;
446 }
447 }
448 }
449 }
450
451 // Honor the option to skip flat functions. Since they are already added to
452 // the profile map, remove them all here.
453 if (SkipFlatProf)
454 for (SampleContext *FlatSample : FlatSamples)
455 Profiles.erase(Ctx: *FlatSample);
456
457 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
458 "Cannot have both context-sensitive and regular profile");
459 ProfileIsCS = (CSProfileCount > 0);
460 assert((TopLevelProbeProfileCount == 0 ||
461 TopLevelProbeProfileCount == Profiles.size()) &&
462 "Cannot have both probe-based profiles and regular profiles");
463 ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
464 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
465 FunctionSamples::ProfileIsCS = ProfileIsCS;
466 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
467
468 if (Result == sampleprof_error::success)
469 computeSummary();
470
471 return Result;
472}
473
474bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
475 bool result = false;
476
477 // Check that the first non-comment line is a valid function header.
478 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
479 if (!LineIt.is_at_eof()) {
480 if ((*LineIt)[0] != ' ') {
481 uint64_t NumSamples, NumHeadSamples;
482 StringRef FName;
483 result = ParseHead(Input: *LineIt, FName, NumSamples, NumHeadSamples);
484 }
485 }
486
487 return result;
488}
489
490template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
491 unsigned NumBytesRead = 0;
492 uint64_t Val = decodeULEB128(p: Data, n: &NumBytesRead);
493
494 if (Val > std::numeric_limits<T>::max()) {
495 std::error_code EC = sampleprof_error::malformed;
496 reportError(LineNumber: 0, Msg: EC.message());
497 return EC;
498 } else if (Data + NumBytesRead > End) {
499 std::error_code EC = sampleprof_error::truncated;
500 reportError(LineNumber: 0, Msg: EC.message());
501 return EC;
502 }
503
504 Data += NumBytesRead;
505 return static_cast<T>(Val);
506}
507
508ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
509 StringRef Str(reinterpret_cast<const char *>(Data));
510 if (Data + Str.size() + 1 > End) {
511 std::error_code EC = sampleprof_error::truncated;
512 reportError(LineNumber: 0, Msg: EC.message());
513 return EC;
514 }
515
516 Data += Str.size() + 1;
517 return Str;
518}
519
520template <typename T>
521ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
522 if (Data + sizeof(T) > End) {
523 std::error_code EC = sampleprof_error::truncated;
524 reportError(LineNumber: 0, Msg: EC.message());
525 return EC;
526 }
527
528 using namespace support;
529 T Val = endian::readNext<T, llvm::endianness::little>(Data);
530 return Val;
531}
532
533template <typename T>
534inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
535 auto Idx = readNumber<size_t>();
536 if (std::error_code EC = Idx.getError())
537 return EC;
538 if (*Idx >= Table.size())
539 return sampleprof_error::truncated_name_table;
540 return *Idx;
541}
542
543ErrorOr<FunctionId>
544SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
545 auto Idx = readStringIndex(Table&: NameTable);
546 if (std::error_code EC = Idx.getError())
547 return EC;
548 if (RetIdx)
549 *RetIdx = *Idx;
550 return NameTable[*Idx];
551}
552
553ErrorOr<SampleContextFrames>
554SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
555 auto ContextIdx = readNumber<size_t>();
556 if (std::error_code EC = ContextIdx.getError())
557 return EC;
558 if (*ContextIdx >= CSNameTable.size())
559 return sampleprof_error::truncated_name_table;
560 if (RetIdx)
561 *RetIdx = *ContextIdx;
562 return CSNameTable[*ContextIdx];
563}
564
565ErrorOr<std::pair<SampleContext, uint64_t>>
566SampleProfileReaderBinary::readSampleContextFromTable() {
567 SampleContext Context;
568 size_t Idx;
569 if (ProfileIsCS) {
570 auto FContext(readContextFromTable(RetIdx: &Idx));
571 if (std::error_code EC = FContext.getError())
572 return EC;
573 Context = SampleContext(*FContext);
574 } else {
575 auto FName(readStringFromTable(RetIdx: &Idx));
576 if (std::error_code EC = FName.getError())
577 return EC;
578 Context = SampleContext(*FName);
579 }
580 // Since MD5SampleContextStart may point to the profile's file data, need to
581 // make sure it is reading the same value on big endian CPU.
582 uint64_t Hash = support::endian::read64le(P: MD5SampleContextStart + Idx);
583 // Lazy computing of hash value, write back to the table to cache it. Only
584 // compute the context's hash value if it is being referenced for the first
585 // time.
586 if (Hash == 0) {
587 assert(MD5SampleContextStart == MD5SampleContextTable.data());
588 Hash = Context.getHashCode();
589 support::endian::write64le(P: &MD5SampleContextTable[Idx], V: Hash);
590 }
591 return std::make_pair(x&: Context, y&: Hash);
592}
593
594std::error_code
595SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
596 auto NumSamples = readNumber<uint64_t>();
597 if (std::error_code EC = NumSamples.getError())
598 return EC;
599 FProfile.addTotalSamples(Num: *NumSamples);
600
601 // Read the samples in the body.
602 auto NumRecords = readNumber<uint32_t>();
603 if (std::error_code EC = NumRecords.getError())
604 return EC;
605
606 for (uint32_t I = 0; I < *NumRecords; ++I) {
607 auto LineOffset = readNumber<uint64_t>();
608 if (std::error_code EC = LineOffset.getError())
609 return EC;
610
611 if (!isOffsetLegal(L: *LineOffset)) {
612 return std::error_code();
613 }
614
615 auto Discriminator = readNumber<uint64_t>();
616 if (std::error_code EC = Discriminator.getError())
617 return EC;
618
619 auto NumSamples = readNumber<uint64_t>();
620 if (std::error_code EC = NumSamples.getError())
621 return EC;
622
623 auto NumCalls = readNumber<uint32_t>();
624 if (std::error_code EC = NumCalls.getError())
625 return EC;
626
627 // Here we handle FS discriminators:
628 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
629
630 for (uint32_t J = 0; J < *NumCalls; ++J) {
631 auto CalledFunction(readStringFromTable());
632 if (std::error_code EC = CalledFunction.getError())
633 return EC;
634
635 auto CalledFunctionSamples = readNumber<uint64_t>();
636 if (std::error_code EC = CalledFunctionSamples.getError())
637 return EC;
638
639 FProfile.addCalledTargetSamples(LineOffset: *LineOffset, Discriminator: DiscriminatorVal,
640 Func: *CalledFunction, Num: *CalledFunctionSamples);
641 }
642
643 FProfile.addBodySamples(LineOffset: *LineOffset, Discriminator: DiscriminatorVal, Num: *NumSamples);
644 }
645
646 // Read all the samples for inlined function calls.
647 auto NumCallsites = readNumber<uint32_t>();
648 if (std::error_code EC = NumCallsites.getError())
649 return EC;
650
651 for (uint32_t J = 0; J < *NumCallsites; ++J) {
652 auto LineOffset = readNumber<uint64_t>();
653 if (std::error_code EC = LineOffset.getError())
654 return EC;
655
656 auto Discriminator = readNumber<uint64_t>();
657 if (std::error_code EC = Discriminator.getError())
658 return EC;
659
660 auto FName(readStringFromTable());
661 if (std::error_code EC = FName.getError())
662 return EC;
663
664 // Here we handle FS discriminators:
665 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
666
667 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
668 Loc: LineLocation(*LineOffset, DiscriminatorVal))[*FName];
669 CalleeProfile.setFunction(*FName);
670 if (std::error_code EC = readProfile(FProfile&: CalleeProfile))
671 return EC;
672 }
673
674 return sampleprof_error::success;
675}
676
677std::error_code
678SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start,
679 SampleProfileMap &Profiles) {
680 Data = Start;
681 auto NumHeadSamples = readNumber<uint64_t>();
682 if (std::error_code EC = NumHeadSamples.getError())
683 return EC;
684
685 auto FContextHash(readSampleContextFromTable());
686 if (std::error_code EC = FContextHash.getError())
687 return EC;
688
689 auto &[FContext, Hash] = *FContextHash;
690 // Use the cached hash value for insertion instead of recalculating it.
691 auto Res = Profiles.try_emplace(Hash, Key: FContext, Args: FunctionSamples());
692 FunctionSamples &FProfile = Res.first->second;
693 FProfile.setContext(FContext);
694 FProfile.addHeadSamples(Num: *NumHeadSamples);
695
696 if (FContext.hasContext())
697 CSProfileCount++;
698
699 if (std::error_code EC = readProfile(FProfile))
700 return EC;
701 return sampleprof_error::success;
702}
703
704std::error_code
705SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
706 return readFuncProfile(Start, Profiles);
707}
708
709std::error_code SampleProfileReaderBinary::readImpl() {
710 ProfileIsFS = ProfileIsFSDisciminator;
711 FunctionSamples::ProfileIsFS = ProfileIsFS;
712 while (Data < End) {
713 if (std::error_code EC = readFuncProfile(Start: Data))
714 return EC;
715 }
716
717 return sampleprof_error::success;
718}
719
720std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
721 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
722 Data = Start;
723 End = Start + Size;
724 switch (Entry.Type) {
725 case SecProfSummary:
726 if (std::error_code EC = readSummary())
727 return EC;
728 if (hasSecFlag(Entry, Flag: SecProfSummaryFlags::SecFlagPartial))
729 Summary->setPartialProfile(true);
730 if (hasSecFlag(Entry, Flag: SecProfSummaryFlags::SecFlagFullContext))
731 FunctionSamples::ProfileIsCS = ProfileIsCS = true;
732 if (hasSecFlag(Entry, Flag: SecProfSummaryFlags::SecFlagIsPreInlined))
733 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
734 if (hasSecFlag(Entry, Flag: SecProfSummaryFlags::SecFlagFSDiscriminator))
735 FunctionSamples::ProfileIsFS = ProfileIsFS = true;
736 break;
737 case SecNameTable: {
738 bool FixedLengthMD5 =
739 hasSecFlag(Entry, Flag: SecNameTableFlags::SecFlagFixedLengthMD5);
740 bool UseMD5 = hasSecFlag(Entry, Flag: SecNameTableFlags::SecFlagMD5Name);
741 // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire
742 // profile uses MD5 for function name matching in IPO passes.
743 ProfileIsMD5 = ProfileIsMD5 || UseMD5;
744 FunctionSamples::HasUniqSuffix =
745 hasSecFlag(Entry, Flag: SecNameTableFlags::SecFlagUniqSuffix);
746 if (std::error_code EC = readNameTableSec(IsMD5: UseMD5, FixedLengthMD5))
747 return EC;
748 break;
749 }
750 case SecCSNameTable: {
751 if (std::error_code EC = readCSNameTableSec())
752 return EC;
753 break;
754 }
755 case SecLBRProfile:
756 ProfileSecRange = std::make_pair(x&: Data, y&: End);
757 if (std::error_code EC = readFuncProfiles())
758 return EC;
759 break;
760 case SecFuncOffsetTable:
761 // If module is absent, we are using LLVM tools, and need to read all
762 // profiles, so skip reading the function offset table.
763 if (!M) {
764 Data = End;
765 } else {
766 assert((!ProfileIsCS ||
767 hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) &&
768 "func offset table should always be sorted in CS profile");
769 if (std::error_code EC = readFuncOffsetTable())
770 return EC;
771 }
772 break;
773 case SecFuncMetadata: {
774 ProfileIsProbeBased =
775 hasSecFlag(Entry, Flag: SecFuncMetadataFlags::SecFlagIsProbeBased);
776 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
777 ProfileHasAttribute =
778 hasSecFlag(Entry, Flag: SecFuncMetadataFlags::SecFlagHasAttribute);
779 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute))
780 return EC;
781 break;
782 }
783 case SecProfileSymbolList:
784 if (std::error_code EC = readProfileSymbolList())
785 return EC;
786 break;
787 default:
788 if (std::error_code EC = readCustomSection(Entry))
789 return EC;
790 break;
791 }
792 return sampleprof_error::success;
793}
794
795bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
796 // If profile is CS, the function offset section is expected to consist of
797 // sequences of contexts in pre-order layout
798 // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched
799 // context in the module is found, the profiles of all its callees are
800 // recursively loaded. A list is needed since the order of profiles matters.
801 if (ProfileIsCS)
802 return true;
803
804 // If the profile is MD5, use the map container to lookup functions in
805 // the module. A remapper has no use on MD5 names.
806 if (useMD5())
807 return false;
808
809 // Profile is not MD5 and if a remapper is present, the remapped name of
810 // every function needed to be matched against the module, so use the list
811 // container since each entry is accessed.
812 if (Remapper)
813 return true;
814
815 // Otherwise use the map container for faster lookup.
816 // TODO: If the cardinality of the function offset section is much smaller
817 // than the number of functions in the module, using the list container can
818 // be always faster, but we need to figure out the constant factor to
819 // determine the cutoff.
820 return false;
821}
822
823std::error_code
824SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse,
825 SampleProfileMap &Profiles) {
826 if (FuncsToUse.empty())
827 return sampleprof_error::success;
828
829 Data = ProfileSecRange.first;
830 End = ProfileSecRange.second;
831 if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
832 return EC;
833 End = Data;
834 DenseSet<FunctionSamples *> ProfilesToReadMetadata;
835 for (auto FName : FuncsToUse) {
836 auto I = Profiles.find(Ctx: FName);
837 if (I != Profiles.end())
838 ProfilesToReadMetadata.insert(V: &I->second);
839 }
840
841 if (std::error_code EC =
842 readFuncMetadata(ProfileHasAttribute, Profiles&: ProfilesToReadMetadata))
843 return EC;
844 return sampleprof_error::success;
845}
846
847bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
848 if (!M)
849 return false;
850 FuncsToUse.clear();
851 for (auto &F : *M)
852 FuncsToUse.insert(V: FunctionSamples::getCanonicalFnName(F));
853 return true;
854}
855
856std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
857 // If there are more than one function offset section, the profile associated
858 // with the previous section has to be done reading before next one is read.
859 FuncOffsetTable.clear();
860 FuncOffsetList.clear();
861
862 auto Size = readNumber<uint64_t>();
863 if (std::error_code EC = Size.getError())
864 return EC;
865
866 bool UseFuncOffsetList = useFuncOffsetList();
867 if (UseFuncOffsetList)
868 FuncOffsetList.reserve(n: *Size);
869 else
870 FuncOffsetTable.reserve(NumEntries: *Size);
871
872 for (uint64_t I = 0; I < *Size; ++I) {
873 auto FContextHash(readSampleContextFromTable());
874 if (std::error_code EC = FContextHash.getError())
875 return EC;
876
877 auto &[FContext, Hash] = *FContextHash;
878 auto Offset = readNumber<uint64_t>();
879 if (std::error_code EC = Offset.getError())
880 return EC;
881
882 if (UseFuncOffsetList)
883 FuncOffsetList.emplace_back(args&: FContext, args&: *Offset);
884 else
885 // Because Porfiles replace existing value with new value if collision
886 // happens, we also use the latest offset so that they are consistent.
887 FuncOffsetTable[Hash] = *Offset;
888 }
889
890 return sampleprof_error::success;
891}
892
893std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
894 const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
895 const uint8_t *Start = Data;
896
897 if (Remapper) {
898 for (auto Name : FuncsToUse) {
899 Remapper->insert(FunctionName: Name);
900 }
901 }
902
903 if (ProfileIsCS) {
904 assert(useFuncOffsetList());
905 DenseSet<uint64_t> FuncGuidsToUse;
906 if (useMD5()) {
907 for (auto Name : FuncsToUse)
908 FuncGuidsToUse.insert(V: Function::getGUIDAssumingExternalLinkage(GlobalName: Name));
909 }
910
911 // For each function in current module, load all context profiles for
912 // the function as well as their callee contexts which can help profile
913 // guided importing for ThinLTO. This can be achieved by walking
914 // through an ordered context container, where contexts are laid out
915 // as if they were walked in preorder of a context trie. While
916 // traversing the trie, a link to the highest common ancestor node is
917 // kept so that all of its decendants will be loaded.
918 const SampleContext *CommonContext = nullptr;
919 for (const auto &NameOffset : FuncOffsetList) {
920 const auto &FContext = NameOffset.first;
921 FunctionId FName = FContext.getFunction();
922 StringRef FNameString;
923 if (!useMD5())
924 FNameString = FName.stringRef();
925
926 // For function in the current module, keep its farthest ancestor
927 // context. This can be used to load itself and its child and
928 // sibling contexts.
929 if ((useMD5() && FuncGuidsToUse.count(V: FName.getHashCode())) ||
930 (!useMD5() && (FuncsToUse.count(V: FNameString) ||
931 (Remapper && Remapper->exist(FunctionName: FNameString))))) {
932 if (!CommonContext || !CommonContext->isPrefixOf(That: FContext))
933 CommonContext = &FContext;
934 }
935
936 if (CommonContext == &FContext ||
937 (CommonContext && CommonContext->isPrefixOf(That: FContext))) {
938 // Load profile for the current context which originated from
939 // the common ancestor.
940 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
941 if (std::error_code EC = readFuncProfile(Start: FuncProfileAddr))
942 return EC;
943 }
944 }
945 } else if (useMD5()) {
946 assert(!useFuncOffsetList());
947 for (auto Name : FuncsToUse) {
948 auto GUID = MD5Hash(Str: Name);
949 auto iter = FuncOffsetTable.find(Val: GUID);
950 if (iter == FuncOffsetTable.end())
951 continue;
952 const uint8_t *FuncProfileAddr = Start + iter->second;
953 if (std::error_code EC = readFuncProfile(Start: FuncProfileAddr, Profiles))
954 return EC;
955 }
956 } else if (Remapper) {
957 assert(useFuncOffsetList());
958 for (auto NameOffset : FuncOffsetList) {
959 SampleContext FContext(NameOffset.first);
960 auto FuncName = FContext.getFunction();
961 StringRef FuncNameStr = FuncName.stringRef();
962 if (!FuncsToUse.count(V: FuncNameStr) && !Remapper->exist(FunctionName: FuncNameStr))
963 continue;
964 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
965 if (std::error_code EC = readFuncProfile(Start: FuncProfileAddr, Profiles))
966 return EC;
967 }
968 } else {
969 assert(!useFuncOffsetList());
970 for (auto Name : FuncsToUse) {
971
972 auto iter = FuncOffsetTable.find(Val: MD5Hash(Str: Name));
973 if (iter == FuncOffsetTable.end())
974 continue;
975 const uint8_t *FuncProfileAddr = Start + iter->second;
976 if (std::error_code EC = readFuncProfile(Start: FuncProfileAddr, Profiles))
977 return EC;
978 }
979 }
980
981 return sampleprof_error::success;
982}
983
984std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
985 // Collect functions used by current module if the Reader has been
986 // given a module.
987 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
988 // which will query FunctionSamples::HasUniqSuffix, so it has to be
989 // called after FunctionSamples::HasUniqSuffix is set, i.e. after
990 // NameTable section is read.
991 bool LoadFuncsToBeUsed = collectFuncsFromModule();
992
993 // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
994 // profiles.
995 if (!LoadFuncsToBeUsed) {
996 while (Data < End) {
997 if (std::error_code EC = readFuncProfile(Start: Data))
998 return EC;
999 }
1000 assert(Data == End && "More data is read than expected");
1001 } else {
1002 // Load function profiles on demand.
1003 if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
1004 return EC;
1005 Data = End;
1006 }
1007 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
1008 "Cannot have both context-sensitive and regular profile");
1009 assert((!CSProfileCount || ProfileIsCS) &&
1010 "Section flag should be consistent with actual profile");
1011 return sampleprof_error::success;
1012}
1013
1014std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
1015 if (!ProfSymList)
1016 ProfSymList = std::make_unique<ProfileSymbolList>();
1017
1018 if (std::error_code EC = ProfSymList->read(Data, ListSize: End - Data))
1019 return EC;
1020
1021 Data = End;
1022 return sampleprof_error::success;
1023}
1024
1025std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
1026 const uint8_t *SecStart, const uint64_t SecSize,
1027 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
1028 Data = SecStart;
1029 End = SecStart + SecSize;
1030 auto DecompressSize = readNumber<uint64_t>();
1031 if (std::error_code EC = DecompressSize.getError())
1032 return EC;
1033 DecompressBufSize = *DecompressSize;
1034
1035 auto CompressSize = readNumber<uint64_t>();
1036 if (std::error_code EC = CompressSize.getError())
1037 return EC;
1038
1039 if (!llvm::compression::zlib::isAvailable())
1040 return sampleprof_error::zlib_unavailable;
1041
1042 uint8_t *Buffer = Allocator.Allocate<uint8_t>(Num: DecompressBufSize);
1043 size_t UCSize = DecompressBufSize;
1044 llvm::Error E = compression::zlib::decompress(Input: ArrayRef(Data, *CompressSize),
1045 Output: Buffer, UncompressedSize&: UCSize);
1046 if (E)
1047 return sampleprof_error::uncompress_failed;
1048 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
1049 return sampleprof_error::success;
1050}
1051
1052std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
1053 const uint8_t *BufStart =
1054 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1055
1056 for (auto &Entry : SecHdrTable) {
1057 // Skip empty section.
1058 if (!Entry.Size)
1059 continue;
1060
1061 // Skip sections without inlined functions when SkipFlatProf is true.
1062 if (SkipFlatProf && hasSecFlag(Entry, Flag: SecCommonFlags::SecFlagFlat))
1063 continue;
1064
1065 const uint8_t *SecStart = BufStart + Entry.Offset;
1066 uint64_t SecSize = Entry.Size;
1067
1068 // If the section is compressed, decompress it into a buffer
1069 // DecompressBuf before reading the actual data. The pointee of
1070 // 'Data' will be changed to buffer hold by DecompressBuf
1071 // temporarily when reading the actual data.
1072 bool isCompressed = hasSecFlag(Entry, Flag: SecCommonFlags::SecFlagCompress);
1073 if (isCompressed) {
1074 const uint8_t *DecompressBuf;
1075 uint64_t DecompressBufSize;
1076 if (std::error_code EC = decompressSection(
1077 SecStart, SecSize, DecompressBuf, DecompressBufSize))
1078 return EC;
1079 SecStart = DecompressBuf;
1080 SecSize = DecompressBufSize;
1081 }
1082
1083 if (std::error_code EC = readOneSection(Start: SecStart, Size: SecSize, Entry))
1084 return EC;
1085 if (Data != SecStart + SecSize)
1086 return sampleprof_error::malformed;
1087
1088 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
1089 if (isCompressed) {
1090 Data = BufStart + Entry.Offset;
1091 End = BufStart + Buffer->getBufferSize();
1092 }
1093 }
1094
1095 return sampleprof_error::success;
1096}
1097
1098std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
1099 if (Magic == SPMagic())
1100 return sampleprof_error::success;
1101 return sampleprof_error::bad_magic;
1102}
1103
1104std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
1105 if (Magic == SPMagic(Format: SPF_Ext_Binary))
1106 return sampleprof_error::success;
1107 return sampleprof_error::bad_magic;
1108}
1109
1110std::error_code SampleProfileReaderBinary::readNameTable() {
1111 auto Size = readNumber<size_t>();
1112 if (std::error_code EC = Size.getError())
1113 return EC;
1114
1115 // Normally if useMD5 is true, the name table should have MD5 values, not
1116 // strings, however in the case that ExtBinary profile has multiple name
1117 // tables mixing string and MD5, all of them have to be normalized to use MD5,
1118 // because optimization passes can only handle either type.
1119 bool UseMD5 = useMD5();
1120
1121 NameTable.clear();
1122 NameTable.reserve(n: *Size);
1123 if (!ProfileIsCS) {
1124 MD5SampleContextTable.clear();
1125 if (UseMD5)
1126 MD5SampleContextTable.reserve(n: *Size);
1127 else
1128 // If we are using strings, delay MD5 computation since only a portion of
1129 // names are used by top level functions. Use 0 to indicate MD5 value is
1130 // to be calculated as no known string has a MD5 value of 0.
1131 MD5SampleContextTable.resize(new_size: *Size);
1132 }
1133 for (size_t I = 0; I < *Size; ++I) {
1134 auto Name(readString());
1135 if (std::error_code EC = Name.getError())
1136 return EC;
1137 if (UseMD5) {
1138 FunctionId FID(*Name);
1139 if (!ProfileIsCS)
1140 MD5SampleContextTable.emplace_back(args: FID.getHashCode());
1141 NameTable.emplace_back(args&: FID);
1142 } else
1143 NameTable.push_back(x: FunctionId(*Name));
1144 }
1145 if (!ProfileIsCS)
1146 MD5SampleContextStart = MD5SampleContextTable.data();
1147 return sampleprof_error::success;
1148}
1149
1150std::error_code
1151SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
1152 bool FixedLengthMD5) {
1153 if (FixedLengthMD5) {
1154 if (!IsMD5)
1155 errs() << "If FixedLengthMD5 is true, UseMD5 has to be true";
1156 auto Size = readNumber<size_t>();
1157 if (std::error_code EC = Size.getError())
1158 return EC;
1159
1160 assert(Data + (*Size) * sizeof(uint64_t) == End &&
1161 "Fixed length MD5 name table does not contain specified number of "
1162 "entries");
1163 if (Data + (*Size) * sizeof(uint64_t) > End)
1164 return sampleprof_error::truncated;
1165
1166 NameTable.clear();
1167 NameTable.reserve(n: *Size);
1168 for (size_t I = 0; I < *Size; ++I) {
1169 using namespace support;
1170 uint64_t FID = endian::read<uint64_t, endianness::little, unaligned>(
1171 memory: Data + I * sizeof(uint64_t));
1172 NameTable.emplace_back(args: FunctionId(FID));
1173 }
1174 if (!ProfileIsCS)
1175 MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data);
1176 Data = Data + (*Size) * sizeof(uint64_t);
1177 return sampleprof_error::success;
1178 }
1179
1180 if (IsMD5) {
1181 assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here");
1182 auto Size = readNumber<size_t>();
1183 if (std::error_code EC = Size.getError())
1184 return EC;
1185
1186 NameTable.clear();
1187 NameTable.reserve(n: *Size);
1188 if (!ProfileIsCS)
1189 MD5SampleContextTable.resize(new_size: *Size);
1190 for (size_t I = 0; I < *Size; ++I) {
1191 auto FID = readNumber<uint64_t>();
1192 if (std::error_code EC = FID.getError())
1193 return EC;
1194 if (!ProfileIsCS)
1195 support::endian::write64le(P: &MD5SampleContextTable[I], V: *FID);
1196 NameTable.emplace_back(args: FunctionId(*FID));
1197 }
1198 if (!ProfileIsCS)
1199 MD5SampleContextStart = MD5SampleContextTable.data();
1200 return sampleprof_error::success;
1201 }
1202
1203 return SampleProfileReaderBinary::readNameTable();
1204}
1205
1206// Read in the CS name table section, which basically contains a list of context
1207// vectors. Each element of a context vector, aka a frame, refers to the
1208// underlying raw function names that are stored in the name table, as well as
1209// a callsite identifier that only makes sense for non-leaf frames.
1210std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1211 auto Size = readNumber<size_t>();
1212 if (std::error_code EC = Size.getError())
1213 return EC;
1214
1215 CSNameTable.clear();
1216 CSNameTable.reserve(n: *Size);
1217 if (ProfileIsCS) {
1218 // Delay MD5 computation of CS context until they are needed. Use 0 to
1219 // indicate MD5 value is to be calculated as no known string has a MD5
1220 // value of 0.
1221 MD5SampleContextTable.clear();
1222 MD5SampleContextTable.resize(new_size: *Size);
1223 MD5SampleContextStart = MD5SampleContextTable.data();
1224 }
1225 for (size_t I = 0; I < *Size; ++I) {
1226 CSNameTable.emplace_back(args: SampleContextFrameVector());
1227 auto ContextSize = readNumber<uint32_t>();
1228 if (std::error_code EC = ContextSize.getError())
1229 return EC;
1230 for (uint32_t J = 0; J < *ContextSize; ++J) {
1231 auto FName(readStringFromTable());
1232 if (std::error_code EC = FName.getError())
1233 return EC;
1234 auto LineOffset = readNumber<uint64_t>();
1235 if (std::error_code EC = LineOffset.getError())
1236 return EC;
1237
1238 if (!isOffsetLegal(L: *LineOffset))
1239 return std::error_code();
1240
1241 auto Discriminator = readNumber<uint64_t>();
1242 if (std::error_code EC = Discriminator.getError())
1243 return EC;
1244
1245 CSNameTable.back().emplace_back(
1246 Args&: FName.get(), Args: LineLocation(LineOffset.get(), Discriminator.get()));
1247 }
1248 }
1249
1250 return sampleprof_error::success;
1251}
1252
1253std::error_code
1254SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
1255 FunctionSamples *FProfile) {
1256 if (Data < End) {
1257 if (ProfileIsProbeBased) {
1258 auto Checksum = readNumber<uint64_t>();
1259 if (std::error_code EC = Checksum.getError())
1260 return EC;
1261 if (FProfile)
1262 FProfile->setFunctionHash(*Checksum);
1263 }
1264
1265 if (ProfileHasAttribute) {
1266 auto Attributes = readNumber<uint32_t>();
1267 if (std::error_code EC = Attributes.getError())
1268 return EC;
1269 if (FProfile)
1270 FProfile->getContext().setAllAttributes(*Attributes);
1271 }
1272
1273 if (!ProfileIsCS) {
1274 // Read all the attributes for inlined function calls.
1275 auto NumCallsites = readNumber<uint32_t>();
1276 if (std::error_code EC = NumCallsites.getError())
1277 return EC;
1278
1279 for (uint32_t J = 0; J < *NumCallsites; ++J) {
1280 auto LineOffset = readNumber<uint64_t>();
1281 if (std::error_code EC = LineOffset.getError())
1282 return EC;
1283
1284 auto Discriminator = readNumber<uint64_t>();
1285 if (std::error_code EC = Discriminator.getError())
1286 return EC;
1287
1288 auto FContextHash(readSampleContextFromTable());
1289 if (std::error_code EC = FContextHash.getError())
1290 return EC;
1291
1292 auto &[FContext, Hash] = *FContextHash;
1293 FunctionSamples *CalleeProfile = nullptr;
1294 if (FProfile) {
1295 CalleeProfile = const_cast<FunctionSamples *>(
1296 &FProfile->functionSamplesAt(Loc: LineLocation(
1297 *LineOffset,
1298 *Discriminator))[FContext.getFunction()]);
1299 }
1300 if (std::error_code EC =
1301 readFuncMetadata(ProfileHasAttribute, FProfile: CalleeProfile))
1302 return EC;
1303 }
1304 }
1305 }
1306
1307 return sampleprof_error::success;
1308}
1309
1310std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata(
1311 bool ProfileHasAttribute, DenseSet<FunctionSamples *> &Profiles) {
1312 if (FuncMetadataIndex.empty())
1313 return sampleprof_error::success;
1314
1315 for (auto *FProfile : Profiles) {
1316 auto R = FuncMetadataIndex.find(x: FProfile->getContext().getHashCode());
1317 if (R == FuncMetadataIndex.end())
1318 continue;
1319
1320 Data = R->second.first;
1321 End = R->second.second;
1322 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
1323 return EC;
1324 assert(Data == End && "More data is read than expected");
1325 }
1326 return sampleprof_error::success;
1327}
1328
1329std::error_code
1330SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
1331 while (Data < End) {
1332 auto FContextHash(readSampleContextFromTable());
1333 if (std::error_code EC = FContextHash.getError())
1334 return EC;
1335 auto &[FContext, Hash] = *FContextHash;
1336 FunctionSamples *FProfile = nullptr;
1337 auto It = Profiles.find(Ctx: FContext);
1338 if (It != Profiles.end())
1339 FProfile = &It->second;
1340
1341 const uint8_t *Start = Data;
1342 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
1343 return EC;
1344
1345 FuncMetadataIndex[FContext.getHashCode()] = {Start, Data};
1346 }
1347
1348 assert(Data == End && "More data is read than expected");
1349 return sampleprof_error::success;
1350}
1351
1352std::error_code
1353SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) {
1354 SecHdrTableEntry Entry;
1355 auto Type = readUnencodedNumber<uint64_t>();
1356 if (std::error_code EC = Type.getError())
1357 return EC;
1358 Entry.Type = static_cast<SecType>(*Type);
1359
1360 auto Flags = readUnencodedNumber<uint64_t>();
1361 if (std::error_code EC = Flags.getError())
1362 return EC;
1363 Entry.Flags = *Flags;
1364
1365 auto Offset = readUnencodedNumber<uint64_t>();
1366 if (std::error_code EC = Offset.getError())
1367 return EC;
1368 Entry.Offset = *Offset;
1369
1370 auto Size = readUnencodedNumber<uint64_t>();
1371 if (std::error_code EC = Size.getError())
1372 return EC;
1373 Entry.Size = *Size;
1374
1375 Entry.LayoutIndex = Idx;
1376 SecHdrTable.push_back(x: std::move(Entry));
1377 return sampleprof_error::success;
1378}
1379
1380std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1381 auto EntryNum = readUnencodedNumber<uint64_t>();
1382 if (std::error_code EC = EntryNum.getError())
1383 return EC;
1384
1385 for (uint64_t i = 0; i < (*EntryNum); i++)
1386 if (std::error_code EC = readSecHdrTableEntry(Idx: i))
1387 return EC;
1388
1389 return sampleprof_error::success;
1390}
1391
1392std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1393 const uint8_t *BufStart =
1394 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1395 Data = BufStart;
1396 End = BufStart + Buffer->getBufferSize();
1397
1398 if (std::error_code EC = readMagicIdent())
1399 return EC;
1400
1401 if (std::error_code EC = readSecHdrTable())
1402 return EC;
1403
1404 return sampleprof_error::success;
1405}
1406
1407uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1408 uint64_t Size = 0;
1409 for (auto &Entry : SecHdrTable) {
1410 if (Entry.Type == Type)
1411 Size += Entry.Size;
1412 }
1413 return Size;
1414}
1415
1416uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1417 // Sections in SecHdrTable is not necessarily in the same order as
1418 // sections in the profile because section like FuncOffsetTable needs
1419 // to be written after section LBRProfile but needs to be read before
1420 // section LBRProfile, so we cannot simply use the last entry in
1421 // SecHdrTable to calculate the file size.
1422 uint64_t FileSize = 0;
1423 for (auto &Entry : SecHdrTable) {
1424 FileSize = std::max(a: Entry.Offset + Entry.Size, b: FileSize);
1425 }
1426 return FileSize;
1427}
1428
1429static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1430 std::string Flags;
1431 if (hasSecFlag(Entry, Flag: SecCommonFlags::SecFlagCompress))
1432 Flags.append(s: "{compressed,");
1433 else
1434 Flags.append(s: "{");
1435
1436 if (hasSecFlag(Entry, Flag: SecCommonFlags::SecFlagFlat))
1437 Flags.append(s: "flat,");
1438
1439 switch (Entry.Type) {
1440 case SecNameTable:
1441 if (hasSecFlag(Entry, Flag: SecNameTableFlags::SecFlagFixedLengthMD5))
1442 Flags.append(s: "fixlenmd5,");
1443 else if (hasSecFlag(Entry, Flag: SecNameTableFlags::SecFlagMD5Name))
1444 Flags.append(s: "md5,");
1445 if (hasSecFlag(Entry, Flag: SecNameTableFlags::SecFlagUniqSuffix))
1446 Flags.append(s: "uniq,");
1447 break;
1448 case SecProfSummary:
1449 if (hasSecFlag(Entry, Flag: SecProfSummaryFlags::SecFlagPartial))
1450 Flags.append(s: "partial,");
1451 if (hasSecFlag(Entry, Flag: SecProfSummaryFlags::SecFlagFullContext))
1452 Flags.append(s: "context,");
1453 if (hasSecFlag(Entry, Flag: SecProfSummaryFlags::SecFlagIsPreInlined))
1454 Flags.append(s: "preInlined,");
1455 if (hasSecFlag(Entry, Flag: SecProfSummaryFlags::SecFlagFSDiscriminator))
1456 Flags.append(s: "fs-discriminator,");
1457 break;
1458 case SecFuncOffsetTable:
1459 if (hasSecFlag(Entry, Flag: SecFuncOffsetFlags::SecFlagOrdered))
1460 Flags.append(s: "ordered,");
1461 break;
1462 case SecFuncMetadata:
1463 if (hasSecFlag(Entry, Flag: SecFuncMetadataFlags::SecFlagIsProbeBased))
1464 Flags.append(s: "probe,");
1465 if (hasSecFlag(Entry, Flag: SecFuncMetadataFlags::SecFlagHasAttribute))
1466 Flags.append(s: "attr,");
1467 break;
1468 default:
1469 break;
1470 }
1471 char &last = Flags.back();
1472 if (last == ',')
1473 last = '}';
1474 else
1475 Flags.append(s: "}");
1476 return Flags;
1477}
1478
1479bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1480 uint64_t TotalSecsSize = 0;
1481 for (auto &Entry : SecHdrTable) {
1482 OS << getSecName(Type: Entry.Type) << " - Offset: " << Entry.Offset
1483 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1484 << "\n";
1485 ;
1486 TotalSecsSize += Entry.Size;
1487 }
1488 uint64_t HeaderSize = SecHdrTable.front().Offset;
1489 assert(HeaderSize + TotalSecsSize == getFileSize() &&
1490 "Size of 'header + sections' doesn't match the total size of profile");
1491
1492 OS << "Header Size: " << HeaderSize << "\n";
1493 OS << "Total Sections Size: " << TotalSecsSize << "\n";
1494 OS << "File Size: " << getFileSize() << "\n";
1495 return true;
1496}
1497
1498std::error_code SampleProfileReaderBinary::readMagicIdent() {
1499 // Read and check the magic identifier.
1500 auto Magic = readNumber<uint64_t>();
1501 if (std::error_code EC = Magic.getError())
1502 return EC;
1503 else if (std::error_code EC = verifySPMagic(Magic: *Magic))
1504 return EC;
1505
1506 // Read the version number.
1507 auto Version = readNumber<uint64_t>();
1508 if (std::error_code EC = Version.getError())
1509 return EC;
1510 else if (*Version != SPVersion())
1511 return sampleprof_error::unsupported_version;
1512
1513 return sampleprof_error::success;
1514}
1515
1516std::error_code SampleProfileReaderBinary::readHeader() {
1517 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1518 End = Data + Buffer->getBufferSize();
1519
1520 if (std::error_code EC = readMagicIdent())
1521 return EC;
1522
1523 if (std::error_code EC = readSummary())
1524 return EC;
1525
1526 if (std::error_code EC = readNameTable())
1527 return EC;
1528 return sampleprof_error::success;
1529}
1530
1531std::error_code SampleProfileReaderBinary::readSummaryEntry(
1532 std::vector<ProfileSummaryEntry> &Entries) {
1533 auto Cutoff = readNumber<uint64_t>();
1534 if (std::error_code EC = Cutoff.getError())
1535 return EC;
1536
1537 auto MinBlockCount = readNumber<uint64_t>();
1538 if (std::error_code EC = MinBlockCount.getError())
1539 return EC;
1540
1541 auto NumBlocks = readNumber<uint64_t>();
1542 if (std::error_code EC = NumBlocks.getError())
1543 return EC;
1544
1545 Entries.emplace_back(args&: *Cutoff, args&: *MinBlockCount, args&: *NumBlocks);
1546 return sampleprof_error::success;
1547}
1548
1549std::error_code SampleProfileReaderBinary::readSummary() {
1550 auto TotalCount = readNumber<uint64_t>();
1551 if (std::error_code EC = TotalCount.getError())
1552 return EC;
1553
1554 auto MaxBlockCount = readNumber<uint64_t>();
1555 if (std::error_code EC = MaxBlockCount.getError())
1556 return EC;
1557
1558 auto MaxFunctionCount = readNumber<uint64_t>();
1559 if (std::error_code EC = MaxFunctionCount.getError())
1560 return EC;
1561
1562 auto NumBlocks = readNumber<uint64_t>();
1563 if (std::error_code EC = NumBlocks.getError())
1564 return EC;
1565
1566 auto NumFunctions = readNumber<uint64_t>();
1567 if (std::error_code EC = NumFunctions.getError())
1568 return EC;
1569
1570 auto NumSummaryEntries = readNumber<uint64_t>();
1571 if (std::error_code EC = NumSummaryEntries.getError())
1572 return EC;
1573
1574 std::vector<ProfileSummaryEntry> Entries;
1575 for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1576 std::error_code EC = readSummaryEntry(Entries);
1577 if (EC != sampleprof_error::success)
1578 return EC;
1579 }
1580 Summary = std::make_unique<ProfileSummary>(
1581 args: ProfileSummary::PSK_Sample, args&: Entries, args&: *TotalCount, args&: *MaxBlockCount, args: 0,
1582 args&: *MaxFunctionCount, args&: *NumBlocks, args&: *NumFunctions);
1583
1584 return sampleprof_error::success;
1585}
1586
1587bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1588 const uint8_t *Data =
1589 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1590 uint64_t Magic = decodeULEB128(p: Data);
1591 return Magic == SPMagic();
1592}
1593
1594bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1595 const uint8_t *Data =
1596 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1597 uint64_t Magic = decodeULEB128(p: Data);
1598 return Magic == SPMagic(Format: SPF_Ext_Binary);
1599}
1600
1601std::error_code SampleProfileReaderGCC::skipNextWord() {
1602 uint32_t dummy;
1603 if (!GcovBuffer.readInt(Val&: dummy))
1604 return sampleprof_error::truncated;
1605 return sampleprof_error::success;
1606}
1607
1608template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1609 if (sizeof(T) <= sizeof(uint32_t)) {
1610 uint32_t Val;
1611 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1612 return static_cast<T>(Val);
1613 } else if (sizeof(T) <= sizeof(uint64_t)) {
1614 uint64_t Val;
1615 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1616 return static_cast<T>(Val);
1617 }
1618
1619 std::error_code EC = sampleprof_error::malformed;
1620 reportError(LineNumber: 0, Msg: EC.message());
1621 return EC;
1622}
1623
1624ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1625 StringRef Str;
1626 if (!GcovBuffer.readString(str&: Str))
1627 return sampleprof_error::truncated;
1628 return Str;
1629}
1630
1631std::error_code SampleProfileReaderGCC::readHeader() {
1632 // Read the magic identifier.
1633 if (!GcovBuffer.readGCDAFormat())
1634 return sampleprof_error::unrecognized_format;
1635
1636 // Read the version number. Note - the GCC reader does not validate this
1637 // version, but the profile creator generates v704.
1638 GCOV::GCOVVersion version;
1639 if (!GcovBuffer.readGCOVVersion(version))
1640 return sampleprof_error::unrecognized_format;
1641
1642 if (version != GCOV::V407)
1643 return sampleprof_error::unsupported_version;
1644
1645 // Skip the empty integer.
1646 if (std::error_code EC = skipNextWord())
1647 return EC;
1648
1649 return sampleprof_error::success;
1650}
1651
1652std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1653 uint32_t Tag;
1654 if (!GcovBuffer.readInt(Val&: Tag))
1655 return sampleprof_error::truncated;
1656
1657 if (Tag != Expected)
1658 return sampleprof_error::malformed;
1659
1660 if (std::error_code EC = skipNextWord())
1661 return EC;
1662
1663 return sampleprof_error::success;
1664}
1665
1666std::error_code SampleProfileReaderGCC::readNameTable() {
1667 if (std::error_code EC = readSectionTag(Expected: GCOVTagAFDOFileNames))
1668 return EC;
1669
1670 uint32_t Size;
1671 if (!GcovBuffer.readInt(Val&: Size))
1672 return sampleprof_error::truncated;
1673
1674 for (uint32_t I = 0; I < Size; ++I) {
1675 StringRef Str;
1676 if (!GcovBuffer.readString(str&: Str))
1677 return sampleprof_error::truncated;
1678 Names.push_back(x: std::string(Str));
1679 }
1680
1681 return sampleprof_error::success;
1682}
1683
1684std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1685 if (std::error_code EC = readSectionTag(Expected: GCOVTagAFDOFunction))
1686 return EC;
1687
1688 uint32_t NumFunctions;
1689 if (!GcovBuffer.readInt(Val&: NumFunctions))
1690 return sampleprof_error::truncated;
1691
1692 InlineCallStack Stack;
1693 for (uint32_t I = 0; I < NumFunctions; ++I)
1694 if (std::error_code EC = readOneFunctionProfile(InlineStack: Stack, Update: true, Offset: 0))
1695 return EC;
1696
1697 computeSummary();
1698 return sampleprof_error::success;
1699}
1700
1701std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1702 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1703 uint64_t HeadCount = 0;
1704 if (InlineStack.size() == 0)
1705 if (!GcovBuffer.readInt64(Val&: HeadCount))
1706 return sampleprof_error::truncated;
1707
1708 uint32_t NameIdx;
1709 if (!GcovBuffer.readInt(Val&: NameIdx))
1710 return sampleprof_error::truncated;
1711
1712 StringRef Name(Names[NameIdx]);
1713
1714 uint32_t NumPosCounts;
1715 if (!GcovBuffer.readInt(Val&: NumPosCounts))
1716 return sampleprof_error::truncated;
1717
1718 uint32_t NumCallsites;
1719 if (!GcovBuffer.readInt(Val&: NumCallsites))
1720 return sampleprof_error::truncated;
1721
1722 FunctionSamples *FProfile = nullptr;
1723 if (InlineStack.size() == 0) {
1724 // If this is a top function that we have already processed, do not
1725 // update its profile again. This happens in the presence of
1726 // function aliases. Since these aliases share the same function
1727 // body, there will be identical replicated profiles for the
1728 // original function. In this case, we simply not bother updating
1729 // the profile of the original function.
1730 FProfile = &Profiles[FunctionId(Name)];
1731 FProfile->addHeadSamples(Num: HeadCount);
1732 if (FProfile->getTotalSamples() > 0)
1733 Update = false;
1734 } else {
1735 // Otherwise, we are reading an inlined instance. The top of the
1736 // inline stack contains the profile of the caller. Insert this
1737 // callee in the caller's CallsiteMap.
1738 FunctionSamples *CallerProfile = InlineStack.front();
1739 uint32_t LineOffset = Offset >> 16;
1740 uint32_t Discriminator = Offset & 0xffff;
1741 FProfile = &CallerProfile->functionSamplesAt(
1742 Loc: LineLocation(LineOffset, Discriminator))[FunctionId(Name)];
1743 }
1744 FProfile->setFunction(FunctionId(Name));
1745
1746 for (uint32_t I = 0; I < NumPosCounts; ++I) {
1747 uint32_t Offset;
1748 if (!GcovBuffer.readInt(Val&: Offset))
1749 return sampleprof_error::truncated;
1750
1751 uint32_t NumTargets;
1752 if (!GcovBuffer.readInt(Val&: NumTargets))
1753 return sampleprof_error::truncated;
1754
1755 uint64_t Count;
1756 if (!GcovBuffer.readInt64(Val&: Count))
1757 return sampleprof_error::truncated;
1758
1759 // The line location is encoded in the offset as:
1760 // high 16 bits: line offset to the start of the function.
1761 // low 16 bits: discriminator.
1762 uint32_t LineOffset = Offset >> 16;
1763 uint32_t Discriminator = Offset & 0xffff;
1764
1765 InlineCallStack NewStack;
1766 NewStack.push_back(Elt: FProfile);
1767 llvm::append_range(C&: NewStack, R: InlineStack);
1768 if (Update) {
1769 // Walk up the inline stack, adding the samples on this line to
1770 // the total sample count of the callers in the chain.
1771 for (auto *CallerProfile : NewStack)
1772 CallerProfile->addTotalSamples(Num: Count);
1773
1774 // Update the body samples for the current profile.
1775 FProfile->addBodySamples(LineOffset, Discriminator, Num: Count);
1776 }
1777
1778 // Process the list of functions called at an indirect call site.
1779 // These are all the targets that a function pointer (or virtual
1780 // function) resolved at runtime.
1781 for (uint32_t J = 0; J < NumTargets; J++) {
1782 uint32_t HistVal;
1783 if (!GcovBuffer.readInt(Val&: HistVal))
1784 return sampleprof_error::truncated;
1785
1786 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1787 return sampleprof_error::malformed;
1788
1789 uint64_t TargetIdx;
1790 if (!GcovBuffer.readInt64(Val&: TargetIdx))
1791 return sampleprof_error::truncated;
1792 StringRef TargetName(Names[TargetIdx]);
1793
1794 uint64_t TargetCount;
1795 if (!GcovBuffer.readInt64(Val&: TargetCount))
1796 return sampleprof_error::truncated;
1797
1798 if (Update)
1799 FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1800 Func: FunctionId(TargetName),
1801 Num: TargetCount);
1802 }
1803 }
1804
1805 // Process all the inlined callers into the current function. These
1806 // are all the callsites that were inlined into this function.
1807 for (uint32_t I = 0; I < NumCallsites; I++) {
1808 // The offset is encoded as:
1809 // high 16 bits: line offset to the start of the function.
1810 // low 16 bits: discriminator.
1811 uint32_t Offset;
1812 if (!GcovBuffer.readInt(Val&: Offset))
1813 return sampleprof_error::truncated;
1814 InlineCallStack NewStack;
1815 NewStack.push_back(Elt: FProfile);
1816 llvm::append_range(C&: NewStack, R: InlineStack);
1817 if (std::error_code EC = readOneFunctionProfile(InlineStack: NewStack, Update, Offset))
1818 return EC;
1819 }
1820
1821 return sampleprof_error::success;
1822}
1823
1824/// Read a GCC AutoFDO profile.
1825///
1826/// This format is generated by the Linux Perf conversion tool at
1827/// https://github.com/google/autofdo.
1828std::error_code SampleProfileReaderGCC::readImpl() {
1829 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1830 // Read the string table.
1831 if (std::error_code EC = readNameTable())
1832 return EC;
1833
1834 // Read the source profile.
1835 if (std::error_code EC = readFunctionProfiles())
1836 return EC;
1837
1838 return sampleprof_error::success;
1839}
1840
1841bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1842 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1843 return Magic == "adcg*704";
1844}
1845
1846void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1847 // If the reader uses MD5 to represent string, we can't remap it because
1848 // we don't know what the original function names were.
1849 if (Reader.useMD5()) {
1850 Ctx.diagnose(DI: DiagnosticInfoSampleProfile(
1851 Reader.getBuffer()->getBufferIdentifier(),
1852 "Profile data remapping cannot be applied to profile data "
1853 "using MD5 names (original mangled names are not available).",
1854 DS_Warning));
1855 return;
1856 }
1857
1858 // CSSPGO-TODO: Remapper is not yet supported.
1859 // We will need to remap the entire context string.
1860 assert(Remappings && "should be initialized while creating remapper");
1861 for (auto &Sample : Reader.getProfiles()) {
1862 DenseSet<FunctionId> NamesInSample;
1863 Sample.second.findAllNames(NameSet&: NamesInSample);
1864 for (auto &Name : NamesInSample) {
1865 StringRef NameStr = Name.stringRef();
1866 if (auto Key = Remappings->insert(FunctionName: NameStr))
1867 NameMap.insert(KV: {Key, NameStr});
1868 }
1869 }
1870
1871 RemappingApplied = true;
1872}
1873
1874std::optional<StringRef>
1875SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1876 if (auto Key = Remappings->lookup(FunctionName: Fname)) {
1877 StringRef Result = NameMap.lookup(Val: Key);
1878 if (!Result.empty())
1879 return Result;
1880 }
1881 return std::nullopt;
1882}
1883
1884/// Prepare a memory buffer for the contents of \p Filename.
1885///
1886/// \returns an error code indicating the status of the buffer.
1887static ErrorOr<std::unique_ptr<MemoryBuffer>>
1888setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
1889 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
1890 : FS.getBufferForFile(Name: Filename);
1891 if (std::error_code EC = BufferOrErr.getError())
1892 return EC;
1893 auto Buffer = std::move(BufferOrErr.get());
1894
1895 return std::move(Buffer);
1896}
1897
1898/// Create a sample profile reader based on the format of the input file.
1899///
1900/// \param Filename The file to open.
1901///
1902/// \param C The LLVM context to use to emit diagnostics.
1903///
1904/// \param P The FSDiscriminatorPass.
1905///
1906/// \param RemapFilename The file used for profile remapping.
1907///
1908/// \returns an error code indicating the status of the created reader.
1909ErrorOr<std::unique_ptr<SampleProfileReader>>
1910SampleProfileReader::create(StringRef Filename, LLVMContext &C,
1911 vfs::FileSystem &FS, FSDiscriminatorPass P,
1912 StringRef RemapFilename) {
1913 auto BufferOrError = setupMemoryBuffer(Filename, FS);
1914 if (std::error_code EC = BufferOrError.getError())
1915 return EC;
1916 return create(B&: BufferOrError.get(), C, FS, P, RemapFilename);
1917}
1918
1919/// Create a sample profile remapper from the given input, to remap the
1920/// function names in the given profile data.
1921///
1922/// \param Filename The file to open.
1923///
1924/// \param Reader The profile reader the remapper is going to be applied to.
1925///
1926/// \param C The LLVM context to use to emit diagnostics.
1927///
1928/// \returns an error code indicating the status of the created reader.
1929ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1930SampleProfileReaderItaniumRemapper::create(StringRef Filename,
1931 vfs::FileSystem &FS,
1932 SampleProfileReader &Reader,
1933 LLVMContext &C) {
1934 auto BufferOrError = setupMemoryBuffer(Filename, FS);
1935 if (std::error_code EC = BufferOrError.getError())
1936 return EC;
1937 return create(B&: BufferOrError.get(), Reader, C);
1938}
1939
1940/// Create a sample profile remapper from the given input, to remap the
1941/// function names in the given profile data.
1942///
1943/// \param B The memory buffer to create the reader from (assumes ownership).
1944///
1945/// \param C The LLVM context to use to emit diagnostics.
1946///
1947/// \param Reader The profile reader the remapper is going to be applied to.
1948///
1949/// \returns an error code indicating the status of the created reader.
1950ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1951SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1952 SampleProfileReader &Reader,
1953 LLVMContext &C) {
1954 auto Remappings = std::make_unique<SymbolRemappingReader>();
1955 if (Error E = Remappings->read(B&: *B)) {
1956 handleAllErrors(
1957 E: std::move(E), Handlers: [&](const SymbolRemappingParseError &ParseError) {
1958 C.diagnose(DI: DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1959 ParseError.getLineNum(),
1960 ParseError.getMessage()));
1961 });
1962 return sampleprof_error::malformed;
1963 }
1964
1965 return std::make_unique<SampleProfileReaderItaniumRemapper>(
1966 args: std::move(B), args: std::move(Remappings), args&: Reader);
1967}
1968
1969/// Create a sample profile reader based on the format of the input data.
1970///
1971/// \param B The memory buffer to create the reader from (assumes ownership).
1972///
1973/// \param C The LLVM context to use to emit diagnostics.
1974///
1975/// \param P The FSDiscriminatorPass.
1976///
1977/// \param RemapFilename The file used for profile remapping.
1978///
1979/// \returns an error code indicating the status of the created reader.
1980ErrorOr<std::unique_ptr<SampleProfileReader>>
1981SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1982 vfs::FileSystem &FS, FSDiscriminatorPass P,
1983 StringRef RemapFilename) {
1984 std::unique_ptr<SampleProfileReader> Reader;
1985 if (SampleProfileReaderRawBinary::hasFormat(Buffer: *B))
1986 Reader.reset(p: new SampleProfileReaderRawBinary(std::move(B), C));
1987 else if (SampleProfileReaderExtBinary::hasFormat(Buffer: *B))
1988 Reader.reset(p: new SampleProfileReaderExtBinary(std::move(B), C));
1989 else if (SampleProfileReaderGCC::hasFormat(Buffer: *B))
1990 Reader.reset(p: new SampleProfileReaderGCC(std::move(B), C));
1991 else if (SampleProfileReaderText::hasFormat(Buffer: *B))
1992 Reader.reset(p: new SampleProfileReaderText(std::move(B), C));
1993 else
1994 return sampleprof_error::unrecognized_format;
1995
1996 if (!RemapFilename.empty()) {
1997 auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
1998 Filename: RemapFilename, FS, Reader&: *Reader, C);
1999 if (std::error_code EC = ReaderOrErr.getError()) {
2000 std::string Msg = "Could not create remapper: " + EC.message();
2001 C.diagnose(DI: DiagnosticInfoSampleProfile(RemapFilename, Msg));
2002 return EC;
2003 }
2004 Reader->Remapper = std::move(ReaderOrErr.get());
2005 }
2006
2007 if (std::error_code EC = Reader->readHeader()) {
2008 return EC;
2009 }
2010
2011 Reader->setDiscriminatorMaskedBitFrom(P);
2012
2013 return std::move(Reader);
2014}
2015
2016// For text and GCC file formats, we compute the summary after reading the
2017// profile. Binary format has the profile summary in its header.
2018void SampleProfileReader::computeSummary() {
2019 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
2020 Summary = Builder.computeSummaryForProfiles(Profiles);
2021}
2022