1//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Instrumentation-based profile-guided optimization
10//
11//===----------------------------------------------------------------------===//
12
13#include "CodeGenPGO.h"
14#include "CGDebugInfo.h"
15#include "CodeGenFunction.h"
16#include "CoverageMappingGen.h"
17#include "clang/AST/RecursiveASTVisitor.h"
18#include "clang/AST/StmtVisitor.h"
19#include "clang/Basic/DiagnosticFrontend.h"
20#include "llvm/IR/Intrinsics.h"
21#include "llvm/IR/MDBuilder.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/Endian.h"
24#include "llvm/Support/MD5.h"
25#include <optional>
26
27namespace llvm {
28extern cl::opt<bool> EnableSingleByteCoverage;
29} // namespace llvm
30
31static llvm::cl::opt<bool>
32 EnableValueProfiling("enable-value-profiling",
33 llvm::cl::desc("Enable value profiling"),
34 llvm::cl::Hidden, llvm::cl::init(Val: false));
35
36using namespace clang;
37using namespace CodeGen;
38
39void CodeGenPGO::setFuncName(StringRef Name,
40 llvm::GlobalValue::LinkageTypes Linkage) {
41 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
42 FuncName = llvm::getPGOFuncName(
43 RawFuncName: Name, Linkage, FileName: CGM.getCodeGenOpts().MainFileName,
44 Version: PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
45
46 // If we're generating a profile, create a variable for the name.
47 if (CGM.getCodeGenOpts().hasProfileClangInstr())
48 FuncNameVar = llvm::createPGOFuncNameVar(M&: CGM.getModule(), Linkage, PGOFuncName: FuncName);
49}
50
51void CodeGenPGO::setFuncName(llvm::Function *Fn) {
52 setFuncName(Name: Fn->getName(), Linkage: Fn->getLinkage());
53 // Create PGOFuncName meta data.
54 llvm::createPGOFuncNameMetadata(F&: *Fn, PGOFuncName: FuncName);
55}
56
57/// The version of the PGO hash algorithm.
58enum PGOHashVersion : unsigned {
59 PGO_HASH_V1,
60 PGO_HASH_V2,
61 PGO_HASH_V3,
62 PGO_HASH_V4,
63
64 // Keep this set to the latest hash version.
65 PGO_HASH_LATEST = PGO_HASH_V4
66};
67
68namespace {
69/// Stable hasher for PGO region counters.
70///
71/// PGOHash produces a stable hash of a given function's control flow.
72///
73/// Changing the output of this hash will invalidate all previously generated
74/// profiles -- i.e., don't do it.
75///
76/// \note When this hash does eventually change (years?), we still need to
77/// support old hashes. We'll need to pull in the version number from the
78/// profile data format and use the matching hash function.
79class PGOHash {
80 uint64_t Working;
81 unsigned Count;
82 PGOHashVersion HashVersion;
83 llvm::MD5 MD5;
84
85 static const int NumBitsPerType = 6;
86 static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
87 static const unsigned TooBig = 1u << NumBitsPerType;
88
89public:
90 /// Hash values for AST nodes.
91 ///
92 /// Distinct values for AST nodes that have region counters attached.
93 ///
94 /// These values must be stable. All new members must be added at the end,
95 /// and no members should be removed. Changing the enumeration value for an
96 /// AST node will affect the hash of every function that contains that node.
97 enum HashType : unsigned char {
98 None = 0,
99 LabelStmt = 1,
100 WhileStmt,
101 DoStmt,
102 ForStmt,
103 CXXForRangeStmt,
104 ObjCForCollectionStmt,
105 SwitchStmt,
106 CaseStmt,
107 DefaultStmt,
108 IfStmt,
109 CXXTryStmt,
110 CXXCatchStmt,
111 ConditionalOperator,
112 BinaryOperatorLAnd,
113 BinaryOperatorLOr,
114 BinaryConditionalOperator,
115 // The preceding values are available with PGO_HASH_V1.
116
117 EndOfScope,
118 IfThenBranch,
119 IfElseBranch,
120 GotoStmt,
121 IndirectGotoStmt,
122 BreakStmt,
123 ContinueStmt,
124 ReturnStmt,
125 ThrowExpr,
126 UnaryOperatorLNot,
127 BinaryOperatorLT,
128 BinaryOperatorGT,
129 BinaryOperatorLE,
130 BinaryOperatorGE,
131 BinaryOperatorEQ,
132 BinaryOperatorNE,
133 // The preceding values are available since PGO_HASH_V2.
134
135 // Keep this last. It's for the static assert that follows.
136 LastHashType
137 };
138 static_assert(LastHashType <= TooBig, "Too many types in HashType");
139
140 PGOHash(PGOHashVersion HashVersion)
141 : Working(0), Count(0), HashVersion(HashVersion) {}
142 void combine(HashType Type);
143 uint64_t finalize();
144 PGOHashVersion getHashVersion() const { return HashVersion; }
145};
146const int PGOHash::NumBitsPerType;
147const unsigned PGOHash::NumTypesPerWord;
148const unsigned PGOHash::TooBig;
149
150/// Get the PGO hash version used in the given indexed profile.
151static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
152 CodeGenModule &CGM) {
153 if (PGOReader->getVersion() <= 4)
154 return PGO_HASH_V1;
155 if (PGOReader->getVersion() <= 5)
156 return PGO_HASH_V2;
157 if (PGOReader->getVersion() <= 12)
158 return PGO_HASH_V3;
159 return PGO_HASH_V4;
160}
161
162/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
163struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
164 using Base = RecursiveASTVisitor<MapRegionCounters>;
165
166 /// The next counter value to assign.
167 unsigned NextCounter;
168 /// The function hash.
169 PGOHash Hash;
170 /// The map of statements to counters.
171 llvm::DenseMap<const Stmt *, CounterPair> &CounterMap;
172 /// The state of MC/DC Coverage in this function.
173 MCDC::State &MCDCState;
174 /// Maximum number of supported MC/DC conditions in a boolean expression.
175 unsigned MCDCMaxCond;
176 /// The profile version.
177 uint64_t ProfileVersion;
178 /// Diagnostics Engine used to report warnings.
179 DiagnosticsEngine &Diag;
180
181 MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion,
182 llvm::DenseMap<const Stmt *, CounterPair> &CounterMap,
183 MCDC::State &MCDCState, unsigned MCDCMaxCond,
184 DiagnosticsEngine &Diag)
185 : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap),
186 MCDCState(MCDCState), MCDCMaxCond(MCDCMaxCond),
187 ProfileVersion(ProfileVersion), Diag(Diag) {}
188
189 // Blocks and lambdas are handled as separate functions, so we need not
190 // traverse them in the parent context.
191 bool TraverseBlockExpr(BlockExpr *BE) { return true; }
192 bool TraverseLambdaExpr(LambdaExpr *LE) {
193 // Traverse the captures, but not the body.
194 for (auto C : zip(t: LE->captures(), u: LE->capture_inits()))
195 TraverseLambdaCapture(LE, C: &std::get<0>(t&: C), Init: std::get<1>(t&: C));
196 return true;
197 }
198 bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
199
200 bool VisitDecl(const Decl *D) {
201 switch (D->getKind()) {
202 default:
203 break;
204 case Decl::Function:
205 case Decl::CXXMethod:
206 case Decl::CXXConstructor:
207 case Decl::CXXDestructor:
208 case Decl::CXXConversion:
209 case Decl::ObjCMethod:
210 case Decl::Block:
211 case Decl::Captured:
212 CounterMap[D->getBody()] = NextCounter++;
213 break;
214 }
215 return true;
216 }
217
218 /// If \p S gets a fresh counter, update the counter mappings. Return the
219 /// V1 hash of \p S.
220 PGOHash::HashType updateCounterMappings(Stmt *S) {
221 auto Type = getHashType(HashVersion: PGO_HASH_V1, S);
222 if (Type != PGOHash::None)
223 CounterMap[S] = NextCounter++;
224 return Type;
225 }
226
227 /// The following stacks are used with dataTraverseStmtPre() and
228 /// dataTraverseStmtPost() to track the depth of nested logical operators in a
229 /// boolean expression in a function. The ultimate purpose is to keep track
230 /// of the number of leaf-level conditions in the boolean expression so that a
231 /// profile bitmap can be allocated based on that number.
232 ///
233 /// The stacks are also used to find error cases and notify the user. A
234 /// standard logical operator nest for a boolean expression could be in a form
235 /// similar to this: "x = a && b && c && (d || f)"
236 struct DecisionState {
237 llvm::DenseSet<const Stmt *> Leaves; // Not BinOp
238 const Expr *DecisionExpr; // Root
239 bool Split; // In splitting with Leaves.
240
241 DecisionState() = delete;
242 DecisionState(const Expr *E, bool Split = false)
243 : DecisionExpr(E), Split(Split) {}
244 };
245
246 SmallVector<DecisionState, 1> DecisionStack;
247
248 // Hook: dataTraverseStmtPre() is invoked prior to visiting an AST Stmt node.
249 bool dataTraverseStmtPre(Stmt *S) {
250 /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing.
251 if (MCDCMaxCond == 0)
252 return true;
253
254 /// Mark "in splitting" when a leaf is met.
255 if (!DecisionStack.empty()) {
256 auto &StackTop = DecisionStack.back();
257 if (!StackTop.Split) {
258 if (StackTop.Leaves.contains(V: S)) {
259 assert(!StackTop.Split);
260 StackTop.Split = true;
261 }
262 return true;
263 }
264
265 // Split
266 assert(StackTop.Split);
267 assert(!StackTop.Leaves.contains(S));
268 }
269
270 if (const auto *E = dyn_cast<Expr>(Val: S)) {
271 if (const auto *BinOp =
272 dyn_cast<BinaryOperator>(Val: CodeGenFunction::stripCond(C: E));
273 BinOp && BinOp->isLogicalOp())
274 DecisionStack.emplace_back(Args&: E);
275 }
276
277 return true;
278 }
279
280 // Hook: dataTraverseStmtPost() is invoked by the AST visitor after visiting
281 // an AST Stmt node. MC/DC will use it to to signal when the top of a
282 // logical operation (boolean expression) nest is encountered.
283 bool dataTraverseStmtPost(Stmt *S) {
284 if (DecisionStack.empty())
285 return true;
286
287 /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing.
288 assert(MCDCMaxCond > 0);
289
290 auto &StackTop = DecisionStack.back();
291
292 if (StackTop.DecisionExpr != S) {
293 if (StackTop.Leaves.contains(V: S)) {
294 assert(StackTop.Split);
295 StackTop.Split = false;
296 }
297
298 return true;
299 }
300
301 /// Allocate the entry (with Valid=false)
302 auto &DecisionEntry =
303 MCDCState
304 .DecisionByStmt[CodeGenFunction::stripCond(C: StackTop.DecisionExpr)];
305
306 /// Was the maximum number of conditions encountered?
307 auto NumCond = StackTop.Leaves.size();
308 if (NumCond > MCDCMaxCond) {
309 Diag.Report(Loc: S->getBeginLoc(), DiagID: diag::warn_pgo_condition_limit)
310 << NumCond << MCDCMaxCond;
311 DecisionStack.pop_back();
312 return true;
313 }
314
315 // The Decision is validated.
316 DecisionEntry.ID = MCDCState.DecisionByStmt.size() - 1;
317
318 DecisionStack.pop_back();
319
320 return true;
321 }
322
323 /// The RHS of all logical operators gets a fresh counter in order to count
324 /// how many times the RHS evaluates to true or false, depending on the
325 /// semantics of the operator. This is only valid for ">= v7" of the profile
326 /// version so that we facilitate backward compatibility. In addition, in
327 /// order to use MC/DC, count the number of total LHS and RHS conditions.
328 bool VisitBinaryOperator(BinaryOperator *S) {
329 if (S->isLogicalOp()) {
330 if (CodeGenFunction::isInstrumentedCondition(C: S->getLHS())) {
331 if (!DecisionStack.empty())
332 DecisionStack.back().Leaves.insert(V: S->getLHS());
333 }
334
335 if (CodeGenFunction::isInstrumentedCondition(C: S->getRHS())) {
336 if (ProfileVersion >= llvm::IndexedInstrProf::Version7)
337 CounterMap[S->getRHS()] = NextCounter++;
338
339 if (!DecisionStack.empty())
340 DecisionStack.back().Leaves.insert(V: S->getRHS());
341 }
342 }
343 return Base::VisitBinaryOperator(S);
344 }
345
346 /// Include \p S in the function hash.
347 bool VisitStmt(Stmt *S) {
348 auto Type = updateCounterMappings(S);
349 if (Hash.getHashVersion() != PGO_HASH_V1)
350 Type = getHashType(HashVersion: Hash.getHashVersion(), S);
351 if (Type != PGOHash::None)
352 Hash.combine(Type);
353 return true;
354 }
355
356 bool TraverseIfStmt(IfStmt *If) {
357 // If we used the V1 hash, use the default traversal.
358 if (Hash.getHashVersion() == PGO_HASH_V1)
359 return Base::TraverseIfStmt(S: If);
360
361 // Otherwise, keep track of which branch we're in while traversing.
362 VisitStmt(S: If);
363
364 for (Stmt *CS : If->children()) {
365 if (!CS)
366 continue;
367 if (CS == If->getThen())
368 Hash.combine(Type: PGOHash::IfThenBranch);
369 else if (CS == If->getElse())
370 Hash.combine(Type: PGOHash::IfElseBranch);
371 TraverseStmt(S: CS);
372 }
373 Hash.combine(Type: PGOHash::EndOfScope);
374 return true;
375 }
376
377// If the statement type \p N is nestable, and its nesting impacts profile
378// stability, define a custom traversal which tracks the end of the statement
379// in the hash (provided we're not using the V1 hash).
380#define DEFINE_NESTABLE_TRAVERSAL(N) \
381 bool Traverse##N(N *S) { \
382 Base::Traverse##N(S); \
383 if (Hash.getHashVersion() != PGO_HASH_V1) \
384 Hash.combine(PGOHash::EndOfScope); \
385 return true; \
386 }
387
388 DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
389 DEFINE_NESTABLE_TRAVERSAL(DoStmt)
390 DEFINE_NESTABLE_TRAVERSAL(ForStmt)
391 DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
392 DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
393 DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
394 DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
395
396 /// Get version \p HashVersion of the PGO hash for \p S.
397 PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
398 switch (S->getStmtClass()) {
399 default:
400 break;
401 case Stmt::LabelStmtClass:
402 return PGOHash::LabelStmt;
403 case Stmt::WhileStmtClass:
404 return PGOHash::WhileStmt;
405 case Stmt::DoStmtClass:
406 return PGOHash::DoStmt;
407 case Stmt::ForStmtClass:
408 return PGOHash::ForStmt;
409 case Stmt::CXXForRangeStmtClass:
410 return PGOHash::CXXForRangeStmt;
411 case Stmt::ObjCForCollectionStmtClass:
412 return PGOHash::ObjCForCollectionStmt;
413 case Stmt::SwitchStmtClass:
414 return PGOHash::SwitchStmt;
415 case Stmt::CaseStmtClass:
416 return PGOHash::CaseStmt;
417 case Stmt::DefaultStmtClass:
418 return PGOHash::DefaultStmt;
419 case Stmt::IfStmtClass:
420 return PGOHash::IfStmt;
421 case Stmt::CXXTryStmtClass:
422 return PGOHash::CXXTryStmt;
423 case Stmt::CXXCatchStmtClass:
424 return PGOHash::CXXCatchStmt;
425 case Stmt::ConditionalOperatorClass:
426 return PGOHash::ConditionalOperator;
427 case Stmt::BinaryConditionalOperatorClass:
428 return PGOHash::BinaryConditionalOperator;
429 case Stmt::BinaryOperatorClass: {
430 const BinaryOperator *BO = cast<BinaryOperator>(Val: S);
431 if (BO->getOpcode() == BO_LAnd)
432 return PGOHash::BinaryOperatorLAnd;
433 if (BO->getOpcode() == BO_LOr)
434 return PGOHash::BinaryOperatorLOr;
435 if (HashVersion >= PGO_HASH_V2) {
436 switch (BO->getOpcode()) {
437 default:
438 break;
439 case BO_LT:
440 return PGOHash::BinaryOperatorLT;
441 case BO_GT:
442 return PGOHash::BinaryOperatorGT;
443 case BO_LE:
444 return PGOHash::BinaryOperatorLE;
445 case BO_GE:
446 return PGOHash::BinaryOperatorGE;
447 case BO_EQ:
448 return PGOHash::BinaryOperatorEQ;
449 case BO_NE:
450 return PGOHash::BinaryOperatorNE;
451 }
452 }
453 break;
454 }
455 }
456
457 if (HashVersion >= PGO_HASH_V2) {
458 switch (S->getStmtClass()) {
459 default:
460 break;
461 case Stmt::GotoStmtClass:
462 return PGOHash::GotoStmt;
463 case Stmt::IndirectGotoStmtClass:
464 return PGOHash::IndirectGotoStmt;
465 case Stmt::BreakStmtClass:
466 return PGOHash::BreakStmt;
467 case Stmt::ContinueStmtClass:
468 return PGOHash::ContinueStmt;
469 case Stmt::ReturnStmtClass:
470 return PGOHash::ReturnStmt;
471 case Stmt::CXXThrowExprClass:
472 return PGOHash::ThrowExpr;
473 case Stmt::UnaryOperatorClass: {
474 const UnaryOperator *UO = cast<UnaryOperator>(Val: S);
475 if (UO->getOpcode() == UO_LNot)
476 return PGOHash::UnaryOperatorLNot;
477 break;
478 }
479 }
480 }
481
482 return PGOHash::None;
483 }
484};
485
486/// A StmtVisitor that propagates the raw counts through the AST and
487/// records the count at statements where the value may change.
488struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
489 /// PGO state.
490 CodeGenPGO &PGO;
491
492 /// A flag that is set when the current count should be recorded on the
493 /// next statement, such as at the exit of a loop.
494 bool RecordNextStmtCount;
495
496 /// The count at the current location in the traversal.
497 uint64_t CurrentCount;
498
499 /// The map of statements to count values.
500 llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
501
502 /// BreakContinueStack - Keep counts of breaks and continues inside loops.
503 struct BreakContinue {
504 uint64_t BreakCount = 0;
505 uint64_t ContinueCount = 0;
506 BreakContinue() = default;
507 };
508 SmallVector<BreakContinue, 8> BreakContinueStack;
509
510 ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
511 CodeGenPGO &PGO)
512 : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
513
514 void RecordStmtCount(const Stmt *S) {
515 if (RecordNextStmtCount) {
516 CountMap[S] = CurrentCount;
517 RecordNextStmtCount = false;
518 }
519 }
520
521 /// Set and return the current count.
522 uint64_t setCount(uint64_t Count) {
523 CurrentCount = Count;
524 return Count;
525 }
526
527 void VisitStmt(const Stmt *S) {
528 RecordStmtCount(S);
529 for (const Stmt *Child : S->children())
530 if (Child)
531 this->Visit(S: Child);
532 }
533
534 void VisitFunctionDecl(const FunctionDecl *D) {
535 // Counter tracks entry to the function body.
536 uint64_t BodyCount = setCount(PGO.getRegionCount(S: D->getBody()));
537 CountMap[D->getBody()] = BodyCount;
538 Visit(S: D->getBody());
539 }
540
541 // Skip lambda expressions. We visit these as FunctionDecls when we're
542 // generating them and aren't interested in the body when generating a
543 // parent context.
544 void VisitLambdaExpr(const LambdaExpr *LE) {}
545
546 void VisitCapturedDecl(const CapturedDecl *D) {
547 // Counter tracks entry to the capture body.
548 uint64_t BodyCount = setCount(PGO.getRegionCount(S: D->getBody()));
549 CountMap[D->getBody()] = BodyCount;
550 Visit(S: D->getBody());
551 }
552
553 void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
554 // Counter tracks entry to the method body.
555 uint64_t BodyCount = setCount(PGO.getRegionCount(S: D->getBody()));
556 CountMap[D->getBody()] = BodyCount;
557 Visit(S: D->getBody());
558 }
559
560 void VisitBlockDecl(const BlockDecl *D) {
561 // Counter tracks entry to the block body.
562 uint64_t BodyCount = setCount(PGO.getRegionCount(S: D->getBody()));
563 CountMap[D->getBody()] = BodyCount;
564 Visit(S: D->getBody());
565 }
566
567 void VisitReturnStmt(const ReturnStmt *S) {
568 RecordStmtCount(S);
569 if (S->getRetValue())
570 Visit(S: S->getRetValue());
571 CurrentCount = 0;
572 RecordNextStmtCount = true;
573 }
574
575 void VisitCXXThrowExpr(const CXXThrowExpr *E) {
576 RecordStmtCount(S: E);
577 if (E->getSubExpr())
578 Visit(S: E->getSubExpr());
579 CurrentCount = 0;
580 RecordNextStmtCount = true;
581 }
582
583 void VisitGotoStmt(const GotoStmt *S) {
584 RecordStmtCount(S);
585 CurrentCount = 0;
586 RecordNextStmtCount = true;
587 }
588
589 void VisitLabelStmt(const LabelStmt *S) {
590 RecordNextStmtCount = false;
591 // Counter tracks the block following the label.
592 uint64_t BlockCount = setCount(PGO.getRegionCount(S));
593 CountMap[S] = BlockCount;
594 Visit(S: S->getSubStmt());
595 }
596
597 void VisitBreakStmt(const BreakStmt *S) {
598 RecordStmtCount(S);
599 assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
600 BreakContinueStack.back().BreakCount += CurrentCount;
601 CurrentCount = 0;
602 RecordNextStmtCount = true;
603 }
604
605 void VisitContinueStmt(const ContinueStmt *S) {
606 RecordStmtCount(S);
607 assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
608 BreakContinueStack.back().ContinueCount += CurrentCount;
609 CurrentCount = 0;
610 RecordNextStmtCount = true;
611 }
612
613 void VisitWhileStmt(const WhileStmt *S) {
614 RecordStmtCount(S);
615 uint64_t ParentCount = CurrentCount;
616
617 BreakContinueStack.push_back(Elt: BreakContinue());
618 // Visit the body region first so the break/continue adjustments can be
619 // included when visiting the condition.
620 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
621 CountMap[S->getBody()] = CurrentCount;
622 Visit(S: S->getBody());
623 uint64_t BackedgeCount = CurrentCount;
624
625 // ...then go back and propagate counts through the condition. The count
626 // at the start of the condition is the sum of the incoming edges,
627 // the backedge from the end of the loop body, and the edges from
628 // continue statements.
629 BreakContinue BC = BreakContinueStack.pop_back_val();
630 uint64_t CondCount =
631 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
632 CountMap[S->getCond()] = CondCount;
633 Visit(S: S->getCond());
634 setCount(BC.BreakCount + CondCount - BodyCount);
635 RecordNextStmtCount = true;
636 }
637
638 void VisitDoStmt(const DoStmt *S) {
639 RecordStmtCount(S);
640 uint64_t LoopCount = PGO.getRegionCount(S);
641
642 BreakContinueStack.push_back(Elt: BreakContinue());
643 // The count doesn't include the fallthrough from the parent scope. Add it.
644 uint64_t BodyCount = setCount(LoopCount + CurrentCount);
645 CountMap[S->getBody()] = BodyCount;
646 Visit(S: S->getBody());
647 uint64_t BackedgeCount = CurrentCount;
648
649 BreakContinue BC = BreakContinueStack.pop_back_val();
650 // The count at the start of the condition is equal to the count at the
651 // end of the body, plus any continues.
652 uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
653 CountMap[S->getCond()] = CondCount;
654 Visit(S: S->getCond());
655 setCount(BC.BreakCount + CondCount - LoopCount);
656 RecordNextStmtCount = true;
657 }
658
659 void VisitForStmt(const ForStmt *S) {
660 RecordStmtCount(S);
661 if (S->getInit())
662 Visit(S: S->getInit());
663
664 uint64_t ParentCount = CurrentCount;
665
666 BreakContinueStack.push_back(Elt: BreakContinue());
667 // Visit the body region first. (This is basically the same as a while
668 // loop; see further comments in VisitWhileStmt.)
669 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
670 CountMap[S->getBody()] = BodyCount;
671 Visit(S: S->getBody());
672 uint64_t BackedgeCount = CurrentCount;
673 BreakContinue BC = BreakContinueStack.pop_back_val();
674
675 // The increment is essentially part of the body but it needs to include
676 // the count for all the continue statements.
677 if (S->getInc()) {
678 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
679 CountMap[S->getInc()] = IncCount;
680 Visit(S: S->getInc());
681 }
682
683 // ...then go back and propagate counts through the condition.
684 uint64_t CondCount =
685 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
686 if (S->getCond()) {
687 CountMap[S->getCond()] = CondCount;
688 Visit(S: S->getCond());
689 }
690 setCount(BC.BreakCount + CondCount - BodyCount);
691 RecordNextStmtCount = true;
692 }
693
694 void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
695 RecordStmtCount(S);
696 if (S->getInit())
697 Visit(S: S->getInit());
698 Visit(S: S->getLoopVarStmt());
699 Visit(S: S->getRangeStmt());
700 Visit(S: S->getBeginStmt());
701 Visit(S: S->getEndStmt());
702
703 uint64_t ParentCount = CurrentCount;
704 BreakContinueStack.push_back(Elt: BreakContinue());
705 // Visit the body region first. (This is basically the same as a while
706 // loop; see further comments in VisitWhileStmt.)
707 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
708 CountMap[S->getBody()] = BodyCount;
709 Visit(S: S->getBody());
710 uint64_t BackedgeCount = CurrentCount;
711 BreakContinue BC = BreakContinueStack.pop_back_val();
712
713 // The increment is essentially part of the body but it needs to include
714 // the count for all the continue statements.
715 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
716 CountMap[S->getInc()] = IncCount;
717 Visit(S: S->getInc());
718
719 // ...then go back and propagate counts through the condition.
720 uint64_t CondCount =
721 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
722 CountMap[S->getCond()] = CondCount;
723 Visit(S: S->getCond());
724 setCount(BC.BreakCount + CondCount - BodyCount);
725 RecordNextStmtCount = true;
726 }
727
728 void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
729 RecordStmtCount(S);
730 Visit(S: S->getElement());
731 uint64_t ParentCount = CurrentCount;
732 BreakContinueStack.push_back(Elt: BreakContinue());
733 // Counter tracks the body of the loop.
734 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
735 CountMap[S->getBody()] = BodyCount;
736 Visit(S: S->getBody());
737 uint64_t BackedgeCount = CurrentCount;
738 BreakContinue BC = BreakContinueStack.pop_back_val();
739
740 setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
741 BodyCount);
742 RecordNextStmtCount = true;
743 }
744
745 void VisitSwitchStmt(const SwitchStmt *S) {
746 RecordStmtCount(S);
747 if (S->getInit())
748 Visit(S: S->getInit());
749 Visit(S: S->getCond());
750 CurrentCount = 0;
751 BreakContinueStack.push_back(Elt: BreakContinue());
752 Visit(S: S->getBody());
753 // If the switch is inside a loop, add the continue counts.
754 BreakContinue BC = BreakContinueStack.pop_back_val();
755 if (!BreakContinueStack.empty())
756 BreakContinueStack.back().ContinueCount += BC.ContinueCount;
757 // Counter tracks the exit block of the switch.
758 setCount(PGO.getRegionCount(S));
759 RecordNextStmtCount = true;
760 }
761
762 void VisitSwitchCase(const SwitchCase *S) {
763 RecordNextStmtCount = false;
764 // Counter for this particular case. This counts only jumps from the
765 // switch header and does not include fallthrough from the case before
766 // this one.
767 uint64_t CaseCount = PGO.getRegionCount(S);
768 setCount(CurrentCount + CaseCount);
769 // We need the count without fallthrough in the mapping, so it's more useful
770 // for branch probabilities.
771 CountMap[S] = CaseCount;
772 RecordNextStmtCount = true;
773 Visit(S: S->getSubStmt());
774 }
775
776 void VisitIfStmt(const IfStmt *S) {
777 RecordStmtCount(S);
778
779 if (S->isConsteval()) {
780 const Stmt *Stm = S->isNegatedConsteval() ? S->getThen() : S->getElse();
781 if (Stm)
782 Visit(S: Stm);
783 return;
784 }
785
786 uint64_t ParentCount = CurrentCount;
787 if (S->getInit())
788 Visit(S: S->getInit());
789 Visit(S: S->getCond());
790
791 // Counter tracks the "then" part of an if statement. The count for
792 // the "else" part, if it exists, will be calculated from this counter.
793 uint64_t ThenCount = setCount(PGO.getRegionCount(S));
794 CountMap[S->getThen()] = ThenCount;
795 Visit(S: S->getThen());
796 uint64_t OutCount = CurrentCount;
797
798 uint64_t ElseCount = ParentCount - ThenCount;
799 if (S->getElse()) {
800 setCount(ElseCount);
801 CountMap[S->getElse()] = ElseCount;
802 Visit(S: S->getElse());
803 OutCount += CurrentCount;
804 } else
805 OutCount += ElseCount;
806 setCount(OutCount);
807 RecordNextStmtCount = true;
808 }
809
810 void VisitCXXTryStmt(const CXXTryStmt *S) {
811 RecordStmtCount(S);
812 Visit(S: S->getTryBlock());
813 for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
814 Visit(S: S->getHandler(i: I));
815 // Counter tracks the continuation block of the try statement.
816 setCount(PGO.getRegionCount(S));
817 RecordNextStmtCount = true;
818 }
819
820 void VisitCXXCatchStmt(const CXXCatchStmt *S) {
821 RecordNextStmtCount = false;
822 // Counter tracks the catch statement's handler block.
823 uint64_t CatchCount = setCount(PGO.getRegionCount(S));
824 CountMap[S] = CatchCount;
825 Visit(S: S->getHandlerBlock());
826 }
827
828 void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
829 RecordStmtCount(S: E);
830 uint64_t ParentCount = CurrentCount;
831 Visit(S: E->getCond());
832
833 // Counter tracks the "true" part of a conditional operator. The
834 // count in the "false" part will be calculated from this counter.
835 uint64_t TrueCount = setCount(PGO.getRegionCount(S: E));
836 CountMap[E->getTrueExpr()] = TrueCount;
837 Visit(S: E->getTrueExpr());
838 uint64_t OutCount = CurrentCount;
839
840 uint64_t FalseCount = setCount(ParentCount - TrueCount);
841 CountMap[E->getFalseExpr()] = FalseCount;
842 Visit(S: E->getFalseExpr());
843 OutCount += CurrentCount;
844
845 setCount(OutCount);
846 RecordNextStmtCount = true;
847 }
848
849 void VisitBinLAnd(const BinaryOperator *E) {
850 RecordStmtCount(S: E);
851 uint64_t ParentCount = CurrentCount;
852 Visit(S: E->getLHS());
853 // Counter tracks the right hand side of a logical and operator.
854 uint64_t RHSCount = setCount(PGO.getRegionCount(S: E));
855 CountMap[E->getRHS()] = RHSCount;
856 Visit(S: E->getRHS());
857 setCount(ParentCount + RHSCount - CurrentCount);
858 RecordNextStmtCount = true;
859 }
860
861 void VisitBinLOr(const BinaryOperator *E) {
862 RecordStmtCount(S: E);
863 uint64_t ParentCount = CurrentCount;
864 Visit(S: E->getLHS());
865 // Counter tracks the right hand side of a logical or operator.
866 uint64_t RHSCount = setCount(PGO.getRegionCount(S: E));
867 CountMap[E->getRHS()] = RHSCount;
868 Visit(S: E->getRHS());
869 setCount(ParentCount + RHSCount - CurrentCount);
870 RecordNextStmtCount = true;
871 }
872};
873} // end anonymous namespace
874
875void PGOHash::combine(HashType Type) {
876 // Check that we never combine 0 and only have six bits.
877 assert(Type && "Hash is invalid: unexpected type 0");
878 assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
879
880 // Pass through MD5 if enough work has built up.
881 if (Count && Count % NumTypesPerWord == 0) {
882 using namespace llvm::support;
883 uint64_t Swapped =
884 endian::byte_swap<uint64_t>(value: Working, endian: llvm::endianness::little);
885 MD5.update(Data: llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
886 Working = 0;
887 }
888
889 // Accumulate the current type.
890 ++Count;
891 Working = Working << NumBitsPerType | Type;
892}
893
894uint64_t PGOHash::finalize() {
895 // Use Working as the hash directly if we never used MD5.
896 if (Count <= NumTypesPerWord)
897 // No need to byte swap here, since none of the math was endian-dependent.
898 // This number will be byte-swapped as required on endianness transitions,
899 // so we will see the same value on the other side.
900 return Working;
901
902 // Check for remaining work in Working.
903 if (Working) {
904 // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
905 // is buggy because it converts a uint64_t into an array of uint8_t.
906 if (HashVersion < PGO_HASH_V3) {
907 MD5.update(Data: {(uint8_t)Working});
908 } else {
909 using namespace llvm::support;
910 uint64_t Swapped =
911 endian::byte_swap<uint64_t>(value: Working, endian: llvm::endianness::little);
912 MD5.update(Data: llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
913 }
914 }
915
916 // Finalize the MD5 and return the hash.
917 llvm::MD5::MD5Result Result;
918 MD5.final(Result);
919 return Result.low();
920}
921
922void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
923 const Decl *D = GD.getDecl();
924 if (!D->hasBody())
925 return;
926
927 // Skip CUDA/HIP kernel launch stub functions.
928 if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice &&
929 D->hasAttr<CUDAGlobalAttr>())
930 return;
931
932 bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
933 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
934 if (!InstrumentRegions && !PGOReader)
935 return;
936 if (D->isImplicit())
937 return;
938 // Constructors and destructors may be represented by several functions in IR.
939 // If so, instrument only base variant, others are implemented by delegation
940 // to the base one, it would be counted twice otherwise.
941 if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
942 if (const auto *CCD = dyn_cast<CXXConstructorDecl>(Val: D))
943 if (GD.getCtorType() != Ctor_Base &&
944 CodeGenFunction::IsConstructorDelegationValid(Ctor: CCD))
945 return;
946 }
947 if (isa<CXXDestructorDecl>(Val: D) && GD.getDtorType() != Dtor_Base)
948 return;
949
950 CGM.ClearUnusedCoverageMapping(D);
951 if (Fn->hasFnAttribute(Kind: llvm::Attribute::NoProfile))
952 return;
953 if (Fn->hasFnAttribute(Kind: llvm::Attribute::SkipProfile))
954 return;
955
956 SourceManager &SM = CGM.getContext().getSourceManager();
957 if (!llvm::coverage::SystemHeadersCoverage &&
958 SM.isInSystemHeader(Loc: D->getLocation()))
959 return;
960
961 setFuncName(Fn);
962
963 mapRegionCounters(D);
964 if (CGM.getCodeGenOpts().CoverageMapping)
965 emitCounterRegionMapping(D);
966 if (PGOReader) {
967 loadRegionCounts(PGOReader, IsInMainFile: SM.isInMainFile(Loc: D->getLocation()));
968 computeRegionCounts(D);
969 applyFunctionAttributes(PGOReader, Fn);
970 }
971}
972
973void CodeGenPGO::mapRegionCounters(const Decl *D) {
974 // Use the latest hash version when inserting instrumentation, but use the
975 // version in the indexed profile if we're reading PGO data.
976 PGOHashVersion HashVersion = PGO_HASH_LATEST;
977 uint64_t ProfileVersion = llvm::IndexedInstrProf::Version;
978 if (auto *PGOReader = CGM.getPGOReader()) {
979 HashVersion = getPGOHashVersion(PGOReader, CGM);
980 ProfileVersion = PGOReader->getVersion();
981 }
982
983 // If MC/DC is enabled, set the MaxConditions to a preset value. Otherwise,
984 // set it to zero. This value impacts the number of conditions accepted in a
985 // given boolean expression, which impacts the size of the bitmap used to
986 // track test vector execution for that boolean expression. Because the
987 // bitmap scales exponentially (2^n) based on the number of conditions seen,
988 // the maximum value is hard-coded at 6 conditions, which is more than enough
989 // for most embedded applications. Setting a maximum value prevents the
990 // bitmap footprint from growing too large without the user's knowledge. In
991 // the future, this value could be adjusted with a command-line option.
992 unsigned MCDCMaxConditions =
993 (CGM.getCodeGenOpts().MCDCCoverage ? CGM.getCodeGenOpts().MCDCMaxConds
994 : 0);
995
996 RegionCounterMap.reset(p: new llvm::DenseMap<const Stmt *, CounterPair>);
997 RegionMCDCState.reset(p: new MCDC::State);
998 MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap,
999 *RegionMCDCState, MCDCMaxConditions, CGM.getDiags());
1000 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D))
1001 Walker.TraverseDecl(D: const_cast<FunctionDecl *>(FD));
1002 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(Val: D))
1003 Walker.TraverseDecl(D: const_cast<ObjCMethodDecl *>(MD));
1004 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(Val: D))
1005 Walker.TraverseDecl(D: const_cast<BlockDecl *>(BD));
1006 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(Val: D))
1007 Walker.TraverseDecl(D: const_cast<CapturedDecl *>(CD));
1008 assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
1009 NumRegionCounters = Walker.NextCounter;
1010 FunctionHash = Walker.Hash.finalize();
1011 if (HashVersion >= PGO_HASH_V4)
1012 FunctionHash &= llvm::NamedInstrProfRecord::FUNC_HASH_MASK;
1013}
1014
1015bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
1016 if (!D->getBody())
1017 return true;
1018
1019 // Skip host-only functions in the CUDA device compilation and device-only
1020 // functions in the host compilation. Just roughly filter them out based on
1021 // the function attributes. If there are effectively host-only or device-only
1022 // ones, their coverage mapping may still be generated.
1023 if (CGM.getLangOpts().CUDA &&
1024 ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() &&
1025 !D->hasAttr<CUDAGlobalAttr>()) ||
1026 (!CGM.getLangOpts().CUDAIsDevice &&
1027 (D->hasAttr<CUDAGlobalAttr>() ||
1028 (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>())))))
1029 return true;
1030
1031 // Don't map the functions in system headers.
1032 const auto &SM = CGM.getContext().getSourceManager();
1033 auto Loc = D->getBody()->getBeginLoc();
1034 return !llvm::coverage::SystemHeadersCoverage && SM.isInSystemHeader(Loc);
1035}
1036
1037void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
1038 if (skipRegionMappingForDecl(D))
1039 return;
1040
1041 std::string CoverageMapping;
1042 llvm::raw_string_ostream OS(CoverageMapping);
1043 RegionMCDCState->BranchByStmt.clear();
1044 CoverageMappingGen MappingGen(
1045 *CGM.getCoverageMapping(), CGM.getContext().getSourceManager(),
1046 CGM.getLangOpts(), RegionCounterMap.get(), RegionMCDCState.get());
1047 MappingGen.emitCounterMapping(D, OS);
1048
1049 if (CoverageMapping.empty())
1050 return;
1051
1052 // Scan max(FalseCnt) and update NumRegionCounters.
1053 unsigned MaxNumCounters = NumRegionCounters;
1054 for (const auto &[_, V] : *RegionCounterMap) {
1055 assert((!V.Executed.hasValue() || MaxNumCounters > V.Executed) &&
1056 "TrueCnt should not be reassigned");
1057 if (V.Skipped.hasValue())
1058 MaxNumCounters = std::max(a: MaxNumCounters, b: V.Skipped + 1);
1059 }
1060 NumRegionCounters = MaxNumCounters;
1061
1062 CGM.getCoverageMapping()->addFunctionMappingRecord(
1063 FunctionName: FuncNameVar, FunctionNameValue: FuncName, FunctionHash, CoverageMapping);
1064}
1065
1066void
1067CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
1068 llvm::GlobalValue::LinkageTypes Linkage) {
1069 if (skipRegionMappingForDecl(D))
1070 return;
1071
1072 std::string CoverageMapping;
1073 llvm::raw_string_ostream OS(CoverageMapping);
1074 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
1075 CGM.getContext().getSourceManager(),
1076 CGM.getLangOpts());
1077 MappingGen.emitEmptyMapping(D, OS);
1078
1079 if (CoverageMapping.empty())
1080 return;
1081
1082 setFuncName(Name, Linkage);
1083 CGM.getCoverageMapping()->addFunctionMappingRecord(
1084 FunctionName: FuncNameVar, FunctionNameValue: FuncName, FunctionHash, CoverageMapping, IsUsed: false);
1085}
1086
1087void CodeGenPGO::computeRegionCounts(const Decl *D) {
1088 StmtCountMap.reset(p: new llvm::DenseMap<const Stmt *, uint64_t>);
1089 ComputeRegionCounts Walker(*StmtCountMap, *this);
1090 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D))
1091 Walker.VisitFunctionDecl(D: FD);
1092 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(Val: D))
1093 Walker.VisitObjCMethodDecl(D: MD);
1094 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(Val: D))
1095 Walker.VisitBlockDecl(D: BD);
1096 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(Val: D))
1097 Walker.VisitCapturedDecl(D: const_cast<CapturedDecl *>(CD));
1098}
1099
1100void
1101CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
1102 llvm::Function *Fn) {
1103 if (!haveRegionCounts())
1104 return;
1105
1106 uint64_t FunctionCount = getRegionCount(S: nullptr);
1107 Fn->setEntryCount(Count: FunctionCount);
1108}
1109
1110bool CodeGenPGO::hasSkipCounter(const Stmt *S) const {
1111 if (!RegionCounterMap)
1112 return false;
1113
1114 auto I = RegionCounterMap->find(Val: S);
1115 if (I == RegionCounterMap->end())
1116 return false;
1117
1118 return I->second.Skipped.hasValue();
1119}
1120
1121void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S,
1122 bool UseSkipPath, bool UseBoth,
1123 llvm::Value *StepV) {
1124 if (!RegionCounterMap)
1125 return;
1126
1127 // Allocate S in the Map regardless of emission.
1128 const auto &TheCounterPair = (*RegionCounterMap)[S];
1129
1130 if (!Builder.GetInsertBlock())
1131 return;
1132
1133 const CounterPair::ValueOpt &Counter =
1134 (UseSkipPath ? TheCounterPair.Skipped : TheCounterPair.Executed);
1135 if (!Counter.hasValue())
1136 return;
1137
1138 // Make sure that pointer to global is passed in with zero addrspace
1139 // This is relevant during GPU profiling
1140 auto *NormalizedFuncNameVarPtr =
1141 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1142 C: FuncNameVar, Ty: llvm::PointerType::get(C&: CGM.getLLVMContext(), AddressSpace: 0));
1143
1144 llvm::Value *Args[] = {
1145 NormalizedFuncNameVarPtr, Builder.getInt64(C: FunctionHash),
1146 Builder.getInt32(C: NumRegionCounters), Builder.getInt32(C: Counter), StepV};
1147
1148 if (llvm::EnableSingleByteCoverage) {
1149 assert(!StepV && "StepV is not supported in single byte counter mode");
1150 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: llvm::Intrinsic::instrprof_cover),
1151 Args: ArrayRef(Args, 4));
1152 } else if (!StepV)
1153 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: llvm::Intrinsic::instrprof_increment),
1154 Args: ArrayRef(Args, 4));
1155 else
1156 Builder.CreateCall(
1157 Callee: CGM.getIntrinsic(IID: llvm::Intrinsic::instrprof_increment_step), Args);
1158}
1159
1160bool CodeGenPGO::canEmitMCDCCoverage(const CGBuilderTy &Builder) {
1161 return (CGM.getCodeGenOpts().hasProfileClangInstr() &&
1162 CGM.getCodeGenOpts().MCDCCoverage && Builder.GetInsertBlock());
1163}
1164
1165void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) {
1166 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1167 return;
1168
1169 auto *I8PtrTy = llvm::PointerType::getUnqual(C&: CGM.getLLVMContext());
1170
1171 // Emit intrinsic representing MCDC bitmap parameters at function entry.
1172 // This is used by the instrumentation pass, but it isn't actually lowered to
1173 // anything.
1174 llvm::Value *Args[3] = {llvm::ConstantExpr::getBitCast(C: FuncNameVar, Ty: I8PtrTy),
1175 Builder.getInt64(C: FunctionHash),
1176 Builder.getInt32(C: RegionMCDCState->BitmapBits)};
1177 Builder.CreateCall(
1178 Callee: CGM.getIntrinsic(IID: llvm::Intrinsic::instrprof_mcdc_parameters), Args);
1179}
1180
1181/// Fill mcdc.addr order by ID.
1182std::vector<Address *>
1183CodeGenPGO::getMCDCCondBitmapAddrArray(CGBuilderTy &Builder) {
1184 std::vector<Address *> Result;
1185
1186 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1187 return Result;
1188
1189 SmallVector<std::pair<unsigned, Address *>> SortedPair;
1190 for (auto &[_, V] : RegionMCDCState->DecisionByStmt)
1191 if (V.isValid())
1192 SortedPair.emplace_back(Args&: V.ID, Args: &V.MCDCCondBitmapAddr);
1193
1194 llvm::sort(C&: SortedPair);
1195
1196 for (auto &[_, MCDCCondBitmapAddr] : SortedPair)
1197 Result.push_back(x: MCDCCondBitmapAddr);
1198
1199 return Result;
1200}
1201
1202void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder,
1203 const Expr *S,
1204 CodeGenFunction &CGF) {
1205 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1206 return;
1207
1208 S = S->IgnoreParens();
1209
1210 auto DecisionStateIter = RegionMCDCState->DecisionByStmt.find(Val: S);
1211 if (DecisionStateIter == RegionMCDCState->DecisionByStmt.end())
1212 return;
1213
1214 auto &MCDCCondBitmapAddr = DecisionStateIter->second.MCDCCondBitmapAddr;
1215 if (!MCDCCondBitmapAddr.isValid())
1216 return;
1217
1218 // Don't create tvbitmap_update if the record is allocated but excluded.
1219 // Or `bitmap |= (1 << 0)` would be wrongly executed to the next bitmap.
1220 if (DecisionStateIter->second.Indices.size() == 0)
1221 return;
1222
1223 // Extract the offset of the global bitmap associated with this expression.
1224 unsigned MCDCTestVectorBitmapOffset = DecisionStateIter->second.BitmapIdx;
1225 auto *I8PtrTy = llvm::PointerType::getUnqual(C&: CGM.getLLVMContext());
1226
1227 // Emit intrinsic responsible for updating the global bitmap corresponding to
1228 // a boolean expression. The index being set is based on the value loaded
1229 // from a pointer to a dedicated temporary value on the stack that is itself
1230 // updated via emitMCDCCondBitmapReset() and emitMCDCCondBitmapUpdate(). The
1231 // index represents an executed test vector.
1232 llvm::Value *Args[4] = {llvm::ConstantExpr::getBitCast(C: FuncNameVar, Ty: I8PtrTy),
1233 Builder.getInt64(C: FunctionHash),
1234 Builder.getInt32(C: MCDCTestVectorBitmapOffset),
1235 MCDCCondBitmapAddr.emitRawPointer(CGF)};
1236 Builder.CreateCall(
1237 Callee: CGM.getIntrinsic(IID: llvm::Intrinsic::instrprof_mcdc_tvbitmap_update), Args);
1238}
1239
1240void CodeGenPGO::emitMCDCCondBitmapReset(CGBuilderTy &Builder, const Expr *S) {
1241 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1242 return;
1243
1244 auto I = RegionMCDCState->DecisionByStmt.find(Val: S->IgnoreParens());
1245 if (I == RegionMCDCState->DecisionByStmt.end())
1246 return;
1247
1248 auto &MCDCCondBitmapAddr = I->second.MCDCCondBitmapAddr;
1249 if (!MCDCCondBitmapAddr.isValid())
1250 return;
1251
1252 // Emit intrinsic that resets a dedicated temporary value on the stack to 0.
1253 Builder.CreateStore(Val: Builder.getInt32(C: 0), Addr: MCDCCondBitmapAddr);
1254}
1255
1256void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S,
1257 llvm::Value *Val,
1258 CodeGenFunction &CGF) {
1259 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1260 return;
1261
1262 // Even though, for simplicity, parentheses and unary logical-NOT operators
1263 // are considered part of their underlying condition for both MC/DC and
1264 // branch coverage, the condition IDs themselves are assigned and tracked
1265 // using the underlying condition itself. This is done solely for
1266 // consistency since parentheses and logical-NOTs are ignored when checking
1267 // whether the condition is actually an instrumentable condition. This can
1268 // also make debugging a bit easier.
1269 S = CodeGenFunction::stripCond(C: S);
1270
1271 auto BranchStateIter = RegionMCDCState->BranchByStmt.find(Val: S);
1272 if (BranchStateIter == RegionMCDCState->BranchByStmt.end())
1273 return;
1274
1275 // Extract the ID of the condition we are setting in the bitmap.
1276 const auto &Branch = BranchStateIter->second;
1277 assert(Branch.ID >= 0 && "Condition has no ID!");
1278 assert(Branch.DecisionStmt);
1279
1280 // Cancel the emission if the Decision is erased after the allocation.
1281 const auto DecisionIter =
1282 RegionMCDCState->DecisionByStmt.find(Val: Branch.DecisionStmt);
1283 if (DecisionIter == RegionMCDCState->DecisionByStmt.end())
1284 return;
1285
1286 auto &MCDCCondBitmapAddr = DecisionIter->second.MCDCCondBitmapAddr;
1287 if (!MCDCCondBitmapAddr.isValid())
1288 return;
1289
1290 const auto &TVIdxs = DecisionIter->second.Indices[Branch.ID];
1291
1292 auto *CurTV = Builder.CreateLoad(Addr: MCDCCondBitmapAddr,
1293 Name: "mcdc." + Twine(Branch.ID + 1) + ".cur");
1294 auto *NewTV = Builder.CreateAdd(LHS: CurTV, RHS: Builder.getInt32(C: TVIdxs[true]));
1295 NewTV = Builder.CreateSelect(
1296 C: Val, True: NewTV, False: Builder.CreateAdd(LHS: CurTV, RHS: Builder.getInt32(C: TVIdxs[false])));
1297 Builder.CreateStore(Val: NewTV, Addr: MCDCCondBitmapAddr);
1298}
1299
1300void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) {
1301 if (CGM.getCodeGenOpts().hasProfileClangInstr())
1302 M.addModuleFlag(Behavior: llvm::Module::Warning, Key: "EnableValueProfiling",
1303 Val: uint32_t(EnableValueProfiling));
1304}
1305
1306void CodeGenPGO::setProfileVersion(llvm::Module &M) {
1307 if (CGM.getCodeGenOpts().hasProfileClangInstr() &&
1308 llvm::EnableSingleByteCoverage) {
1309 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
1310 llvm::Type *IntTy64 = llvm::Type::getInt64Ty(C&: M.getContext());
1311 uint64_t ProfileVersion =
1312 (INSTR_PROF_RAW_VERSION | VARIANT_MASK_BYTE_COVERAGE);
1313
1314 auto IRLevelVersionVariable = new llvm::GlobalVariable(
1315 M, IntTy64, true, llvm::GlobalValue::WeakAnyLinkage,
1316 llvm::Constant::getIntegerValue(Ty: IntTy64,
1317 V: llvm::APInt(64, ProfileVersion)),
1318 VarName);
1319
1320 IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility);
1321 llvm::Triple TT(M.getTargetTriple());
1322 if (TT.isGPU())
1323 IRLevelVersionVariable->setVisibility(
1324 llvm::GlobalValue::ProtectedVisibility);
1325 if (TT.supportsCOMDAT()) {
1326 IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage);
1327 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(Name: VarName));
1328 }
1329 IRLevelVersionVariable->setDSOLocal(true);
1330 }
1331}
1332
1333// This method either inserts a call to the profile run-time during
1334// instrumentation or puts profile data into metadata for PGO use.
1335void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
1336 llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
1337
1338 if (!EnableValueProfiling)
1339 return;
1340
1341 if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
1342 return;
1343
1344 if (isa<llvm::Constant>(Val: ValuePtr))
1345 return;
1346
1347 bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
1348 if (InstrumentValueSites && RegionCounterMap) {
1349 auto BuilderInsertPoint = Builder.saveIP();
1350 Builder.SetInsertPoint(ValueSite);
1351 llvm::Value *Args[5] = {
1352 FuncNameVar,
1353 Builder.getInt64(C: FunctionHash),
1354 Builder.CreatePtrToInt(V: ValuePtr, DestTy: Builder.getInt64Ty()),
1355 Builder.getInt32(C: ValueKind),
1356 Builder.getInt32(C: NumValueSites[ValueKind]++)
1357 };
1358 Builder.CreateCall(
1359 Callee: CGM.getIntrinsic(IID: llvm::Intrinsic::instrprof_value_profile), Args);
1360 Builder.restoreIP(IP: BuilderInsertPoint);
1361 return;
1362 }
1363
1364 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
1365 if (PGOReader && haveRegionCounts()) {
1366 // We record the top most called three functions at each call site.
1367 // Profile metadata contains "VP" string identifying this metadata
1368 // as value profiling data, then a uint32_t value for the value profiling
1369 // kind, a uint64_t value for the total number of times the call is
1370 // executed, followed by the function hash and execution count (uint64_t)
1371 // pairs for each function.
1372 if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
1373 return;
1374
1375 llvm::annotateValueSite(M&: CGM.getModule(), Inst&: *ValueSite, InstrProfR: *ProfRecord,
1376 ValueKind: (llvm::InstrProfValueKind)ValueKind,
1377 SiteIndx: NumValueSites[ValueKind]);
1378
1379 NumValueSites[ValueKind]++;
1380 }
1381}
1382
1383void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
1384 bool IsInMainFile) {
1385 CGM.getPGOStats().addVisited(MainFile: IsInMainFile);
1386 RegionCounts.clear();
1387 auto RecordExpected = PGOReader->getInstrProfRecord(FuncName, FuncHash: FunctionHash);
1388 if (auto E = RecordExpected.takeError()) {
1389 auto IPE = std::get<0>(in: llvm::InstrProfError::take(E: std::move(E)));
1390 if (IPE == llvm::instrprof_error::unknown_function)
1391 CGM.getPGOStats().addMissing(MainFile: IsInMainFile);
1392 else if (IPE == llvm::instrprof_error::hash_mismatch)
1393 CGM.getPGOStats().addMismatched(MainFile: IsInMainFile);
1394 else if (IPE == llvm::instrprof_error::malformed)
1395 // TODO: Consider a more specific warning for this case.
1396 CGM.getPGOStats().addMismatched(MainFile: IsInMainFile);
1397 return;
1398 }
1399 ProfRecord =
1400 std::make_unique<llvm::InstrProfRecord>(args: std::move(RecordExpected.get()));
1401 RegionCounts = ProfRecord->Counts;
1402}
1403
1404/// Calculate what to divide by to scale weights.
1405///
1406/// Given the maximum weight, calculate a divisor that will scale all the
1407/// weights to strictly less than UINT32_MAX.
1408static uint64_t calculateWeightScale(uint64_t MaxWeight) {
1409 return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
1410}
1411
1412/// Scale an individual branch weight (and add 1).
1413///
1414/// Scale a 64-bit weight down to 32-bits using \c Scale.
1415///
1416/// According to Laplace's Rule of Succession, it is better to compute the
1417/// weight based on the count plus 1, so universally add 1 to the value.
1418///
1419/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
1420/// greater than \c Weight.
1421static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
1422 assert(Scale && "scale by 0?");
1423 uint64_t Scaled = Weight / Scale + 1;
1424 assert(Scaled <= UINT32_MAX && "overflow 32-bits");
1425 return Scaled;
1426}
1427
1428llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
1429 uint64_t FalseCount) const {
1430 // Check for empty weights.
1431 if (!TrueCount && !FalseCount)
1432 return nullptr;
1433
1434 // Calculate how to scale down to 32-bits.
1435 uint64_t Scale = calculateWeightScale(MaxWeight: std::max(a: TrueCount, b: FalseCount));
1436
1437 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1438 return MDHelper.createBranchWeights(TrueWeight: scaleBranchWeight(Weight: TrueCount, Scale),
1439 FalseWeight: scaleBranchWeight(Weight: FalseCount, Scale));
1440}
1441
1442llvm::MDNode *
1443CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) const {
1444 // We need at least two elements to create meaningful weights.
1445 if (Weights.size() < 2)
1446 return nullptr;
1447
1448 // Check for empty weights.
1449 uint64_t MaxWeight = *llvm::max_element(Range&: Weights);
1450 if (MaxWeight == 0)
1451 return nullptr;
1452
1453 // Calculate how to scale down to 32-bits.
1454 uint64_t Scale = calculateWeightScale(MaxWeight);
1455
1456 SmallVector<uint32_t, 16> ScaledWeights;
1457 ScaledWeights.reserve(N: Weights.size());
1458 for (uint64_t W : Weights)
1459 ScaledWeights.push_back(Elt: scaleBranchWeight(Weight: W, Scale));
1460
1461 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1462 return MDHelper.createBranchWeights(Weights: ScaledWeights);
1463}
1464
1465llvm::MDNode *
1466CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
1467 uint64_t LoopCount) const {
1468 if (!PGO->haveRegionCounts())
1469 return nullptr;
1470 std::optional<uint64_t> CondCount = PGO->getStmtCount(S: Cond);
1471 if (!CondCount || *CondCount == 0)
1472 return nullptr;
1473 return createProfileWeights(TrueCount: LoopCount,
1474 FalseCount: std::max(a: *CondCount, b: LoopCount) - LoopCount);
1475}
1476
1477void CodeGenFunction::incrementProfileCounter(CounterForIncrement ExecSkip,
1478 const Stmt *S, bool UseBoth,
1479 llvm::Value *StepV) {
1480 if (CGM.getCodeGenOpts().hasProfileClangInstr() &&
1481 !CurFn->hasFnAttribute(Kind: llvm::Attribute::NoProfile) &&
1482 !CurFn->hasFnAttribute(Kind: llvm::Attribute::SkipProfile)) {
1483 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: *this);
1484 PGO->emitCounterSetOrIncrement(Builder, S, UseSkipPath: (ExecSkip == UseSkipPath),
1485 UseBoth, StepV);
1486 }
1487 PGO->setCurrentStmt(S);
1488}
1489
1490bool CodeGenFunction::hasSkipCounter(const Stmt *S) const {
1491 return PGO->hasSkipCounter(S);
1492}
1493void CodeGenFunction::markStmtAsUsed(bool Skipped, const Stmt *S) {
1494 PGO->markStmtAsUsed(Skipped, S);
1495}
1496void CodeGenFunction::markStmtMaybeUsed(const Stmt *S) {
1497 PGO->markStmtMaybeUsed(S);
1498}
1499
1500void CodeGenFunction::maybeCreateMCDCCondBitmap() {
1501 if (isMCDCCoverageEnabled()) {
1502 PGO->emitMCDCParameters(Builder);
1503
1504 // Set up MCDCCondBitmapAddr for each Decision.
1505 // Note: This doesn't initialize Addrs in invalidated Decisions.
1506 for (auto *MCDCCondBitmapAddr : PGO->getMCDCCondBitmapAddrArray(Builder))
1507 *MCDCCondBitmapAddr =
1508 CreateIRTemp(T: getContext().UnsignedIntTy, Name: "mcdc.addr");
1509 }
1510}
1511bool CodeGenFunction::isMCDCDecisionExpr(const Expr *E) const {
1512 return PGO->isMCDCDecisionExpr(E);
1513}
1514bool CodeGenFunction::isMCDCBranchExpr(const Expr *E) const {
1515 return PGO->isMCDCBranchExpr(E);
1516}
1517void CodeGenFunction::maybeResetMCDCCondBitmap(const Expr *E) {
1518 if (isMCDCCoverageEnabled() && isBinaryLogicalOp(E)) {
1519 PGO->emitMCDCCondBitmapReset(Builder, S: E);
1520 PGO->setCurrentStmt(E);
1521 }
1522}
1523void CodeGenFunction::maybeUpdateMCDCTestVectorBitmap(const Expr *E) {
1524 if (isMCDCCoverageEnabled() && isBinaryLogicalOp(E)) {
1525 PGO->emitMCDCTestVectorBitmapUpdate(Builder, S: E, CGF&: *this);
1526 PGO->setCurrentStmt(E);
1527 }
1528}
1529
1530void CodeGenFunction::maybeUpdateMCDCCondBitmap(const Expr *E,
1531 llvm::Value *Val) {
1532 if (isMCDCCoverageEnabled()) {
1533 PGO->emitMCDCCondBitmapUpdate(Builder, S: E, Val, CGF&: *this);
1534 PGO->setCurrentStmt(E);
1535 }
1536}
1537
1538uint64_t CodeGenFunction::getProfileCount(const Stmt *S) {
1539 return PGO->getStmtCount(S).value_or(u: 0);
1540}
1541
1542/// Set the profiler's current count.
1543void CodeGenFunction::setCurrentProfileCount(uint64_t Count) {
1544 PGO->setCurrentRegionCount(Count);
1545}
1546
1547/// Get the profiler's current count. This is generally the count for the most
1548/// recently incremented counter.
1549uint64_t CodeGenFunction::getCurrentProfileCount() {
1550 return PGO->getCurrentRegionCount();
1551}
1552