1//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit warnings for potentially incorrect usage of the
10// llvm.expect intrinsic. This utility extracts the threshold values from
11// metadata associated with the instrumented Branch or Switch instruction. The
12// threshold values are then used to determine if a warning should be emmited.
13//
14// MisExpect's implementation relies on two assumptions about how branch weights
15// are managed in LLVM.
16//
17// 1) Frontend profiling weights are always in place before llvm.expect is
18// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
19// needs to extract the branch weights and then compare them to the weights
20// being added by the llvm.expect intrinsic lowering.
21//
22// 2) Sampling and IR based profiles will *only* have branch weight metadata
23// before profiling data is consulted if they are from a lowered llvm.expect
24// intrinsic. These profiles thus always extract the expected weights and then
25// compare them to the weights collected during profiling to determine if a
26// diagnostic message is warranted.
27//
28//===----------------------------------------------------------------------===//
29
30#include "llvm/Transforms/Utils/MisExpect.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/OptimizationRemarkEmitter.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DiagnosticInfo.h"
35#include "llvm/IR/Instruction.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/LLVMContext.h"
38#include "llvm/IR/ProfDataUtils.h"
39#include "llvm/Support/BranchProbability.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/FormatVariadic.h"
43#include <algorithm>
44#include <cstdint>
45#include <functional>
46#include <numeric>
47
48#define DEBUG_TYPE "misexpect"
49
50using namespace llvm;
51using namespace misexpect;
52
53namespace llvm {
54
55// Command line option to enable/disable the warning when profile data suggests
56// a mismatch with the use of the llvm.expect intrinsic
57static cl::opt<bool> PGOWarnMisExpect(
58 "pgo-warn-misexpect", cl::init(Val: false), cl::Hidden,
59 cl::desc("Use this option to turn on/off "
60 "warnings about incorrect usage of llvm.expect intrinsics."));
61
62// Command line option for setting the diagnostic tolerance threshold
63static cl::opt<uint32_t> MisExpectTolerance(
64 "misexpect-tolerance", cl::init(Val: 0),
65 cl::desc("Prevents emitting diagnostics when profile counts are "
66 "within N% of the threshold.."));
67
68} // namespace llvm
69
70namespace {
71
72bool isMisExpectDiagEnabled(LLVMContext &Ctx) {
73 return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
74}
75
76uint32_t getMisExpectTolerance(LLVMContext &Ctx) {
77 return std::max(a: static_cast<uint32_t>(MisExpectTolerance),
78 b: Ctx.getDiagnosticsMisExpectTolerance());
79}
80
81Instruction *getInstCondition(Instruction *I) {
82 assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
83 Instruction *Ret = nullptr;
84 if (auto *B = dyn_cast<BranchInst>(Val: I)) {
85 Ret = dyn_cast<Instruction>(Val: B->getCondition());
86 }
87 // TODO: Find a way to resolve condition location for switches
88 // Using the condition of the switch seems to often resolve to an earlier
89 // point in the program, i.e. the calculation of the switch condition, rather
90 // than the switch's location in the source code. Thus, we should use the
91 // instruction to get source code locations rather than the condition to
92 // improve diagnostic output, such as the caret. If the same problem exists
93 // for branch instructions, then we should remove this function and directly
94 // use the instruction
95 //
96 else if (auto *S = dyn_cast<SwitchInst>(Val: I)) {
97 Ret = dyn_cast<Instruction>(Val: S->getCondition());
98 }
99 return Ret ? Ret : I;
100}
101
102void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
103 uint64_t ProfCount, uint64_t TotalCount) {
104 double PercentageCorrect = (double)ProfCount / TotalCount;
105 auto PerString =
106 formatv(Fmt: "{0:P} ({1} / {2})", Vals&: PercentageCorrect, Vals&: ProfCount, Vals&: TotalCount);
107 auto RemStr = formatv(
108 Fmt: "Potential performance regression from use of the llvm.expect intrinsic: "
109 "Annotation was correct on {0} of profiled executions.",
110 Vals&: PerString);
111 Twine Msg(PerString);
112 Instruction *Cond = getInstCondition(I);
113 if (isMisExpectDiagEnabled(Ctx))
114 Ctx.diagnose(DI: DiagnosticInfoMisExpect(Cond, Msg));
115 OptimizationRemarkEmitter ORE(I->getParent()->getParent());
116 ORE.emit(OptDiag&: OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
117}
118
119} // namespace
120
121namespace llvm {
122namespace misexpect {
123
124void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
125 ArrayRef<uint32_t> ExpectedWeights) {
126 // To determine if we emit a diagnostic, we need to compare the branch weights
127 // from the profile to those added by the llvm.expect intrinsic.
128 // So first, we extract the "likely" and "unlikely" weights from
129 // ExpectedWeights And determine the correct weight in the profile to compare
130 // against.
131 uint64_t LikelyBranchWeight = 0,
132 UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
133 size_t MaxIndex = 0;
134 for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {
135 uint32_t V = ExpectedWeights[Idx];
136 if (LikelyBranchWeight < V) {
137 LikelyBranchWeight = V;
138 MaxIndex = Idx;
139 }
140 if (UnlikelyBranchWeight > V) {
141 UnlikelyBranchWeight = V;
142 }
143 }
144
145 const uint64_t ProfiledWeight = RealWeights[MaxIndex];
146 const uint64_t RealWeightsTotal =
147 std::accumulate(first: RealWeights.begin(), last: RealWeights.end(), init: (uint64_t)0,
148 binary_op: std::plus<uint64_t>());
149 const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;
150
151 uint64_t TotalBranchWeight =
152 LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
153
154 // Failing this assert means that we have corrupted metadata.
155 assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) &&
156 "TotalBranchWeight is less than the Likely branch weight");
157
158 // To determine our threshold value we need to obtain the branch probability
159 // for the weights added by llvm.expect and use that proportion to calculate
160 // our threshold based on the collected profile data.
161 auto LikelyProbablilty = BranchProbability::getBranchProbability(
162 Numerator: LikelyBranchWeight, Denominator: TotalBranchWeight);
163
164 uint64_t ScaledThreshold = LikelyProbablilty.scale(Num: RealWeightsTotal);
165
166 // clamp tolerance range to [0, 100)
167 auto Tolerance = getMisExpectTolerance(Ctx&: I.getContext());
168 Tolerance = std::clamp(val: Tolerance, lo: 0u, hi: 99u);
169
170 // Allow users to relax checking by N% i.e., if they use a 5% tolerance,
171 // then we check against 0.95*ScaledThreshold
172 if (Tolerance > 0)
173 ScaledThreshold *= (1.0 - Tolerance / 100.0);
174
175 // When the profile weight is below the threshold, we emit the diagnostic
176 if (ProfiledWeight < ScaledThreshold)
177 emitMisexpectDiagnostic(I: &I, Ctx&: I.getContext(), ProfCount: ProfiledWeight,
178 TotalCount: RealWeightsTotal);
179}
180
181void checkBackendInstrumentation(Instruction &I,
182 const ArrayRef<uint32_t> RealWeights) {
183 // Backend checking assumes any existing weight comes from an `llvm.expect`
184 // intrinsic. However, SampleProfiling + ThinLTO add branch weights multiple
185 // times, leading to an invalid assumption in our checking. Backend checks
186 // should only operate on branch weights that carry the "!expected" field,
187 // since they are guaranteed to be added by the LowerExpectIntrinsic pass.
188 if (!hasBranchWeightOrigin(I))
189 return;
190 SmallVector<uint32_t> ExpectedWeights;
191 if (!extractBranchWeights(I, Weights&: ExpectedWeights))
192 return;
193 verifyMisExpect(I, RealWeights, ExpectedWeights);
194}
195
196void checkFrontendInstrumentation(Instruction &I,
197 const ArrayRef<uint32_t> ExpectedWeights) {
198 SmallVector<uint32_t> RealWeights;
199 if (!extractBranchWeights(I, Weights&: RealWeights))
200 return;
201 verifyMisExpect(I, RealWeights, ExpectedWeights);
202}
203
204void checkExpectAnnotations(Instruction &I,
205 const ArrayRef<uint32_t> ExistingWeights,
206 bool IsFrontend) {
207 if (IsFrontend) {
208 checkFrontendInstrumentation(I, ExpectedWeights: ExistingWeights);
209 } else {
210 checkBackendInstrumentation(I, RealWeights: ExistingWeights);
211 }
212}
213
214} // namespace misexpect
215} // namespace llvm
216#undef DEBUG_TYPE
217