| 1 | //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the guard widening pass. The semantics of the |
| 10 | // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails |
| 11 | // more often that it did before the transform. This optimization is called |
| 12 | // "widening" and can be used hoist and common runtime checks in situations like |
| 13 | // these: |
| 14 | // |
| 15 | // %cmp0 = 7 u< Length |
| 16 | // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] |
| 17 | // call @unknown_side_effects() |
| 18 | // %cmp1 = 9 u< Length |
| 19 | // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] |
| 20 | // ... |
| 21 | // |
| 22 | // => |
| 23 | // |
| 24 | // %cmp0 = 9 u< Length |
| 25 | // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] |
| 26 | // call @unknown_side_effects() |
| 27 | // ... |
| 28 | // |
| 29 | // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a |
| 30 | // generic implementation of the same function, which will have the correct |
| 31 | // semantics from that point onward. It is always _legal_ to deoptimize (so |
| 32 | // replacing %cmp0 with false is "correct"), though it may not always be |
| 33 | // profitable to do so. |
| 34 | // |
| 35 | // NB! This pass is a work in progress. It hasn't been tuned to be "production |
| 36 | // ready" yet. It is known to have quadriatic running time and will not scale |
| 37 | // to large numbers of guards |
| 38 | // |
| 39 | //===----------------------------------------------------------------------===// |
| 40 | |
| 41 | #include "llvm/Transforms/Scalar/GuardWidening.h" |
| 42 | #include "llvm/ADT/DenseMap.h" |
| 43 | #include "llvm/ADT/DepthFirstIterator.h" |
| 44 | #include "llvm/ADT/Statistic.h" |
| 45 | #include "llvm/Analysis/AssumptionCache.h" |
| 46 | #include "llvm/Analysis/GuardUtils.h" |
| 47 | #include "llvm/Analysis/LoopInfo.h" |
| 48 | #include "llvm/Analysis/MemorySSAUpdater.h" |
| 49 | #include "llvm/Analysis/PostDominators.h" |
| 50 | #include "llvm/Analysis/ValueTracking.h" |
| 51 | #include "llvm/IR/ConstantRange.h" |
| 52 | #include "llvm/IR/Dominators.h" |
| 53 | #include "llvm/IR/IRBuilder.h" |
| 54 | #include "llvm/IR/IntrinsicInst.h" |
| 55 | #include "llvm/IR/PatternMatch.h" |
| 56 | #include "llvm/Support/CommandLine.h" |
| 57 | #include "llvm/Support/Debug.h" |
| 58 | #include "llvm/Support/KnownBits.h" |
| 59 | #include "llvm/Transforms/Scalar.h" |
| 60 | #include "llvm/Transforms/Utils/GuardUtils.h" |
| 61 | #include "llvm/Transforms/Utils/LoopUtils.h" |
| 62 | #include <functional> |
| 63 | |
| 64 | using namespace llvm; |
| 65 | |
| 66 | #define DEBUG_TYPE "guard-widening" |
| 67 | |
| 68 | STATISTIC(GuardsEliminated, "Number of eliminated guards" ); |
| 69 | STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches" ); |
| 70 | STATISTIC(FreezeAdded, "Number of freeze instruction introduced" ); |
| 71 | |
| 72 | static cl::opt<bool> |
| 73 | WidenBranchGuards("guard-widening-widen-branch-guards" , cl::Hidden, |
| 74 | cl::desc("Whether or not we should widen guards " |
| 75 | "expressed as branches by widenable conditions" ), |
| 76 | cl::init(Val: true)); |
| 77 | |
| 78 | namespace { |
| 79 | |
| 80 | // Get the condition of \p I. It can either be a guard or a conditional branch. |
| 81 | static Value *getCondition(Instruction *I) { |
| 82 | if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(Val: I)) { |
| 83 | assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && |
| 84 | "Bad guard intrinsic?" ); |
| 85 | return GI->getArgOperand(i: 0); |
| 86 | } |
| 87 | Value *Cond, *WC; |
| 88 | BasicBlock *IfTrueBB, *IfFalseBB; |
| 89 | if (parseWidenableBranch(U: I, Condition&: Cond, WidenableCondition&: WC, IfTrueBB, IfFalseBB)) |
| 90 | return Cond; |
| 91 | |
| 92 | return cast<BranchInst>(Val: I)->getCondition(); |
| 93 | } |
| 94 | |
| 95 | // Set the condition for \p I to \p NewCond. \p I can either be a guard or a |
| 96 | // conditional branch. |
| 97 | static void setCondition(Instruction *I, Value *NewCond) { |
| 98 | if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(Val: I)) { |
| 99 | assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && |
| 100 | "Bad guard intrinsic?" ); |
| 101 | GI->setArgOperand(i: 0, v: NewCond); |
| 102 | return; |
| 103 | } |
| 104 | cast<BranchInst>(Val: I)->setCondition(NewCond); |
| 105 | } |
| 106 | |
| 107 | // Eliminates the guard instruction properly. |
| 108 | static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { |
| 109 | GuardInst->eraseFromParent(); |
| 110 | if (MSSAU) |
| 111 | MSSAU->removeMemoryAccess(I: GuardInst); |
| 112 | ++GuardsEliminated; |
| 113 | } |
| 114 | |
| 115 | /// Find a point at which the widened condition of \p Guard should be inserted. |
| 116 | /// When it is represented as intrinsic call, we can do it right before the call |
| 117 | /// instruction. However, when we are dealing with widenable branch, we must |
| 118 | /// account for the following situation: widening should not turn a |
| 119 | /// loop-invariant condition into a loop-variant. It means that if |
| 120 | /// widenable.condition() call is invariant (w.r.t. any loop), the new wide |
| 121 | /// condition should stay invariant. Otherwise there can be a miscompile, like |
| 122 | /// the one described at https://github.com/llvm/llvm-project/issues/60234. The |
| 123 | /// safest way to do it is to expand the new condition at WC's block. |
| 124 | static std::optional<BasicBlock::iterator> |
| 125 | findInsertionPointForWideCondition(Instruction *WCOrGuard) { |
| 126 | if (isGuard(U: WCOrGuard)) |
| 127 | return WCOrGuard->getIterator(); |
| 128 | if (auto WC = extractWidenableCondition(U: WCOrGuard)) |
| 129 | return cast<Instruction>(Val: WC)->getIterator(); |
| 130 | return std::nullopt; |
| 131 | } |
| 132 | |
| 133 | class GuardWideningImpl { |
| 134 | DominatorTree &DT; |
| 135 | PostDominatorTree *PDT; |
| 136 | LoopInfo &LI; |
| 137 | AssumptionCache &AC; |
| 138 | MemorySSAUpdater *MSSAU; |
| 139 | |
| 140 | /// Together, these describe the region of interest. This might be all of |
| 141 | /// the blocks within a function, or only a given loop's blocks and preheader. |
| 142 | DomTreeNode *Root; |
| 143 | std::function<bool(BasicBlock*)> BlockFilter; |
| 144 | |
| 145 | /// The set of guards and conditional branches whose conditions have been |
| 146 | /// widened into dominating guards. |
| 147 | SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; |
| 148 | |
| 149 | /// The set of guards which have been widened to include conditions to other |
| 150 | /// guards. |
| 151 | DenseSet<Instruction *> WidenedGuards; |
| 152 | |
| 153 | /// Try to eliminate instruction \p Instr by widening it into an earlier |
| 154 | /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that |
| 155 | /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock |
| 156 | /// maps BasicBlocks to the set of guards seen in that block. |
| 157 | bool eliminateInstrViaWidening( |
| 158 | Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, |
| 159 | const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> |
| 160 | &GuardsPerBlock); |
| 161 | |
| 162 | /// Used to keep track of which widening potential is more effective. |
| 163 | enum WideningScore { |
| 164 | /// Don't widen. |
| 165 | WS_IllegalOrNegative, |
| 166 | |
| 167 | /// Widening is performance neutral as far as the cycles spent in check |
| 168 | /// conditions goes (but can still help, e.g., code layout, having less |
| 169 | /// deopt state). |
| 170 | WS_Neutral, |
| 171 | |
| 172 | /// Widening is profitable. |
| 173 | WS_Positive, |
| 174 | |
| 175 | /// Widening is very profitable. Not significantly different from \c |
| 176 | /// WS_Positive, except by the order. |
| 177 | WS_VeryPositive |
| 178 | }; |
| 179 | |
| 180 | static StringRef scoreTypeToString(WideningScore WS); |
| 181 | |
| 182 | /// Compute the score for widening the condition in \p DominatedInstr |
| 183 | /// into \p WideningPoint. |
| 184 | WideningScore computeWideningScore(Instruction *DominatedInstr, |
| 185 | Instruction *ToWiden, |
| 186 | BasicBlock::iterator WideningPoint, |
| 187 | SmallVectorImpl<Value *> &ChecksToHoist, |
| 188 | SmallVectorImpl<Value *> &ChecksToWiden); |
| 189 | |
| 190 | /// Helper to check if \p V can be hoisted to \p InsertPos. |
| 191 | bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos) const { |
| 192 | SmallPtrSet<const Instruction *, 8> Visited; |
| 193 | return canBeHoistedTo(V, InsertPos, Visited); |
| 194 | } |
| 195 | |
| 196 | bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos, |
| 197 | SmallPtrSetImpl<const Instruction *> &Visited) const; |
| 198 | |
| 199 | bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks, |
| 200 | BasicBlock::iterator InsertPos) const { |
| 201 | return all_of(Range: Checks, |
| 202 | P: [&](const Value *V) { return canBeHoistedTo(V, InsertPos); }); |
| 203 | } |
| 204 | /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c |
| 205 | /// canBeHoistedTo returned true. |
| 206 | void makeAvailableAt(Value *V, BasicBlock::iterator InsertPos) const; |
| 207 | |
| 208 | void makeAvailableAt(const SmallVectorImpl<Value *> &Checks, |
| 209 | BasicBlock::iterator InsertPos) const { |
| 210 | for (Value *V : Checks) |
| 211 | makeAvailableAt(V, InsertPos); |
| 212 | } |
| 213 | |
| 214 | /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try |
| 215 | /// to generate an expression computing the logical AND of \p ChecksToHoist |
| 216 | /// and \p ChecksToWiden. Return true if the expression computing the AND is |
| 217 | /// only as expensive as computing one of the set of expressions. If \p |
| 218 | /// InsertPt is true then actually generate the resulting expression, make it |
| 219 | /// available at \p InsertPt and return it in \p Result (else no change to the |
| 220 | /// IR is made). |
| 221 | std::optional<Value *> |
| 222 | mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
| 223 | SmallVectorImpl<Value *> &ChecksToWiden, |
| 224 | std::optional<BasicBlock::iterator> InsertPt); |
| 225 | |
| 226 | /// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make |
| 227 | /// it available at InsertPt |
| 228 | Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
| 229 | Value *OldCondition, BasicBlock::iterator InsertPt); |
| 230 | |
| 231 | /// Adds freeze to Orig and push it as far as possible very aggressively. |
| 232 | /// Also replaces all uses of frozen instruction with frozen version. |
| 233 | Value *freezeAndPush(Value *Orig, BasicBlock::iterator InsertPt); |
| 234 | |
| 235 | /// Represents a range check of the form \c Base + \c Offset u< \c Length, |
| 236 | /// with the constraint that \c Length is not negative. \c CheckInst is the |
| 237 | /// pre-existing instruction in the IR that computes the result of this range |
| 238 | /// check. |
| 239 | class RangeCheck { |
| 240 | const Value *Base; |
| 241 | const ConstantInt *Offset; |
| 242 | const Value *Length; |
| 243 | ICmpInst *CheckInst; |
| 244 | |
| 245 | public: |
| 246 | explicit RangeCheck(const Value *Base, const ConstantInt *Offset, |
| 247 | const Value *Length, ICmpInst *CheckInst) |
| 248 | : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} |
| 249 | |
| 250 | void setBase(const Value *NewBase) { Base = NewBase; } |
| 251 | void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } |
| 252 | |
| 253 | const Value *getBase() const { return Base; } |
| 254 | const ConstantInt *getOffset() const { return Offset; } |
| 255 | const APInt &getOffsetValue() const { return getOffset()->getValue(); } |
| 256 | const Value *getLength() const { return Length; }; |
| 257 | ICmpInst *getCheckInst() const { return CheckInst; } |
| 258 | |
| 259 | void print(raw_ostream &OS, bool PrintTypes = false) { |
| 260 | OS << "Base: " ; |
| 261 | Base->printAsOperand(O&: OS, PrintType: PrintTypes); |
| 262 | OS << " Offset: " ; |
| 263 | Offset->printAsOperand(O&: OS, PrintType: PrintTypes); |
| 264 | OS << " Length: " ; |
| 265 | Length->printAsOperand(O&: OS, PrintType: PrintTypes); |
| 266 | } |
| 267 | |
| 268 | LLVM_DUMP_METHOD void dump() { |
| 269 | print(OS&: dbgs()); |
| 270 | dbgs() << "\n" ; |
| 271 | } |
| 272 | }; |
| 273 | |
| 274 | /// Parse \p ToParse into a conjunction (logical-and) of range checks; and |
| 275 | /// append them to \p Checks. Returns true on success, may clobber \c Checks |
| 276 | /// on failure. |
| 277 | bool parseRangeChecks(SmallVectorImpl<Value *> &ToParse, |
| 278 | SmallVectorImpl<RangeCheck> &Checks) { |
| 279 | for (auto CheckCond : ToParse) { |
| 280 | if (!parseRangeChecks(CheckCond, Checks)) |
| 281 | return false; |
| 282 | } |
| 283 | return true; |
| 284 | } |
| 285 | |
| 286 | bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks); |
| 287 | |
| 288 | /// Combine the checks in \p Checks into a smaller set of checks and append |
| 289 | /// them into \p CombinedChecks. Return true on success (i.e. all of checks |
| 290 | /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks |
| 291 | /// and \p CombinedChecks on success and on failure. |
| 292 | bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, |
| 293 | SmallVectorImpl<RangeCheck> &CombinedChecks) const; |
| 294 | |
| 295 | /// Can we compute the logical AND of \p ChecksToHoist and \p ChecksToWiden |
| 296 | /// for the price of computing only one of the set of expressions? |
| 297 | bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist, |
| 298 | SmallVectorImpl<Value *> &ChecksToWiden) { |
| 299 | return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/std::nullopt) |
| 300 | .has_value(); |
| 301 | } |
| 302 | |
| 303 | /// Widen \p ChecksToWiden to fail if any of \p ChecksToHoist is false |
| 304 | void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist, |
| 305 | SmallVectorImpl<Value *> &ChecksToWiden, |
| 306 | Instruction *ToWiden) { |
| 307 | auto InsertPt = findInsertionPointForWideCondition(WCOrGuard: ToWiden); |
| 308 | auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt); |
| 309 | Value *Result = MergedCheck ? *MergedCheck |
| 310 | : hoistChecks(ChecksToHoist, |
| 311 | OldCondition: getCondition(I: ToWiden), InsertPt: *InsertPt); |
| 312 | |
| 313 | if (isGuardAsWidenableBranch(U: ToWiden)) { |
| 314 | setWidenableBranchCond(WidenableBR: cast<BranchInst>(Val: ToWiden), Cond: Result); |
| 315 | return; |
| 316 | } |
| 317 | setCondition(I: ToWiden, NewCond: Result); |
| 318 | } |
| 319 | |
| 320 | public: |
| 321 | explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, |
| 322 | LoopInfo &LI, AssumptionCache &AC, |
| 323 | MemorySSAUpdater *MSSAU, DomTreeNode *Root, |
| 324 | std::function<bool(BasicBlock *)> BlockFilter) |
| 325 | : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root), |
| 326 | BlockFilter(BlockFilter) {} |
| 327 | |
| 328 | /// The entry point for this pass. |
| 329 | bool run(); |
| 330 | }; |
| 331 | } |
| 332 | |
| 333 | static bool isSupportedGuardInstruction(const Instruction *Insn) { |
| 334 | if (isGuard(U: Insn)) |
| 335 | return true; |
| 336 | if (WidenBranchGuards && isGuardAsWidenableBranch(U: Insn)) |
| 337 | return true; |
| 338 | return false; |
| 339 | } |
| 340 | |
| 341 | bool GuardWideningImpl::run() { |
| 342 | DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; |
| 343 | bool Changed = false; |
| 344 | for (auto DFI = df_begin(G: Root), DFE = df_end(G: Root); |
| 345 | DFI != DFE; ++DFI) { |
| 346 | auto *BB = (*DFI)->getBlock(); |
| 347 | if (!BlockFilter(BB)) |
| 348 | continue; |
| 349 | |
| 350 | auto &CurrentList = GuardsInBlock[BB]; |
| 351 | |
| 352 | for (auto &I : *BB) |
| 353 | if (isSupportedGuardInstruction(Insn: &I)) |
| 354 | CurrentList.push_back(Elt: cast<Instruction>(Val: &I)); |
| 355 | |
| 356 | for (auto *II : CurrentList) |
| 357 | Changed |= eliminateInstrViaWidening(Instr: II, DFSI: DFI, GuardsPerBlock: GuardsInBlock); |
| 358 | } |
| 359 | |
| 360 | assert(EliminatedGuardsAndBranches.empty() || Changed); |
| 361 | for (auto *I : EliminatedGuardsAndBranches) |
| 362 | if (!WidenedGuards.count(V: I)) { |
| 363 | assert(isa<ConstantInt>(getCondition(I)) && "Should be!" ); |
| 364 | if (isSupportedGuardInstruction(Insn: I)) |
| 365 | eliminateGuard(GuardInst: I, MSSAU); |
| 366 | else { |
| 367 | assert(isa<BranchInst>(I) && |
| 368 | "Eliminated something other than guard or branch?" ); |
| 369 | ++CondBranchEliminated; |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | return Changed; |
| 374 | } |
| 375 | |
| 376 | bool GuardWideningImpl::eliminateInstrViaWidening( |
| 377 | Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, |
| 378 | const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> |
| 379 | &GuardsInBlock) { |
| 380 | SmallVector<Value *> ChecksToHoist; |
| 381 | parseWidenableGuard(U: Instr, Checks&: ChecksToHoist); |
| 382 | // Ignore trivial true or false conditions. These instructions will be |
| 383 | // trivially eliminated by any cleanup pass. Do not erase them because other |
| 384 | // guards can possibly be widened into them. |
| 385 | if (ChecksToHoist.empty() || |
| 386 | (ChecksToHoist.size() == 1 && isa<ConstantInt>(Val: ChecksToHoist.front()))) |
| 387 | return false; |
| 388 | |
| 389 | Instruction *BestSoFar = nullptr; |
| 390 | auto BestScoreSoFar = WS_IllegalOrNegative; |
| 391 | |
| 392 | // In the set of dominating guards, find the one we can merge GuardInst with |
| 393 | // for the most profit. |
| 394 | for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { |
| 395 | auto *CurBB = DFSI.getPath(n: i)->getBlock(); |
| 396 | if (!BlockFilter(CurBB)) |
| 397 | break; |
| 398 | assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!" ); |
| 399 | const auto &GuardsInCurBB = GuardsInBlock.find(Val: CurBB)->second; |
| 400 | |
| 401 | auto I = GuardsInCurBB.begin(); |
| 402 | auto E = Instr->getParent() == CurBB ? find(Range: GuardsInCurBB, Val: Instr) |
| 403 | : GuardsInCurBB.end(); |
| 404 | |
| 405 | #ifndef NDEBUG |
| 406 | { |
| 407 | unsigned Index = 0; |
| 408 | for (auto &I : *CurBB) { |
| 409 | if (Index == GuardsInCurBB.size()) |
| 410 | break; |
| 411 | if (GuardsInCurBB[Index] == &I) |
| 412 | Index++; |
| 413 | } |
| 414 | assert(Index == GuardsInCurBB.size() && |
| 415 | "Guards expected to be in order!" ); |
| 416 | } |
| 417 | #endif |
| 418 | |
| 419 | assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?" ); |
| 420 | |
| 421 | for (auto *Candidate : make_range(x: I, y: E)) { |
| 422 | auto WideningPoint = findInsertionPointForWideCondition(WCOrGuard: Candidate); |
| 423 | if (!WideningPoint) |
| 424 | continue; |
| 425 | SmallVector<Value *> CandidateChecks; |
| 426 | parseWidenableGuard(U: Candidate, Checks&: CandidateChecks); |
| 427 | auto Score = computeWideningScore(DominatedInstr: Instr, ToWiden: Candidate, WideningPoint: *WideningPoint, |
| 428 | ChecksToHoist, ChecksToWiden&: CandidateChecks); |
| 429 | LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate |
| 430 | << " is " << scoreTypeToString(Score) << "\n" ); |
| 431 | if (Score > BestScoreSoFar) { |
| 432 | BestScoreSoFar = Score; |
| 433 | BestSoFar = Candidate; |
| 434 | } |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | if (BestScoreSoFar == WS_IllegalOrNegative) { |
| 439 | LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n" ); |
| 440 | return false; |
| 441 | } |
| 442 | |
| 443 | assert(BestSoFar != Instr && "Should have never visited same guard!" ); |
| 444 | assert(DT.dominates(BestSoFar, Instr) && "Should be!" ); |
| 445 | |
| 446 | LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar |
| 447 | << " with score " << scoreTypeToString(BestScoreSoFar) |
| 448 | << "\n" ); |
| 449 | SmallVector<Value *> ChecksToWiden; |
| 450 | parseWidenableGuard(U: BestSoFar, Checks&: ChecksToWiden); |
| 451 | widenGuard(ChecksToHoist, ChecksToWiden, ToWiden: BestSoFar); |
| 452 | auto NewGuardCondition = ConstantInt::getTrue(Context&: Instr->getContext()); |
| 453 | setCondition(I: Instr, NewCond: NewGuardCondition); |
| 454 | EliminatedGuardsAndBranches.push_back(Elt: Instr); |
| 455 | WidenedGuards.insert(V: BestSoFar); |
| 456 | return true; |
| 457 | } |
| 458 | |
| 459 | GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( |
| 460 | Instruction *DominatedInstr, Instruction *ToWiden, |
| 461 | BasicBlock::iterator WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist, |
| 462 | SmallVectorImpl<Value *> &ChecksToWiden) { |
| 463 | Loop *DominatedInstrLoop = LI.getLoopFor(BB: DominatedInstr->getParent()); |
| 464 | Loop *DominatingGuardLoop = LI.getLoopFor(BB: WideningPoint->getParent()); |
| 465 | bool HoistingOutOfLoop = false; |
| 466 | |
| 467 | if (DominatingGuardLoop != DominatedInstrLoop) { |
| 468 | // Be conservative and don't widen into a sibling loop. TODO: If the |
| 469 | // sibling is colder, we should consider allowing this. |
| 470 | if (DominatingGuardLoop && |
| 471 | !DominatingGuardLoop->contains(L: DominatedInstrLoop)) |
| 472 | return WS_IllegalOrNegative; |
| 473 | |
| 474 | HoistingOutOfLoop = true; |
| 475 | } |
| 476 | |
| 477 | if (!canBeHoistedTo(Checks: ChecksToHoist, InsertPos: WideningPoint)) |
| 478 | return WS_IllegalOrNegative; |
| 479 | // Further in the GuardWideningImpl::hoistChecks the entire condition might be |
| 480 | // widened, not the parsed list of checks. So we need to check the possibility |
| 481 | // of that condition hoisting. |
| 482 | if (!canBeHoistedTo(V: getCondition(I: ToWiden), InsertPos: WideningPoint)) |
| 483 | return WS_IllegalOrNegative; |
| 484 | |
| 485 | // If the guard was conditional executed, it may never be reached |
| 486 | // dynamically. There are two potential downsides to hoisting it out of the |
| 487 | // conditionally executed region: 1) we may spuriously deopt without need and |
| 488 | // 2) we have the extra cost of computing the guard condition in the common |
| 489 | // case. At the moment, we really only consider the second in our heuristic |
| 490 | // here. TODO: evaluate cost model for spurious deopt |
| 491 | // NOTE: As written, this also lets us hoist right over another guard which |
| 492 | // is essentially just another spelling for control flow. |
| 493 | if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden)) |
| 494 | return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; |
| 495 | |
| 496 | if (HoistingOutOfLoop) |
| 497 | return WS_Positive; |
| 498 | |
| 499 | // For a given basic block \p BB, return its successor which is guaranteed or |
| 500 | // highly likely will be taken as its successor. |
| 501 | auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * { |
| 502 | if (auto *UniqueSucc = BB->getUniqueSuccessor()) |
| 503 | return UniqueSucc; |
| 504 | auto *Term = BB->getTerminator(); |
| 505 | Value *Cond = nullptr; |
| 506 | const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; |
| 507 | using namespace PatternMatch; |
| 508 | if (!match(V: Term, P: m_Br(C: m_Value(V&: Cond), T: m_BasicBlock(V&: IfTrue), |
| 509 | F: m_BasicBlock(V&: IfFalse)))) |
| 510 | return nullptr; |
| 511 | // For constant conditions, only one dynamical successor is possible |
| 512 | if (auto *ConstCond = dyn_cast<ConstantInt>(Val: Cond)) |
| 513 | return ConstCond->isAllOnesValue() ? IfTrue : IfFalse; |
| 514 | // If one of successors ends with deopt, another one is likely. |
| 515 | if (IfFalse->getPostdominatingDeoptimizeCall()) |
| 516 | return IfTrue; |
| 517 | if (IfTrue->getPostdominatingDeoptimizeCall()) |
| 518 | return IfFalse; |
| 519 | // TODO: Use branch frequency metatada to allow hoisting through non-deopt |
| 520 | // branches? |
| 521 | return nullptr; |
| 522 | }; |
| 523 | |
| 524 | // Returns true if we might be hoisting above explicit control flow into a |
| 525 | // considerably hotter block. Note that this completely ignores implicit |
| 526 | // control flow (guards, calls which throw, etc...). That choice appears |
| 527 | // arbitrary (we assume that implicit control flow exits are all rare). |
| 528 | auto MaybeHoistingToHotterBlock = [&]() { |
| 529 | const auto *DominatingBlock = WideningPoint->getParent(); |
| 530 | const auto *DominatedBlock = DominatedInstr->getParent(); |
| 531 | |
| 532 | // Descend as low as we can, always taking the likely successor. |
| 533 | assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code" ); |
| 534 | assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code" ); |
| 535 | assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance" ); |
| 536 | while (DominatedBlock != DominatingBlock) { |
| 537 | auto *LikelySucc = GetLikelySuccessor(DominatingBlock); |
| 538 | // No likely successor? |
| 539 | if (!LikelySucc) |
| 540 | break; |
| 541 | // Only go down the dominator tree. |
| 542 | if (!DT.properlyDominates(A: DominatingBlock, B: LikelySucc)) |
| 543 | break; |
| 544 | DominatingBlock = LikelySucc; |
| 545 | } |
| 546 | |
| 547 | // Found? |
| 548 | if (DominatedBlock == DominatingBlock) |
| 549 | return false; |
| 550 | // We followed the likely successor chain and went past the dominated |
| 551 | // block. It means that the dominated guard is in dead/very cold code. |
| 552 | if (!DT.dominates(A: DominatingBlock, B: DominatedBlock)) |
| 553 | return true; |
| 554 | // TODO: diamond, triangle cases |
| 555 | if (!PDT) |
| 556 | return true; |
| 557 | return !PDT->dominates(A: DominatedBlock, B: DominatingBlock); |
| 558 | }; |
| 559 | |
| 560 | return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral; |
| 561 | } |
| 562 | |
| 563 | bool GuardWideningImpl::canBeHoistedTo( |
| 564 | const Value *V, BasicBlock::iterator Loc, |
| 565 | SmallPtrSetImpl<const Instruction *> &Visited) const { |
| 566 | auto *Inst = dyn_cast<Instruction>(Val: V); |
| 567 | if (!Inst || DT.dominates(Def: Inst, User: Loc) || Visited.count(Ptr: Inst)) |
| 568 | return true; |
| 569 | |
| 570 | if (!isSafeToSpeculativelyExecute(I: Inst, CtxI: Loc, AC: &AC, DT: &DT) || |
| 571 | Inst->mayReadFromMemory()) |
| 572 | return false; |
| 573 | |
| 574 | Visited.insert(Ptr: Inst); |
| 575 | |
| 576 | // We only want to go _up_ the dominance chain when recursing. |
| 577 | assert(!isa<PHINode>(Loc) && |
| 578 | "PHIs should return false for isSafeToSpeculativelyExecute" ); |
| 579 | assert(DT.isReachableFromEntry(Inst->getParent()) && |
| 580 | "We did a DFS from the block entry!" ); |
| 581 | return all_of(Range: Inst->operands(), |
| 582 | P: [&](Value *Op) { return canBeHoistedTo(V: Op, Loc, Visited); }); |
| 583 | } |
| 584 | |
| 585 | void GuardWideningImpl::makeAvailableAt(Value *V, |
| 586 | BasicBlock::iterator Loc) const { |
| 587 | auto *Inst = dyn_cast<Instruction>(Val: V); |
| 588 | if (!Inst || DT.dominates(Def: Inst, User: Loc)) |
| 589 | return; |
| 590 | |
| 591 | assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) && |
| 592 | !Inst->mayReadFromMemory() && |
| 593 | "Should've checked with canBeHoistedTo!" ); |
| 594 | |
| 595 | for (Value *Op : Inst->operands()) |
| 596 | makeAvailableAt(V: Op, Loc); |
| 597 | |
| 598 | Inst->moveBefore(BB&: *Loc->getParent(), I: Loc); |
| 599 | } |
| 600 | |
| 601 | // Return Instruction before which we can insert freeze for the value V as close |
| 602 | // to def as possible. If there is no place to add freeze, return empty. |
| 603 | static std::optional<BasicBlock::iterator> |
| 604 | getFreezeInsertPt(Value *V, const DominatorTree &DT) { |
| 605 | auto *I = dyn_cast<Instruction>(Val: V); |
| 606 | if (!I) |
| 607 | return DT.getRoot()->getFirstNonPHIOrDbgOrAlloca()->getIterator(); |
| 608 | |
| 609 | std::optional<BasicBlock::iterator> Res = I->getInsertionPointAfterDef(); |
| 610 | // If there is no place to add freeze - return nullptr. |
| 611 | if (!Res || !DT.dominates(Def: I, User: &**Res)) |
| 612 | return std::nullopt; |
| 613 | |
| 614 | Instruction *ResInst = &**Res; |
| 615 | |
| 616 | // If there is a User dominated by original I, then it should be dominated |
| 617 | // by Freeze instruction as well. |
| 618 | if (any_of(Range: I->users(), P: [&](User *U) { |
| 619 | Instruction *User = cast<Instruction>(Val: U); |
| 620 | return ResInst != User && DT.dominates(Def: I, User) && |
| 621 | !DT.dominates(Def: ResInst, User); |
| 622 | })) |
| 623 | return std::nullopt; |
| 624 | return Res; |
| 625 | } |
| 626 | |
| 627 | Value *GuardWideningImpl::freezeAndPush(Value *Orig, |
| 628 | BasicBlock::iterator InsertPt) { |
| 629 | if (isGuaranteedNotToBePoison(V: Orig, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
| 630 | return Orig; |
| 631 | std::optional<BasicBlock::iterator> InsertPtAtDef = |
| 632 | getFreezeInsertPt(V: Orig, DT); |
| 633 | if (!InsertPtAtDef) { |
| 634 | FreezeInst *FI = new FreezeInst(Orig, "gw.freeze" ); |
| 635 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
| 636 | return FI; |
| 637 | } |
| 638 | if (isa<Constant>(Val: Orig) || isa<GlobalValue>(Val: Orig)) { |
| 639 | BasicBlock::iterator InsertPt = *InsertPtAtDef; |
| 640 | FreezeInst *FI = new FreezeInst(Orig, "gw.freeze" ); |
| 641 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
| 642 | return FI; |
| 643 | } |
| 644 | |
| 645 | SmallSet<Value *, 16> Visited; |
| 646 | SmallVector<Value *, 16> Worklist; |
| 647 | SmallSet<Instruction *, 16> DropPoisonFlags; |
| 648 | SmallVector<Value *, 16> NeedFreeze; |
| 649 | DenseMap<Value *, FreezeInst *> CacheOfFreezes; |
| 650 | |
| 651 | // A bit overloaded data structures. Visited contains constant/GV |
| 652 | // if we already met it. In this case CacheOfFreezes has a freeze if it is |
| 653 | // required. |
| 654 | auto handleConstantOrGlobal = [&](Use &U) { |
| 655 | Value *Def = U.get(); |
| 656 | if (!isa<Constant>(Val: Def) && !isa<GlobalValue>(Val: Def)) |
| 657 | return false; |
| 658 | |
| 659 | if (Visited.insert(Ptr: Def).second) { |
| 660 | if (isGuaranteedNotToBePoison(V: Def, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
| 661 | return true; |
| 662 | BasicBlock::iterator InsertPt = *getFreezeInsertPt(V: Def, DT); |
| 663 | FreezeInst *FI = new FreezeInst(Def, Def->getName() + ".gw.fr" ); |
| 664 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
| 665 | CacheOfFreezes[Def] = FI; |
| 666 | } |
| 667 | |
| 668 | if (auto It = CacheOfFreezes.find(Val: Def); It != CacheOfFreezes.end()) |
| 669 | U.set(It->second); |
| 670 | return true; |
| 671 | }; |
| 672 | |
| 673 | Worklist.push_back(Elt: Orig); |
| 674 | while (!Worklist.empty()) { |
| 675 | Value *V = Worklist.pop_back_val(); |
| 676 | if (!Visited.insert(Ptr: V).second) |
| 677 | continue; |
| 678 | |
| 679 | if (isGuaranteedNotToBePoison(V, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
| 680 | continue; |
| 681 | |
| 682 | Instruction *I = dyn_cast<Instruction>(Val: V); |
| 683 | if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I), |
| 684 | /*ConsiderFlagsAndMetadata*/ false)) { |
| 685 | NeedFreeze.push_back(Elt: V); |
| 686 | continue; |
| 687 | } |
| 688 | // Check all operands. If for any of them we cannot insert Freeze, |
| 689 | // stop here. Otherwise, iterate. |
| 690 | if (any_of(Range: I->operands(), P: [&](Value *Op) { |
| 691 | return isa<Instruction>(Val: Op) && !getFreezeInsertPt(V: Op, DT); |
| 692 | })) { |
| 693 | NeedFreeze.push_back(Elt: I); |
| 694 | continue; |
| 695 | } |
| 696 | DropPoisonFlags.insert(Ptr: I); |
| 697 | for (Use &U : I->operands()) |
| 698 | if (!handleConstantOrGlobal(U)) |
| 699 | Worklist.push_back(Elt: U.get()); |
| 700 | } |
| 701 | for (Instruction *I : DropPoisonFlags) |
| 702 | I->dropPoisonGeneratingAnnotations(); |
| 703 | |
| 704 | Value *Result = Orig; |
| 705 | for (Value *V : NeedFreeze) { |
| 706 | BasicBlock::iterator FreezeInsertPt = *getFreezeInsertPt(V, DT); |
| 707 | FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr" ); |
| 708 | FI->insertBefore(BB&: *FreezeInsertPt->getParent(), InsertPos: FreezeInsertPt); |
| 709 | ++FreezeAdded; |
| 710 | if (V == Orig) |
| 711 | Result = FI; |
| 712 | V->replaceUsesWithIf( |
| 713 | New: FI, ShouldReplace: [&](const Use & U)->bool { return U.getUser() != FI; }); |
| 714 | } |
| 715 | |
| 716 | return Result; |
| 717 | } |
| 718 | |
| 719 | std::optional<Value *> |
| 720 | GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
| 721 | SmallVectorImpl<Value *> &ChecksToWiden, |
| 722 | std::optional<BasicBlock::iterator> InsertPt) { |
| 723 | using namespace llvm::PatternMatch; |
| 724 | |
| 725 | Value *Result = nullptr; |
| 726 | { |
| 727 | // L >u C0 && L >u C1 -> L >u max(C0, C1) |
| 728 | ConstantInt *RHS0, *RHS1; |
| 729 | Value *LHS; |
| 730 | CmpPredicate Pred0, Pred1; |
| 731 | // TODO: Support searching for pairs to merge from both whole lists of |
| 732 | // ChecksToHoist and ChecksToWiden. |
| 733 | if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 && |
| 734 | match(V: ChecksToWiden.front(), |
| 735 | P: m_ICmp(Pred&: Pred0, L: m_Value(V&: LHS), R: m_ConstantInt(CI&: RHS0))) && |
| 736 | match(V: ChecksToHoist.front(), |
| 737 | P: m_ICmp(Pred&: Pred1, L: m_Specific(V: LHS), R: m_ConstantInt(CI&: RHS1)))) { |
| 738 | |
| 739 | ConstantRange CR0 = |
| 740 | ConstantRange::makeExactICmpRegion(Pred: Pred0, Other: RHS0->getValue()); |
| 741 | ConstantRange CR1 = |
| 742 | ConstantRange::makeExactICmpRegion(Pred: Pred1, Other: RHS1->getValue()); |
| 743 | |
| 744 | // Given what we're doing here and the semantics of guards, it would |
| 745 | // be correct to use a subset intersection, but that may be too |
| 746 | // aggressive in cases we care about. |
| 747 | if (std::optional<ConstantRange> Intersect = |
| 748 | CR0.exactIntersectWith(CR: CR1)) { |
| 749 | APInt NewRHSAP; |
| 750 | CmpInst::Predicate Pred; |
| 751 | if (Intersect->getEquivalentICmp(Pred, RHS&: NewRHSAP)) { |
| 752 | if (InsertPt) { |
| 753 | ConstantInt *NewRHS = |
| 754 | ConstantInt::get(Context&: (*InsertPt)->getContext(), V: NewRHSAP); |
| 755 | assert(canBeHoistedTo(LHS, *InsertPt) && "must be" ); |
| 756 | makeAvailableAt(V: LHS, Loc: *InsertPt); |
| 757 | Result = new ICmpInst(*InsertPt, Pred, LHS, NewRHS, "wide.chk" ); |
| 758 | } |
| 759 | return Result; |
| 760 | } |
| 761 | } |
| 762 | } |
| 763 | } |
| 764 | |
| 765 | { |
| 766 | SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; |
| 767 | if (parseRangeChecks(ToParse&: ChecksToWiden, Checks) && |
| 768 | parseRangeChecks(ToParse&: ChecksToHoist, Checks) && |
| 769 | combineRangeChecks(Checks, CombinedChecks)) { |
| 770 | if (InsertPt) { |
| 771 | for (auto &RC : CombinedChecks) { |
| 772 | makeAvailableAt(V: RC.getCheckInst(), Loc: *InsertPt); |
| 773 | if (Result) |
| 774 | Result = BinaryOperator::CreateAnd(V1: RC.getCheckInst(), V2: Result, Name: "" , |
| 775 | InsertBefore: *InsertPt); |
| 776 | else |
| 777 | Result = RC.getCheckInst(); |
| 778 | } |
| 779 | assert(Result && "Failed to find result value" ); |
| 780 | Result->setName("wide.chk" ); |
| 781 | Result = freezeAndPush(Orig: Result, InsertPt: *InsertPt); |
| 782 | } |
| 783 | return Result; |
| 784 | } |
| 785 | } |
| 786 | // We were not able to compute ChecksToHoist AND ChecksToWiden for the price |
| 787 | // of one. |
| 788 | return std::nullopt; |
| 789 | } |
| 790 | |
| 791 | Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
| 792 | Value *OldCondition, |
| 793 | BasicBlock::iterator InsertPt) { |
| 794 | assert(!ChecksToHoist.empty()); |
| 795 | IRBuilder<> Builder(InsertPt->getParent(), InsertPt); |
| 796 | makeAvailableAt(Checks: ChecksToHoist, InsertPos: InsertPt); |
| 797 | makeAvailableAt(V: OldCondition, Loc: InsertPt); |
| 798 | Value *Result = Builder.CreateAnd(Ops: ChecksToHoist); |
| 799 | Result = freezeAndPush(Orig: Result, InsertPt); |
| 800 | Result = Builder.CreateAnd(LHS: OldCondition, RHS: Result); |
| 801 | Result->setName("wide.chk" ); |
| 802 | return Result; |
| 803 | } |
| 804 | |
| 805 | bool GuardWideningImpl::parseRangeChecks( |
| 806 | Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks) { |
| 807 | using namespace llvm::PatternMatch; |
| 808 | |
| 809 | auto *IC = dyn_cast<ICmpInst>(Val: CheckCond); |
| 810 | if (!IC || !IC->getOperand(i_nocapture: 0)->getType()->isIntegerTy() || |
| 811 | (IC->getPredicate() != ICmpInst::ICMP_ULT && |
| 812 | IC->getPredicate() != ICmpInst::ICMP_UGT)) |
| 813 | return false; |
| 814 | |
| 815 | const Value *CmpLHS = IC->getOperand(i_nocapture: 0), *CmpRHS = IC->getOperand(i_nocapture: 1); |
| 816 | if (IC->getPredicate() == ICmpInst::ICMP_UGT) |
| 817 | std::swap(a&: CmpLHS, b&: CmpRHS); |
| 818 | |
| 819 | auto &DL = IC->getDataLayout(); |
| 820 | |
| 821 | GuardWideningImpl::RangeCheck Check( |
| 822 | CmpLHS, cast<ConstantInt>(Val: ConstantInt::getNullValue(Ty: CmpRHS->getType())), |
| 823 | CmpRHS, IC); |
| 824 | |
| 825 | if (!isKnownNonNegative(V: Check.getLength(), SQ: DL)) |
| 826 | return false; |
| 827 | |
| 828 | // What we have in \c Check now is a correct interpretation of \p CheckCond. |
| 829 | // Try to see if we can move some constant offsets into the \c Offset field. |
| 830 | |
| 831 | bool Changed; |
| 832 | auto &Ctx = CheckCond->getContext(); |
| 833 | |
| 834 | do { |
| 835 | Value *OpLHS; |
| 836 | ConstantInt *OpRHS; |
| 837 | Changed = false; |
| 838 | |
| 839 | #ifndef NDEBUG |
| 840 | auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); |
| 841 | assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && |
| 842 | "Unreachable instruction?" ); |
| 843 | #endif |
| 844 | |
| 845 | if (match(V: Check.getBase(), P: m_Add(L: m_Value(V&: OpLHS), R: m_ConstantInt(CI&: OpRHS)))) { |
| 846 | Check.setBase(OpLHS); |
| 847 | APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); |
| 848 | Check.setOffset(ConstantInt::get(Context&: Ctx, V: NewOffset)); |
| 849 | Changed = true; |
| 850 | } else if (match(V: Check.getBase(), |
| 851 | P: m_Or(L: m_Value(V&: OpLHS), R: m_ConstantInt(CI&: OpRHS)))) { |
| 852 | KnownBits Known = computeKnownBits(V: OpLHS, DL); |
| 853 | if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { |
| 854 | Check.setBase(OpLHS); |
| 855 | APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); |
| 856 | Check.setOffset(ConstantInt::get(Context&: Ctx, V: NewOffset)); |
| 857 | Changed = true; |
| 858 | } |
| 859 | } |
| 860 | } while (Changed); |
| 861 | |
| 862 | Checks.push_back(Elt: Check); |
| 863 | return true; |
| 864 | } |
| 865 | |
| 866 | bool GuardWideningImpl::combineRangeChecks( |
| 867 | SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, |
| 868 | SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { |
| 869 | unsigned OldCount = Checks.size(); |
| 870 | while (!Checks.empty()) { |
| 871 | // Pick all of the range checks with a specific base and length, and try to |
| 872 | // merge them. |
| 873 | const Value *CurrentBase = Checks.front().getBase(); |
| 874 | const Value *CurrentLength = Checks.front().getLength(); |
| 875 | |
| 876 | SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; |
| 877 | |
| 878 | auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { |
| 879 | return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; |
| 880 | }; |
| 881 | |
| 882 | copy_if(Range&: Checks, Out: std::back_inserter(x&: CurrentChecks), P: IsCurrentCheck); |
| 883 | erase_if(C&: Checks, P: IsCurrentCheck); |
| 884 | |
| 885 | assert(CurrentChecks.size() != 0 && "We know we have at least one!" ); |
| 886 | |
| 887 | if (CurrentChecks.size() < 3) { |
| 888 | llvm::append_range(C&: RangeChecksOut, R&: CurrentChecks); |
| 889 | continue; |
| 890 | } |
| 891 | |
| 892 | // CurrentChecks.size() will typically be 3 here, but so far there has been |
| 893 | // no need to hard-code that fact. |
| 894 | |
| 895 | llvm::sort(C&: CurrentChecks, Comp: [&](const GuardWideningImpl::RangeCheck &LHS, |
| 896 | const GuardWideningImpl::RangeCheck &RHS) { |
| 897 | return LHS.getOffsetValue().slt(RHS: RHS.getOffsetValue()); |
| 898 | }); |
| 899 | |
| 900 | // Note: std::sort should not invalidate the ChecksStart iterator. |
| 901 | |
| 902 | const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); |
| 903 | const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); |
| 904 | |
| 905 | unsigned BitWidth = MaxOffset->getValue().getBitWidth(); |
| 906 | if ((MaxOffset->getValue() - MinOffset->getValue()) |
| 907 | .ugt(RHS: APInt::getSignedMinValue(numBits: BitWidth))) |
| 908 | return false; |
| 909 | |
| 910 | APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); |
| 911 | const APInt &HighOffset = MaxOffset->getValue(); |
| 912 | auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { |
| 913 | return (HighOffset - RC.getOffsetValue()).ult(RHS: MaxDiff); |
| 914 | }; |
| 915 | |
| 916 | if (MaxDiff.isMinValue() || !all_of(Range: drop_begin(RangeOrContainer&: CurrentChecks), P: OffsetOK)) |
| 917 | return false; |
| 918 | |
| 919 | // We have a series of f+1 checks as: |
| 920 | // |
| 921 | // I+k_0 u< L ... Chk_0 |
| 922 | // I+k_1 u< L ... Chk_1 |
| 923 | // ... |
| 924 | // I+k_f u< L ... Chk_f |
| 925 | // |
| 926 | // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 |
| 927 | // k_f-k_0 u< INT_MIN+k_f ... Precond_1 |
| 928 | // k_f != k_0 ... Precond_2 |
| 929 | // |
| 930 | // Claim: |
| 931 | // Chk_0 AND Chk_f implies all the other checks |
| 932 | // |
| 933 | // Informal proof sketch: |
| 934 | // |
| 935 | // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap |
| 936 | // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and |
| 937 | // thus I+k_f is the greatest unsigned value in that range. |
| 938 | // |
| 939 | // This combined with Ckh_(f+1) shows that everything in that range is u< L. |
| 940 | // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) |
| 941 | // lie in [I+k_0,I+k_f], this proving our claim. |
| 942 | // |
| 943 | // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are |
| 944 | // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal |
| 945 | // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping |
| 946 | // range by definition, and the latter case is impossible: |
| 947 | // |
| 948 | // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) |
| 949 | // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx |
| 950 | // |
| 951 | // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted |
| 952 | // with 'x' above) to be at least >u INT_MIN. |
| 953 | |
| 954 | RangeChecksOut.emplace_back(Args&: CurrentChecks.front()); |
| 955 | RangeChecksOut.emplace_back(Args&: CurrentChecks.back()); |
| 956 | } |
| 957 | |
| 958 | assert(RangeChecksOut.size() <= OldCount && "We pessimized!" ); |
| 959 | return RangeChecksOut.size() != OldCount; |
| 960 | } |
| 961 | |
| 962 | #ifndef NDEBUG |
| 963 | StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { |
| 964 | switch (WS) { |
| 965 | case WS_IllegalOrNegative: |
| 966 | return "IllegalOrNegative" ; |
| 967 | case WS_Neutral: |
| 968 | return "Neutral" ; |
| 969 | case WS_Positive: |
| 970 | return "Positive" ; |
| 971 | case WS_VeryPositive: |
| 972 | return "VeryPositive" ; |
| 973 | } |
| 974 | |
| 975 | llvm_unreachable("Fully covered switch above!" ); |
| 976 | } |
| 977 | #endif |
| 978 | |
| 979 | PreservedAnalyses GuardWideningPass::run(Function &F, |
| 980 | FunctionAnalysisManager &AM) { |
| 981 | // Avoid requesting analyses if there are no guards or widenable conditions. |
| 982 | auto *GuardDecl = Intrinsic::getDeclarationIfExists( |
| 983 | M: F.getParent(), id: Intrinsic::experimental_guard); |
| 984 | bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); |
| 985 | auto *WCDecl = Intrinsic::getDeclarationIfExists( |
| 986 | M: F.getParent(), id: Intrinsic::experimental_widenable_condition); |
| 987 | bool HasWidenableConditions = WCDecl && !WCDecl->use_empty(); |
| 988 | if (!HasIntrinsicGuards && !HasWidenableConditions) |
| 989 | return PreservedAnalyses::all(); |
| 990 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
| 991 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
| 992 | auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F); |
| 993 | auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
| 994 | auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(IR&: F); |
| 995 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
| 996 | if (MSSAA) |
| 997 | MSSAU = std::make_unique<MemorySSAUpdater>(args: &MSSAA->getMSSA()); |
| 998 | if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, |
| 999 | DT.getRootNode(), [](BasicBlock *) { return true; }) |
| 1000 | .run()) |
| 1001 | return PreservedAnalyses::all(); |
| 1002 | |
| 1003 | PreservedAnalyses PA; |
| 1004 | PA.preserveSet<CFGAnalyses>(); |
| 1005 | PA.preserve<MemorySSAAnalysis>(); |
| 1006 | return PA; |
| 1007 | } |
| 1008 | |
| 1009 | PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, |
| 1010 | LoopStandardAnalysisResults &AR, |
| 1011 | LPMUpdater &U) { |
| 1012 | BasicBlock *RootBB = L.getLoopPredecessor(); |
| 1013 | if (!RootBB) |
| 1014 | RootBB = L.getHeader(); |
| 1015 | auto BlockFilter = [&](BasicBlock *BB) { |
| 1016 | return BB == RootBB || L.contains(BB); |
| 1017 | }; |
| 1018 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
| 1019 | if (AR.MSSA) |
| 1020 | MSSAU = std::make_unique<MemorySSAUpdater>(args&: AR.MSSA); |
| 1021 | if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC, |
| 1022 | MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(BB: RootBB), |
| 1023 | BlockFilter) |
| 1024 | .run()) |
| 1025 | return PreservedAnalyses::all(); |
| 1026 | |
| 1027 | auto PA = getLoopPassPreservedAnalyses(); |
| 1028 | if (AR.MSSA) |
| 1029 | PA.preserve<MemorySSAAnalysis>(); |
| 1030 | return PA; |
| 1031 | } |
| 1032 | |