1 | //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the guard widening pass. The semantics of the |
10 | // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails |
11 | // more often that it did before the transform. This optimization is called |
12 | // "widening" and can be used hoist and common runtime checks in situations like |
13 | // these: |
14 | // |
15 | // %cmp0 = 7 u< Length |
16 | // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] |
17 | // call @unknown_side_effects() |
18 | // %cmp1 = 9 u< Length |
19 | // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] |
20 | // ... |
21 | // |
22 | // => |
23 | // |
24 | // %cmp0 = 9 u< Length |
25 | // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] |
26 | // call @unknown_side_effects() |
27 | // ... |
28 | // |
29 | // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a |
30 | // generic implementation of the same function, which will have the correct |
31 | // semantics from that point onward. It is always _legal_ to deoptimize (so |
32 | // replacing %cmp0 with false is "correct"), though it may not always be |
33 | // profitable to do so. |
34 | // |
35 | // NB! This pass is a work in progress. It hasn't been tuned to be "production |
36 | // ready" yet. It is known to have quadriatic running time and will not scale |
37 | // to large numbers of guards |
38 | // |
39 | //===----------------------------------------------------------------------===// |
40 | |
41 | #include "llvm/Transforms/Scalar/GuardWidening.h" |
42 | #include "llvm/ADT/DenseMap.h" |
43 | #include "llvm/ADT/DepthFirstIterator.h" |
44 | #include "llvm/ADT/Statistic.h" |
45 | #include "llvm/Analysis/AssumptionCache.h" |
46 | #include "llvm/Analysis/GuardUtils.h" |
47 | #include "llvm/Analysis/LoopInfo.h" |
48 | #include "llvm/Analysis/MemorySSAUpdater.h" |
49 | #include "llvm/Analysis/PostDominators.h" |
50 | #include "llvm/Analysis/ValueTracking.h" |
51 | #include "llvm/IR/ConstantRange.h" |
52 | #include "llvm/IR/Dominators.h" |
53 | #include "llvm/IR/IRBuilder.h" |
54 | #include "llvm/IR/IntrinsicInst.h" |
55 | #include "llvm/IR/Module.h" |
56 | #include "llvm/IR/PatternMatch.h" |
57 | #include "llvm/Support/CommandLine.h" |
58 | #include "llvm/Support/Debug.h" |
59 | #include "llvm/Support/KnownBits.h" |
60 | #include "llvm/Transforms/Scalar.h" |
61 | #include "llvm/Transforms/Utils/GuardUtils.h" |
62 | #include "llvm/Transforms/Utils/LoopUtils.h" |
63 | #include <functional> |
64 | |
65 | using namespace llvm; |
66 | |
67 | #define DEBUG_TYPE "guard-widening" |
68 | |
69 | STATISTIC(GuardsEliminated, "Number of eliminated guards" ); |
70 | STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches" ); |
71 | STATISTIC(FreezeAdded, "Number of freeze instruction introduced" ); |
72 | |
73 | static cl::opt<bool> |
74 | WidenBranchGuards("guard-widening-widen-branch-guards" , cl::Hidden, |
75 | cl::desc("Whether or not we should widen guards " |
76 | "expressed as branches by widenable conditions" ), |
77 | cl::init(Val: true)); |
78 | |
79 | namespace { |
80 | |
81 | // Get the condition of \p I. It can either be a guard or a conditional branch. |
82 | static Value *getCondition(Instruction *I) { |
83 | if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(Val: I)) { |
84 | assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && |
85 | "Bad guard intrinsic?" ); |
86 | return GI->getArgOperand(i: 0); |
87 | } |
88 | Value *Cond, *WC; |
89 | BasicBlock *IfTrueBB, *IfFalseBB; |
90 | if (parseWidenableBranch(U: I, Condition&: Cond, WidenableCondition&: WC, IfTrueBB, IfFalseBB)) |
91 | return Cond; |
92 | |
93 | return cast<BranchInst>(Val: I)->getCondition(); |
94 | } |
95 | |
96 | // Set the condition for \p I to \p NewCond. \p I can either be a guard or a |
97 | // conditional branch. |
98 | static void setCondition(Instruction *I, Value *NewCond) { |
99 | if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(Val: I)) { |
100 | assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && |
101 | "Bad guard intrinsic?" ); |
102 | GI->setArgOperand(i: 0, v: NewCond); |
103 | return; |
104 | } |
105 | cast<BranchInst>(Val: I)->setCondition(NewCond); |
106 | } |
107 | |
108 | // Eliminates the guard instruction properly. |
109 | static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { |
110 | GuardInst->eraseFromParent(); |
111 | if (MSSAU) |
112 | MSSAU->removeMemoryAccess(I: GuardInst); |
113 | ++GuardsEliminated; |
114 | } |
115 | |
116 | /// Find a point at which the widened condition of \p Guard should be inserted. |
117 | /// When it is represented as intrinsic call, we can do it right before the call |
118 | /// instruction. However, when we are dealing with widenable branch, we must |
119 | /// account for the following situation: widening should not turn a |
120 | /// loop-invariant condition into a loop-variant. It means that if |
121 | /// widenable.condition() call is invariant (w.r.t. any loop), the new wide |
122 | /// condition should stay invariant. Otherwise there can be a miscompile, like |
123 | /// the one described at https://github.com/llvm/llvm-project/issues/60234. The |
124 | /// safest way to do it is to expand the new condition at WC's block. |
125 | static std::optional<BasicBlock::iterator> |
126 | findInsertionPointForWideCondition(Instruction *WCOrGuard) { |
127 | if (isGuard(U: WCOrGuard)) |
128 | return WCOrGuard->getIterator(); |
129 | if (auto WC = extractWidenableCondition(U: WCOrGuard)) |
130 | return cast<Instruction>(Val: WC)->getIterator(); |
131 | return std::nullopt; |
132 | } |
133 | |
134 | class GuardWideningImpl { |
135 | DominatorTree &DT; |
136 | PostDominatorTree *PDT; |
137 | LoopInfo &LI; |
138 | AssumptionCache &AC; |
139 | MemorySSAUpdater *MSSAU; |
140 | |
141 | /// Together, these describe the region of interest. This might be all of |
142 | /// the blocks within a function, or only a given loop's blocks and preheader. |
143 | DomTreeNode *Root; |
144 | std::function<bool(BasicBlock*)> BlockFilter; |
145 | |
146 | /// The set of guards and conditional branches whose conditions have been |
147 | /// widened into dominating guards. |
148 | SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; |
149 | |
150 | /// The set of guards which have been widened to include conditions to other |
151 | /// guards. |
152 | DenseSet<Instruction *> WidenedGuards; |
153 | |
154 | /// Try to eliminate instruction \p Instr by widening it into an earlier |
155 | /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that |
156 | /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock |
157 | /// maps BasicBlocks to the set of guards seen in that block. |
158 | bool eliminateInstrViaWidening( |
159 | Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, |
160 | const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> |
161 | &GuardsPerBlock); |
162 | |
163 | /// Used to keep track of which widening potential is more effective. |
164 | enum WideningScore { |
165 | /// Don't widen. |
166 | WS_IllegalOrNegative, |
167 | |
168 | /// Widening is performance neutral as far as the cycles spent in check |
169 | /// conditions goes (but can still help, e.g., code layout, having less |
170 | /// deopt state). |
171 | WS_Neutral, |
172 | |
173 | /// Widening is profitable. |
174 | WS_Positive, |
175 | |
176 | /// Widening is very profitable. Not significantly different from \c |
177 | /// WS_Positive, except by the order. |
178 | WS_VeryPositive |
179 | }; |
180 | |
181 | static StringRef scoreTypeToString(WideningScore WS); |
182 | |
183 | /// Compute the score for widening the condition in \p DominatedInstr |
184 | /// into \p WideningPoint. |
185 | WideningScore computeWideningScore(Instruction *DominatedInstr, |
186 | Instruction *ToWiden, |
187 | BasicBlock::iterator WideningPoint, |
188 | SmallVectorImpl<Value *> &ChecksToHoist, |
189 | SmallVectorImpl<Value *> &ChecksToWiden); |
190 | |
191 | /// Helper to check if \p V can be hoisted to \p InsertPos. |
192 | bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos) const { |
193 | SmallPtrSet<const Instruction *, 8> Visited; |
194 | return canBeHoistedTo(V, InsertPos, Visited); |
195 | } |
196 | |
197 | bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos, |
198 | SmallPtrSetImpl<const Instruction *> &Visited) const; |
199 | |
200 | bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks, |
201 | BasicBlock::iterator InsertPos) const { |
202 | return all_of(Range: Checks, |
203 | P: [&](const Value *V) { return canBeHoistedTo(V, InsertPos); }); |
204 | } |
205 | /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c |
206 | /// canBeHoistedTo returned true. |
207 | void makeAvailableAt(Value *V, BasicBlock::iterator InsertPos) const; |
208 | |
209 | void makeAvailableAt(const SmallVectorImpl<Value *> &Checks, |
210 | BasicBlock::iterator InsertPos) const { |
211 | for (Value *V : Checks) |
212 | makeAvailableAt(V, InsertPos); |
213 | } |
214 | |
215 | /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try |
216 | /// to generate an expression computing the logical AND of \p ChecksToHoist |
217 | /// and \p ChecksToWiden. Return true if the expression computing the AND is |
218 | /// only as expensive as computing one of the set of expressions. If \p |
219 | /// InsertPt is true then actually generate the resulting expression, make it |
220 | /// available at \p InsertPt and return it in \p Result (else no change to the |
221 | /// IR is made). |
222 | std::optional<Value *> |
223 | mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
224 | SmallVectorImpl<Value *> &ChecksToWiden, |
225 | std::optional<BasicBlock::iterator> InsertPt); |
226 | |
227 | /// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make |
228 | /// it available at InsertPt |
229 | Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
230 | Value *OldCondition, BasicBlock::iterator InsertPt); |
231 | |
232 | /// Adds freeze to Orig and push it as far as possible very aggressively. |
233 | /// Also replaces all uses of frozen instruction with frozen version. |
234 | Value *freezeAndPush(Value *Orig, BasicBlock::iterator InsertPt); |
235 | |
236 | /// Represents a range check of the form \c Base + \c Offset u< \c Length, |
237 | /// with the constraint that \c Length is not negative. \c CheckInst is the |
238 | /// pre-existing instruction in the IR that computes the result of this range |
239 | /// check. |
240 | class RangeCheck { |
241 | const Value *Base; |
242 | const ConstantInt *Offset; |
243 | const Value *Length; |
244 | ICmpInst *CheckInst; |
245 | |
246 | public: |
247 | explicit RangeCheck(const Value *Base, const ConstantInt *Offset, |
248 | const Value *Length, ICmpInst *CheckInst) |
249 | : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} |
250 | |
251 | void setBase(const Value *NewBase) { Base = NewBase; } |
252 | void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } |
253 | |
254 | const Value *getBase() const { return Base; } |
255 | const ConstantInt *getOffset() const { return Offset; } |
256 | const APInt &getOffsetValue() const { return getOffset()->getValue(); } |
257 | const Value *getLength() const { return Length; }; |
258 | ICmpInst *getCheckInst() const { return CheckInst; } |
259 | |
260 | void print(raw_ostream &OS, bool PrintTypes = false) { |
261 | OS << "Base: " ; |
262 | Base->printAsOperand(O&: OS, PrintType: PrintTypes); |
263 | OS << " Offset: " ; |
264 | Offset->printAsOperand(O&: OS, PrintType: PrintTypes); |
265 | OS << " Length: " ; |
266 | Length->printAsOperand(O&: OS, PrintType: PrintTypes); |
267 | } |
268 | |
269 | LLVM_DUMP_METHOD void dump() { |
270 | print(OS&: dbgs()); |
271 | dbgs() << "\n" ; |
272 | } |
273 | }; |
274 | |
275 | /// Parse \p ToParse into a conjunction (logical-and) of range checks; and |
276 | /// append them to \p Checks. Returns true on success, may clobber \c Checks |
277 | /// on failure. |
278 | bool parseRangeChecks(SmallVectorImpl<Value *> &ToParse, |
279 | SmallVectorImpl<RangeCheck> &Checks) { |
280 | for (auto CheckCond : ToParse) { |
281 | if (!parseRangeChecks(CheckCond, Checks)) |
282 | return false; |
283 | } |
284 | return true; |
285 | } |
286 | |
287 | bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks); |
288 | |
289 | /// Combine the checks in \p Checks into a smaller set of checks and append |
290 | /// them into \p CombinedChecks. Return true on success (i.e. all of checks |
291 | /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks |
292 | /// and \p CombinedChecks on success and on failure. |
293 | bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, |
294 | SmallVectorImpl<RangeCheck> &CombinedChecks) const; |
295 | |
296 | /// Can we compute the logical AND of \p ChecksToHoist and \p ChecksToWiden |
297 | /// for the price of computing only one of the set of expressions? |
298 | bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist, |
299 | SmallVectorImpl<Value *> &ChecksToWiden) { |
300 | return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/std::nullopt) |
301 | .has_value(); |
302 | } |
303 | |
304 | /// Widen \p ChecksToWiden to fail if any of \p ChecksToHoist is false |
305 | void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist, |
306 | SmallVectorImpl<Value *> &ChecksToWiden, |
307 | Instruction *ToWiden) { |
308 | auto InsertPt = findInsertionPointForWideCondition(WCOrGuard: ToWiden); |
309 | auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt); |
310 | Value *Result = MergedCheck ? *MergedCheck |
311 | : hoistChecks(ChecksToHoist, |
312 | OldCondition: getCondition(I: ToWiden), InsertPt: *InsertPt); |
313 | |
314 | if (isGuardAsWidenableBranch(U: ToWiden)) { |
315 | setWidenableBranchCond(WidenableBR: cast<BranchInst>(Val: ToWiden), Cond: Result); |
316 | return; |
317 | } |
318 | setCondition(I: ToWiden, NewCond: Result); |
319 | } |
320 | |
321 | public: |
322 | explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, |
323 | LoopInfo &LI, AssumptionCache &AC, |
324 | MemorySSAUpdater *MSSAU, DomTreeNode *Root, |
325 | std::function<bool(BasicBlock *)> BlockFilter) |
326 | : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root), |
327 | BlockFilter(BlockFilter) {} |
328 | |
329 | /// The entry point for this pass. |
330 | bool run(); |
331 | }; |
332 | } |
333 | |
334 | static bool isSupportedGuardInstruction(const Instruction *Insn) { |
335 | if (isGuard(U: Insn)) |
336 | return true; |
337 | if (WidenBranchGuards && isGuardAsWidenableBranch(U: Insn)) |
338 | return true; |
339 | return false; |
340 | } |
341 | |
342 | bool GuardWideningImpl::run() { |
343 | DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; |
344 | bool Changed = false; |
345 | for (auto DFI = df_begin(G: Root), DFE = df_end(G: Root); |
346 | DFI != DFE; ++DFI) { |
347 | auto *BB = (*DFI)->getBlock(); |
348 | if (!BlockFilter(BB)) |
349 | continue; |
350 | |
351 | auto &CurrentList = GuardsInBlock[BB]; |
352 | |
353 | for (auto &I : *BB) |
354 | if (isSupportedGuardInstruction(Insn: &I)) |
355 | CurrentList.push_back(Elt: cast<Instruction>(Val: &I)); |
356 | |
357 | for (auto *II : CurrentList) |
358 | Changed |= eliminateInstrViaWidening(Instr: II, DFSI: DFI, GuardsPerBlock: GuardsInBlock); |
359 | } |
360 | |
361 | assert(EliminatedGuardsAndBranches.empty() || Changed); |
362 | for (auto *I : EliminatedGuardsAndBranches) |
363 | if (!WidenedGuards.count(V: I)) { |
364 | assert(isa<ConstantInt>(getCondition(I)) && "Should be!" ); |
365 | if (isSupportedGuardInstruction(Insn: I)) |
366 | eliminateGuard(GuardInst: I, MSSAU); |
367 | else { |
368 | assert(isa<BranchInst>(I) && |
369 | "Eliminated something other than guard or branch?" ); |
370 | ++CondBranchEliminated; |
371 | } |
372 | } |
373 | |
374 | return Changed; |
375 | } |
376 | |
377 | bool GuardWideningImpl::eliminateInstrViaWidening( |
378 | Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, |
379 | const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> |
380 | &GuardsInBlock) { |
381 | SmallVector<Value *> ChecksToHoist; |
382 | parseWidenableGuard(U: Instr, Checks&: ChecksToHoist); |
383 | // Ignore trivial true or false conditions. These instructions will be |
384 | // trivially eliminated by any cleanup pass. Do not erase them because other |
385 | // guards can possibly be widened into them. |
386 | if (ChecksToHoist.empty() || |
387 | (ChecksToHoist.size() == 1 && isa<ConstantInt>(Val: ChecksToHoist.front()))) |
388 | return false; |
389 | |
390 | Instruction *BestSoFar = nullptr; |
391 | auto BestScoreSoFar = WS_IllegalOrNegative; |
392 | |
393 | // In the set of dominating guards, find the one we can merge GuardInst with |
394 | // for the most profit. |
395 | for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { |
396 | auto *CurBB = DFSI.getPath(n: i)->getBlock(); |
397 | if (!BlockFilter(CurBB)) |
398 | break; |
399 | assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!" ); |
400 | const auto &GuardsInCurBB = GuardsInBlock.find(Val: CurBB)->second; |
401 | |
402 | auto I = GuardsInCurBB.begin(); |
403 | auto E = Instr->getParent() == CurBB ? find(Range: GuardsInCurBB, Val: Instr) |
404 | : GuardsInCurBB.end(); |
405 | |
406 | #ifndef NDEBUG |
407 | { |
408 | unsigned Index = 0; |
409 | for (auto &I : *CurBB) { |
410 | if (Index == GuardsInCurBB.size()) |
411 | break; |
412 | if (GuardsInCurBB[Index] == &I) |
413 | Index++; |
414 | } |
415 | assert(Index == GuardsInCurBB.size() && |
416 | "Guards expected to be in order!" ); |
417 | } |
418 | #endif |
419 | |
420 | assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?" ); |
421 | |
422 | for (auto *Candidate : make_range(x: I, y: E)) { |
423 | auto WideningPoint = findInsertionPointForWideCondition(WCOrGuard: Candidate); |
424 | if (!WideningPoint) |
425 | continue; |
426 | SmallVector<Value *> CandidateChecks; |
427 | parseWidenableGuard(U: Candidate, Checks&: CandidateChecks); |
428 | auto Score = computeWideningScore(DominatedInstr: Instr, ToWiden: Candidate, WideningPoint: *WideningPoint, |
429 | ChecksToHoist, ChecksToWiden&: CandidateChecks); |
430 | LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate |
431 | << " is " << scoreTypeToString(Score) << "\n" ); |
432 | if (Score > BestScoreSoFar) { |
433 | BestScoreSoFar = Score; |
434 | BestSoFar = Candidate; |
435 | } |
436 | } |
437 | } |
438 | |
439 | if (BestScoreSoFar == WS_IllegalOrNegative) { |
440 | LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n" ); |
441 | return false; |
442 | } |
443 | |
444 | assert(BestSoFar != Instr && "Should have never visited same guard!" ); |
445 | assert(DT.dominates(BestSoFar, Instr) && "Should be!" ); |
446 | |
447 | LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar |
448 | << " with score " << scoreTypeToString(BestScoreSoFar) |
449 | << "\n" ); |
450 | SmallVector<Value *> ChecksToWiden; |
451 | parseWidenableGuard(U: BestSoFar, Checks&: ChecksToWiden); |
452 | widenGuard(ChecksToHoist, ChecksToWiden, ToWiden: BestSoFar); |
453 | auto NewGuardCondition = ConstantInt::getTrue(Context&: Instr->getContext()); |
454 | setCondition(I: Instr, NewCond: NewGuardCondition); |
455 | EliminatedGuardsAndBranches.push_back(Elt: Instr); |
456 | WidenedGuards.insert(V: BestSoFar); |
457 | return true; |
458 | } |
459 | |
460 | GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( |
461 | Instruction *DominatedInstr, Instruction *ToWiden, |
462 | BasicBlock::iterator WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist, |
463 | SmallVectorImpl<Value *> &ChecksToWiden) { |
464 | Loop *DominatedInstrLoop = LI.getLoopFor(BB: DominatedInstr->getParent()); |
465 | Loop *DominatingGuardLoop = LI.getLoopFor(BB: WideningPoint->getParent()); |
466 | bool HoistingOutOfLoop = false; |
467 | |
468 | if (DominatingGuardLoop != DominatedInstrLoop) { |
469 | // Be conservative and don't widen into a sibling loop. TODO: If the |
470 | // sibling is colder, we should consider allowing this. |
471 | if (DominatingGuardLoop && |
472 | !DominatingGuardLoop->contains(L: DominatedInstrLoop)) |
473 | return WS_IllegalOrNegative; |
474 | |
475 | HoistingOutOfLoop = true; |
476 | } |
477 | |
478 | if (!canBeHoistedTo(Checks: ChecksToHoist, InsertPos: WideningPoint)) |
479 | return WS_IllegalOrNegative; |
480 | // Further in the GuardWideningImpl::hoistChecks the entire condition might be |
481 | // widened, not the parsed list of checks. So we need to check the possibility |
482 | // of that condition hoisting. |
483 | if (!canBeHoistedTo(V: getCondition(I: ToWiden), InsertPos: WideningPoint)) |
484 | return WS_IllegalOrNegative; |
485 | |
486 | // If the guard was conditional executed, it may never be reached |
487 | // dynamically. There are two potential downsides to hoisting it out of the |
488 | // conditionally executed region: 1) we may spuriously deopt without need and |
489 | // 2) we have the extra cost of computing the guard condition in the common |
490 | // case. At the moment, we really only consider the second in our heuristic |
491 | // here. TODO: evaluate cost model for spurious deopt |
492 | // NOTE: As written, this also lets us hoist right over another guard which |
493 | // is essentially just another spelling for control flow. |
494 | if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden)) |
495 | return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; |
496 | |
497 | if (HoistingOutOfLoop) |
498 | return WS_Positive; |
499 | |
500 | // For a given basic block \p BB, return its successor which is guaranteed or |
501 | // highly likely will be taken as its successor. |
502 | auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * { |
503 | if (auto *UniqueSucc = BB->getUniqueSuccessor()) |
504 | return UniqueSucc; |
505 | auto *Term = BB->getTerminator(); |
506 | Value *Cond = nullptr; |
507 | const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; |
508 | using namespace PatternMatch; |
509 | if (!match(V: Term, P: m_Br(C: m_Value(V&: Cond), T: m_BasicBlock(V&: IfTrue), |
510 | F: m_BasicBlock(V&: IfFalse)))) |
511 | return nullptr; |
512 | // For constant conditions, only one dynamical successor is possible |
513 | if (auto *ConstCond = dyn_cast<ConstantInt>(Val: Cond)) |
514 | return ConstCond->isAllOnesValue() ? IfTrue : IfFalse; |
515 | // If one of successors ends with deopt, another one is likely. |
516 | if (IfFalse->getPostdominatingDeoptimizeCall()) |
517 | return IfTrue; |
518 | if (IfTrue->getPostdominatingDeoptimizeCall()) |
519 | return IfFalse; |
520 | // TODO: Use branch frequency metatada to allow hoisting through non-deopt |
521 | // branches? |
522 | return nullptr; |
523 | }; |
524 | |
525 | // Returns true if we might be hoisting above explicit control flow into a |
526 | // considerably hotter block. Note that this completely ignores implicit |
527 | // control flow (guards, calls which throw, etc...). That choice appears |
528 | // arbitrary (we assume that implicit control flow exits are all rare). |
529 | auto MaybeHoistingToHotterBlock = [&]() { |
530 | const auto *DominatingBlock = WideningPoint->getParent(); |
531 | const auto *DominatedBlock = DominatedInstr->getParent(); |
532 | |
533 | // Descend as low as we can, always taking the likely successor. |
534 | assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code" ); |
535 | assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code" ); |
536 | assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance" ); |
537 | while (DominatedBlock != DominatingBlock) { |
538 | auto *LikelySucc = GetLikelySuccessor(DominatingBlock); |
539 | // No likely successor? |
540 | if (!LikelySucc) |
541 | break; |
542 | // Only go down the dominator tree. |
543 | if (!DT.properlyDominates(A: DominatingBlock, B: LikelySucc)) |
544 | break; |
545 | DominatingBlock = LikelySucc; |
546 | } |
547 | |
548 | // Found? |
549 | if (DominatedBlock == DominatingBlock) |
550 | return false; |
551 | // We followed the likely successor chain and went past the dominated |
552 | // block. It means that the dominated guard is in dead/very cold code. |
553 | if (!DT.dominates(A: DominatingBlock, B: DominatedBlock)) |
554 | return true; |
555 | // TODO: diamond, triangle cases |
556 | if (!PDT) |
557 | return true; |
558 | return !PDT->dominates(A: DominatedBlock, B: DominatingBlock); |
559 | }; |
560 | |
561 | return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral; |
562 | } |
563 | |
564 | bool GuardWideningImpl::canBeHoistedTo( |
565 | const Value *V, BasicBlock::iterator Loc, |
566 | SmallPtrSetImpl<const Instruction *> &Visited) const { |
567 | auto *Inst = dyn_cast<Instruction>(Val: V); |
568 | if (!Inst || DT.dominates(Def: Inst, User: Loc) || Visited.count(Ptr: Inst)) |
569 | return true; |
570 | |
571 | if (!isSafeToSpeculativelyExecute(I: Inst, CtxI: Loc, AC: &AC, DT: &DT) || |
572 | Inst->mayReadFromMemory()) |
573 | return false; |
574 | |
575 | Visited.insert(Ptr: Inst); |
576 | |
577 | // We only want to go _up_ the dominance chain when recursing. |
578 | assert(!isa<PHINode>(Loc) && |
579 | "PHIs should return false for isSafeToSpeculativelyExecute" ); |
580 | assert(DT.isReachableFromEntry(Inst->getParent()) && |
581 | "We did a DFS from the block entry!" ); |
582 | return all_of(Range: Inst->operands(), |
583 | P: [&](Value *Op) { return canBeHoistedTo(V: Op, Loc, Visited); }); |
584 | } |
585 | |
586 | void GuardWideningImpl::makeAvailableAt(Value *V, |
587 | BasicBlock::iterator Loc) const { |
588 | auto *Inst = dyn_cast<Instruction>(Val: V); |
589 | if (!Inst || DT.dominates(Def: Inst, User: Loc)) |
590 | return; |
591 | |
592 | assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) && |
593 | !Inst->mayReadFromMemory() && |
594 | "Should've checked with canBeHoistedTo!" ); |
595 | |
596 | for (Value *Op : Inst->operands()) |
597 | makeAvailableAt(V: Op, Loc); |
598 | |
599 | Inst->moveBefore(BB&: *Loc->getParent(), I: Loc); |
600 | } |
601 | |
602 | // Return Instruction before which we can insert freeze for the value V as close |
603 | // to def as possible. If there is no place to add freeze, return empty. |
604 | static std::optional<BasicBlock::iterator> |
605 | getFreezeInsertPt(Value *V, const DominatorTree &DT) { |
606 | auto *I = dyn_cast<Instruction>(Val: V); |
607 | if (!I) |
608 | return DT.getRoot()->getFirstNonPHIOrDbgOrAlloca()->getIterator(); |
609 | |
610 | std::optional<BasicBlock::iterator> Res = I->getInsertionPointAfterDef(); |
611 | // If there is no place to add freeze - return nullptr. |
612 | if (!Res || !DT.dominates(Def: I, User: &**Res)) |
613 | return std::nullopt; |
614 | |
615 | Instruction *ResInst = &**Res; |
616 | |
617 | // If there is a User dominated by original I, then it should be dominated |
618 | // by Freeze instruction as well. |
619 | if (any_of(Range: I->users(), P: [&](User *U) { |
620 | Instruction *User = cast<Instruction>(Val: U); |
621 | return ResInst != User && DT.dominates(Def: I, User) && |
622 | !DT.dominates(Def: ResInst, User); |
623 | })) |
624 | return std::nullopt; |
625 | return Res; |
626 | } |
627 | |
628 | Value *GuardWideningImpl::freezeAndPush(Value *Orig, |
629 | BasicBlock::iterator InsertPt) { |
630 | if (isGuaranteedNotToBePoison(V: Orig, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
631 | return Orig; |
632 | std::optional<BasicBlock::iterator> InsertPtAtDef = |
633 | getFreezeInsertPt(V: Orig, DT); |
634 | if (!InsertPtAtDef) { |
635 | FreezeInst *FI = new FreezeInst(Orig, "gw.freeze" ); |
636 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
637 | return FI; |
638 | } |
639 | if (isa<Constant>(Val: Orig) || isa<GlobalValue>(Val: Orig)) { |
640 | BasicBlock::iterator InsertPt = *InsertPtAtDef; |
641 | FreezeInst *FI = new FreezeInst(Orig, "gw.freeze" ); |
642 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
643 | return FI; |
644 | } |
645 | |
646 | SmallSet<Value *, 16> Visited; |
647 | SmallVector<Value *, 16> Worklist; |
648 | SmallSet<Instruction *, 16> DropPoisonFlags; |
649 | SmallVector<Value *, 16> NeedFreeze; |
650 | DenseMap<Value *, FreezeInst *> CacheOfFreezes; |
651 | |
652 | // A bit overloaded data structures. Visited contains constant/GV |
653 | // if we already met it. In this case CacheOfFreezes has a freeze if it is |
654 | // required. |
655 | auto handleConstantOrGlobal = [&](Use &U) { |
656 | Value *Def = U.get(); |
657 | if (!isa<Constant>(Val: Def) && !isa<GlobalValue>(Val: Def)) |
658 | return false; |
659 | |
660 | if (Visited.insert(Ptr: Def).second) { |
661 | if (isGuaranteedNotToBePoison(V: Def, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
662 | return true; |
663 | BasicBlock::iterator InsertPt = *getFreezeInsertPt(V: Def, DT); |
664 | FreezeInst *FI = new FreezeInst(Def, Def->getName() + ".gw.fr" ); |
665 | FI->insertBefore(BB&: *InsertPt->getParent(), InsertPos: InsertPt); |
666 | CacheOfFreezes[Def] = FI; |
667 | } |
668 | |
669 | if (CacheOfFreezes.count(Val: Def)) |
670 | U.set(CacheOfFreezes[Def]); |
671 | return true; |
672 | }; |
673 | |
674 | Worklist.push_back(Elt: Orig); |
675 | while (!Worklist.empty()) { |
676 | Value *V = Worklist.pop_back_val(); |
677 | if (!Visited.insert(Ptr: V).second) |
678 | continue; |
679 | |
680 | if (isGuaranteedNotToBePoison(V, AC: nullptr, CtxI: InsertPt, DT: &DT)) |
681 | continue; |
682 | |
683 | Instruction *I = dyn_cast<Instruction>(Val: V); |
684 | if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I), |
685 | /*ConsiderFlagsAndMetadata*/ false)) { |
686 | NeedFreeze.push_back(Elt: V); |
687 | continue; |
688 | } |
689 | // Check all operands. If for any of them we cannot insert Freeze, |
690 | // stop here. Otherwise, iterate. |
691 | if (any_of(Range: I->operands(), P: [&](Value *Op) { |
692 | return isa<Instruction>(Val: Op) && !getFreezeInsertPt(V: Op, DT); |
693 | })) { |
694 | NeedFreeze.push_back(Elt: I); |
695 | continue; |
696 | } |
697 | DropPoisonFlags.insert(Ptr: I); |
698 | for (Use &U : I->operands()) |
699 | if (!handleConstantOrGlobal(U)) |
700 | Worklist.push_back(Elt: U.get()); |
701 | } |
702 | for (Instruction *I : DropPoisonFlags) |
703 | I->dropPoisonGeneratingAnnotations(); |
704 | |
705 | Value *Result = Orig; |
706 | for (Value *V : NeedFreeze) { |
707 | BasicBlock::iterator FreezeInsertPt = *getFreezeInsertPt(V, DT); |
708 | FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr" ); |
709 | FI->insertBefore(BB&: *FreezeInsertPt->getParent(), InsertPos: FreezeInsertPt); |
710 | ++FreezeAdded; |
711 | if (V == Orig) |
712 | Result = FI; |
713 | V->replaceUsesWithIf( |
714 | New: FI, ShouldReplace: [&](const Use & U)->bool { return U.getUser() != FI; }); |
715 | } |
716 | |
717 | return Result; |
718 | } |
719 | |
720 | std::optional<Value *> |
721 | GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
722 | SmallVectorImpl<Value *> &ChecksToWiden, |
723 | std::optional<BasicBlock::iterator> InsertPt) { |
724 | using namespace llvm::PatternMatch; |
725 | |
726 | Value *Result = nullptr; |
727 | { |
728 | // L >u C0 && L >u C1 -> L >u max(C0, C1) |
729 | ConstantInt *RHS0, *RHS1; |
730 | Value *LHS; |
731 | ICmpInst::Predicate Pred0, Pred1; |
732 | // TODO: Support searching for pairs to merge from both whole lists of |
733 | // ChecksToHoist and ChecksToWiden. |
734 | if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 && |
735 | match(V: ChecksToWiden.front(), |
736 | P: m_ICmp(Pred&: Pred0, L: m_Value(V&: LHS), R: m_ConstantInt(CI&: RHS0))) && |
737 | match(V: ChecksToHoist.front(), |
738 | P: m_ICmp(Pred&: Pred1, L: m_Specific(V: LHS), R: m_ConstantInt(CI&: RHS1)))) { |
739 | |
740 | ConstantRange CR0 = |
741 | ConstantRange::makeExactICmpRegion(Pred: Pred0, Other: RHS0->getValue()); |
742 | ConstantRange CR1 = |
743 | ConstantRange::makeExactICmpRegion(Pred: Pred1, Other: RHS1->getValue()); |
744 | |
745 | // Given what we're doing here and the semantics of guards, it would |
746 | // be correct to use a subset intersection, but that may be too |
747 | // aggressive in cases we care about. |
748 | if (std::optional<ConstantRange> Intersect = |
749 | CR0.exactIntersectWith(CR: CR1)) { |
750 | APInt NewRHSAP; |
751 | CmpInst::Predicate Pred; |
752 | if (Intersect->getEquivalentICmp(Pred, RHS&: NewRHSAP)) { |
753 | if (InsertPt) { |
754 | ConstantInt *NewRHS = |
755 | ConstantInt::get(Context&: (*InsertPt)->getContext(), V: NewRHSAP); |
756 | assert(canBeHoistedTo(LHS, *InsertPt) && "must be" ); |
757 | makeAvailableAt(V: LHS, Loc: *InsertPt); |
758 | Result = new ICmpInst(*InsertPt, Pred, LHS, NewRHS, "wide.chk" ); |
759 | } |
760 | return Result; |
761 | } |
762 | } |
763 | } |
764 | } |
765 | |
766 | { |
767 | SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; |
768 | if (parseRangeChecks(ToParse&: ChecksToWiden, Checks) && |
769 | parseRangeChecks(ToParse&: ChecksToHoist, Checks) && |
770 | combineRangeChecks(Checks, CombinedChecks)) { |
771 | if (InsertPt) { |
772 | for (auto &RC : CombinedChecks) { |
773 | makeAvailableAt(V: RC.getCheckInst(), Loc: *InsertPt); |
774 | if (Result) |
775 | Result = BinaryOperator::CreateAnd(V1: RC.getCheckInst(), V2: Result, Name: "" , |
776 | It: *InsertPt); |
777 | else |
778 | Result = RC.getCheckInst(); |
779 | } |
780 | assert(Result && "Failed to find result value" ); |
781 | Result->setName("wide.chk" ); |
782 | Result = freezeAndPush(Orig: Result, InsertPt: *InsertPt); |
783 | } |
784 | return Result; |
785 | } |
786 | } |
787 | // We were not able to compute ChecksToHoist AND ChecksToWiden for the price |
788 | // of one. |
789 | return std::nullopt; |
790 | } |
791 | |
792 | Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, |
793 | Value *OldCondition, |
794 | BasicBlock::iterator InsertPt) { |
795 | assert(!ChecksToHoist.empty()); |
796 | IRBuilder<> Builder(InsertPt->getParent(), InsertPt); |
797 | makeAvailableAt(Checks: ChecksToHoist, InsertPos: InsertPt); |
798 | makeAvailableAt(V: OldCondition, Loc: InsertPt); |
799 | Value *Result = Builder.CreateAnd(Ops: ChecksToHoist); |
800 | Result = freezeAndPush(Orig: Result, InsertPt); |
801 | Result = Builder.CreateAnd(LHS: OldCondition, RHS: Result); |
802 | Result->setName("wide.chk" ); |
803 | return Result; |
804 | } |
805 | |
806 | bool GuardWideningImpl::parseRangeChecks( |
807 | Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks) { |
808 | using namespace llvm::PatternMatch; |
809 | |
810 | auto *IC = dyn_cast<ICmpInst>(Val: CheckCond); |
811 | if (!IC || !IC->getOperand(i_nocapture: 0)->getType()->isIntegerTy() || |
812 | (IC->getPredicate() != ICmpInst::ICMP_ULT && |
813 | IC->getPredicate() != ICmpInst::ICMP_UGT)) |
814 | return false; |
815 | |
816 | const Value *CmpLHS = IC->getOperand(i_nocapture: 0), *CmpRHS = IC->getOperand(i_nocapture: 1); |
817 | if (IC->getPredicate() == ICmpInst::ICMP_UGT) |
818 | std::swap(a&: CmpLHS, b&: CmpRHS); |
819 | |
820 | auto &DL = IC->getDataLayout(); |
821 | |
822 | GuardWideningImpl::RangeCheck Check( |
823 | CmpLHS, cast<ConstantInt>(Val: ConstantInt::getNullValue(Ty: CmpRHS->getType())), |
824 | CmpRHS, IC); |
825 | |
826 | if (!isKnownNonNegative(V: Check.getLength(), SQ: DL)) |
827 | return false; |
828 | |
829 | // What we have in \c Check now is a correct interpretation of \p CheckCond. |
830 | // Try to see if we can move some constant offsets into the \c Offset field. |
831 | |
832 | bool Changed; |
833 | auto &Ctx = CheckCond->getContext(); |
834 | |
835 | do { |
836 | Value *OpLHS; |
837 | ConstantInt *OpRHS; |
838 | Changed = false; |
839 | |
840 | #ifndef NDEBUG |
841 | auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); |
842 | assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && |
843 | "Unreachable instruction?" ); |
844 | #endif |
845 | |
846 | if (match(V: Check.getBase(), P: m_Add(L: m_Value(V&: OpLHS), R: m_ConstantInt(CI&: OpRHS)))) { |
847 | Check.setBase(OpLHS); |
848 | APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); |
849 | Check.setOffset(ConstantInt::get(Context&: Ctx, V: NewOffset)); |
850 | Changed = true; |
851 | } else if (match(V: Check.getBase(), |
852 | P: m_Or(L: m_Value(V&: OpLHS), R: m_ConstantInt(CI&: OpRHS)))) { |
853 | KnownBits Known = computeKnownBits(V: OpLHS, DL); |
854 | if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { |
855 | Check.setBase(OpLHS); |
856 | APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); |
857 | Check.setOffset(ConstantInt::get(Context&: Ctx, V: NewOffset)); |
858 | Changed = true; |
859 | } |
860 | } |
861 | } while (Changed); |
862 | |
863 | Checks.push_back(Elt: Check); |
864 | return true; |
865 | } |
866 | |
867 | bool GuardWideningImpl::combineRangeChecks( |
868 | SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, |
869 | SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { |
870 | unsigned OldCount = Checks.size(); |
871 | while (!Checks.empty()) { |
872 | // Pick all of the range checks with a specific base and length, and try to |
873 | // merge them. |
874 | const Value *CurrentBase = Checks.front().getBase(); |
875 | const Value *CurrentLength = Checks.front().getLength(); |
876 | |
877 | SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; |
878 | |
879 | auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { |
880 | return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; |
881 | }; |
882 | |
883 | copy_if(Range&: Checks, Out: std::back_inserter(x&: CurrentChecks), P: IsCurrentCheck); |
884 | erase_if(C&: Checks, P: IsCurrentCheck); |
885 | |
886 | assert(CurrentChecks.size() != 0 && "We know we have at least one!" ); |
887 | |
888 | if (CurrentChecks.size() < 3) { |
889 | llvm::append_range(C&: RangeChecksOut, R&: CurrentChecks); |
890 | continue; |
891 | } |
892 | |
893 | // CurrentChecks.size() will typically be 3 here, but so far there has been |
894 | // no need to hard-code that fact. |
895 | |
896 | llvm::sort(C&: CurrentChecks, Comp: [&](const GuardWideningImpl::RangeCheck &LHS, |
897 | const GuardWideningImpl::RangeCheck &RHS) { |
898 | return LHS.getOffsetValue().slt(RHS: RHS.getOffsetValue()); |
899 | }); |
900 | |
901 | // Note: std::sort should not invalidate the ChecksStart iterator. |
902 | |
903 | const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); |
904 | const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); |
905 | |
906 | unsigned BitWidth = MaxOffset->getValue().getBitWidth(); |
907 | if ((MaxOffset->getValue() - MinOffset->getValue()) |
908 | .ugt(RHS: APInt::getSignedMinValue(numBits: BitWidth))) |
909 | return false; |
910 | |
911 | APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); |
912 | const APInt &HighOffset = MaxOffset->getValue(); |
913 | auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { |
914 | return (HighOffset - RC.getOffsetValue()).ult(RHS: MaxDiff); |
915 | }; |
916 | |
917 | if (MaxDiff.isMinValue() || !all_of(Range: drop_begin(RangeOrContainer&: CurrentChecks), P: OffsetOK)) |
918 | return false; |
919 | |
920 | // We have a series of f+1 checks as: |
921 | // |
922 | // I+k_0 u< L ... Chk_0 |
923 | // I+k_1 u< L ... Chk_1 |
924 | // ... |
925 | // I+k_f u< L ... Chk_f |
926 | // |
927 | // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 |
928 | // k_f-k_0 u< INT_MIN+k_f ... Precond_1 |
929 | // k_f != k_0 ... Precond_2 |
930 | // |
931 | // Claim: |
932 | // Chk_0 AND Chk_f implies all the other checks |
933 | // |
934 | // Informal proof sketch: |
935 | // |
936 | // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap |
937 | // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and |
938 | // thus I+k_f is the greatest unsigned value in that range. |
939 | // |
940 | // This combined with Ckh_(f+1) shows that everything in that range is u< L. |
941 | // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) |
942 | // lie in [I+k_0,I+k_f], this proving our claim. |
943 | // |
944 | // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are |
945 | // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal |
946 | // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping |
947 | // range by definition, and the latter case is impossible: |
948 | // |
949 | // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) |
950 | // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx |
951 | // |
952 | // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted |
953 | // with 'x' above) to be at least >u INT_MIN. |
954 | |
955 | RangeChecksOut.emplace_back(Args&: CurrentChecks.front()); |
956 | RangeChecksOut.emplace_back(Args&: CurrentChecks.back()); |
957 | } |
958 | |
959 | assert(RangeChecksOut.size() <= OldCount && "We pessimized!" ); |
960 | return RangeChecksOut.size() != OldCount; |
961 | } |
962 | |
963 | #ifndef NDEBUG |
964 | StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { |
965 | switch (WS) { |
966 | case WS_IllegalOrNegative: |
967 | return "IllegalOrNegative" ; |
968 | case WS_Neutral: |
969 | return "Neutral" ; |
970 | case WS_Positive: |
971 | return "Positive" ; |
972 | case WS_VeryPositive: |
973 | return "VeryPositive" ; |
974 | } |
975 | |
976 | llvm_unreachable("Fully covered switch above!" ); |
977 | } |
978 | #endif |
979 | |
980 | PreservedAnalyses GuardWideningPass::run(Function &F, |
981 | FunctionAnalysisManager &AM) { |
982 | // Avoid requesting analyses if there are no guards or widenable conditions. |
983 | auto *GuardDecl = F.getParent()->getFunction( |
984 | Name: Intrinsic::getName(id: Intrinsic::experimental_guard)); |
985 | bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); |
986 | auto *WCDecl = F.getParent()->getFunction( |
987 | Name: Intrinsic::getName(id: Intrinsic::experimental_widenable_condition)); |
988 | bool HasWidenableConditions = WCDecl && !WCDecl->use_empty(); |
989 | if (!HasIntrinsicGuards && !HasWidenableConditions) |
990 | return PreservedAnalyses::all(); |
991 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
992 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
993 | auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F); |
994 | auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
995 | auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(IR&: F); |
996 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
997 | if (MSSAA) |
998 | MSSAU = std::make_unique<MemorySSAUpdater>(args: &MSSAA->getMSSA()); |
999 | if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, |
1000 | DT.getRootNode(), [](BasicBlock *) { return true; }) |
1001 | .run()) |
1002 | return PreservedAnalyses::all(); |
1003 | |
1004 | PreservedAnalyses PA; |
1005 | PA.preserveSet<CFGAnalyses>(); |
1006 | PA.preserve<MemorySSAAnalysis>(); |
1007 | return PA; |
1008 | } |
1009 | |
1010 | PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, |
1011 | LoopStandardAnalysisResults &AR, |
1012 | LPMUpdater &U) { |
1013 | BasicBlock *RootBB = L.getLoopPredecessor(); |
1014 | if (!RootBB) |
1015 | RootBB = L.getHeader(); |
1016 | auto BlockFilter = [&](BasicBlock *BB) { |
1017 | return BB == RootBB || L.contains(BB); |
1018 | }; |
1019 | std::unique_ptr<MemorySSAUpdater> MSSAU; |
1020 | if (AR.MSSA) |
1021 | MSSAU = std::make_unique<MemorySSAUpdater>(args&: AR.MSSA); |
1022 | if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC, |
1023 | MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(BB: RootBB), |
1024 | BlockFilter) |
1025 | .run()) |
1026 | return PreservedAnalyses::all(); |
1027 | |
1028 | auto PA = getLoopPassPreservedAnalyses(); |
1029 | if (AR.MSSA) |
1030 | PA.preserve<MemorySSAAnalysis>(); |
1031 | return PA; |
1032 | } |
1033 | |