1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
19#include "llvm/ADT/SetOperations.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/CaptureTracking.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/Analysis/DomTreeUpdater.h"
29#include "llvm/Analysis/GuardUtils.h"
30#include "llvm/Analysis/InstructionSimplify.h"
31#include "llvm/Analysis/Loads.h"
32#include "llvm/Analysis/MemorySSA.h"
33#include "llvm/Analysis/MemorySSAUpdater.h"
34#include "llvm/Analysis/TargetTransformInfo.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
40#include "llvm/IR/ConstantRange.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
51#include "llvm/IR/Instructions.h"
52#include "llvm/IR/IntrinsicInst.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PatternMatch.h"
61#include "llvm/IR/ProfDataUtils.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/BranchProbability.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/ErrorHandling.h"
72#include "llvm/Support/KnownBits.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Transforms/Utils/BasicBlockUtils.h"
76#include "llvm/Transforms/Utils/Cloning.h"
77#include "llvm/Transforms/Utils/Local.h"
78#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
79#include "llvm/Transforms/Utils/ValueMapper.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cstddef>
84#include <cstdint>
85#include <iterator>
86#include <map>
87#include <optional>
88#include <set>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "simplifycfg"
97
98cl::opt<bool> llvm::RequireAndPreserveDomTree(
99 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
100
101 cl::desc(
102 "Temporary development switch used to gradually uplift SimplifyCFG "
103 "into preserving DomTree,"));
104
105// Chosen as 2 so as to be cheap, but still to have enough power to fold
106// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
107// To catch this, we need to fold a compare and a select, hence '2' being the
108// minimum reasonable default.
109static cl::opt<unsigned> PHINodeFoldingThreshold(
110 "phi-node-folding-threshold", cl::Hidden, cl::init(Val: 2),
111 cl::desc(
112 "Control the amount of phi node folding to perform (default = 2)"));
113
114static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
115 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(Val: 4),
116 cl::desc("Control the maximal total instruction cost that we are willing "
117 "to speculatively execute to fold a 2-entry PHI node into a "
118 "select (default = 4)"));
119
120static cl::opt<bool>
121 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(Val: true),
122 cl::desc("Hoist common instructions up to the parent block"));
123
124static cl::opt<bool> HoistLoadsWithCondFaulting(
125 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(Val: true),
126 cl::desc("Hoist loads if the target supports conditional faulting"));
127
128static cl::opt<bool> HoistStoresWithCondFaulting(
129 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(Val: true),
130 cl::desc("Hoist stores if the target supports conditional faulting"));
131
132static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
133 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(Val: 6),
134 cl::desc("Control the maximal conditional load/store that we are willing "
135 "to speculatively execute to eliminate conditional branch "
136 "(default = 6)"));
137
138static cl::opt<unsigned>
139 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
140 cl::init(Val: 20),
141 cl::desc("Allow reordering across at most this many "
142 "instructions when hoisting"));
143
144static cl::opt<bool>
145 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(Val: true),
146 cl::desc("Sink common instructions down to the end block"));
147
148static cl::opt<bool> HoistCondStores(
149 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(Val: true),
150 cl::desc("Hoist conditional stores if an unconditional store precedes"));
151
152static cl::opt<bool> MergeCondStores(
153 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(Val: true),
154 cl::desc("Hoist conditional stores even if an unconditional store does not "
155 "precede - hoist multiple conditional stores into a single "
156 "predicated store"));
157
158static cl::opt<bool> MergeCondStoresAggressively(
159 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(Val: false),
160 cl::desc("When merging conditional stores, do so even if the resultant "
161 "basic blocks are unlikely to be if-converted as a result"));
162
163static cl::opt<bool> SpeculateOneExpensiveInst(
164 "speculate-one-expensive-inst", cl::Hidden, cl::init(Val: true),
165 cl::desc("Allow exactly one expensive instruction to be speculatively "
166 "executed"));
167
168static cl::opt<unsigned> MaxSpeculationDepth(
169 "max-speculation-depth", cl::Hidden, cl::init(Val: 10),
170 cl::desc("Limit maximum recursion depth when calculating costs of "
171 "speculatively executed instructions"));
172
173static cl::opt<int>
174 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
175 cl::init(Val: 10),
176 cl::desc("Max size of a block which is still considered "
177 "small enough to thread through"));
178
179// Two is chosen to allow one negation and a logical combine.
180static cl::opt<unsigned>
181 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
182 cl::init(Val: 2),
183 cl::desc("Maximum cost of combining conditions when "
184 "folding branches"));
185
186static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
187 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
188 cl::init(Val: 2),
189 cl::desc("Multiplier to apply to threshold when determining whether or not "
190 "to fold branch to common destination when vector operations are "
191 "present"));
192
193static cl::opt<bool> EnableMergeCompatibleInvokes(
194 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(Val: true),
195 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
196
197static cl::opt<unsigned> MaxSwitchCasesPerResult(
198 "max-switch-cases-per-result", cl::Hidden, cl::init(Val: 16),
199 cl::desc("Limit cases to analyze when converting a switch to select"));
200
201STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
202STATISTIC(NumLinearMaps,
203 "Number of switch instructions turned into linear mapping");
204STATISTIC(NumLookupTables,
205 "Number of switch instructions turned into lookup tables");
206STATISTIC(
207 NumLookupTablesHoles,
208 "Number of switch instructions turned into lookup tables (holes checked)");
209STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
210STATISTIC(NumFoldValueComparisonIntoPredecessors,
211 "Number of value comparisons folded into predecessor basic blocks");
212STATISTIC(NumFoldBranchToCommonDest,
213 "Number of branches folded into predecessor basic block");
214STATISTIC(
215 NumHoistCommonCode,
216 "Number of common instruction 'blocks' hoisted up to the begin block");
217STATISTIC(NumHoistCommonInstrs,
218 "Number of common instructions hoisted up to the begin block");
219STATISTIC(NumSinkCommonCode,
220 "Number of common instruction 'blocks' sunk down to the end block");
221STATISTIC(NumSinkCommonInstrs,
222 "Number of common instructions sunk down to the end block");
223STATISTIC(NumSpeculations, "Number of speculative executed instructions");
224STATISTIC(NumInvokes,
225 "Number of invokes with empty resume blocks simplified into calls");
226STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
227STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
228
229namespace {
230
231// The first field contains the value that the switch produces when a certain
232// case group is selected, and the second field is a vector containing the
233// cases composing the case group.
234using SwitchCaseResultVectorTy =
235 SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
236
237// The first field contains the phi node that generates a result of the switch
238// and the second field contains the value generated for a certain case in the
239// switch for that PHI.
240using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
241
242/// ValueEqualityComparisonCase - Represents a case of a switch.
243struct ValueEqualityComparisonCase {
244 ConstantInt *Value;
245 BasicBlock *Dest;
246
247 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
248 : Value(Value), Dest(Dest) {}
249
250 bool operator<(ValueEqualityComparisonCase RHS) const {
251 // Comparing pointers is ok as we only rely on the order for uniquing.
252 return Value < RHS.Value;
253 }
254
255 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
256};
257
258class SimplifyCFGOpt {
259 const TargetTransformInfo &TTI;
260 DomTreeUpdater *DTU;
261 const DataLayout &DL;
262 ArrayRef<WeakVH> LoopHeaders;
263 const SimplifyCFGOptions &Options;
264 bool Resimplify;
265
266 Value *isValueEqualityComparison(Instruction *TI);
267 BasicBlock *getValueEqualityComparisonCases(
268 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
269 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
270 BasicBlock *Pred,
271 IRBuilder<> &Builder);
272 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
273 Instruction *PTI,
274 IRBuilder<> &Builder);
275 bool foldValueComparisonIntoPredecessors(Instruction *TI,
276 IRBuilder<> &Builder);
277
278 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
279 bool simplifySingleResume(ResumeInst *RI);
280 bool simplifyCommonResume(ResumeInst *RI);
281 bool simplifyCleanupReturn(CleanupReturnInst *RI);
282 bool simplifyUnreachable(UnreachableInst *UI);
283 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
284 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
285 bool simplifyIndirectBr(IndirectBrInst *IBI);
286 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
287 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
288 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
289
290 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
291 IRBuilder<> &Builder);
292
293 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
294 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
295 Instruction *TI, Instruction *I1,
296 SmallVectorImpl<Instruction *> &OtherSuccTIs);
297 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
298 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
299 BasicBlock *TrueBB, BasicBlock *FalseBB,
300 uint32_t TrueWeight, uint32_t FalseWeight);
301 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
302 const DataLayout &DL);
303 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
304 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
305 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
306
307public:
308 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
309 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
310 const SimplifyCFGOptions &Opts)
311 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
312 assert((!DTU || !DTU->hasPostDomTree()) &&
313 "SimplifyCFG is not yet capable of maintaining validity of a "
314 "PostDomTree, so don't ask for it.");
315 }
316
317 bool simplifyOnce(BasicBlock *BB);
318 bool run(BasicBlock *BB);
319
320 // Helper to set Resimplify and return change indication.
321 bool requestResimplify() {
322 Resimplify = true;
323 return true;
324 }
325};
326
327} // end anonymous namespace
328
329/// Return true if all the PHI nodes in the basic block \p BB
330/// receive compatible (identical) incoming values when coming from
331/// all of the predecessor blocks that are specified in \p IncomingBlocks.
332///
333/// Note that if the values aren't exactly identical, but \p EquivalenceSet
334/// is provided, and *both* of the values are present in the set,
335/// then they are considered equal.
336static bool incomingValuesAreCompatible(
337 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
338 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
339 assert(IncomingBlocks.size() == 2 &&
340 "Only for a pair of incoming blocks at the time!");
341
342 // FIXME: it is okay if one of the incoming values is an `undef` value,
343 // iff the other incoming value is guaranteed to be a non-poison value.
344 // FIXME: it is okay if one of the incoming values is a `poison` value.
345 return all_of(Range: BB->phis(), P: [IncomingBlocks, EquivalenceSet](PHINode &PN) {
346 Value *IV0 = PN.getIncomingValueForBlock(BB: IncomingBlocks[0]);
347 Value *IV1 = PN.getIncomingValueForBlock(BB: IncomingBlocks[1]);
348 if (IV0 == IV1)
349 return true;
350 if (EquivalenceSet && EquivalenceSet->contains(Ptr: IV0) &&
351 EquivalenceSet->contains(Ptr: IV1))
352 return true;
353 return false;
354 });
355}
356
357/// Return true if it is safe to merge these two
358/// terminator instructions together.
359static bool
360safeToMergeTerminators(Instruction *SI1, Instruction *SI2,
361 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
362 if (SI1 == SI2)
363 return false; // Can't merge with self!
364
365 // It is not safe to merge these two switch instructions if they have a common
366 // successor, and if that successor has a PHI node, and if *that* PHI node has
367 // conflicting incoming values from the two switch blocks.
368 BasicBlock *SI1BB = SI1->getParent();
369 BasicBlock *SI2BB = SI2->getParent();
370
371 SmallPtrSet<BasicBlock *, 16> SI1Succs(llvm::from_range, successors(BB: SI1BB));
372 bool Fail = false;
373 for (BasicBlock *Succ : successors(BB: SI2BB)) {
374 if (!SI1Succs.count(Ptr: Succ))
375 continue;
376 if (incomingValuesAreCompatible(BB: Succ, IncomingBlocks: {SI1BB, SI2BB}))
377 continue;
378 Fail = true;
379 if (FailBlocks)
380 FailBlocks->insert(X: Succ);
381 else
382 break;
383 }
384
385 return !Fail;
386}
387
388/// Update PHI nodes in Succ to indicate that there will now be entries in it
389/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
390/// will be the same as those coming in from ExistPred, an existing predecessor
391/// of Succ.
392static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
393 BasicBlock *ExistPred,
394 MemorySSAUpdater *MSSAU = nullptr) {
395 for (PHINode &PN : Succ->phis())
396 PN.addIncoming(V: PN.getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
397 if (MSSAU)
398 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(BB: Succ))
399 MPhi->addIncoming(V: MPhi->getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
400}
401
402/// Compute an abstract "cost" of speculating the given instruction,
403/// which is assumed to be safe to speculate. TCC_Free means cheap,
404/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
405/// expensive.
406static InstructionCost computeSpeculationCost(const User *I,
407 const TargetTransformInfo &TTI) {
408 return TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
409}
410
411/// If we have a merge point of an "if condition" as accepted above,
412/// return true if the specified value dominates the block. We don't handle
413/// the true generality of domination here, just a special case which works
414/// well enough for us.
415///
416/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
417/// see if V (which must be an instruction) and its recursive operands
418/// that do not dominate BB have a combined cost lower than Budget and
419/// are non-trapping. If both are true, the instruction is inserted into the
420/// set and true is returned.
421///
422/// The cost for most non-trapping instructions is defined as 1 except for
423/// Select whose cost is 2.
424///
425/// After this function returns, Cost is increased by the cost of
426/// V plus its non-dominating operands. If that cost is greater than
427/// Budget, false is returned and Cost is undefined.
428static bool dominatesMergePoint(
429 Value *V, BasicBlock *BB, Instruction *InsertPt,
430 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
431 InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
432 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
433 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
434 // so limit the recursion depth.
435 // TODO: While this recursion limit does prevent pathological behavior, it
436 // would be better to track visited instructions to avoid cycles.
437 if (Depth == MaxSpeculationDepth)
438 return false;
439
440 Instruction *I = dyn_cast<Instruction>(Val: V);
441 if (!I) {
442 // Non-instructions dominate all instructions and can be executed
443 // unconditionally.
444 return true;
445 }
446 BasicBlock *PBB = I->getParent();
447
448 // We don't want to allow weird loops that might have the "if condition" in
449 // the bottom of this block.
450 if (PBB == BB)
451 return false;
452
453 // If this instruction is defined in a block that contains an unconditional
454 // branch to BB, then it must be in the 'conditional' part of the "if
455 // statement". If not, it definitely dominates the region.
456 BranchInst *BI = dyn_cast<BranchInst>(Val: PBB->getTerminator());
457 if (!BI || BI->isConditional() || BI->getSuccessor(i: 0) != BB)
458 return true;
459
460 // If we have seen this instruction before, don't count it again.
461 if (AggressiveInsts.count(Ptr: I))
462 return true;
463
464 // Okay, it looks like the instruction IS in the "condition". Check to
465 // see if it's a cheap instruction to unconditionally compute, and if it
466 // only uses stuff defined outside of the condition. If so, hoist it out.
467 if (!isSafeToSpeculativelyExecute(I, CtxI: InsertPt, AC))
468 return false;
469
470 // Overflow arithmetic instruction plus extract value are usually generated
471 // when a division is being replaced. But, in this case, the zero check may
472 // still be kept in the code. In that case it would be worth to hoist these
473 // two instruction out of the basic block. Let's treat this pattern as one
474 // single cheap instruction here!
475 WithOverflowInst *OverflowInst;
476 if (match(V: I, P: m_ExtractValue<1>(V: m_OneUse(SubPattern: m_WithOverflowInst(I&: OverflowInst))))) {
477 ZeroCostInstructions.insert(Ptr: OverflowInst);
478 Cost += 1;
479 } else if (!ZeroCostInstructions.contains(Ptr: I))
480 Cost += computeSpeculationCost(I, TTI);
481
482 // Allow exactly one instruction to be speculated regardless of its cost
483 // (as long as it is safe to do so).
484 // This is intended to flatten the CFG even if the instruction is a division
485 // or other expensive operation. The speculation of an expensive instruction
486 // is expected to be undone in CodeGenPrepare if the speculation has not
487 // enabled further IR optimizations.
488 if (Cost > Budget &&
489 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
490 !Cost.isValid()))
491 return false;
492
493 // Okay, we can only really hoist these out if their operands do
494 // not take us over the cost threshold.
495 for (Use &Op : I->operands())
496 if (!dominatesMergePoint(V: Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
497 TTI, AC, ZeroCostInstructions, Depth: Depth + 1))
498 return false;
499 // Okay, it's safe to do this! Remember this instruction.
500 AggressiveInsts.insert(Ptr: I);
501 return true;
502}
503
504/// Extract ConstantInt from value, looking through IntToPtr
505/// and PointerNullValue. Return NULL if value is not a constant int.
506static ConstantInt *getConstantInt(Value *V, const DataLayout &DL) {
507 // Normal constant int.
508 ConstantInt *CI = dyn_cast<ConstantInt>(Val: V);
509 if (CI || !isa<Constant>(Val: V) || !V->getType()->isPointerTy() ||
510 DL.isNonIntegralPointerType(Ty: V->getType()))
511 return CI;
512
513 // This is some kind of pointer constant. Turn it into a pointer-sized
514 // ConstantInt if possible.
515 IntegerType *PtrTy = cast<IntegerType>(Val: DL.getIntPtrType(V->getType()));
516
517 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
518 if (isa<ConstantPointerNull>(Val: V))
519 return ConstantInt::get(Ty: PtrTy, V: 0);
520
521 // IntToPtr const int.
522 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
523 if (CE->getOpcode() == Instruction::IntToPtr)
524 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: 0))) {
525 // The constant is very likely to have the right type already.
526 if (CI->getType() == PtrTy)
527 return CI;
528 else
529 return cast<ConstantInt>(
530 Val: ConstantFoldIntegerCast(C: CI, DestTy: PtrTy, /*isSigned=*/IsSigned: false, DL));
531 }
532 return nullptr;
533}
534
535namespace {
536
537/// Given a chain of or (||) or and (&&) comparison of a value against a
538/// constant, this will try to recover the information required for a switch
539/// structure.
540/// It will depth-first traverse the chain of comparison, seeking for patterns
541/// like %a == 12 or %a < 4 and combine them to produce a set of integer
542/// representing the different cases for the switch.
543/// Note that if the chain is composed of '||' it will build the set of elements
544/// that matches the comparisons (i.e. any of this value validate the chain)
545/// while for a chain of '&&' it will build the set elements that make the test
546/// fail.
547struct ConstantComparesGatherer {
548 const DataLayout &DL;
549
550 /// Value found for the switch comparison
551 Value *CompValue = nullptr;
552
553 /// Extra clause to be checked before the switch
554 Value *Extra = nullptr;
555
556 /// Set of integers to match in switch
557 SmallVector<ConstantInt *, 8> Vals;
558
559 /// Number of comparisons matched in the and/or chain
560 unsigned UsedICmps = 0;
561
562 /// Construct and compute the result for the comparison instruction Cond
563 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
564 gather(V: Cond);
565 }
566
567 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
568 ConstantComparesGatherer &
569 operator=(const ConstantComparesGatherer &) = delete;
570
571private:
572 /// Try to set the current value used for the comparison, it succeeds only if
573 /// it wasn't set before or if the new value is the same as the old one
574 bool setValueOnce(Value *NewVal) {
575 if (CompValue && CompValue != NewVal)
576 return false;
577 CompValue = NewVal;
578 return (CompValue != nullptr);
579 }
580
581 /// Try to match Instruction "I" as a comparison against a constant and
582 /// populates the array Vals with the set of values that match (or do not
583 /// match depending on isEQ).
584 /// Return false on failure. On success, the Value the comparison matched
585 /// against is placed in CompValue.
586 /// If CompValue is already set, the function is expected to fail if a match
587 /// is found but the value compared to is different.
588 bool matchInstruction(Instruction *I, bool isEQ) {
589 // If this is an icmp against a constant, handle this as one of the cases.
590 ICmpInst *ICI;
591 ConstantInt *C;
592 if (!((ICI = dyn_cast<ICmpInst>(Val: I)) &&
593 (C = getConstantInt(V: I->getOperand(i: 1), DL)))) {
594 return false;
595 }
596
597 Value *RHSVal;
598 const APInt *RHSC;
599
600 // Pattern match a special case
601 // (x & ~2^z) == y --> x == y || x == y|2^z
602 // This undoes a transformation done by instcombine to fuse 2 compares.
603 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
604 // It's a little bit hard to see why the following transformations are
605 // correct. Here is a CVC3 program to verify them for 64-bit values:
606
607 /*
608 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
609 x : BITVECTOR(64);
610 y : BITVECTOR(64);
611 z : BITVECTOR(64);
612 mask : BITVECTOR(64) = BVSHL(ONE, z);
613 QUERY( (y & ~mask = y) =>
614 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
615 );
616 QUERY( (y | mask = y) =>
617 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
618 );
619 */
620
621 // Please note that each pattern must be a dual implication (<--> or
622 // iff). One directional implication can create spurious matches. If the
623 // implication is only one-way, an unsatisfiable condition on the left
624 // side can imply a satisfiable condition on the right side. Dual
625 // implication ensures that satisfiable conditions are transformed to
626 // other satisfiable conditions and unsatisfiable conditions are
627 // transformed to other unsatisfiable conditions.
628
629 // Here is a concrete example of a unsatisfiable condition on the left
630 // implying a satisfiable condition on the right:
631 //
632 // mask = (1 << z)
633 // (x & ~mask) == y --> (x == y || x == (y | mask))
634 //
635 // Substituting y = 3, z = 0 yields:
636 // (x & -2) == 3 --> (x == 3 || x == 2)
637
638 // Pattern match a special case:
639 /*
640 QUERY( (y & ~mask = y) =>
641 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
642 );
643 */
644 if (match(V: ICI->getOperand(i_nocapture: 0),
645 P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
646 APInt Mask = ~*RHSC;
647 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
648 // If we already have a value for the switch, it has to match!
649 if (!setValueOnce(RHSVal))
650 return false;
651
652 Vals.push_back(Elt: C);
653 Vals.push_back(
654 Elt: ConstantInt::get(Context&: C->getContext(),
655 V: C->getValue() | Mask));
656 UsedICmps++;
657 return true;
658 }
659 }
660
661 // Pattern match a special case:
662 /*
663 QUERY( (y | mask = y) =>
664 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
665 );
666 */
667 if (match(V: ICI->getOperand(i_nocapture: 0),
668 P: m_Or(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
669 APInt Mask = *RHSC;
670 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
671 // If we already have a value for the switch, it has to match!
672 if (!setValueOnce(RHSVal))
673 return false;
674
675 Vals.push_back(Elt: C);
676 Vals.push_back(Elt: ConstantInt::get(Context&: C->getContext(),
677 V: C->getValue() & ~Mask));
678 UsedICmps++;
679 return true;
680 }
681 }
682
683 // If we already have a value for the switch, it has to match!
684 if (!setValueOnce(ICI->getOperand(i_nocapture: 0)))
685 return false;
686
687 UsedICmps++;
688 Vals.push_back(Elt: C);
689 return ICI->getOperand(i_nocapture: 0);
690 }
691
692 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
693 ConstantRange Span =
694 ConstantRange::makeExactICmpRegion(Pred: ICI->getPredicate(), Other: C->getValue());
695
696 // Shift the range if the compare is fed by an add. This is the range
697 // compare idiom as emitted by instcombine.
698 Value *CandidateVal = I->getOperand(i: 0);
699 if (match(V: I->getOperand(i: 0), P: m_Add(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
700 Span = Span.subtract(CI: *RHSC);
701 CandidateVal = RHSVal;
702 }
703
704 // If this is an and/!= check, then we are looking to build the set of
705 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
706 // x != 0 && x != 1.
707 if (!isEQ)
708 Span = Span.inverse();
709
710 // If there are a ton of values, we don't want to make a ginormous switch.
711 if (Span.isSizeLargerThan(MaxSize: 8) || Span.isEmptySet()) {
712 return false;
713 }
714
715 // If we already have a value for the switch, it has to match!
716 if (!setValueOnce(CandidateVal))
717 return false;
718
719 // Add all values from the range to the set
720 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
721 Vals.push_back(Elt: ConstantInt::get(Context&: I->getContext(), V: Tmp));
722
723 UsedICmps++;
724 return true;
725 }
726
727 /// Given a potentially 'or'd or 'and'd together collection of icmp
728 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
729 /// the value being compared, and stick the list constants into the Vals
730 /// vector.
731 /// One "Extra" case is allowed to differ from the other.
732 void gather(Value *V) {
733 bool isEQ = match(V, P: m_LogicalOr(L: m_Value(), R: m_Value()));
734
735 // Keep a stack (SmallVector for efficiency) for depth-first traversal
736 SmallVector<Value *, 8> DFT;
737 SmallPtrSet<Value *, 8> Visited;
738
739 // Initialize
740 Visited.insert(Ptr: V);
741 DFT.push_back(Elt: V);
742
743 while (!DFT.empty()) {
744 V = DFT.pop_back_val();
745
746 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
747 // If it is a || (or && depending on isEQ), process the operands.
748 Value *Op0, *Op1;
749 if (isEQ ? match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))
750 : match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
751 if (Visited.insert(Ptr: Op1).second)
752 DFT.push_back(Elt: Op1);
753 if (Visited.insert(Ptr: Op0).second)
754 DFT.push_back(Elt: Op0);
755
756 continue;
757 }
758
759 // Try to match the current instruction
760 if (matchInstruction(I, isEQ))
761 // Match succeed, continue the loop
762 continue;
763 }
764
765 // One element of the sequence of || (or &&) could not be match as a
766 // comparison against the same value as the others.
767 // We allow only one "Extra" case to be checked before the switch
768 if (!Extra) {
769 Extra = V;
770 continue;
771 }
772 // Failed to parse a proper sequence, abort now
773 CompValue = nullptr;
774 break;
775 }
776 }
777};
778
779} // end anonymous namespace
780
781static void eraseTerminatorAndDCECond(Instruction *TI,
782 MemorySSAUpdater *MSSAU = nullptr) {
783 Instruction *Cond = nullptr;
784 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
785 Cond = dyn_cast<Instruction>(Val: SI->getCondition());
786 } else if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
787 if (BI->isConditional())
788 Cond = dyn_cast<Instruction>(Val: BI->getCondition());
789 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(Val: TI)) {
790 Cond = dyn_cast<Instruction>(Val: IBI->getAddress());
791 }
792
793 TI->eraseFromParent();
794 if (Cond)
795 RecursivelyDeleteTriviallyDeadInstructions(V: Cond, TLI: nullptr, MSSAU);
796}
797
798/// Return true if the specified terminator checks
799/// to see if a value is equal to constant integer value.
800Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
801 Value *CV = nullptr;
802 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
803 // Do not permit merging of large switch instructions into their
804 // predecessors unless there is only one predecessor.
805 if (!SI->getParent()->hasNPredecessorsOrMore(N: 128 / SI->getNumSuccessors()))
806 CV = SI->getCondition();
807 } else if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI))
808 if (BI->isConditional() && BI->getCondition()->hasOneUse())
809 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition())) {
810 if (ICI->isEquality() && getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL))
811 CV = ICI->getOperand(i_nocapture: 0);
812 }
813
814 // Unwrap any lossless ptrtoint cast.
815 if (CV) {
816 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(Val: CV)) {
817 Value *Ptr = PTII->getPointerOperand();
818 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
819 CV = Ptr;
820 }
821 }
822 return CV;
823}
824
825/// Given a value comparison instruction,
826/// decode all of the 'cases' that it represents and return the 'default' block.
827BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
828 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
829 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
830 Cases.reserve(n: SI->getNumCases());
831 for (auto Case : SI->cases())
832 Cases.push_back(x: ValueEqualityComparisonCase(Case.getCaseValue(),
833 Case.getCaseSuccessor()));
834 return SI->getDefaultDest();
835 }
836
837 BranchInst *BI = cast<BranchInst>(Val: TI);
838 ICmpInst *ICI = cast<ICmpInst>(Val: BI->getCondition());
839 BasicBlock *Succ = BI->getSuccessor(i: ICI->getPredicate() == ICmpInst::ICMP_NE);
840 Cases.push_back(x: ValueEqualityComparisonCase(
841 getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL), Succ));
842 return BI->getSuccessor(i: ICI->getPredicate() == ICmpInst::ICMP_EQ);
843}
844
845/// Given a vector of bb/value pairs, remove any entries
846/// in the list that match the specified block.
847static void
848eliminateBlockCases(BasicBlock *BB,
849 std::vector<ValueEqualityComparisonCase> &Cases) {
850 llvm::erase(C&: Cases, V: BB);
851}
852
853/// Return true if there are any keys in C1 that exist in C2 as well.
854static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
855 std::vector<ValueEqualityComparisonCase> &C2) {
856 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
857
858 // Make V1 be smaller than V2.
859 if (V1->size() > V2->size())
860 std::swap(a&: V1, b&: V2);
861
862 if (V1->empty())
863 return false;
864 if (V1->size() == 1) {
865 // Just scan V2.
866 ConstantInt *TheVal = (*V1)[0].Value;
867 for (const ValueEqualityComparisonCase &VECC : *V2)
868 if (TheVal == VECC.Value)
869 return true;
870 }
871
872 // Otherwise, just sort both lists and compare element by element.
873 array_pod_sort(Start: V1->begin(), End: V1->end());
874 array_pod_sort(Start: V2->begin(), End: V2->end());
875 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
876 while (i1 != e1 && i2 != e2) {
877 if ((*V1)[i1].Value == (*V2)[i2].Value)
878 return true;
879 if ((*V1)[i1].Value < (*V2)[i2].Value)
880 ++i1;
881 else
882 ++i2;
883 }
884 return false;
885}
886
887// Set branch weights on SwitchInst. This sets the metadata if there is at
888// least one non-zero weight.
889static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights,
890 bool IsExpected) {
891 // Check that there is at least one non-zero weight. Otherwise, pass
892 // nullptr to setMetadata which will erase the existing metadata.
893 MDNode *N = nullptr;
894 if (llvm::any_of(Range&: Weights, P: [](uint32_t W) { return W != 0; }))
895 N = MDBuilder(SI->getParent()->getContext())
896 .createBranchWeights(Weights, IsExpected);
897 SI->setMetadata(KindID: LLVMContext::MD_prof, Node: N);
898}
899
900// Similar to the above, but for branch and select instructions that take
901// exactly 2 weights.
902static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
903 uint32_t FalseWeight, bool IsExpected) {
904 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
905 // Check that there is at least one non-zero weight. Otherwise, pass
906 // nullptr to setMetadata which will erase the existing metadata.
907 MDNode *N = nullptr;
908 if (TrueWeight || FalseWeight)
909 N = MDBuilder(I->getParent()->getContext())
910 .createBranchWeights(TrueWeight, FalseWeight, IsExpected);
911 I->setMetadata(KindID: LLVMContext::MD_prof, Node: N);
912}
913
914/// If TI is known to be a terminator instruction and its block is known to
915/// only have a single predecessor block, check to see if that predecessor is
916/// also a value comparison with the same value, and if that comparison
917/// determines the outcome of this comparison. If so, simplify TI. This does a
918/// very limited form of jump threading.
919bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
920 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
921 Value *PredVal = isValueEqualityComparison(TI: Pred->getTerminator());
922 if (!PredVal)
923 return false; // Not a value comparison in predecessor.
924
925 Value *ThisVal = isValueEqualityComparison(TI);
926 assert(ThisVal && "This isn't a value comparison!!");
927 if (ThisVal != PredVal)
928 return false; // Different predicates.
929
930 // TODO: Preserve branch weight metadata, similarly to how
931 // foldValueComparisonIntoPredecessors preserves it.
932
933 // Find out information about when control will move from Pred to TI's block.
934 std::vector<ValueEqualityComparisonCase> PredCases;
935 BasicBlock *PredDef =
936 getValueEqualityComparisonCases(TI: Pred->getTerminator(), Cases&: PredCases);
937 eliminateBlockCases(BB: PredDef, Cases&: PredCases); // Remove default from cases.
938
939 // Find information about how control leaves this block.
940 std::vector<ValueEqualityComparisonCase> ThisCases;
941 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, Cases&: ThisCases);
942 eliminateBlockCases(BB: ThisDef, Cases&: ThisCases); // Remove default from cases.
943
944 // If TI's block is the default block from Pred's comparison, potentially
945 // simplify TI based on this knowledge.
946 if (PredDef == TI->getParent()) {
947 // If we are here, we know that the value is none of those cases listed in
948 // PredCases. If there are any cases in ThisCases that are in PredCases, we
949 // can simplify TI.
950 if (!valuesOverlap(C1&: PredCases, C2&: ThisCases))
951 return false;
952
953 if (isa<BranchInst>(Val: TI)) {
954 // Okay, one of the successors of this condbr is dead. Convert it to a
955 // uncond br.
956 assert(ThisCases.size() == 1 && "Branch can only have one case!");
957 // Insert the new branch.
958 Instruction *NI = Builder.CreateBr(Dest: ThisDef);
959 (void)NI;
960
961 // Remove PHI node entries for the dead edge.
962 ThisCases[0].Dest->removePredecessor(Pred: PredDef);
963
964 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
965 << "Through successor TI: " << *TI << "Leaving: " << *NI
966 << "\n");
967
968 eraseTerminatorAndDCECond(TI);
969
970 if (DTU)
971 DTU->applyUpdates(
972 Updates: {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
973
974 return true;
975 }
976
977 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(Val: TI);
978 // Okay, TI has cases that are statically dead, prune them away.
979 SmallPtrSet<Constant *, 16> DeadCases;
980 for (const ValueEqualityComparisonCase &Case : PredCases)
981 DeadCases.insert(Ptr: Case.Value);
982
983 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
984 << "Through successor TI: " << *TI);
985
986 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
987 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
988 --i;
989 auto *Successor = i->getCaseSuccessor();
990 if (DTU)
991 ++NumPerSuccessorCases[Successor];
992 if (DeadCases.count(Ptr: i->getCaseValue())) {
993 Successor->removePredecessor(Pred: PredDef);
994 SI.removeCase(I: i);
995 if (DTU)
996 --NumPerSuccessorCases[Successor];
997 }
998 }
999
1000 if (DTU) {
1001 std::vector<DominatorTree::UpdateType> Updates;
1002 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1003 if (I.second == 0)
1004 Updates.push_back(x: {DominatorTree::Delete, PredDef, I.first});
1005 DTU->applyUpdates(Updates);
1006 }
1007
1008 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1009 return true;
1010 }
1011
1012 // Otherwise, TI's block must correspond to some matched value. Find out
1013 // which value (or set of values) this is.
1014 ConstantInt *TIV = nullptr;
1015 BasicBlock *TIBB = TI->getParent();
1016 for (const auto &[Value, Dest] : PredCases)
1017 if (Dest == TIBB) {
1018 if (TIV)
1019 return false; // Cannot handle multiple values coming to this block.
1020 TIV = Value;
1021 }
1022 assert(TIV && "No edge from pred to succ?");
1023
1024 // Okay, we found the one constant that our value can be if we get into TI's
1025 // BB. Find out which successor will unconditionally be branched to.
1026 BasicBlock *TheRealDest = nullptr;
1027 for (const auto &[Value, Dest] : ThisCases)
1028 if (Value == TIV) {
1029 TheRealDest = Dest;
1030 break;
1031 }
1032
1033 // If not handled by any explicit cases, it is handled by the default case.
1034 if (!TheRealDest)
1035 TheRealDest = ThisDef;
1036
1037 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1038
1039 // Remove PHI node entries for dead edges.
1040 BasicBlock *CheckEdge = TheRealDest;
1041 for (BasicBlock *Succ : successors(BB: TIBB))
1042 if (Succ != CheckEdge) {
1043 if (Succ != TheRealDest)
1044 RemovedSuccs.insert(Ptr: Succ);
1045 Succ->removePredecessor(Pred: TIBB);
1046 } else
1047 CheckEdge = nullptr;
1048
1049 // Insert the new branch.
1050 Instruction *NI = Builder.CreateBr(Dest: TheRealDest);
1051 (void)NI;
1052
1053 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1054 << "Through successor TI: " << *TI << "Leaving: " << *NI
1055 << "\n");
1056
1057 eraseTerminatorAndDCECond(TI);
1058 if (DTU) {
1059 SmallVector<DominatorTree::UpdateType, 2> Updates;
1060 Updates.reserve(N: RemovedSuccs.size());
1061 for (auto *RemovedSucc : RemovedSuccs)
1062 Updates.push_back(Elt: {DominatorTree::Delete, TIBB, RemovedSucc});
1063 DTU->applyUpdates(Updates);
1064 }
1065 return true;
1066}
1067
1068namespace {
1069
1070/// This class implements a stable ordering of constant
1071/// integers that does not depend on their address. This is important for
1072/// applications that sort ConstantInt's to ensure uniqueness.
1073struct ConstantIntOrdering {
1074 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1075 return LHS->getValue().ult(RHS: RHS->getValue());
1076 }
1077};
1078
1079} // end anonymous namespace
1080
1081static int constantIntSortPredicate(ConstantInt *const *P1,
1082 ConstantInt *const *P2) {
1083 const ConstantInt *LHS = *P1;
1084 const ConstantInt *RHS = *P2;
1085 if (LHS == RHS)
1086 return 0;
1087 return LHS->getValue().ult(RHS: RHS->getValue()) ? 1 : -1;
1088}
1089
1090/// Get Weights of a given terminator, the default weight is at the front
1091/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1092/// metadata.
1093static void getBranchWeights(Instruction *TI,
1094 SmallVectorImpl<uint64_t> &Weights) {
1095 MDNode *MD = TI->getMetadata(KindID: LLVMContext::MD_prof);
1096 assert(MD && "Invalid branch-weight metadata");
1097 extractFromBranchWeightMD64(ProfileData: MD, Weights);
1098
1099 // If TI is a conditional eq, the default case is the false case,
1100 // and the corresponding branch-weight data is at index 2. We swap the
1101 // default weight to be the first entry.
1102 if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
1103 assert(Weights.size() == 2);
1104 ICmpInst *ICI = cast<ICmpInst>(Val: BI->getCondition());
1105 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1106 std::swap(a&: Weights.front(), b&: Weights.back());
1107 }
1108}
1109
1110/// Keep halving the weights until all can fit in uint32_t.
1111static void fitWeights(MutableArrayRef<uint64_t> Weights) {
1112 uint64_t Max = *llvm::max_element(Range&: Weights);
1113 if (Max > UINT_MAX) {
1114 unsigned Offset = 32 - llvm::countl_zero(Val: Max);
1115 for (uint64_t &I : Weights)
1116 I >>= Offset;
1117 }
1118}
1119
1120static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1121 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1122 Instruction *PTI = PredBlock->getTerminator();
1123
1124 // If we have bonus instructions, clone them into the predecessor block.
1125 // Note that there may be multiple predecessor blocks, so we cannot move
1126 // bonus instructions to a predecessor block.
1127 for (Instruction &BonusInst : *BB) {
1128 if (BonusInst.isTerminator())
1129 continue;
1130
1131 Instruction *NewBonusInst = BonusInst.clone();
1132
1133 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(Other: PTI->getDebugLoc())) {
1134 // Unless the instruction has the same !dbg location as the original
1135 // branch, drop it. When we fold the bonus instructions we want to make
1136 // sure we reset their debug locations in order to avoid stepping on
1137 // dead code caused by folding dead branches.
1138 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1139 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1140 mapAtomInstance(DL, VMap);
1141 }
1142
1143 RemapInstruction(I: NewBonusInst, VM&: VMap,
1144 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1145
1146 // If we speculated an instruction, we need to drop any metadata that may
1147 // result in undefined behavior, as the metadata might have been valid
1148 // only given the branch precondition.
1149 // Similarly strip attributes on call parameters that may cause UB in
1150 // location the call is moved to.
1151 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1152
1153 NewBonusInst->insertInto(ParentBB: PredBlock, It: PTI->getIterator());
1154 auto Range = NewBonusInst->cloneDebugInfoFrom(From: &BonusInst);
1155 RemapDbgRecordRange(M: NewBonusInst->getModule(), Range, VM&: VMap,
1156 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1157
1158 NewBonusInst->takeName(V: &BonusInst);
1159 BonusInst.setName(NewBonusInst->getName() + ".old");
1160 VMap[&BonusInst] = NewBonusInst;
1161
1162 // Update (liveout) uses of bonus instructions,
1163 // now that the bonus instruction has been cloned into predecessor.
1164 // Note that we expect to be in a block-closed SSA form for this to work!
1165 for (Use &U : make_early_inc_range(Range: BonusInst.uses())) {
1166 auto *UI = cast<Instruction>(Val: U.getUser());
1167 auto *PN = dyn_cast<PHINode>(Val: UI);
1168 if (!PN) {
1169 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1170 "If the user is not a PHI node, then it should be in the same "
1171 "block as, and come after, the original bonus instruction.");
1172 continue; // Keep using the original bonus instruction.
1173 }
1174 // Is this the block-closed SSA form PHI node?
1175 if (PN->getIncomingBlock(U) == BB)
1176 continue; // Great, keep using the original bonus instruction.
1177 // The only other alternative is an "use" when coming from
1178 // the predecessor block - here we should refer to the cloned bonus instr.
1179 assert(PN->getIncomingBlock(U) == PredBlock &&
1180 "Not in block-closed SSA form?");
1181 U.set(NewBonusInst);
1182 }
1183 }
1184
1185 // Key Instructions: We may have propagated atom info into the pred. If the
1186 // pred's terminator already has atom info do nothing as merging would drop
1187 // one atom group anyway. If it doesn't, propagte the remapped atom group
1188 // from BB's terminator.
1189 if (auto &PredDL = PTI->getDebugLoc()) {
1190 auto &DL = BB->getTerminator()->getDebugLoc();
1191 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1192 PredDL.isSameSourceLocation(Other: DL)) {
1193 PTI->setDebugLoc(DL);
1194 RemapSourceAtom(I: PTI, VM&: VMap);
1195 }
1196 }
1197}
1198
1199bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1200 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1201 BasicBlock *BB = TI->getParent();
1202 BasicBlock *Pred = PTI->getParent();
1203
1204 SmallVector<DominatorTree::UpdateType, 32> Updates;
1205
1206 // Figure out which 'cases' to copy from SI to PSI.
1207 std::vector<ValueEqualityComparisonCase> BBCases;
1208 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, Cases&: BBCases);
1209
1210 std::vector<ValueEqualityComparisonCase> PredCases;
1211 BasicBlock *PredDefault = getValueEqualityComparisonCases(TI: PTI, Cases&: PredCases);
1212
1213 // Based on whether the default edge from PTI goes to BB or not, fill in
1214 // PredCases and PredDefault with the new switch cases we would like to
1215 // build.
1216 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1217
1218 // Update the branch weight metadata along the way
1219 SmallVector<uint64_t, 8> Weights;
1220 bool PredHasWeights = hasBranchWeightMD(I: *PTI);
1221 bool SuccHasWeights = hasBranchWeightMD(I: *TI);
1222
1223 if (PredHasWeights) {
1224 getBranchWeights(TI: PTI, Weights);
1225 // branch-weight metadata is inconsistent here.
1226 if (Weights.size() != 1 + PredCases.size())
1227 PredHasWeights = SuccHasWeights = false;
1228 } else if (SuccHasWeights)
1229 // If there are no predecessor weights but there are successor weights,
1230 // populate Weights with 1, which will later be scaled to the sum of
1231 // successor's weights
1232 Weights.assign(NumElts: 1 + PredCases.size(), Elt: 1);
1233
1234 SmallVector<uint64_t, 8> SuccWeights;
1235 if (SuccHasWeights) {
1236 getBranchWeights(TI, Weights&: SuccWeights);
1237 // branch-weight metadata is inconsistent here.
1238 if (SuccWeights.size() != 1 + BBCases.size())
1239 PredHasWeights = SuccHasWeights = false;
1240 } else if (PredHasWeights)
1241 SuccWeights.assign(NumElts: 1 + BBCases.size(), Elt: 1);
1242
1243 if (PredDefault == BB) {
1244 // If this is the default destination from PTI, only the edges in TI
1245 // that don't occur in PTI, or that branch to BB will be activated.
1246 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1247 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1248 if (PredCases[i].Dest != BB)
1249 PTIHandled.insert(x: PredCases[i].Value);
1250 else {
1251 // The default destination is BB, we don't need explicit targets.
1252 std::swap(a&: PredCases[i], b&: PredCases.back());
1253
1254 if (PredHasWeights || SuccHasWeights) {
1255 // Increase weight for the default case.
1256 Weights[0] += Weights[i + 1];
1257 std::swap(a&: Weights[i + 1], b&: Weights.back());
1258 Weights.pop_back();
1259 }
1260
1261 PredCases.pop_back();
1262 --i;
1263 --e;
1264 }
1265
1266 // Reconstruct the new switch statement we will be building.
1267 if (PredDefault != BBDefault) {
1268 PredDefault->removePredecessor(Pred);
1269 if (DTU && PredDefault != BB)
1270 Updates.push_back(Elt: {DominatorTree::Delete, Pred, PredDefault});
1271 PredDefault = BBDefault;
1272 ++NewSuccessors[BBDefault];
1273 }
1274
1275 unsigned CasesFromPred = Weights.size();
1276 uint64_t ValidTotalSuccWeight = 0;
1277 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1278 if (!PTIHandled.count(x: BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1279 PredCases.push_back(x: BBCases[i]);
1280 ++NewSuccessors[BBCases[i].Dest];
1281 if (SuccHasWeights || PredHasWeights) {
1282 // The default weight is at index 0, so weight for the ith case
1283 // should be at index i+1. Scale the cases from successor by
1284 // PredDefaultWeight (Weights[0]).
1285 Weights.push_back(Elt: Weights[0] * SuccWeights[i + 1]);
1286 ValidTotalSuccWeight += SuccWeights[i + 1];
1287 }
1288 }
1289
1290 if (SuccHasWeights || PredHasWeights) {
1291 ValidTotalSuccWeight += SuccWeights[0];
1292 // Scale the cases from predecessor by ValidTotalSuccWeight.
1293 for (unsigned i = 1; i < CasesFromPred; ++i)
1294 Weights[i] *= ValidTotalSuccWeight;
1295 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1296 Weights[0] *= SuccWeights[0];
1297 }
1298 } else {
1299 // If this is not the default destination from PSI, only the edges
1300 // in SI that occur in PSI with a destination of BB will be
1301 // activated.
1302 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1303 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1304 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1305 if (PredCases[i].Dest == BB) {
1306 PTIHandled.insert(x: PredCases[i].Value);
1307
1308 if (PredHasWeights || SuccHasWeights) {
1309 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1310 std::swap(a&: Weights[i + 1], b&: Weights.back());
1311 Weights.pop_back();
1312 }
1313
1314 std::swap(a&: PredCases[i], b&: PredCases.back());
1315 PredCases.pop_back();
1316 --i;
1317 --e;
1318 }
1319
1320 // Okay, now we know which constants were sent to BB from the
1321 // predecessor. Figure out where they will all go now.
1322 for (const ValueEqualityComparisonCase &Case : BBCases)
1323 if (PTIHandled.count(x: Case.Value)) {
1324 // If this is one we are capable of getting...
1325 if (PredHasWeights || SuccHasWeights)
1326 Weights.push_back(Elt: WeightsForHandled[Case.Value]);
1327 PredCases.push_back(x: Case);
1328 ++NewSuccessors[Case.Dest];
1329 PTIHandled.erase(x: Case.Value); // This constant is taken care of
1330 }
1331
1332 // If there are any constants vectored to BB that TI doesn't handle,
1333 // they must go to the default destination of TI.
1334 for (ConstantInt *I : PTIHandled) {
1335 if (PredHasWeights || SuccHasWeights)
1336 Weights.push_back(Elt: WeightsForHandled[I]);
1337 PredCases.push_back(x: ValueEqualityComparisonCase(I, BBDefault));
1338 ++NewSuccessors[BBDefault];
1339 }
1340 }
1341
1342 // Okay, at this point, we know which new successor Pred will get. Make
1343 // sure we update the number of entries in the PHI nodes for these
1344 // successors.
1345 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1346 if (DTU) {
1347 SuccsOfPred = {llvm::from_range, successors(BB: Pred)};
1348 Updates.reserve(N: Updates.size() + NewSuccessors.size());
1349 }
1350 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1351 NewSuccessors) {
1352 for (auto I : seq(Size: NewSuccessor.second)) {
1353 (void)I;
1354 addPredecessorToBlock(Succ: NewSuccessor.first, NewPred: Pred, ExistPred: BB);
1355 }
1356 if (DTU && !SuccsOfPred.contains(Ptr: NewSuccessor.first))
1357 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewSuccessor.first});
1358 }
1359
1360 Builder.SetInsertPoint(PTI);
1361 // Convert pointer to int before we switch.
1362 if (CV->getType()->isPointerTy()) {
1363 CV =
1364 Builder.CreatePtrToInt(V: CV, DestTy: DL.getIntPtrType(CV->getType()), Name: "magicptr");
1365 }
1366
1367 // Now that the successors are updated, create the new Switch instruction.
1368 SwitchInst *NewSI = Builder.CreateSwitch(V: CV, Dest: PredDefault, NumCases: PredCases.size());
1369 NewSI->setDebugLoc(PTI->getDebugLoc());
1370 for (ValueEqualityComparisonCase &V : PredCases)
1371 NewSI->addCase(OnVal: V.Value, Dest: V.Dest);
1372
1373 if (PredHasWeights || SuccHasWeights) {
1374 // Halve the weights if any of them cannot fit in an uint32_t
1375 fitWeights(Weights);
1376
1377 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1378
1379 setBranchWeights(SI: NewSI, Weights: MDWeights, /*IsExpected=*/false);
1380 }
1381
1382 eraseTerminatorAndDCECond(TI: PTI);
1383
1384 // Okay, last check. If BB is still a successor of PSI, then we must
1385 // have an infinite loop case. If so, add an infinitely looping block
1386 // to handle the case to preserve the behavior of the code.
1387 BasicBlock *InfLoopBlock = nullptr;
1388 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1389 if (NewSI->getSuccessor(idx: i) == BB) {
1390 if (!InfLoopBlock) {
1391 // Insert it at the end of the function, because it's either code,
1392 // or it won't matter if it's hot. :)
1393 InfLoopBlock =
1394 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
1395 BranchInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
1396 if (DTU)
1397 Updates.push_back(
1398 Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1399 }
1400 NewSI->setSuccessor(idx: i, NewSucc: InfLoopBlock);
1401 }
1402
1403 if (DTU) {
1404 if (InfLoopBlock)
1405 Updates.push_back(Elt: {DominatorTree::Insert, Pred, InfLoopBlock});
1406
1407 Updates.push_back(Elt: {DominatorTree::Delete, Pred, BB});
1408
1409 DTU->applyUpdates(Updates);
1410 }
1411
1412 ++NumFoldValueComparisonIntoPredecessors;
1413 return true;
1414}
1415
1416/// The specified terminator is a value equality comparison instruction
1417/// (either a switch or a branch on "X == c").
1418/// See if any of the predecessors of the terminator block are value comparisons
1419/// on the same value. If so, and if safe to do so, fold them together.
1420bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1421 IRBuilder<> &Builder) {
1422 BasicBlock *BB = TI->getParent();
1423 Value *CV = isValueEqualityComparison(TI); // CondVal
1424 assert(CV && "Not a comparison?");
1425
1426 bool Changed = false;
1427
1428 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1429 while (!Preds.empty()) {
1430 BasicBlock *Pred = Preds.pop_back_val();
1431 Instruction *PTI = Pred->getTerminator();
1432
1433 // Don't try to fold into itself.
1434 if (Pred == BB)
1435 continue;
1436
1437 // See if the predecessor is a comparison with the same value.
1438 Value *PCV = isValueEqualityComparison(TI: PTI); // PredCondVal
1439 if (PCV != CV)
1440 continue;
1441
1442 SmallSetVector<BasicBlock *, 4> FailBlocks;
1443 if (!safeToMergeTerminators(SI1: TI, SI2: PTI, FailBlocks: &FailBlocks)) {
1444 for (auto *Succ : FailBlocks) {
1445 if (!SplitBlockPredecessors(BB: Succ, Preds: TI->getParent(), Suffix: ".fold.split", DTU))
1446 return false;
1447 }
1448 }
1449
1450 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1451 Changed = true;
1452 }
1453 return Changed;
1454}
1455
1456// If we would need to insert a select that uses the value of this invoke
1457// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1458// need to do this), we can't hoist the invoke, as there is nowhere to put the
1459// select in this case.
1460static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1461 Instruction *I1, Instruction *I2) {
1462 for (BasicBlock *Succ : successors(BB: BB1)) {
1463 for (const PHINode &PN : Succ->phis()) {
1464 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1465 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1466 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1467 return false;
1468 }
1469 }
1470 }
1471 return true;
1472}
1473
1474// Get interesting characteristics of instructions that
1475// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1476// instructions can be reordered across.
1477enum SkipFlags {
1478 SkipReadMem = 1,
1479 SkipSideEffect = 2,
1480 SkipImplicitControlFlow = 4
1481};
1482
1483static unsigned skippedInstrFlags(Instruction *I) {
1484 unsigned Flags = 0;
1485 if (I->mayReadFromMemory())
1486 Flags |= SkipReadMem;
1487 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1488 // inalloca) across stacksave/stackrestore boundaries.
1489 if (I->mayHaveSideEffects() || isa<AllocaInst>(Val: I))
1490 Flags |= SkipSideEffect;
1491 if (!isGuaranteedToTransferExecutionToSuccessor(I))
1492 Flags |= SkipImplicitControlFlow;
1493 return Flags;
1494}
1495
1496// Returns true if it is safe to reorder an instruction across preceding
1497// instructions in a basic block.
1498static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1499 // Don't reorder a store over a load.
1500 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1501 return false;
1502
1503 // If we have seen an instruction with side effects, it's unsafe to reorder an
1504 // instruction which reads memory or itself has side effects.
1505 if ((Flags & SkipSideEffect) &&
1506 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(Val: I)))
1507 return false;
1508
1509 // Reordering across an instruction which does not necessarily transfer
1510 // control to the next instruction is speculation.
1511 if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1512 return false;
1513
1514 // Hoisting of llvm.deoptimize is only legal together with the next return
1515 // instruction, which this pass is not always able to do.
1516 if (auto *CB = dyn_cast<CallBase>(Val: I))
1517 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1518 return false;
1519
1520 // It's also unsafe/illegal to hoist an instruction above its instruction
1521 // operands
1522 BasicBlock *BB = I->getParent();
1523 for (Value *Op : I->operands()) {
1524 if (auto *J = dyn_cast<Instruction>(Val: Op))
1525 if (J->getParent() == BB)
1526 return false;
1527 }
1528
1529 return true;
1530}
1531
1532static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1533
1534/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1535/// instructions \p I1 and \p I2 can and should be hoisted.
1536static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
1537 const TargetTransformInfo &TTI) {
1538 // If we're going to hoist a call, make sure that the two instructions
1539 // we're commoning/hoisting are both marked with musttail, or neither of
1540 // them is marked as such. Otherwise, we might end up in a situation where
1541 // we hoist from a block where the terminator is a `ret` to a block where
1542 // the terminator is a `br`, and `musttail` calls expect to be followed by
1543 // a return.
1544 auto *C1 = dyn_cast<CallInst>(Val: I1);
1545 auto *C2 = dyn_cast<CallInst>(Val: I2);
1546 if (C1 && C2)
1547 if (C1->isMustTailCall() != C2->isMustTailCall())
1548 return false;
1549
1550 if (!TTI.isProfitableToHoist(I: I1) || !TTI.isProfitableToHoist(I: I2))
1551 return false;
1552
1553 // If any of the two call sites has nomerge or convergent attribute, stop
1554 // hoisting.
1555 if (const auto *CB1 = dyn_cast<CallBase>(Val: I1))
1556 if (CB1->cannotMerge() || CB1->isConvergent())
1557 return false;
1558 if (const auto *CB2 = dyn_cast<CallBase>(Val: I2))
1559 if (CB2->cannotMerge() || CB2->isConvergent())
1560 return false;
1561
1562 return true;
1563}
1564
1565/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1566/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1567/// hoistCommonCodeFromSuccessors. e.g. The input:
1568/// I1 DVRs: { x, z },
1569/// OtherInsts: { I2 DVRs: { x, y, z } }
1570/// would result in hoisting only DbgVariableRecord x.
1571static void hoistLockstepIdenticalDbgVariableRecords(
1572 Instruction *TI, Instruction *I1,
1573 SmallVectorImpl<Instruction *> &OtherInsts) {
1574 if (!I1->hasDbgRecords())
1575 return;
1576 using CurrentAndEndIt =
1577 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1578 // Vector of {Current, End} iterators.
1579 SmallVector<CurrentAndEndIt> Itrs;
1580 Itrs.reserve(N: OtherInsts.size() + 1);
1581 // Helper lambdas for lock-step checks:
1582 // Return true if this Current == End.
1583 auto atEnd = [](const CurrentAndEndIt &Pair) {
1584 return Pair.first == Pair.second;
1585 };
1586 // Return true if all Current are identical.
1587 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1588 return all_of(Range: make_first_range(c: ArrayRef(Itrs).drop_front()),
1589 P: [&](DbgRecord::self_iterator I) {
1590 return Itrs[0].first->isIdenticalToWhenDefined(R: *I);
1591 });
1592 };
1593
1594 // Collect the iterators.
1595 Itrs.push_back(
1596 Elt: {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1597 for (Instruction *Other : OtherInsts) {
1598 if (!Other->hasDbgRecords())
1599 return;
1600 Itrs.push_back(
1601 Elt: {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1602 }
1603
1604 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1605 // the lock-step DbgRecord are identical, hoist all of them to TI.
1606 // This replicates the dbg.* intrinsic behaviour in
1607 // hoistCommonCodeFromSuccessors.
1608 while (none_of(Range&: Itrs, P: atEnd)) {
1609 bool HoistDVRs = allIdentical(Itrs);
1610 for (CurrentAndEndIt &Pair : Itrs) {
1611 // Increment Current iterator now as we may be about to move the
1612 // DbgRecord.
1613 DbgRecord &DR = *Pair.first++;
1614 if (HoistDVRs) {
1615 DR.removeFromParent();
1616 TI->getParent()->insertDbgRecordBefore(DR: &DR, Here: TI->getIterator());
1617 }
1618 }
1619 }
1620}
1621
1622static bool areIdenticalUpToCommutativity(const Instruction *I1,
1623 const Instruction *I2) {
1624 if (I1->isIdenticalToWhenDefined(I: I2, /*IntersectAttrs=*/true))
1625 return true;
1626
1627 if (auto *Cmp1 = dyn_cast<CmpInst>(Val: I1))
1628 if (auto *Cmp2 = dyn_cast<CmpInst>(Val: I2))
1629 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1630 Cmp1->getOperand(i_nocapture: 0) == Cmp2->getOperand(i_nocapture: 1) &&
1631 Cmp1->getOperand(i_nocapture: 1) == Cmp2->getOperand(i_nocapture: 0);
1632
1633 if (I1->isCommutative() && I1->isSameOperationAs(I: I2)) {
1634 return I1->getOperand(i: 0) == I2->getOperand(i: 1) &&
1635 I1->getOperand(i: 1) == I2->getOperand(i: 0) &&
1636 equal(LRange: drop_begin(RangeOrContainer: I1->operands(), N: 2), RRange: drop_begin(RangeOrContainer: I2->operands(), N: 2));
1637 }
1638
1639 return false;
1640}
1641
1642/// If the target supports conditional faulting,
1643/// we look for the following pattern:
1644/// \code
1645/// BB:
1646/// ...
1647/// %cond = icmp ult %x, %y
1648/// br i1 %cond, label %TrueBB, label %FalseBB
1649/// FalseBB:
1650/// store i32 1, ptr %q, align 4
1651/// ...
1652/// TrueBB:
1653/// %maskedloadstore = load i32, ptr %b, align 4
1654/// store i32 %maskedloadstore, ptr %p, align 4
1655/// ...
1656/// \endcode
1657///
1658/// and transform it into:
1659///
1660/// \code
1661/// BB:
1662/// ...
1663/// %cond = icmp ult %x, %y
1664/// %maskedloadstore = cload i32, ptr %b, %cond
1665/// cstore i32 %maskedloadstore, ptr %p, %cond
1666/// cstore i32 1, ptr %q, ~%cond
1667/// br i1 %cond, label %TrueBB, label %FalseBB
1668/// FalseBB:
1669/// ...
1670/// TrueBB:
1671/// ...
1672/// \endcode
1673///
1674/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1675/// e.g.
1676///
1677/// \code
1678/// %vcond = bitcast i1 %cond to <1 x i1>
1679/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1680/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1681/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1682/// call void @llvm.masked.store.v1i32.p0
1683/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1684/// %cond.not = xor i1 %cond, true
1685/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1686/// call void @llvm.masked.store.v1i32.p0
1687/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1688/// \endcode
1689///
1690/// So we need to turn hoisted load/store into cload/cstore.
1691///
1692/// \param BI The branch instruction.
1693/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1694/// will be speculated.
1695/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1696static void hoistConditionalLoadsStores(
1697 BranchInst *BI,
1698 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1699 std::optional<bool> Invert, Instruction *Sel) {
1700 auto &Context = BI->getParent()->getContext();
1701 auto *VCondTy = FixedVectorType::get(ElementType: Type::getInt1Ty(C&: Context), NumElts: 1);
1702 auto *Cond = BI->getOperand(i_nocapture: 0);
1703 // Construct the condition if needed.
1704 BasicBlock *BB = BI->getParent();
1705 Value *Mask = nullptr;
1706 Value *MaskFalse = nullptr;
1707 Value *MaskTrue = nullptr;
1708 if (Invert.has_value()) {
1709 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1710 Mask = Builder.CreateBitCast(
1711 V: *Invert ? Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)) : Cond,
1712 DestTy: VCondTy);
1713 } else {
1714 IRBuilder<> Builder(BI);
1715 MaskFalse = Builder.CreateBitCast(
1716 V: Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)), DestTy: VCondTy);
1717 MaskTrue = Builder.CreateBitCast(V: Cond, DestTy: VCondTy);
1718 }
1719 auto PeekThroughBitcasts = [](Value *V) {
1720 while (auto *BitCast = dyn_cast<BitCastInst>(Val: V))
1721 V = BitCast->getOperand(i_nocapture: 0);
1722 return V;
1723 };
1724 for (auto *I : SpeculatedConditionalLoadsStores) {
1725 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1726 if (!Invert.has_value())
1727 Mask = I->getParent() == BI->getSuccessor(i: 0) ? MaskTrue : MaskFalse;
1728 // We currently assume conditional faulting load/store is supported for
1729 // scalar types only when creating new instructions. This can be easily
1730 // extended for vector types in the future.
1731 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1732 auto *Op0 = I->getOperand(i: 0);
1733 CallInst *MaskedLoadStore = nullptr;
1734 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1735 // Handle Load.
1736 auto *Ty = I->getType();
1737 PHINode *PN = nullptr;
1738 Value *PassThru = nullptr;
1739 if (Invert.has_value())
1740 for (User *U : I->users()) {
1741 if ((PN = dyn_cast<PHINode>(Val: U))) {
1742 PassThru = Builder.CreateBitCast(
1743 V: PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1744 DestTy: FixedVectorType::get(ElementType: Ty, NumElts: 1));
1745 } else if (auto *Ins = cast<Instruction>(Val: U);
1746 Sel && Ins->getParent() == BB) {
1747 // This happens when store or/and a speculative instruction between
1748 // load and store were hoisted to the BB. Make sure the masked load
1749 // inserted before its use.
1750 // We assume there's one of such use.
1751 Builder.SetInsertPoint(Ins);
1752 }
1753 }
1754 MaskedLoadStore = Builder.CreateMaskedLoad(
1755 Ty: FixedVectorType::get(ElementType: Ty, NumElts: 1), Ptr: Op0, Alignment: LI->getAlign(), Mask, PassThru);
1756 Value *NewLoadStore = Builder.CreateBitCast(V: MaskedLoadStore, DestTy: Ty);
1757 if (PN)
1758 PN->setIncomingValue(i: PN->getBasicBlockIndex(BB), V: NewLoadStore);
1759 I->replaceAllUsesWith(V: NewLoadStore);
1760 } else {
1761 // Handle Store.
1762 auto *StoredVal = Builder.CreateBitCast(
1763 V: PeekThroughBitcasts(Op0), DestTy: FixedVectorType::get(ElementType: Op0->getType(), NumElts: 1));
1764 MaskedLoadStore = Builder.CreateMaskedStore(
1765 Val: StoredVal, Ptr: I->getOperand(i: 1), Alignment: cast<StoreInst>(Val: I)->getAlign(), Mask);
1766 }
1767 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1768 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1769 //
1770 // !nonnull, !align : Not support pointer type, no need to keep.
1771 // !range: Load type is changed from scalar to vector, but the metadata on
1772 // vector specifies a per-element range, so the semantics stay the
1773 // same. Keep it.
1774 // !annotation: Not impact semantics. Keep it.
1775 if (const MDNode *Ranges = I->getMetadata(KindID: LLVMContext::MD_range))
1776 MaskedLoadStore->addRangeRetAttr(CR: getConstantRangeFromMetadata(RangeMD: *Ranges));
1777 I->dropUBImplyingAttrsAndUnknownMetadata(KnownIDs: {LLVMContext::MD_annotation});
1778 // FIXME: DIAssignID is not supported for masked store yet.
1779 // (Verifier::visitDIAssignIDMetadata)
1780 at::deleteAssignmentMarkers(Inst: I);
1781 I->eraseMetadataIf(Pred: [](unsigned MDKind, MDNode *Node) {
1782 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1783 });
1784 MaskedLoadStore->copyMetadata(SrcInst: *I);
1785 I->eraseFromParent();
1786 }
1787}
1788
1789static bool isSafeCheapLoadStore(const Instruction *I,
1790 const TargetTransformInfo &TTI) {
1791 // Not handle volatile or atomic.
1792 bool IsStore = false;
1793 if (auto *L = dyn_cast<LoadInst>(Val: I)) {
1794 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1795 return false;
1796 } else if (auto *S = dyn_cast<StoreInst>(Val: I)) {
1797 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1798 return false;
1799 IsStore = true;
1800 } else
1801 return false;
1802
1803 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1804 // That's why we have the alignment limitation.
1805 // FIXME: Update the prototype of the intrinsics?
1806 return TTI.hasConditionalLoadStoreForType(Ty: getLoadStoreType(I), IsStore) &&
1807 getLoadStoreAlignment(I) < Value::MaximumAlignment;
1808}
1809
1810/// Hoist any common code in the successor blocks up into the block. This
1811/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1812/// given, only perform hoisting in case all successors blocks contain matching
1813/// instructions only. In that case, all instructions can be hoisted and the
1814/// original branch will be replaced and selects for PHIs are added.
1815bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1816 bool AllInstsEqOnly) {
1817 // This does very trivial matching, with limited scanning, to find identical
1818 // instructions in the two blocks. In particular, we don't want to get into
1819 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1820 // such, we currently just scan for obviously identical instructions in an
1821 // identical order, possibly separated by the same number of non-identical
1822 // instructions.
1823 BasicBlock *BB = TI->getParent();
1824 unsigned int SuccSize = succ_size(BB);
1825 if (SuccSize < 2)
1826 return false;
1827
1828 // If either of the blocks has it's address taken, then we can't do this fold,
1829 // because the code we'd hoist would no longer run when we jump into the block
1830 // by it's address.
1831 for (auto *Succ : successors(BB))
1832 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1833 return false;
1834
1835 // The second of pair is a SkipFlags bitmask.
1836 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1837 SmallVector<SuccIterPair, 8> SuccIterPairs;
1838 for (auto *Succ : successors(BB)) {
1839 BasicBlock::iterator SuccItr = Succ->begin();
1840 if (isa<PHINode>(Val: *SuccItr))
1841 return false;
1842 SuccIterPairs.push_back(Elt: SuccIterPair(SuccItr, 0));
1843 }
1844
1845 if (AllInstsEqOnly) {
1846 // Check if all instructions in the successor blocks match. This allows
1847 // hoisting all instructions and removing the blocks we are hoisting from,
1848 // so does not add any new instructions.
1849 SmallVector<BasicBlock *> Succs = to_vector(Range: successors(BB));
1850 // Check if sizes and terminators of all successors match.
1851 bool AllSame = none_of(Range&: Succs, P: [&Succs](BasicBlock *Succ) {
1852 Instruction *Term0 = Succs[0]->getTerminator();
1853 Instruction *Term = Succ->getTerminator();
1854 return !Term->isSameOperationAs(I: Term0) ||
1855 !equal(LRange: Term->operands(), RRange: Term0->operands()) ||
1856 Succs[0]->size() != Succ->size();
1857 });
1858 if (!AllSame)
1859 return false;
1860 if (AllSame) {
1861 LockstepReverseIterator<true> LRI(Succs);
1862 while (LRI.isValid()) {
1863 Instruction *I0 = (*LRI)[0];
1864 if (any_of(Range: *LRI, P: [I0](Instruction *I) {
1865 return !areIdenticalUpToCommutativity(I1: I0, I2: I);
1866 })) {
1867 return false;
1868 }
1869 --LRI;
1870 }
1871 }
1872 // Now we know that all instructions in all successors can be hoisted. Let
1873 // the loop below handle the hoisting.
1874 }
1875
1876 // Count how many instructions were not hoisted so far. There's a limit on how
1877 // many instructions we skip, serving as a compilation time control as well as
1878 // preventing excessive increase of life ranges.
1879 unsigned NumSkipped = 0;
1880 // If we find an unreachable instruction at the beginning of a basic block, we
1881 // can still hoist instructions from the rest of the basic blocks.
1882 if (SuccIterPairs.size() > 2) {
1883 erase_if(C&: SuccIterPairs,
1884 P: [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1885 if (SuccIterPairs.size() < 2)
1886 return false;
1887 }
1888
1889 bool Changed = false;
1890
1891 for (;;) {
1892 auto *SuccIterPairBegin = SuccIterPairs.begin();
1893 auto &BB1ItrPair = *SuccIterPairBegin++;
1894 auto OtherSuccIterPairRange =
1895 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1896 auto OtherSuccIterRange = make_first_range(c&: OtherSuccIterPairRange);
1897
1898 Instruction *I1 = &*BB1ItrPair.first;
1899
1900 bool AllInstsAreIdentical = true;
1901 bool HasTerminator = I1->isTerminator();
1902 for (auto &SuccIter : OtherSuccIterRange) {
1903 Instruction *I2 = &*SuccIter;
1904 HasTerminator |= I2->isTerminator();
1905 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1906 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1907 AllInstsAreIdentical = false;
1908 }
1909
1910 SmallVector<Instruction *, 8> OtherInsts;
1911 for (auto &SuccIter : OtherSuccIterRange)
1912 OtherInsts.push_back(Elt: &*SuccIter);
1913
1914 // If we are hoisting the terminator instruction, don't move one (making a
1915 // broken BB), instead clone it, and remove BI.
1916 if (HasTerminator) {
1917 // Even if BB, which contains only one unreachable instruction, is ignored
1918 // at the beginning of the loop, we can hoist the terminator instruction.
1919 // If any instructions remain in the block, we cannot hoist terminators.
1920 if (NumSkipped || !AllInstsAreIdentical) {
1921 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1922 return Changed;
1923 }
1924
1925 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherSuccTIs&: OtherInsts) ||
1926 Changed;
1927 }
1928
1929 if (AllInstsAreIdentical) {
1930 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1931 AllInstsAreIdentical =
1932 isSafeToHoistInstr(I: I1, Flags: SkipFlagsBB1) &&
1933 all_of(Range&: OtherSuccIterPairRange, P: [=](const auto &Pair) {
1934 Instruction *I2 = &*Pair.first;
1935 unsigned SkipFlagsBB2 = Pair.second;
1936 // Even if the instructions are identical, it may not
1937 // be safe to hoist them if we have skipped over
1938 // instructions with side effects or their operands
1939 // weren't hoisted.
1940 return isSafeToHoistInstr(I: I2, Flags: SkipFlagsBB2) &&
1941 shouldHoistCommonInstructions(I1, I2, TTI);
1942 });
1943 }
1944
1945 if (AllInstsAreIdentical) {
1946 BB1ItrPair.first++;
1947 // For a normal instruction, we just move one to right before the
1948 // branch, then replace all uses of the other with the first. Finally,
1949 // we remove the now redundant second instruction.
1950 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1951 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1952 // and leave any that were not hoisted behind (by calling moveBefore
1953 // rather than moveBeforePreserving).
1954 I1->moveBefore(InsertPos: TI->getIterator());
1955 for (auto &SuccIter : OtherSuccIterRange) {
1956 Instruction *I2 = &*SuccIter++;
1957 assert(I2 != I1);
1958 if (!I2->use_empty())
1959 I2->replaceAllUsesWith(V: I1);
1960 I1->andIRFlags(V: I2);
1961 if (auto *CB = dyn_cast<CallBase>(Val: I1)) {
1962 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I2));
1963 assert(Success && "We should not be trying to hoist callbases "
1964 "with non-intersectable attributes");
1965 // For NDEBUG Compile.
1966 (void)Success;
1967 }
1968
1969 combineMetadataForCSE(K: I1, J: I2, DoesKMove: true);
1970 // I1 and I2 are being combined into a single instruction. Its debug
1971 // location is the merged locations of the original instructions.
1972 I1->applyMergedLocation(LocA: I1->getDebugLoc(), LocB: I2->getDebugLoc());
1973 I2->eraseFromParent();
1974 }
1975 if (!Changed)
1976 NumHoistCommonCode += SuccIterPairs.size();
1977 Changed = true;
1978 NumHoistCommonInstrs += SuccIterPairs.size();
1979 } else {
1980 if (NumSkipped >= HoistCommonSkipLimit) {
1981 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1982 return Changed;
1983 }
1984 // We are about to skip over a pair of non-identical instructions. Record
1985 // if any have characteristics that would prevent reordering instructions
1986 // across them.
1987 for (auto &SuccIterPair : SuccIterPairs) {
1988 Instruction *I = &*SuccIterPair.first++;
1989 SuccIterPair.second |= skippedInstrFlags(I);
1990 }
1991 ++NumSkipped;
1992 }
1993 }
1994}
1995
1996bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1997 Instruction *TI, Instruction *I1,
1998 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1999
2000 auto *BI = dyn_cast<BranchInst>(Val: TI);
2001
2002 bool Changed = false;
2003 BasicBlock *TIParent = TI->getParent();
2004 BasicBlock *BB1 = I1->getParent();
2005
2006 // Use only for an if statement.
2007 auto *I2 = *OtherSuccTIs.begin();
2008 auto *BB2 = I2->getParent();
2009 if (BI) {
2010 assert(OtherSuccTIs.size() == 1);
2011 assert(BI->getSuccessor(0) == I1->getParent());
2012 assert(BI->getSuccessor(1) == I2->getParent());
2013 }
2014
2015 // In the case of an if statement, we try to hoist an invoke.
2016 // FIXME: Can we define a safety predicate for CallBr?
2017 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2018 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2019 if (isa<InvokeInst>(Val: I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2020 return false;
2021
2022 // TODO: callbr hoisting currently disabled pending further study.
2023 if (isa<CallBrInst>(Val: I1))
2024 return false;
2025
2026 for (BasicBlock *Succ : successors(BB: BB1)) {
2027 for (PHINode &PN : Succ->phis()) {
2028 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2029 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2030 Value *BB2V = PN.getIncomingValueForBlock(BB: OtherSuccTI->getParent());
2031 if (BB1V == BB2V)
2032 continue;
2033
2034 // In the case of an if statement, check for
2035 // passingValueIsAlwaysUndefined here because we would rather eliminate
2036 // undefined control flow then converting it to a select.
2037 if (!BI || passingValueIsAlwaysUndefined(V: BB1V, I: &PN) ||
2038 passingValueIsAlwaysUndefined(V: BB2V, I: &PN))
2039 return false;
2040 }
2041 }
2042 }
2043
2044 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2045 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2046 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts&: OtherSuccTIs);
2047 // Clone the terminator and hoist it into the pred, without any debug info.
2048 Instruction *NT = I1->clone();
2049 NT->insertInto(ParentBB: TIParent, It: TI->getIterator());
2050 if (!NT->getType()->isVoidTy()) {
2051 I1->replaceAllUsesWith(V: NT);
2052 for (Instruction *OtherSuccTI : OtherSuccTIs)
2053 OtherSuccTI->replaceAllUsesWith(V: NT);
2054 NT->takeName(V: I1);
2055 }
2056 Changed = true;
2057 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2058
2059 // Ensure terminator gets a debug location, even an unknown one, in case
2060 // it involves inlinable calls.
2061 SmallVector<DebugLoc, 4> Locs;
2062 Locs.push_back(Elt: I1->getDebugLoc());
2063 for (auto *OtherSuccTI : OtherSuccTIs)
2064 Locs.push_back(Elt: OtherSuccTI->getDebugLoc());
2065 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2066
2067 // PHIs created below will adopt NT's merged DebugLoc.
2068 IRBuilder<NoFolder> Builder(NT);
2069
2070 // In the case of an if statement, hoisting one of the terminators from our
2071 // successor is a great thing. Unfortunately, the successors of the if/else
2072 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2073 // must agree for all PHI nodes, so we insert select instruction to compute
2074 // the final result.
2075 if (BI) {
2076 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2077 for (BasicBlock *Succ : successors(BB: BB1)) {
2078 for (PHINode &PN : Succ->phis()) {
2079 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2080 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
2081 if (BB1V == BB2V)
2082 continue;
2083
2084 // These values do not agree. Insert a select instruction before NT
2085 // that determines the right value.
2086 SelectInst *&SI = InsertedSelects[std::make_pair(x&: BB1V, y&: BB2V)];
2087 if (!SI) {
2088 // Propagate fast-math-flags from phi node to its replacement select.
2089 SI = cast<SelectInst>(Val: Builder.CreateSelectFMF(
2090 C: BI->getCondition(), True: BB1V, False: BB2V,
2091 FMFSource: isa<FPMathOperator>(Val: PN) ? &PN : nullptr,
2092 Name: BB1V->getName() + "." + BB2V->getName(), MDFrom: BI));
2093 }
2094
2095 // Make the PHI node use the select for all incoming values for BB1/BB2
2096 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2097 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2098 PN.setIncomingValue(i, V: SI);
2099 }
2100 }
2101 }
2102
2103 SmallVector<DominatorTree::UpdateType, 4> Updates;
2104
2105 // Update any PHI nodes in our new successors.
2106 for (BasicBlock *Succ : successors(BB: BB1)) {
2107 addPredecessorToBlock(Succ, NewPred: TIParent, ExistPred: BB1);
2108 if (DTU)
2109 Updates.push_back(Elt: {DominatorTree::Insert, TIParent, Succ});
2110 }
2111
2112 if (DTU)
2113 for (BasicBlock *Succ : successors(I: TI))
2114 Updates.push_back(Elt: {DominatorTree::Delete, TIParent, Succ});
2115
2116 eraseTerminatorAndDCECond(TI);
2117 if (DTU)
2118 DTU->applyUpdates(Updates);
2119 return Changed;
2120}
2121
2122// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2123// into variables.
2124static bool replacingOperandWithVariableIsCheap(const Instruction *I,
2125 int OpIdx) {
2126 // Divide/Remainder by constant is typically much cheaper than by variable.
2127 if (I->isIntDivRem())
2128 return OpIdx != 1;
2129 return !isa<IntrinsicInst>(Val: I);
2130}
2131
2132// All instructions in Insts belong to different blocks that all unconditionally
2133// branch to a common successor. Analyze each instruction and return true if it
2134// would be possible to sink them into their successor, creating one common
2135// instruction instead. For every value that would be required to be provided by
2136// PHI node (because an operand varies in each input block), add to PHIOperands.
2137static bool canSinkInstructions(
2138 ArrayRef<Instruction *> Insts,
2139 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2140 // Prune out obviously bad instructions to move. Each instruction must have
2141 // the same number of uses, and we check later that the uses are consistent.
2142 std::optional<unsigned> NumUses;
2143 for (auto *I : Insts) {
2144 // These instructions may change or break semantics if moved.
2145 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
2146 I->getType()->isTokenTy())
2147 return false;
2148
2149 // Do not try to sink an instruction in an infinite loop - it can cause
2150 // this algorithm to infinite loop.
2151 if (I->getParent()->getSingleSuccessor() == I->getParent())
2152 return false;
2153
2154 // Conservatively return false if I is an inline-asm instruction. Sinking
2155 // and merging inline-asm instructions can potentially create arguments
2156 // that cannot satisfy the inline-asm constraints.
2157 // If the instruction has nomerge or convergent attribute, return false.
2158 if (const auto *C = dyn_cast<CallBase>(Val: I))
2159 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2160 return false;
2161
2162 if (!NumUses)
2163 NumUses = I->getNumUses();
2164 else if (NumUses != I->getNumUses())
2165 return false;
2166 }
2167
2168 const Instruction *I0 = Insts.front();
2169 const auto I0MMRA = MMRAMetadata(*I0);
2170 for (auto *I : Insts) {
2171 if (!I->isSameOperationAs(I: I0, flags: Instruction::CompareUsingIntersectedAttrs))
2172 return false;
2173
2174 // Treat MMRAs conservatively. This pass can be quite aggressive and
2175 // could drop a lot of MMRAs otherwise.
2176 if (MMRAMetadata(*I) != I0MMRA)
2177 return false;
2178 }
2179
2180 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2181 // then the other phi operands must match the instructions from Insts. This
2182 // also has to hold true for any phi nodes that would be created as a result
2183 // of sinking. Both of these cases are represented by PhiOperands.
2184 for (const Use &U : I0->uses()) {
2185 auto It = PHIOperands.find(Val: &U);
2186 if (It == PHIOperands.end())
2187 // There may be uses in other blocks when sinking into a loop header.
2188 return false;
2189 if (!equal(LRange&: Insts, RRange&: It->second))
2190 return false;
2191 }
2192
2193 // For calls to be sinkable, they must all be indirect, or have same callee.
2194 // I.e. if we have two direct calls to different callees, we don't want to
2195 // turn that into an indirect call. Likewise, if we have an indirect call,
2196 // and a direct call, we don't actually want to have a single indirect call.
2197 if (isa<CallBase>(Val: I0)) {
2198 auto IsIndirectCall = [](const Instruction *I) {
2199 return cast<CallBase>(Val: I)->isIndirectCall();
2200 };
2201 bool HaveIndirectCalls = any_of(Range&: Insts, P: IsIndirectCall);
2202 bool AllCallsAreIndirect = all_of(Range&: Insts, P: IsIndirectCall);
2203 if (HaveIndirectCalls) {
2204 if (!AllCallsAreIndirect)
2205 return false;
2206 } else {
2207 // All callees must be identical.
2208 Value *Callee = nullptr;
2209 for (const Instruction *I : Insts) {
2210 Value *CurrCallee = cast<CallBase>(Val: I)->getCalledOperand();
2211 if (!Callee)
2212 Callee = CurrCallee;
2213 else if (Callee != CurrCallee)
2214 return false;
2215 }
2216 }
2217 }
2218
2219 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2220 Value *Op = I0->getOperand(i: OI);
2221 if (Op->getType()->isTokenTy())
2222 // Don't touch any operand of token type.
2223 return false;
2224
2225 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2226 assert(I->getNumOperands() == I0->getNumOperands());
2227 return I->getOperand(i: OI) == I0->getOperand(i: OI);
2228 };
2229 if (!all_of(Range&: Insts, P: SameAsI0)) {
2230 // SROA can't speculate lifetime markers of selects/phis, and the
2231 // backend may handle such lifetimes incorrectly as well (#104776).
2232 // Don't sink lifetimes if it would introduce a phi on the pointer
2233 // argument.
2234 if (isa<LifetimeIntrinsic>(Val: I0) && OI == 1 &&
2235 any_of(Range&: Insts, P: [](const Instruction *I) {
2236 return isa<AllocaInst>(Val: I->getOperand(i: 1)->stripPointerCasts());
2237 }))
2238 return false;
2239
2240 if ((isa<Constant>(Val: Op) && !replacingOperandWithVariableIsCheap(I: I0, OpIdx: OI)) ||
2241 !canReplaceOperandWithVariable(I: I0, OpIdx: OI))
2242 // We can't create a PHI from this GEP.
2243 return false;
2244 auto &Ops = PHIOperands[&I0->getOperandUse(i: OI)];
2245 for (auto *I : Insts)
2246 Ops.push_back(Elt: I->getOperand(i: OI));
2247 }
2248 }
2249 return true;
2250}
2251
2252// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2253// instruction of every block in Blocks to their common successor, commoning
2254// into one instruction.
2255static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2256 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(Idx: 0);
2257
2258 // canSinkInstructions returning true guarantees that every block has at
2259 // least one non-terminator instruction.
2260 SmallVector<Instruction*,4> Insts;
2261 for (auto *BB : Blocks) {
2262 Instruction *I = BB->getTerminator();
2263 I = I->getPrevNode();
2264 Insts.push_back(Elt: I);
2265 }
2266
2267 // We don't need to do any more checking here; canSinkInstructions should
2268 // have done it all for us.
2269 SmallVector<Value*, 4> NewOperands;
2270 Instruction *I0 = Insts.front();
2271 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2272 // This check is different to that in canSinkInstructions. There, we
2273 // cared about the global view once simplifycfg (and instcombine) have
2274 // completed - it takes into account PHIs that become trivially
2275 // simplifiable. However here we need a more local view; if an operand
2276 // differs we create a PHI and rely on instcombine to clean up the very
2277 // small mess we may make.
2278 bool NeedPHI = any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
2279 return I->getOperand(i: O) != I0->getOperand(i: O);
2280 });
2281 if (!NeedPHI) {
2282 NewOperands.push_back(Elt: I0->getOperand(i: O));
2283 continue;
2284 }
2285
2286 // Create a new PHI in the successor block and populate it.
2287 auto *Op = I0->getOperand(i: O);
2288 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2289 auto *PN =
2290 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
2291 PN->insertBefore(InsertPos: BBEnd->begin());
2292 for (auto *I : Insts)
2293 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
2294 NewOperands.push_back(Elt: PN);
2295 }
2296
2297 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2298 // and move it to the start of the successor block.
2299 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2300 I0->getOperandUse(i: O).set(NewOperands[O]);
2301
2302 I0->moveBefore(BB&: *BBEnd, I: BBEnd->getFirstInsertionPt());
2303
2304 // Update metadata and IR flags, and merge debug locations.
2305 for (auto *I : Insts)
2306 if (I != I0) {
2307 // The debug location for the "common" instruction is the merged locations
2308 // of all the commoned instructions. We start with the original location
2309 // of the "common" instruction and iteratively merge each location in the
2310 // loop below.
2311 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2312 // However, as N-way merge for CallInst is rare, so we use simplified API
2313 // instead of using complex API for N-way merge.
2314 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
2315 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
2316 I0->andIRFlags(V: I);
2317 if (auto *CB = dyn_cast<CallBase>(Val: I0)) {
2318 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I));
2319 assert(Success && "We should not be trying to sink callbases "
2320 "with non-intersectable attributes");
2321 // For NDEBUG Compile.
2322 (void)Success;
2323 }
2324 }
2325
2326 for (User *U : make_early_inc_range(Range: I0->users())) {
2327 // canSinkLastInstruction checked that all instructions are only used by
2328 // phi nodes in a way that allows replacing the phi node with the common
2329 // instruction.
2330 auto *PN = cast<PHINode>(Val: U);
2331 PN->replaceAllUsesWith(V: I0);
2332 PN->eraseFromParent();
2333 }
2334
2335 // Finally nuke all instructions apart from the common instruction.
2336 for (auto *I : Insts) {
2337 if (I == I0)
2338 continue;
2339 // The remaining uses are debug users, replace those with the common inst.
2340 // In most (all?) cases this just introduces a use-before-def.
2341 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2342 I->replaceAllUsesWith(V: I0);
2343 I->eraseFromParent();
2344 }
2345}
2346
2347/// Check whether BB's predecessors end with unconditional branches. If it is
2348/// true, sink any common code from the predecessors to BB.
2349static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
2350 DomTreeUpdater *DTU) {
2351 // We support two situations:
2352 // (1) all incoming arcs are unconditional
2353 // (2) there are non-unconditional incoming arcs
2354 //
2355 // (2) is very common in switch defaults and
2356 // else-if patterns;
2357 //
2358 // if (a) f(1);
2359 // else if (b) f(2);
2360 //
2361 // produces:
2362 //
2363 // [if]
2364 // / \
2365 // [f(1)] [if]
2366 // | | \
2367 // | | |
2368 // | [f(2)]|
2369 // \ | /
2370 // [ end ]
2371 //
2372 // [end] has two unconditional predecessor arcs and one conditional. The
2373 // conditional refers to the implicit empty 'else' arc. This conditional
2374 // arc can also be caused by an empty default block in a switch.
2375 //
2376 // In this case, we attempt to sink code from all *unconditional* arcs.
2377 // If we can sink instructions from these arcs (determined during the scan
2378 // phase below) we insert a common successor for all unconditional arcs and
2379 // connect that to [end], to enable sinking:
2380 //
2381 // [if]
2382 // / \
2383 // [x(1)] [if]
2384 // | | \
2385 // | | \
2386 // | [x(2)] |
2387 // \ / |
2388 // [sink.split] |
2389 // \ /
2390 // [ end ]
2391 //
2392 SmallVector<BasicBlock*,4> UnconditionalPreds;
2393 bool HaveNonUnconditionalPredecessors = false;
2394 for (auto *PredBB : predecessors(BB)) {
2395 auto *PredBr = dyn_cast<BranchInst>(Val: PredBB->getTerminator());
2396 if (PredBr && PredBr->isUnconditional())
2397 UnconditionalPreds.push_back(Elt: PredBB);
2398 else
2399 HaveNonUnconditionalPredecessors = true;
2400 }
2401 if (UnconditionalPreds.size() < 2)
2402 return false;
2403
2404 // We take a two-step approach to tail sinking. First we scan from the end of
2405 // each block upwards in lockstep. If the n'th instruction from the end of each
2406 // block can be sunk, those instructions are added to ValuesToSink and we
2407 // carry on. If we can sink an instruction but need to PHI-merge some operands
2408 // (because they're not identical in each instruction) we add these to
2409 // PHIOperands.
2410 // We prepopulate PHIOperands with the phis that already exist in BB.
2411 DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
2412 for (PHINode &PN : BB->phis()) {
2413 SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
2414 for (const Use &U : PN.incoming_values())
2415 IncomingVals.insert(KV: {PN.getIncomingBlock(U), &U});
2416 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2417 for (BasicBlock *Pred : UnconditionalPreds)
2418 Ops.push_back(Elt: *IncomingVals[Pred]);
2419 }
2420
2421 int ScanIdx = 0;
2422 SmallPtrSet<Value*,4> InstructionsToSink;
2423 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2424 while (LRI.isValid() &&
2425 canSinkInstructions(Insts: *LRI, PHIOperands)) {
2426 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2427 << "\n");
2428 InstructionsToSink.insert_range(R: *LRI);
2429 ++ScanIdx;
2430 --LRI;
2431 }
2432
2433 // If no instructions can be sunk, early-return.
2434 if (ScanIdx == 0)
2435 return false;
2436
2437 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2438
2439 if (!followedByDeoptOrUnreachable) {
2440 // Check whether this is the pointer operand of a load/store.
2441 auto IsMemOperand = [](Use &U) {
2442 auto *I = cast<Instruction>(Val: U.getUser());
2443 if (isa<LoadInst>(Val: I))
2444 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2445 if (isa<StoreInst>(Val: I))
2446 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2447 return false;
2448 };
2449
2450 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2451 // actually sink before encountering instruction that is unprofitable to
2452 // sink?
2453 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2454 unsigned NumPHIInsts = 0;
2455 for (Use &U : (*LRI)[0]->operands()) {
2456 auto It = PHIOperands.find(Val: &U);
2457 if (It != PHIOperands.end() && !all_of(Range&: It->second, P: [&](Value *V) {
2458 return InstructionsToSink.contains(Ptr: V);
2459 })) {
2460 ++NumPHIInsts;
2461 // Do not separate a load/store from the gep producing the address.
2462 // The gep can likely be folded into the load/store as an addressing
2463 // mode. Additionally, a load of a gep is easier to analyze than a
2464 // load of a phi.
2465 if (IsMemOperand(U) &&
2466 any_of(Range&: It->second, P: [](Value *V) { return isa<GEPOperator>(Val: V); }))
2467 return false;
2468 // FIXME: this check is overly optimistic. We may end up not sinking
2469 // said instruction, due to the very same profitability check.
2470 // See @creating_too_many_phis in sink-common-code.ll.
2471 }
2472 }
2473 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2474 return NumPHIInsts <= 1;
2475 };
2476
2477 // We've determined that we are going to sink last ScanIdx instructions,
2478 // and recorded them in InstructionsToSink. Now, some instructions may be
2479 // unprofitable to sink. But that determination depends on the instructions
2480 // that we are going to sink.
2481
2482 // First, forward scan: find the first instruction unprofitable to sink,
2483 // recording all the ones that are profitable to sink.
2484 // FIXME: would it be better, after we detect that not all are profitable.
2485 // to either record the profitable ones, or erase the unprofitable ones?
2486 // Maybe we need to choose (at runtime) the one that will touch least
2487 // instrs?
2488 LRI.reset();
2489 int Idx = 0;
2490 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2491 while (Idx < ScanIdx) {
2492 if (!ProfitableToSinkInstruction(LRI)) {
2493 // Too many PHIs would be created.
2494 LLVM_DEBUG(
2495 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2496 break;
2497 }
2498 InstructionsProfitableToSink.insert_range(R: *LRI);
2499 --LRI;
2500 ++Idx;
2501 }
2502
2503 // If no instructions can be sunk, early-return.
2504 if (Idx == 0)
2505 return false;
2506
2507 // Did we determine that (only) some instructions are unprofitable to sink?
2508 if (Idx < ScanIdx) {
2509 // Okay, some instructions are unprofitable.
2510 ScanIdx = Idx;
2511 InstructionsToSink = InstructionsProfitableToSink;
2512
2513 // But, that may make other instructions unprofitable, too.
2514 // So, do a backward scan, do any earlier instructions become
2515 // unprofitable?
2516 assert(
2517 !ProfitableToSinkInstruction(LRI) &&
2518 "We already know that the last instruction is unprofitable to sink");
2519 ++LRI;
2520 --Idx;
2521 while (Idx >= 0) {
2522 // If we detect that an instruction becomes unprofitable to sink,
2523 // all earlier instructions won't be sunk either,
2524 // so preemptively keep InstructionsProfitableToSink in sync.
2525 // FIXME: is this the most performant approach?
2526 for (auto *I : *LRI)
2527 InstructionsProfitableToSink.erase(Ptr: I);
2528 if (!ProfitableToSinkInstruction(LRI)) {
2529 // Everything starting with this instruction won't be sunk.
2530 ScanIdx = Idx;
2531 InstructionsToSink = InstructionsProfitableToSink;
2532 }
2533 ++LRI;
2534 --Idx;
2535 }
2536 }
2537
2538 // If no instructions can be sunk, early-return.
2539 if (ScanIdx == 0)
2540 return false;
2541 }
2542
2543 bool Changed = false;
2544
2545 if (HaveNonUnconditionalPredecessors) {
2546 if (!followedByDeoptOrUnreachable) {
2547 // It is always legal to sink common instructions from unconditional
2548 // predecessors. However, if not all predecessors are unconditional,
2549 // this transformation might be pessimizing. So as a rule of thumb,
2550 // don't do it unless we'd sink at least one non-speculatable instruction.
2551 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2552 LRI.reset();
2553 int Idx = 0;
2554 bool Profitable = false;
2555 while (Idx < ScanIdx) {
2556 if (!isSafeToSpeculativelyExecute(I: (*LRI)[0])) {
2557 Profitable = true;
2558 break;
2559 }
2560 --LRI;
2561 ++Idx;
2562 }
2563 if (!Profitable)
2564 return false;
2565 }
2566
2567 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2568 // We have a conditional edge and we're going to sink some instructions.
2569 // Insert a new block postdominating all blocks we're going to sink from.
2570 if (!SplitBlockPredecessors(BB, Preds: UnconditionalPreds, Suffix: ".sink.split", DTU))
2571 // Edges couldn't be split.
2572 return false;
2573 Changed = true;
2574 }
2575
2576 // Now that we've analyzed all potential sinking candidates, perform the
2577 // actual sink. We iteratively sink the last non-terminator of the source
2578 // blocks into their common successor unless doing so would require too
2579 // many PHI instructions to be generated (currently only one PHI is allowed
2580 // per sunk instruction).
2581 //
2582 // We can use InstructionsToSink to discount values needing PHI-merging that will
2583 // actually be sunk in a later iteration. This allows us to be more
2584 // aggressive in what we sink. This does allow a false positive where we
2585 // sink presuming a later value will also be sunk, but stop half way through
2586 // and never actually sink it which means we produce more PHIs than intended.
2587 // This is unlikely in practice though.
2588 int SinkIdx = 0;
2589 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2590 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2591 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2592 << "\n");
2593
2594 // Because we've sunk every instruction in turn, the current instruction to
2595 // sink is always at index 0.
2596 LRI.reset();
2597
2598 sinkLastInstruction(Blocks: UnconditionalPreds);
2599 NumSinkCommonInstrs++;
2600 Changed = true;
2601 }
2602 if (SinkIdx != 0)
2603 ++NumSinkCommonCode;
2604 return Changed;
2605}
2606
2607namespace {
2608
2609struct CompatibleSets {
2610 using SetTy = SmallVector<InvokeInst *, 2>;
2611
2612 SmallVector<SetTy, 1> Sets;
2613
2614 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2615
2616 SetTy &getCompatibleSet(InvokeInst *II);
2617
2618 void insert(InvokeInst *II);
2619};
2620
2621CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2622 // Perform a linear scan over all the existing sets, see if the new `invoke`
2623 // is compatible with any particular set. Since we know that all the `invokes`
2624 // within a set are compatible, only check the first `invoke` in each set.
2625 // WARNING: at worst, this has quadratic complexity.
2626 for (CompatibleSets::SetTy &Set : Sets) {
2627 if (CompatibleSets::shouldBelongToSameSet(Invokes: {Set.front(), II}))
2628 return Set;
2629 }
2630
2631 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2632 return Sets.emplace_back();
2633}
2634
2635void CompatibleSets::insert(InvokeInst *II) {
2636 getCompatibleSet(II).emplace_back(Args&: II);
2637}
2638
2639bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2640 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2641
2642 // Can we theoretically merge these `invoke`s?
2643 auto IsIllegalToMerge = [](InvokeInst *II) {
2644 return II->cannotMerge() || II->isInlineAsm();
2645 };
2646 if (any_of(Range&: Invokes, P: IsIllegalToMerge))
2647 return false;
2648
2649 // Either both `invoke`s must be direct,
2650 // or both `invoke`s must be indirect.
2651 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2652 bool HaveIndirectCalls = any_of(Range&: Invokes, P: IsIndirectCall);
2653 bool AllCallsAreIndirect = all_of(Range&: Invokes, P: IsIndirectCall);
2654 if (HaveIndirectCalls) {
2655 if (!AllCallsAreIndirect)
2656 return false;
2657 } else {
2658 // All callees must be identical.
2659 Value *Callee = nullptr;
2660 for (InvokeInst *II : Invokes) {
2661 Value *CurrCallee = II->getCalledOperand();
2662 assert(CurrCallee && "There is always a called operand.");
2663 if (!Callee)
2664 Callee = CurrCallee;
2665 else if (Callee != CurrCallee)
2666 return false;
2667 }
2668 }
2669
2670 // Either both `invoke`s must not have a normal destination,
2671 // or both `invoke`s must have a normal destination,
2672 auto HasNormalDest = [](InvokeInst *II) {
2673 return !isa<UnreachableInst>(Val: II->getNormalDest()->getFirstNonPHIOrDbg());
2674 };
2675 if (any_of(Range&: Invokes, P: HasNormalDest)) {
2676 // Do not merge `invoke` that does not have a normal destination with one
2677 // that does have a normal destination, even though doing so would be legal.
2678 if (!all_of(Range&: Invokes, P: HasNormalDest))
2679 return false;
2680
2681 // All normal destinations must be identical.
2682 BasicBlock *NormalBB = nullptr;
2683 for (InvokeInst *II : Invokes) {
2684 BasicBlock *CurrNormalBB = II->getNormalDest();
2685 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2686 if (!NormalBB)
2687 NormalBB = CurrNormalBB;
2688 else if (NormalBB != CurrNormalBB)
2689 return false;
2690 }
2691
2692 // In the normal destination, the incoming values for these two `invoke`s
2693 // must be compatible.
2694 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2695 if (!incomingValuesAreCompatible(
2696 BB: NormalBB, IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()},
2697 EquivalenceSet: &EquivalenceSet))
2698 return false;
2699 }
2700
2701#ifndef NDEBUG
2702 // All unwind destinations must be identical.
2703 // We know that because we have started from said unwind destination.
2704 BasicBlock *UnwindBB = nullptr;
2705 for (InvokeInst *II : Invokes) {
2706 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2707 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2708 if (!UnwindBB)
2709 UnwindBB = CurrUnwindBB;
2710 else
2711 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2712 }
2713#endif
2714
2715 // In the unwind destination, the incoming values for these two `invoke`s
2716 // must be compatible.
2717 if (!incomingValuesAreCompatible(
2718 BB: Invokes.front()->getUnwindDest(),
2719 IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2720 return false;
2721
2722 // Ignoring arguments, these `invoke`s must be identical,
2723 // including operand bundles.
2724 const InvokeInst *II0 = Invokes.front();
2725 for (auto *II : Invokes.drop_front())
2726 if (!II->isSameOperationAs(I: II0, flags: Instruction::CompareUsingIntersectedAttrs))
2727 return false;
2728
2729 // Can we theoretically form the data operands for the merged `invoke`?
2730 auto IsIllegalToMergeArguments = [](auto Ops) {
2731 Use &U0 = std::get<0>(Ops);
2732 Use &U1 = std::get<1>(Ops);
2733 if (U0 == U1)
2734 return false;
2735 return U0->getType()->isTokenTy() ||
2736 !canReplaceOperandWithVariable(I: cast<Instruction>(Val: U0.getUser()),
2737 OpIdx: U0.getOperandNo());
2738 };
2739 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2740 if (any_of(Range: zip(t: Invokes[0]->data_ops(), u: Invokes[1]->data_ops()),
2741 P: IsIllegalToMergeArguments))
2742 return false;
2743
2744 return true;
2745}
2746
2747} // namespace
2748
2749// Merge all invokes in the provided set, all of which are compatible
2750// as per the `CompatibleSets::shouldBelongToSameSet()`.
2751static void mergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2752 DomTreeUpdater *DTU) {
2753 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2754
2755 SmallVector<DominatorTree::UpdateType, 8> Updates;
2756 if (DTU)
2757 Updates.reserve(N: 2 + 3 * Invokes.size());
2758
2759 bool HasNormalDest =
2760 !isa<UnreachableInst>(Val: Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2761
2762 // Clone one of the invokes into a new basic block.
2763 // Since they are all compatible, it doesn't matter which invoke is cloned.
2764 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2765 InvokeInst *II0 = Invokes.front();
2766 BasicBlock *II0BB = II0->getParent();
2767 BasicBlock *InsertBeforeBlock =
2768 II0->getParent()->getIterator()->getNextNode();
2769 Function *Func = II0BB->getParent();
2770 LLVMContext &Ctx = II0->getContext();
2771
2772 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2773 Context&: Ctx, Name: II0BB->getName() + ".invoke", Parent: Func, InsertBefore: InsertBeforeBlock);
2774
2775 auto *MergedInvoke = cast<InvokeInst>(Val: II0->clone());
2776 // NOTE: all invokes have the same attributes, so no handling needed.
2777 MergedInvoke->insertInto(ParentBB: MergedInvokeBB, It: MergedInvokeBB->end());
2778
2779 if (!HasNormalDest) {
2780 // This set does not have a normal destination,
2781 // so just form a new block with unreachable terminator.
2782 BasicBlock *MergedNormalDest = BasicBlock::Create(
2783 Context&: Ctx, Name: II0BB->getName() + ".cont", Parent: Func, InsertBefore: InsertBeforeBlock);
2784 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2785 UI->setDebugLoc(DebugLoc::getTemporary());
2786 MergedInvoke->setNormalDest(MergedNormalDest);
2787 }
2788
2789 // The unwind destination, however, remainds identical for all invokes here.
2790
2791 return MergedInvoke;
2792 }();
2793
2794 if (DTU) {
2795 // Predecessor blocks that contained these invokes will now branch to
2796 // the new block that contains the merged invoke, ...
2797 for (InvokeInst *II : Invokes)
2798 Updates.push_back(
2799 Elt: {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2800
2801 // ... which has the new `unreachable` block as normal destination,
2802 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2803 for (BasicBlock *SuccBBOfMergedInvoke : successors(I: MergedInvoke))
2804 Updates.push_back(Elt: {DominatorTree::Insert, MergedInvoke->getParent(),
2805 SuccBBOfMergedInvoke});
2806
2807 // Since predecessor blocks now unconditionally branch to a new block,
2808 // they no longer branch to their original successors.
2809 for (InvokeInst *II : Invokes)
2810 for (BasicBlock *SuccOfPredBB : successors(BB: II->getParent()))
2811 Updates.push_back(
2812 Elt: {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2813 }
2814
2815 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2816
2817 // Form the merged operands for the merged invoke.
2818 for (Use &U : MergedInvoke->operands()) {
2819 // Only PHI together the indirect callees and data operands.
2820 if (MergedInvoke->isCallee(U: &U)) {
2821 if (!IsIndirectCall)
2822 continue;
2823 } else if (!MergedInvoke->isDataOperand(U: &U))
2824 continue;
2825
2826 // Don't create trivial PHI's with all-identical incoming values.
2827 bool NeedPHI = any_of(Range&: Invokes, P: [&U](InvokeInst *II) {
2828 return II->getOperand(i_nocapture: U.getOperandNo()) != U.get();
2829 });
2830 if (!NeedPHI)
2831 continue;
2832
2833 // Form a PHI out of all the data ops under this index.
2834 PHINode *PN = PHINode::Create(
2835 Ty: U->getType(), /*NumReservedValues=*/Invokes.size(), NameStr: "", InsertBefore: MergedInvoke->getIterator());
2836 for (InvokeInst *II : Invokes)
2837 PN->addIncoming(V: II->getOperand(i_nocapture: U.getOperandNo()), BB: II->getParent());
2838
2839 U.set(PN);
2840 }
2841
2842 // We've ensured that each PHI node has compatible (identical) incoming values
2843 // when coming from each of the `invoke`s in the current merge set,
2844 // so update the PHI nodes accordingly.
2845 for (BasicBlock *Succ : successors(I: MergedInvoke))
2846 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2847 /*ExistPred=*/Invokes.front()->getParent());
2848
2849 // And finally, replace the original `invoke`s with an unconditional branch
2850 // to the block with the merged `invoke`. Also, give that merged `invoke`
2851 // the merged debugloc of all the original `invoke`s.
2852 DILocation *MergedDebugLoc = nullptr;
2853 for (InvokeInst *II : Invokes) {
2854 // Compute the debug location common to all the original `invoke`s.
2855 if (!MergedDebugLoc)
2856 MergedDebugLoc = II->getDebugLoc();
2857 else
2858 MergedDebugLoc =
2859 DebugLoc::getMergedLocation(LocA: MergedDebugLoc, LocB: II->getDebugLoc());
2860
2861 // And replace the old `invoke` with an unconditionally branch
2862 // to the block with the merged `invoke`.
2863 for (BasicBlock *OrigSuccBB : successors(BB: II->getParent()))
2864 OrigSuccBB->removePredecessor(Pred: II->getParent());
2865 auto *BI = BranchInst::Create(IfTrue: MergedInvoke->getParent(), InsertBefore: II->getParent());
2866 // The unconditional branch is part of the replacement for the original
2867 // invoke, so should use its DebugLoc.
2868 BI->setDebugLoc(II->getDebugLoc());
2869 bool Success = MergedInvoke->tryIntersectAttributes(Other: II);
2870 assert(Success && "Merged invokes with incompatible attributes");
2871 // For NDEBUG Compile
2872 (void)Success;
2873 II->replaceAllUsesWith(V: MergedInvoke);
2874 II->eraseFromParent();
2875 ++NumInvokesMerged;
2876 }
2877 MergedInvoke->setDebugLoc(MergedDebugLoc);
2878 ++NumInvokeSetsFormed;
2879
2880 if (DTU)
2881 DTU->applyUpdates(Updates);
2882}
2883
2884/// If this block is a `landingpad` exception handling block, categorize all
2885/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2886/// being "mergeable" together, and then merge invokes in each set together.
2887///
2888/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2889/// [...] [...]
2890/// | |
2891/// [invoke0] [invoke1]
2892/// / \ / \
2893/// [cont0] [landingpad] [cont1]
2894/// to:
2895/// [...] [...]
2896/// \ /
2897/// [invoke]
2898/// / \
2899/// [cont] [landingpad]
2900///
2901/// But of course we can only do that if the invokes share the `landingpad`,
2902/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2903/// and the invoked functions are "compatible".
2904static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
2905 if (!EnableMergeCompatibleInvokes)
2906 return false;
2907
2908 bool Changed = false;
2909
2910 // FIXME: generalize to all exception handling blocks?
2911 if (!BB->isLandingPad())
2912 return Changed;
2913
2914 CompatibleSets Grouper;
2915
2916 // Record all the predecessors of this `landingpad`. As per verifier,
2917 // the only allowed predecessor is the unwind edge of an `invoke`.
2918 // We want to group "compatible" `invokes` into the same set to be merged.
2919 for (BasicBlock *PredBB : predecessors(BB))
2920 Grouper.insert(II: cast<InvokeInst>(Val: PredBB->getTerminator()));
2921
2922 // And now, merge `invoke`s that were grouped togeter.
2923 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2924 if (Invokes.size() < 2)
2925 continue;
2926 Changed = true;
2927 mergeCompatibleInvokesImpl(Invokes, DTU);
2928 }
2929
2930 return Changed;
2931}
2932
2933namespace {
2934/// Track ephemeral values, which should be ignored for cost-modelling
2935/// purposes. Requires walking instructions in reverse order.
2936class EphemeralValueTracker {
2937 SmallPtrSet<const Instruction *, 32> EphValues;
2938
2939 bool isEphemeral(const Instruction *I) {
2940 if (isa<AssumeInst>(Val: I))
2941 return true;
2942 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2943 all_of(Range: I->users(), P: [&](const User *U) {
2944 return EphValues.count(Ptr: cast<Instruction>(Val: U));
2945 });
2946 }
2947
2948public:
2949 bool track(const Instruction *I) {
2950 if (isEphemeral(I)) {
2951 EphValues.insert(Ptr: I);
2952 return true;
2953 }
2954 return false;
2955 }
2956
2957 bool contains(const Instruction *I) const { return EphValues.contains(Ptr: I); }
2958};
2959} // namespace
2960
2961/// Determine if we can hoist sink a sole store instruction out of a
2962/// conditional block.
2963///
2964/// We are looking for code like the following:
2965/// BrBB:
2966/// store i32 %add, i32* %arrayidx2
2967/// ... // No other stores or function calls (we could be calling a memory
2968/// ... // function).
2969/// %cmp = icmp ult %x, %y
2970/// br i1 %cmp, label %EndBB, label %ThenBB
2971/// ThenBB:
2972/// store i32 %add5, i32* %arrayidx2
2973/// br label EndBB
2974/// EndBB:
2975/// ...
2976/// We are going to transform this into:
2977/// BrBB:
2978/// store i32 %add, i32* %arrayidx2
2979/// ... //
2980/// %cmp = icmp ult %x, %y
2981/// %add.add5 = select i1 %cmp, i32 %add, %add5
2982/// store i32 %add.add5, i32* %arrayidx2
2983/// ...
2984///
2985/// \return The pointer to the value of the previous store if the store can be
2986/// hoisted into the predecessor block. 0 otherwise.
2987static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
2988 BasicBlock *StoreBB, BasicBlock *EndBB) {
2989 StoreInst *StoreToHoist = dyn_cast<StoreInst>(Val: I);
2990 if (!StoreToHoist)
2991 return nullptr;
2992
2993 // Volatile or atomic.
2994 if (!StoreToHoist->isSimple())
2995 return nullptr;
2996
2997 Value *StorePtr = StoreToHoist->getPointerOperand();
2998 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2999
3000 // Look for a store to the same pointer in BrBB.
3001 unsigned MaxNumInstToLookAt = 9;
3002 // Skip pseudo probe intrinsic calls which are not really killing any memory
3003 // accesses.
3004 for (Instruction &CurI : reverse(C: BrBB->instructionsWithoutDebug(SkipPseudoOp: true))) {
3005 if (!MaxNumInstToLookAt)
3006 break;
3007 --MaxNumInstToLookAt;
3008
3009 // Could be calling an instruction that affects memory like free().
3010 if (CurI.mayWriteToMemory() && !isa<StoreInst>(Val: CurI))
3011 return nullptr;
3012
3013 if (auto *SI = dyn_cast<StoreInst>(Val: &CurI)) {
3014 // Found the previous store to same location and type. Make sure it is
3015 // simple, to avoid introducing a spurious non-atomic write after an
3016 // atomic write.
3017 if (SI->getPointerOperand() == StorePtr &&
3018 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3019 SI->getAlign() >= StoreToHoist->getAlign())
3020 // Found the previous store, return its value operand.
3021 return SI->getValueOperand();
3022 return nullptr; // Unknown store.
3023 }
3024
3025 if (auto *LI = dyn_cast<LoadInst>(Val: &CurI)) {
3026 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3027 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3028 Value *Obj = getUnderlyingObject(V: StorePtr);
3029 bool ExplicitlyDereferenceableOnly;
3030 if (isWritableObject(Object: Obj, ExplicitlyDereferenceableOnly) &&
3031 capturesNothing(
3032 CC: PointerMayBeCaptured(V: Obj, /*ReturnCaptures=*/false,
3033 Mask: CaptureComponents::Provenance)) &&
3034 (!ExplicitlyDereferenceableOnly ||
3035 isDereferenceablePointer(V: StorePtr, Ty: StoreTy,
3036 DL: LI->getDataLayout()))) {
3037 // Found a previous load, return it.
3038 return LI;
3039 }
3040 }
3041 // The load didn't work out, but we may still find a store.
3042 }
3043 }
3044
3045 return nullptr;
3046}
3047
3048/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3049/// converted to selects.
3050static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
3051 BasicBlock *EndBB,
3052 unsigned &SpeculatedInstructions,
3053 InstructionCost &Cost,
3054 const TargetTransformInfo &TTI) {
3055 TargetTransformInfo::TargetCostKind CostKind =
3056 BB->getParent()->hasMinSize()
3057 ? TargetTransformInfo::TCK_CodeSize
3058 : TargetTransformInfo::TCK_SizeAndLatency;
3059
3060 bool HaveRewritablePHIs = false;
3061 for (PHINode &PN : EndBB->phis()) {
3062 Value *OrigV = PN.getIncomingValueForBlock(BB);
3063 Value *ThenV = PN.getIncomingValueForBlock(BB: ThenBB);
3064
3065 // FIXME: Try to remove some of the duplication with
3066 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3067 if (ThenV == OrigV)
3068 continue;
3069
3070 Cost += TTI.getCmpSelInstrCost(Opcode: Instruction::Select, ValTy: PN.getType(),
3071 CondTy: CmpInst::makeCmpResultType(opnd_type: PN.getType()),
3072 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
3073
3074 // Don't convert to selects if we could remove undefined behavior instead.
3075 if (passingValueIsAlwaysUndefined(V: OrigV, I: &PN) ||
3076 passingValueIsAlwaysUndefined(V: ThenV, I: &PN))
3077 return false;
3078
3079 HaveRewritablePHIs = true;
3080 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(Val: OrigV);
3081 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(Val: ThenV);
3082 if (!OrigCE && !ThenCE)
3083 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3084
3085 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(I: OrigCE, TTI) : 0;
3086 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(I: ThenCE, TTI) : 0;
3087 InstructionCost MaxCost =
3088 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3089 if (OrigCost + ThenCost > MaxCost)
3090 return false;
3091
3092 // Account for the cost of an unfolded ConstantExpr which could end up
3093 // getting expanded into Instructions.
3094 // FIXME: This doesn't account for how many operations are combined in the
3095 // constant expression.
3096 ++SpeculatedInstructions;
3097 if (SpeculatedInstructions > 1)
3098 return false;
3099 }
3100
3101 return HaveRewritablePHIs;
3102}
3103
3104static bool isProfitableToSpeculate(const BranchInst *BI,
3105 std::optional<bool> Invert,
3106 const TargetTransformInfo &TTI) {
3107 // If the branch is non-unpredictable, and is predicted to *not* branch to
3108 // the `then` block, then avoid speculating it.
3109 if (BI->getMetadata(KindID: LLVMContext::MD_unpredictable))
3110 return true;
3111
3112 uint64_t TWeight, FWeight;
3113 if (!extractBranchWeights(I: *BI, TrueVal&: TWeight, FalseVal&: FWeight) || (TWeight + FWeight) == 0)
3114 return true;
3115
3116 if (!Invert.has_value())
3117 return false;
3118
3119 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3120 BranchProbability BIEndProb =
3121 BranchProbability::getBranchProbability(Numerator: EndWeight, Denominator: TWeight + FWeight);
3122 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3123 return BIEndProb < Likely;
3124}
3125
3126/// Speculate a conditional basic block flattening the CFG.
3127///
3128/// Note that this is a very risky transform currently. Speculating
3129/// instructions like this is most often not desirable. Instead, there is an MI
3130/// pass which can do it with full awareness of the resource constraints.
3131/// However, some cases are "obvious" and we should do directly. An example of
3132/// this is speculating a single, reasonably cheap instruction.
3133///
3134/// There is only one distinct advantage to flattening the CFG at the IR level:
3135/// it makes very common but simplistic optimizations such as are common in
3136/// instcombine and the DAG combiner more powerful by removing CFG edges and
3137/// modeling their effects with easier to reason about SSA value graphs.
3138///
3139///
3140/// An illustration of this transform is turning this IR:
3141/// \code
3142/// BB:
3143/// %cmp = icmp ult %x, %y
3144/// br i1 %cmp, label %EndBB, label %ThenBB
3145/// ThenBB:
3146/// %sub = sub %x, %y
3147/// br label BB2
3148/// EndBB:
3149/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3150/// ...
3151/// \endcode
3152///
3153/// Into this IR:
3154/// \code
3155/// BB:
3156/// %cmp = icmp ult %x, %y
3157/// %sub = sub %x, %y
3158/// %cond = select i1 %cmp, 0, %sub
3159/// ...
3160/// \endcode
3161///
3162/// \returns true if the conditional block is removed.
3163bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3164 BasicBlock *ThenBB) {
3165 if (!Options.SpeculateBlocks)
3166 return false;
3167
3168 // Be conservative for now. FP select instruction can often be expensive.
3169 Value *BrCond = BI->getCondition();
3170 if (isa<FCmpInst>(Val: BrCond))
3171 return false;
3172
3173 BasicBlock *BB = BI->getParent();
3174 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(Idx: 0);
3175 InstructionCost Budget =
3176 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3177
3178 // If ThenBB is actually on the false edge of the conditional branch, remember
3179 // to swap the select operands later.
3180 bool Invert = false;
3181 if (ThenBB != BI->getSuccessor(i: 0)) {
3182 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3183 Invert = true;
3184 }
3185 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3186
3187 if (!isProfitableToSpeculate(BI, Invert, TTI))
3188 return false;
3189
3190 // Keep a count of how many times instructions are used within ThenBB when
3191 // they are candidates for sinking into ThenBB. Specifically:
3192 // - They are defined in BB, and
3193 // - They have no side effects, and
3194 // - All of their uses are in ThenBB.
3195 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3196
3197 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3198
3199 unsigned SpeculatedInstructions = 0;
3200 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3201 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3202 Value *SpeculatedStoreValue = nullptr;
3203 StoreInst *SpeculatedStore = nullptr;
3204 EphemeralValueTracker EphTracker;
3205 for (Instruction &I : reverse(C: drop_end(RangeOrContainer&: *ThenBB))) {
3206 // Skip pseudo probes. The consequence is we lose track of the branch
3207 // probability for ThenBB, which is fine since the optimization here takes
3208 // place regardless of the branch probability.
3209 if (isa<PseudoProbeInst>(Val: I)) {
3210 // The probe should be deleted so that it will not be over-counted when
3211 // the samples collected on the non-conditional path are counted towards
3212 // the conditional path. We leave it for the counts inference algorithm to
3213 // figure out a proper count for an unknown probe.
3214 SpeculatedPseudoProbes.push_back(Elt: &I);
3215 continue;
3216 }
3217
3218 // Ignore ephemeral values, they will be dropped by the transform.
3219 if (EphTracker.track(I: &I))
3220 continue;
3221
3222 // Only speculatively execute a single instruction (not counting the
3223 // terminator) for now.
3224 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3225 isSafeCheapLoadStore(I: &I, TTI) &&
3226 SpeculatedConditionalLoadsStores.size() <
3227 HoistLoadsStoresWithCondFaultingThreshold;
3228 // Not count load/store into cost if target supports conditional faulting
3229 // b/c it's cheap to speculate it.
3230 if (IsSafeCheapLoadStore)
3231 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
3232 else
3233 ++SpeculatedInstructions;
3234
3235 if (SpeculatedInstructions > 1)
3236 return false;
3237
3238 // Don't hoist the instruction if it's unsafe or expensive.
3239 if (!IsSafeCheapLoadStore &&
3240 !isSafeToSpeculativelyExecute(I: &I, CtxI: BI, AC: Options.AC) &&
3241 !(HoistCondStores && !SpeculatedStoreValue &&
3242 (SpeculatedStoreValue =
3243 isSafeToSpeculateStore(I: &I, BrBB: BB, StoreBB: ThenBB, EndBB))))
3244 return false;
3245 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3246 computeSpeculationCost(I: &I, TTI) >
3247 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3248 return false;
3249
3250 // Store the store speculation candidate.
3251 if (!SpeculatedStore && SpeculatedStoreValue)
3252 SpeculatedStore = cast<StoreInst>(Val: &I);
3253
3254 // Do not hoist the instruction if any of its operands are defined but not
3255 // used in BB. The transformation will prevent the operand from
3256 // being sunk into the use block.
3257 for (Use &Op : I.operands()) {
3258 Instruction *OpI = dyn_cast<Instruction>(Val&: Op);
3259 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3260 continue; // Not a candidate for sinking.
3261
3262 ++SinkCandidateUseCounts[OpI];
3263 }
3264 }
3265
3266 // Consider any sink candidates which are only used in ThenBB as costs for
3267 // speculation. Note, while we iterate over a DenseMap here, we are summing
3268 // and so iteration order isn't significant.
3269 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3270 if (Inst->hasNUses(N: Count)) {
3271 ++SpeculatedInstructions;
3272 if (SpeculatedInstructions > 1)
3273 return false;
3274 }
3275
3276 // Check that we can insert the selects and that it's not too expensive to do
3277 // so.
3278 bool Convert =
3279 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3280 InstructionCost Cost = 0;
3281 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3282 SpeculatedInstructions, Cost, TTI);
3283 if (!Convert || Cost > Budget)
3284 return false;
3285
3286 // If we get here, we can hoist the instruction and if-convert.
3287 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3288
3289 Instruction *Sel = nullptr;
3290 // Insert a select of the value of the speculated store.
3291 if (SpeculatedStoreValue) {
3292 IRBuilder<NoFolder> Builder(BI);
3293 Value *OrigV = SpeculatedStore->getValueOperand();
3294 Value *TrueV = SpeculatedStore->getValueOperand();
3295 Value *FalseV = SpeculatedStoreValue;
3296 if (Invert)
3297 std::swap(a&: TrueV, b&: FalseV);
3298 Value *S = Builder.CreateSelect(
3299 C: BrCond, True: TrueV, False: FalseV, Name: "spec.store.select", MDFrom: BI);
3300 Sel = cast<Instruction>(Val: S);
3301 SpeculatedStore->setOperand(i_nocapture: 0, Val_nocapture: S);
3302 SpeculatedStore->applyMergedLocation(LocA: BI->getDebugLoc(),
3303 LocB: SpeculatedStore->getDebugLoc());
3304 // The value stored is still conditional, but the store itself is now
3305 // unconditonally executed, so we must be sure that any linked dbg.assign
3306 // intrinsics are tracking the new stored value (the result of the
3307 // select). If we don't, and the store were to be removed by another pass
3308 // (e.g. DSE), then we'd eventually end up emitting a location describing
3309 // the conditional value, unconditionally.
3310 //
3311 // === Before this transformation ===
3312 // pred:
3313 // store %one, %x.dest, !DIAssignID !1
3314 // dbg.assign %one, "x", ..., !1, ...
3315 // br %cond if.then
3316 //
3317 // if.then:
3318 // store %two, %x.dest, !DIAssignID !2
3319 // dbg.assign %two, "x", ..., !2, ...
3320 //
3321 // === After this transformation ===
3322 // pred:
3323 // store %one, %x.dest, !DIAssignID !1
3324 // dbg.assign %one, "x", ..., !1
3325 /// ...
3326 // %merge = select %cond, %two, %one
3327 // store %merge, %x.dest, !DIAssignID !2
3328 // dbg.assign %merge, "x", ..., !2
3329 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3330 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3331 DbgAssign->replaceVariableLocationOp(OrigV, S);
3332 };
3333 for_each(Range: at::getAssignmentMarkers(Inst: SpeculatedStore), F: replaceVariable);
3334 for_each(Range: at::getDVRAssignmentMarkers(Inst: SpeculatedStore), F: replaceVariable);
3335 }
3336
3337 // Metadata can be dependent on the condition we are hoisting above.
3338 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3339 // to avoid making it appear as if the condition is a constant, which would
3340 // be misleading while debugging.
3341 // Similarly strip attributes that maybe dependent on condition we are
3342 // hoisting above.
3343 for (auto &I : make_early_inc_range(Range&: *ThenBB)) {
3344 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3345 I.setDebugLoc(DebugLoc::getDropped());
3346 }
3347 I.dropUBImplyingAttrsAndMetadata();
3348
3349 // Drop ephemeral values.
3350 if (EphTracker.contains(I: &I)) {
3351 I.replaceAllUsesWith(V: PoisonValue::get(T: I.getType()));
3352 I.eraseFromParent();
3353 }
3354 }
3355
3356 // Hoist the instructions.
3357 // Drop DbgVariableRecords attached to these instructions.
3358 for (auto &It : *ThenBB)
3359 for (DbgRecord &DR : make_early_inc_range(Range: It.getDbgRecordRange()))
3360 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3361 // equivalent).
3362 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(Val: &DR);
3363 !DVR || !DVR->isDbgAssign())
3364 It.dropOneDbgRecord(I: &DR);
3365 BB->splice(ToIt: BI->getIterator(), FromBB: ThenBB, FromBeginIt: ThenBB->begin(),
3366 FromEndIt: std::prev(x: ThenBB->end()));
3367
3368 if (!SpeculatedConditionalLoadsStores.empty())
3369 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3370 Sel);
3371
3372 // Insert selects and rewrite the PHI operands.
3373 IRBuilder<NoFolder> Builder(BI);
3374 for (PHINode &PN : EndBB->phis()) {
3375 unsigned OrigI = PN.getBasicBlockIndex(BB);
3376 unsigned ThenI = PN.getBasicBlockIndex(BB: ThenBB);
3377 Value *OrigV = PN.getIncomingValue(i: OrigI);
3378 Value *ThenV = PN.getIncomingValue(i: ThenI);
3379
3380 // Skip PHIs which are trivial.
3381 if (OrigV == ThenV)
3382 continue;
3383
3384 // Create a select whose true value is the speculatively executed value and
3385 // false value is the pre-existing value. Swap them if the branch
3386 // destinations were inverted.
3387 Value *TrueV = ThenV, *FalseV = OrigV;
3388 if (Invert)
3389 std::swap(a&: TrueV, b&: FalseV);
3390 Value *V = Builder.CreateSelect(C: BrCond, True: TrueV, False: FalseV, Name: "spec.select", MDFrom: BI);
3391 PN.setIncomingValue(i: OrigI, V);
3392 PN.setIncomingValue(i: ThenI, V);
3393 }
3394
3395 // Remove speculated pseudo probes.
3396 for (Instruction *I : SpeculatedPseudoProbes)
3397 I->eraseFromParent();
3398
3399 ++NumSpeculations;
3400 return true;
3401}
3402
3403/// Return true if we can thread a branch across this block.
3404static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
3405 int Size = 0;
3406 EphemeralValueTracker EphTracker;
3407
3408 // Walk the loop in reverse so that we can identify ephemeral values properly
3409 // (values only feeding assumes).
3410 for (Instruction &I : reverse(C: BB->instructionsWithoutDebug(SkipPseudoOp: false))) {
3411 // Can't fold blocks that contain noduplicate or convergent calls.
3412 if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
3413 if (CI->cannotDuplicate() || CI->isConvergent())
3414 return false;
3415
3416 // Ignore ephemeral values which are deleted during codegen.
3417 // We will delete Phis while threading, so Phis should not be accounted in
3418 // block's size.
3419 if (!EphTracker.track(I: &I) && !isa<PHINode>(Val: I)) {
3420 if (Size++ > MaxSmallBlockSize)
3421 return false; // Don't clone large BB's.
3422 }
3423
3424 // We can only support instructions that do not define values that are
3425 // live outside of the current basic block.
3426 for (User *U : I.users()) {
3427 Instruction *UI = cast<Instruction>(Val: U);
3428 if (UI->getParent() != BB || isa<PHINode>(Val: UI))
3429 return false;
3430 }
3431
3432 // Looks ok, continue checking.
3433 }
3434
3435 return true;
3436}
3437
3438static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
3439 BasicBlock *To) {
3440 // Don't look past the block defining the value, we might get the value from
3441 // a previous loop iteration.
3442 auto *I = dyn_cast<Instruction>(Val: V);
3443 if (I && I->getParent() == To)
3444 return nullptr;
3445
3446 // We know the value if the From block branches on it.
3447 auto *BI = dyn_cast<BranchInst>(Val: From->getTerminator());
3448 if (BI && BI->isConditional() && BI->getCondition() == V &&
3449 BI->getSuccessor(i: 0) != BI->getSuccessor(i: 1))
3450 return BI->getSuccessor(i: 0) == To ? ConstantInt::getTrue(Context&: BI->getContext())
3451 : ConstantInt::getFalse(Context&: BI->getContext());
3452
3453 return nullptr;
3454}
3455
3456/// If we have a conditional branch on something for which we know the constant
3457/// value in predecessors (e.g. a phi node in the current block), thread edges
3458/// from the predecessor to their ultimate destination.
3459static std::optional<bool>
3460foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
3461 const DataLayout &DL,
3462 AssumptionCache *AC) {
3463 SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
3464 BasicBlock *BB = BI->getParent();
3465 Value *Cond = BI->getCondition();
3466 PHINode *PN = dyn_cast<PHINode>(Val: Cond);
3467 if (PN && PN->getParent() == BB) {
3468 // Degenerate case of a single entry PHI.
3469 if (PN->getNumIncomingValues() == 1) {
3470 FoldSingleEntryPHINodes(BB: PN->getParent());
3471 return true;
3472 }
3473
3474 for (Use &U : PN->incoming_values())
3475 if (auto *CB = dyn_cast<ConstantInt>(Val&: U))
3476 KnownValues[CB].insert(X: PN->getIncomingBlock(U));
3477 } else {
3478 for (BasicBlock *Pred : predecessors(BB)) {
3479 if (ConstantInt *CB = getKnownValueOnEdge(V: Cond, From: Pred, To: BB))
3480 KnownValues[CB].insert(X: Pred);
3481 }
3482 }
3483
3484 if (KnownValues.empty())
3485 return false;
3486
3487 // Now we know that this block has multiple preds and two succs.
3488 // Check that the block is small enough and values defined in the block are
3489 // not used outside of it.
3490 if (!blockIsSimpleEnoughToThreadThrough(BB))
3491 return false;
3492
3493 for (const auto &Pair : KnownValues) {
3494 // Okay, we now know that all edges from PredBB should be revectored to
3495 // branch to RealDest.
3496 ConstantInt *CB = Pair.first;
3497 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3498 BasicBlock *RealDest = BI->getSuccessor(i: !CB->getZExtValue());
3499
3500 if (RealDest == BB)
3501 continue; // Skip self loops.
3502
3503 // Skip if the predecessor's terminator is an indirect branch.
3504 if (any_of(Range&: PredBBs, P: [](BasicBlock *PredBB) {
3505 return isa<IndirectBrInst>(Val: PredBB->getTerminator());
3506 }))
3507 continue;
3508
3509 LLVM_DEBUG({
3510 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3511 << " has value " << *Pair.first << " in predecessors:\n";
3512 for (const BasicBlock *PredBB : Pair.second)
3513 dbgs() << " " << PredBB->getName() << "\n";
3514 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3515 });
3516
3517 // Split the predecessors we are threading into a new edge block. We'll
3518 // clone the instructions into this block, and then redirect it to RealDest.
3519 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, Preds: PredBBs, Suffix: ".critedge", DTU);
3520
3521 // TODO: These just exist to reduce test diff, we can drop them if we like.
3522 EdgeBB->setName(RealDest->getName() + ".critedge");
3523 EdgeBB->moveBefore(MovePos: RealDest);
3524
3525 // Update PHI nodes.
3526 addPredecessorToBlock(Succ: RealDest, NewPred: EdgeBB, ExistPred: BB);
3527
3528 // BB may have instructions that are being threaded over. Clone these
3529 // instructions into EdgeBB. We know that there will be no uses of the
3530 // cloned instructions outside of EdgeBB.
3531 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3532 ValueToValueMapTy TranslateMap; // Track translated values.
3533 TranslateMap[Cond] = CB;
3534
3535 // RemoveDIs: track instructions that we optimise away while folding, so
3536 // that we can copy DbgVariableRecords from them later.
3537 BasicBlock::iterator SrcDbgCursor = BB->begin();
3538 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3539 if (PHINode *PN = dyn_cast<PHINode>(Val&: BBI)) {
3540 TranslateMap[PN] = PN->getIncomingValueForBlock(BB: EdgeBB);
3541 continue;
3542 }
3543 // Clone the instruction.
3544 Instruction *N = BBI->clone();
3545 // Insert the new instruction into its new home.
3546 N->insertInto(ParentBB: EdgeBB, It: InsertPt);
3547
3548 if (BBI->hasName())
3549 N->setName(BBI->getName() + ".c");
3550
3551 // Update operands due to translation.
3552 // Key Instructions: Remap all the atom groups.
3553 if (const DebugLoc &DL = BBI->getDebugLoc())
3554 mapAtomInstance(DL, VMap&: TranslateMap);
3555 RemapInstruction(I: N, VM&: TranslateMap,
3556 Flags: RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
3557
3558 // Check for trivial simplification.
3559 if (Value *V = simplifyInstruction(I: N, Q: {DL, nullptr, nullptr, AC})) {
3560 if (!BBI->use_empty())
3561 TranslateMap[&*BBI] = V;
3562 if (!N->mayHaveSideEffects()) {
3563 N->eraseFromParent(); // Instruction folded away, don't need actual
3564 // inst
3565 N = nullptr;
3566 }
3567 } else {
3568 if (!BBI->use_empty())
3569 TranslateMap[&*BBI] = N;
3570 }
3571 if (N) {
3572 // Copy all debug-info attached to instructions from the last we
3573 // successfully clone, up to this instruction (they might have been
3574 // folded away).
3575 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3576 N->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3577 SrcDbgCursor = std::next(x: BBI);
3578 // Clone debug-info on this instruction too.
3579 N->cloneDebugInfoFrom(From: &*BBI);
3580
3581 // Register the new instruction with the assumption cache if necessary.
3582 if (auto *Assume = dyn_cast<AssumeInst>(Val: N))
3583 if (AC)
3584 AC->registerAssumption(CI: Assume);
3585 }
3586 }
3587
3588 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3589 InsertPt->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3590 InsertPt->cloneDebugInfoFrom(From: BI);
3591
3592 BB->removePredecessor(Pred: EdgeBB);
3593 BranchInst *EdgeBI = cast<BranchInst>(Val: EdgeBB->getTerminator());
3594 EdgeBI->setSuccessor(idx: 0, NewSucc: RealDest);
3595 EdgeBI->setDebugLoc(BI->getDebugLoc());
3596
3597 if (DTU) {
3598 SmallVector<DominatorTree::UpdateType, 2> Updates;
3599 Updates.push_back(Elt: {DominatorTree::Delete, EdgeBB, BB});
3600 Updates.push_back(Elt: {DominatorTree::Insert, EdgeBB, RealDest});
3601 DTU->applyUpdates(Updates);
3602 }
3603
3604 // For simplicity, we created a separate basic block for the edge. Merge
3605 // it back into the predecessor if possible. This not only avoids
3606 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3607 // bypass the check for trivial cycles above.
3608 MergeBlockIntoPredecessor(BB: EdgeBB, DTU);
3609
3610 // Signal repeat, simplifying any other constants.
3611 return std::nullopt;
3612 }
3613
3614 return false;
3615}
3616
3617static bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
3618 DomTreeUpdater *DTU,
3619 const DataLayout &DL,
3620 AssumptionCache *AC) {
3621 std::optional<bool> Result;
3622 bool EverChanged = false;
3623 do {
3624 // Note that None means "we changed things, but recurse further."
3625 Result = foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3626 EverChanged |= Result == std::nullopt || *Result;
3627 } while (Result == std::nullopt);
3628 return EverChanged;
3629}
3630
3631/// Given a BB that starts with the specified two-entry PHI node,
3632/// see if we can eliminate it.
3633static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
3634 DomTreeUpdater *DTU, AssumptionCache *AC,
3635 const DataLayout &DL,
3636 bool SpeculateUnpredictables) {
3637 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3638 // statement", which has a very simple dominance structure. Basically, we
3639 // are trying to find the condition that is being branched on, which
3640 // subsequently causes this merge to happen. We really want control
3641 // dependence information for this check, but simplifycfg can't keep it up
3642 // to date, and this catches most of the cases we care about anyway.
3643 BasicBlock *BB = PN->getParent();
3644
3645 BasicBlock *IfTrue, *IfFalse;
3646 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3647 if (!DomBI)
3648 return false;
3649 Value *IfCond = DomBI->getCondition();
3650 // Don't bother if the branch will be constant folded trivially.
3651 if (isa<ConstantInt>(Val: IfCond))
3652 return false;
3653
3654 BasicBlock *DomBlock = DomBI->getParent();
3655 SmallVector<BasicBlock *, 2> IfBlocks;
3656 llvm::copy_if(
3657 Range: PN->blocks(), Out: std::back_inserter(x&: IfBlocks), P: [](BasicBlock *IfBlock) {
3658 return cast<BranchInst>(Val: IfBlock->getTerminator())->isUnconditional();
3659 });
3660 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3661 "Will have either one or two blocks to speculate.");
3662
3663 // If the branch is non-unpredictable, see if we either predictably jump to
3664 // the merge bb (if we have only a single 'then' block), or if we predictably
3665 // jump to one specific 'then' block (if we have two of them).
3666 // It isn't beneficial to speculatively execute the code
3667 // from the block that we know is predictably not entered.
3668 bool IsUnpredictable = DomBI->getMetadata(KindID: LLVMContext::MD_unpredictable);
3669 if (!IsUnpredictable) {
3670 uint64_t TWeight, FWeight;
3671 if (extractBranchWeights(I: *DomBI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3672 (TWeight + FWeight) != 0) {
3673 BranchProbability BITrueProb =
3674 BranchProbability::getBranchProbability(Numerator: TWeight, Denominator: TWeight + FWeight);
3675 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3676 BranchProbability BIFalseProb = BITrueProb.getCompl();
3677 if (IfBlocks.size() == 1) {
3678 BranchProbability BIBBProb =
3679 DomBI->getSuccessor(i: 0) == BB ? BITrueProb : BIFalseProb;
3680 if (BIBBProb >= Likely)
3681 return false;
3682 } else {
3683 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3684 return false;
3685 }
3686 }
3687 }
3688
3689 // Don't try to fold an unreachable block. For example, the phi node itself
3690 // can't be the candidate if-condition for a select that we want to form.
3691 if (auto *IfCondPhiInst = dyn_cast<PHINode>(Val: IfCond))
3692 if (IfCondPhiInst->getParent() == BB)
3693 return false;
3694
3695 // Okay, we found that we can merge this two-entry phi node into a select.
3696 // Doing so would require us to fold *all* two entry phi nodes in this block.
3697 // At some point this becomes non-profitable (particularly if the target
3698 // doesn't support cmov's). Only do this transformation if there are two or
3699 // fewer PHI nodes in this block.
3700 unsigned NumPhis = 0;
3701 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(Val: I); ++NumPhis, ++I)
3702 if (NumPhis > 2)
3703 return false;
3704
3705 // Loop over the PHI's seeing if we can promote them all to select
3706 // instructions. While we are at it, keep track of the instructions
3707 // that need to be moved to the dominating block.
3708 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3709 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3710 InstructionCost Cost = 0;
3711 InstructionCost Budget =
3712 TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3713 if (SpeculateUnpredictables && IsUnpredictable)
3714 Budget += TTI.getBranchMispredictPenalty();
3715
3716 bool Changed = false;
3717 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(Val: II);) {
3718 PHINode *PN = cast<PHINode>(Val: II++);
3719 if (Value *V = simplifyInstruction(I: PN, Q: {DL, PN})) {
3720 PN->replaceAllUsesWith(V);
3721 PN->eraseFromParent();
3722 Changed = true;
3723 continue;
3724 }
3725
3726 if (!dominatesMergePoint(V: PN->getIncomingValue(i: 0), BB, InsertPt: DomBI,
3727 AggressiveInsts, Cost, Budget, TTI, AC,
3728 ZeroCostInstructions) ||
3729 !dominatesMergePoint(V: PN->getIncomingValue(i: 1), BB, InsertPt: DomBI,
3730 AggressiveInsts, Cost, Budget, TTI, AC,
3731 ZeroCostInstructions))
3732 return Changed;
3733 }
3734
3735 // If we folded the first phi, PN dangles at this point. Refresh it. If
3736 // we ran out of PHIs then we simplified them all.
3737 PN = dyn_cast<PHINode>(Val: BB->begin());
3738 if (!PN)
3739 return true;
3740
3741 // Return true if at least one of these is a 'not', and another is either
3742 // a 'not' too, or a constant.
3743 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3744 if (!match(V: V0, P: m_Not(V: m_Value())))
3745 std::swap(a&: V0, b&: V1);
3746 auto Invertible = m_CombineOr(L: m_Not(V: m_Value()), R: m_AnyIntegralConstant());
3747 return match(V: V0, P: m_Not(V: m_Value())) && match(V: V1, P: Invertible);
3748 };
3749
3750 // Don't fold i1 branches on PHIs which contain binary operators or
3751 // (possibly inverted) select form of or/ands, unless one of
3752 // the incoming values is an 'not' and another one is freely invertible.
3753 // These can often be turned into switches and other things.
3754 auto IsBinOpOrAnd = [](Value *V) {
3755 return match(
3756 V, P: m_CombineOr(L: m_BinOp(), R: m_c_Select(L: m_ImmConstant(), R: m_Value())));
3757 };
3758 if (PN->getType()->isIntegerTy(Bitwidth: 1) &&
3759 (IsBinOpOrAnd(PN->getIncomingValue(i: 0)) ||
3760 IsBinOpOrAnd(PN->getIncomingValue(i: 1)) || IsBinOpOrAnd(IfCond)) &&
3761 !CanHoistNotFromBothValues(PN->getIncomingValue(i: 0),
3762 PN->getIncomingValue(i: 1)))
3763 return Changed;
3764
3765 // If all PHI nodes are promotable, check to make sure that all instructions
3766 // in the predecessor blocks can be promoted as well. If not, we won't be able
3767 // to get rid of the control flow, so it's not worth promoting to select
3768 // instructions.
3769 for (BasicBlock *IfBlock : IfBlocks)
3770 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3771 if (!AggressiveInsts.count(Ptr: &*I) && !I->isDebugOrPseudoInst()) {
3772 // This is not an aggressive instruction that we can promote.
3773 // Because of this, we won't be able to get rid of the control flow, so
3774 // the xform is not worth it.
3775 return Changed;
3776 }
3777
3778 // If either of the blocks has it's address taken, we can't do this fold.
3779 if (any_of(Range&: IfBlocks,
3780 P: [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3781 return Changed;
3782
3783 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3784 if (IsUnpredictable) dbgs() << " (unpredictable)";
3785 dbgs() << " T: " << IfTrue->getName()
3786 << " F: " << IfFalse->getName() << "\n");
3787
3788 // If we can still promote the PHI nodes after this gauntlet of tests,
3789 // do all of the PHI's now.
3790
3791 // Move all 'aggressive' instructions, which are defined in the
3792 // conditional parts of the if's up to the dominating block.
3793 for (BasicBlock *IfBlock : IfBlocks)
3794 hoistAllInstructionsInto(DomBlock, InsertPt: DomBI, BB: IfBlock);
3795
3796 IRBuilder<NoFolder> Builder(DomBI);
3797 // Propagate fast-math-flags from phi nodes to replacement selects.
3798 while (PHINode *PN = dyn_cast<PHINode>(Val: BB->begin())) {
3799 // Change the PHI node into a select instruction.
3800 Value *TrueVal = PN->getIncomingValueForBlock(BB: IfTrue);
3801 Value *FalseVal = PN->getIncomingValueForBlock(BB: IfFalse);
3802
3803 Value *Sel = Builder.CreateSelectFMF(C: IfCond, True: TrueVal, False: FalseVal,
3804 FMFSource: isa<FPMathOperator>(Val: PN) ? PN : nullptr,
3805 Name: "", MDFrom: DomBI);
3806 PN->replaceAllUsesWith(V: Sel);
3807 Sel->takeName(V: PN);
3808 PN->eraseFromParent();
3809 }
3810
3811 // At this point, all IfBlocks are empty, so our if statement
3812 // has been flattened. Change DomBlock to jump directly to our new block to
3813 // avoid other simplifycfg's kicking in on the diamond.
3814 Builder.CreateBr(Dest: BB);
3815
3816 SmallVector<DominatorTree::UpdateType, 3> Updates;
3817 if (DTU) {
3818 Updates.push_back(Elt: {DominatorTree::Insert, DomBlock, BB});
3819 for (auto *Successor : successors(BB: DomBlock))
3820 Updates.push_back(Elt: {DominatorTree::Delete, DomBlock, Successor});
3821 }
3822
3823 DomBI->eraseFromParent();
3824 if (DTU)
3825 DTU->applyUpdates(Updates);
3826
3827 return true;
3828}
3829
3830static Value *createLogicalOp(IRBuilderBase &Builder,
3831 Instruction::BinaryOps Opc, Value *LHS,
3832 Value *RHS, const Twine &Name = "") {
3833 // Try to relax logical op to binary op.
3834 if (impliesPoison(ValAssumedPoison: RHS, V: LHS))
3835 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3836 if (Opc == Instruction::And)
3837 return Builder.CreateLogicalAnd(Cond1: LHS, Cond2: RHS, Name);
3838 if (Opc == Instruction::Or)
3839 return Builder.CreateLogicalOr(Cond1: LHS, Cond2: RHS, Name);
3840 llvm_unreachable("Invalid logical opcode");
3841}
3842
3843/// Return true if either PBI or BI has branch weight available, and store
3844/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3845/// not have branch weight, use 1:1 as its weight.
3846static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
3847 uint64_t &PredTrueWeight,
3848 uint64_t &PredFalseWeight,
3849 uint64_t &SuccTrueWeight,
3850 uint64_t &SuccFalseWeight) {
3851 bool PredHasWeights =
3852 extractBranchWeights(I: *PBI, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight);
3853 bool SuccHasWeights =
3854 extractBranchWeights(I: *BI, TrueVal&: SuccTrueWeight, FalseVal&: SuccFalseWeight);
3855 if (PredHasWeights || SuccHasWeights) {
3856 if (!PredHasWeights)
3857 PredTrueWeight = PredFalseWeight = 1;
3858 if (!SuccHasWeights)
3859 SuccTrueWeight = SuccFalseWeight = 1;
3860 return true;
3861 } else {
3862 return false;
3863 }
3864}
3865
3866/// Determine if the two branches share a common destination and deduce a glue
3867/// that joins the branches' conditions to arrive at the common destination if
3868/// that would be profitable.
3869static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3870shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
3871 const TargetTransformInfo *TTI) {
3872 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3873 "Both blocks must end with a conditional branches.");
3874 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3875 "PredBB must be a predecessor of BB.");
3876
3877 // We have the potential to fold the conditions together, but if the
3878 // predecessor branch is predictable, we may not want to merge them.
3879 uint64_t PTWeight, PFWeight;
3880 BranchProbability PBITrueProb, Likely;
3881 if (TTI && !PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
3882 extractBranchWeights(I: *PBI, TrueVal&: PTWeight, FalseVal&: PFWeight) &&
3883 (PTWeight + PFWeight) != 0) {
3884 PBITrueProb =
3885 BranchProbability::getBranchProbability(Numerator: PTWeight, Denominator: PTWeight + PFWeight);
3886 Likely = TTI->getPredictableBranchThreshold();
3887 }
3888
3889 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
3890 // Speculate the 2nd condition unless the 1st is probably true.
3891 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3892 return {{BI->getSuccessor(i: 0), Instruction::Or, false}};
3893 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
3894 // Speculate the 2nd condition unless the 1st is probably false.
3895 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3896 return {{BI->getSuccessor(i: 1), Instruction::And, false}};
3897 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
3898 // Speculate the 2nd condition unless the 1st is probably true.
3899 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3900 return {{BI->getSuccessor(i: 1), Instruction::And, true}};
3901 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
3902 // Speculate the 2nd condition unless the 1st is probably false.
3903 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3904 return {{BI->getSuccessor(i: 0), Instruction::Or, true}};
3905 }
3906 return std::nullopt;
3907}
3908
3909static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
3910 DomTreeUpdater *DTU,
3911 MemorySSAUpdater *MSSAU,
3912 const TargetTransformInfo *TTI) {
3913 BasicBlock *BB = BI->getParent();
3914 BasicBlock *PredBlock = PBI->getParent();
3915
3916 // Determine if the two branches share a common destination.
3917 BasicBlock *CommonSucc;
3918 Instruction::BinaryOps Opc;
3919 bool InvertPredCond;
3920 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) =
3921 *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
3922
3923 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3924
3925 IRBuilder<> Builder(PBI);
3926 // The builder is used to create instructions to eliminate the branch in BB.
3927 // If BB's terminator has !annotation metadata, add it to the new
3928 // instructions.
3929 Builder.CollectMetadataToCopy(Src: BB->getTerminator(),
3930 MetadataKinds: {LLVMContext::MD_annotation});
3931
3932 // If we need to invert the condition in the pred block to match, do so now.
3933 if (InvertPredCond) {
3934 InvertBranch(PBI, Builder);
3935 }
3936
3937 BasicBlock *UniqueSucc =
3938 PBI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 0) : BI->getSuccessor(i: 1);
3939
3940 // Before cloning instructions, notify the successor basic block that it
3941 // is about to have a new predecessor. This will update PHI nodes,
3942 // which will allow us to update live-out uses of bonus instructions.
3943 addPredecessorToBlock(Succ: UniqueSucc, NewPred: PredBlock, ExistPred: BB, MSSAU);
3944
3945 // Try to update branch weights.
3946 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3947 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3948 SuccTrueWeight, SuccFalseWeight)) {
3949 SmallVector<uint64_t, 8> NewWeights;
3950
3951 if (PBI->getSuccessor(i: 0) == BB) {
3952 // PBI: br i1 %x, BB, FalseDest
3953 // BI: br i1 %y, UniqueSucc, FalseDest
3954 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3955 NewWeights.push_back(Elt: PredTrueWeight * SuccTrueWeight);
3956 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3957 // TrueWeight for PBI * FalseWeight for BI.
3958 // We assume that total weights of a BranchInst can fit into 32 bits.
3959 // Therefore, we will not have overflow using 64-bit arithmetic.
3960 NewWeights.push_back(Elt: PredFalseWeight *
3961 (SuccFalseWeight + SuccTrueWeight) +
3962 PredTrueWeight * SuccFalseWeight);
3963 } else {
3964 // PBI: br i1 %x, TrueDest, BB
3965 // BI: br i1 %y, TrueDest, UniqueSucc
3966 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3967 // FalseWeight for PBI * TrueWeight for BI.
3968 NewWeights.push_back(Elt: PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3969 PredFalseWeight * SuccTrueWeight);
3970 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3971 NewWeights.push_back(Elt: PredFalseWeight * SuccFalseWeight);
3972 }
3973
3974 // Halve the weights if any of them cannot fit in an uint32_t
3975 fitWeights(Weights: NewWeights);
3976
3977 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3978 setBranchWeights(I: PBI, TrueWeight: MDWeights[0], FalseWeight: MDWeights[1], /*IsExpected=*/false);
3979
3980 // TODO: If BB is reachable from all paths through PredBlock, then we
3981 // could replace PBI's branch probabilities with BI's.
3982 } else
3983 PBI->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
3984
3985 // Now, update the CFG.
3986 PBI->setSuccessor(idx: PBI->getSuccessor(i: 0) != BB, NewSucc: UniqueSucc);
3987
3988 if (DTU)
3989 DTU->applyUpdates(Updates: {{DominatorTree::Insert, PredBlock, UniqueSucc},
3990 {DominatorTree::Delete, PredBlock, BB}});
3991
3992 // If BI was a loop latch, it may have had associated loop metadata.
3993 // We need to copy it to the new latch, that is, PBI.
3994 if (MDNode *LoopMD = BI->getMetadata(KindID: LLVMContext::MD_loop))
3995 PBI->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
3996
3997 ValueToValueMapTy VMap; // maps original values to cloned values
3998 cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
3999
4000 Module *M = BB->getModule();
4001
4002 PredBlock->getTerminator()->cloneDebugInfoFrom(From: BB->getTerminator());
4003 for (DbgVariableRecord &DVR :
4004 filterDbgVars(R: PredBlock->getTerminator()->getDbgRecordRange())) {
4005 RemapDbgRecord(M, DR: &DVR, VM&: VMap,
4006 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
4007 }
4008
4009 // Now that the Cond was cloned into the predecessor basic block,
4010 // or/and the two conditions together.
4011 Value *BICond = VMap[BI->getCondition()];
4012 PBI->setCondition(
4013 createLogicalOp(Builder, Opc, LHS: PBI->getCondition(), RHS: BICond, Name: "or.cond"));
4014
4015 ++NumFoldBranchToCommonDest;
4016 return true;
4017}
4018
4019/// Return if an instruction's type or any of its operands' types are a vector
4020/// type.
4021static bool isVectorOp(Instruction &I) {
4022 return I.getType()->isVectorTy() || any_of(Range: I.operands(), P: [](Use &U) {
4023 return U->getType()->isVectorTy();
4024 });
4025}
4026
4027/// If this basic block is simple enough, and if a predecessor branches to us
4028/// and one of our successors, fold the block into the predecessor and use
4029/// logical operations to pick the right destination.
4030bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
4031 MemorySSAUpdater *MSSAU,
4032 const TargetTransformInfo *TTI,
4033 unsigned BonusInstThreshold) {
4034 // If this block ends with an unconditional branch,
4035 // let speculativelyExecuteBB() deal with it.
4036 if (!BI->isConditional())
4037 return false;
4038
4039 BasicBlock *BB = BI->getParent();
4040 TargetTransformInfo::TargetCostKind CostKind =
4041 BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
4042 : TargetTransformInfo::TCK_SizeAndLatency;
4043
4044 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
4045
4046 if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Val: Cond) ||
4047 Cond->getParent() != BB || !Cond->hasOneUse())
4048 return false;
4049
4050 // Finally, don't infinitely unroll conditional loops.
4051 if (is_contained(Range: successors(BB), Element: BB))
4052 return false;
4053
4054 // With which predecessors will we want to deal with?
4055 SmallVector<BasicBlock *, 8> Preds;
4056 for (BasicBlock *PredBlock : predecessors(BB)) {
4057 BranchInst *PBI = dyn_cast<BranchInst>(Val: PredBlock->getTerminator());
4058
4059 // Check that we have two conditional branches. If there is a PHI node in
4060 // the common successor, verify that the same value flows in from both
4061 // blocks.
4062 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(SI1: BI, SI2: PBI))
4063 continue;
4064
4065 // Determine if the two branches share a common destination.
4066 BasicBlock *CommonSucc;
4067 Instruction::BinaryOps Opc;
4068 bool InvertPredCond;
4069 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4070 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) = *Recipe;
4071 else
4072 continue;
4073
4074 // Check the cost of inserting the necessary logic before performing the
4075 // transformation.
4076 if (TTI) {
4077 Type *Ty = BI->getCondition()->getType();
4078 InstructionCost Cost = TTI->getArithmeticInstrCost(Opcode: Opc, Ty, CostKind);
4079 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4080 !isa<CmpInst>(Val: PBI->getCondition())))
4081 Cost += TTI->getArithmeticInstrCost(Opcode: Instruction::Xor, Ty, CostKind);
4082
4083 if (Cost > BranchFoldThreshold)
4084 continue;
4085 }
4086
4087 // Ok, we do want to deal with this predecessor. Record it.
4088 Preds.emplace_back(Args&: PredBlock);
4089 }
4090
4091 // If there aren't any predecessors into which we can fold,
4092 // don't bother checking the cost.
4093 if (Preds.empty())
4094 return false;
4095
4096 // Only allow this transformation if computing the condition doesn't involve
4097 // too many instructions and these involved instructions can be executed
4098 // unconditionally. We denote all involved instructions except the condition
4099 // as "bonus instructions", and only allow this transformation when the
4100 // number of the bonus instructions we'll need to create when cloning into
4101 // each predecessor does not exceed a certain threshold.
4102 unsigned NumBonusInsts = 0;
4103 bool SawVectorOp = false;
4104 const unsigned PredCount = Preds.size();
4105 for (Instruction &I : *BB) {
4106 // Don't check the branch condition comparison itself.
4107 if (&I == Cond)
4108 continue;
4109 // Ignore the terminator.
4110 if (isa<BranchInst>(Val: I))
4111 continue;
4112 // I must be safe to execute unconditionally.
4113 if (!isSafeToSpeculativelyExecute(I: &I))
4114 return false;
4115 SawVectorOp |= isVectorOp(I);
4116
4117 // Account for the cost of duplicating this instruction into each
4118 // predecessor. Ignore free instructions.
4119 if (!TTI || TTI->getInstructionCost(U: &I, CostKind) !=
4120 TargetTransformInfo::TCC_Free) {
4121 NumBonusInsts += PredCount;
4122
4123 // Early exits once we reach the limit.
4124 if (NumBonusInsts >
4125 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4126 return false;
4127 }
4128
4129 auto IsBCSSAUse = [BB, &I](Use &U) {
4130 auto *UI = cast<Instruction>(Val: U.getUser());
4131 if (auto *PN = dyn_cast<PHINode>(Val: UI))
4132 return PN->getIncomingBlock(U) == BB;
4133 return UI->getParent() == BB && I.comesBefore(Other: UI);
4134 };
4135
4136 // Does this instruction require rewriting of uses?
4137 if (!all_of(Range: I.uses(), P: IsBCSSAUse))
4138 return false;
4139 }
4140 if (NumBonusInsts >
4141 BonusInstThreshold *
4142 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4143 return false;
4144
4145 // Ok, we have the budget. Perform the transformation.
4146 for (BasicBlock *PredBlock : Preds) {
4147 auto *PBI = cast<BranchInst>(Val: PredBlock->getTerminator());
4148 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4149 }
4150 return false;
4151}
4152
4153// If there is only one store in BB1 and BB2, return it, otherwise return
4154// nullptr.
4155static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
4156 StoreInst *S = nullptr;
4157 for (auto *BB : {BB1, BB2}) {
4158 if (!BB)
4159 continue;
4160 for (auto &I : *BB)
4161 if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
4162 if (S)
4163 // Multiple stores seen.
4164 return nullptr;
4165 else
4166 S = SI;
4167 }
4168 }
4169 return S;
4170}
4171
4172static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
4173 Value *AlternativeV = nullptr) {
4174 // PHI is going to be a PHI node that allows the value V that is defined in
4175 // BB to be referenced in BB's only successor.
4176 //
4177 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4178 // doesn't matter to us what the other operand is (it'll never get used). We
4179 // could just create a new PHI with an undef incoming value, but that could
4180 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4181 // other PHI. So here we directly look for some PHI in BB's successor with V
4182 // as an incoming operand. If we find one, we use it, else we create a new
4183 // one.
4184 //
4185 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4186 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4187 // where OtherBB is the single other predecessor of BB's only successor.
4188 PHINode *PHI = nullptr;
4189 BasicBlock *Succ = BB->getSingleSuccessor();
4190
4191 for (auto I = Succ->begin(); isa<PHINode>(Val: I); ++I)
4192 if (cast<PHINode>(Val&: I)->getIncomingValueForBlock(BB) == V) {
4193 PHI = cast<PHINode>(Val&: I);
4194 if (!AlternativeV)
4195 break;
4196
4197 assert(Succ->hasNPredecessors(2));
4198 auto PredI = pred_begin(BB: Succ);
4199 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4200 if (PHI->getIncomingValueForBlock(BB: OtherPredBB) == AlternativeV)
4201 break;
4202 PHI = nullptr;
4203 }
4204 if (PHI)
4205 return PHI;
4206
4207 // If V is not an instruction defined in BB, just return it.
4208 if (!AlternativeV &&
4209 (!isa<Instruction>(Val: V) || cast<Instruction>(Val: V)->getParent() != BB))
4210 return V;
4211
4212 PHI = PHINode::Create(Ty: V->getType(), NumReservedValues: 2, NameStr: "simplifycfg.merge");
4213 PHI->insertBefore(InsertPos: Succ->begin());
4214 PHI->addIncoming(V, BB);
4215 for (BasicBlock *PredBB : predecessors(BB: Succ))
4216 if (PredBB != BB)
4217 PHI->addIncoming(
4218 V: AlternativeV ? AlternativeV : PoisonValue::get(T: V->getType()), BB: PredBB);
4219 return PHI;
4220}
4221
4222static bool mergeConditionalStoreToAddress(
4223 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4224 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4225 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4226 // For every pointer, there must be exactly two stores, one coming from
4227 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4228 // store (to any address) in PTB,PFB or QTB,QFB.
4229 // FIXME: We could relax this restriction with a bit more work and performance
4230 // testing.
4231 StoreInst *PStore = findUniqueStoreInBlocks(BB1: PTB, BB2: PFB);
4232 StoreInst *QStore = findUniqueStoreInBlocks(BB1: QTB, BB2: QFB);
4233 if (!PStore || !QStore)
4234 return false;
4235
4236 // Now check the stores are compatible.
4237 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4238 PStore->getValueOperand()->getType() !=
4239 QStore->getValueOperand()->getType())
4240 return false;
4241
4242 // Check that sinking the store won't cause program behavior changes. Sinking
4243 // the store out of the Q blocks won't change any behavior as we're sinking
4244 // from a block to its unconditional successor. But we're moving a store from
4245 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4246 // So we need to check that there are no aliasing loads or stores in
4247 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4248 // operations between PStore and the end of its parent block.
4249 //
4250 // The ideal way to do this is to query AliasAnalysis, but we don't
4251 // preserve AA currently so that is dangerous. Be super safe and just
4252 // check there are no other memory operations at all.
4253 for (auto &I : *QFB->getSinglePredecessor())
4254 if (I.mayReadOrWriteMemory())
4255 return false;
4256 for (auto &I : *QFB)
4257 if (&I != QStore && I.mayReadOrWriteMemory())
4258 return false;
4259 if (QTB)
4260 for (auto &I : *QTB)
4261 if (&I != QStore && I.mayReadOrWriteMemory())
4262 return false;
4263 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4264 I != E; ++I)
4265 if (&*I != PStore && I->mayReadOrWriteMemory())
4266 return false;
4267
4268 // If we're not in aggressive mode, we only optimize if we have some
4269 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4270 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4271 if (!BB)
4272 return true;
4273 // Heuristic: if the block can be if-converted/phi-folded and the
4274 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4275 // thread this store.
4276 InstructionCost Cost = 0;
4277 InstructionCost Budget =
4278 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4279 for (auto &I : BB->instructionsWithoutDebug(SkipPseudoOp: false)) {
4280 // Consider terminator instruction to be free.
4281 if (I.isTerminator())
4282 continue;
4283 // If this is one the stores that we want to speculate out of this BB,
4284 // then don't count it's cost, consider it to be free.
4285 if (auto *S = dyn_cast<StoreInst>(Val: &I))
4286 if (llvm::find(Range&: FreeStores, Val: S))
4287 continue;
4288 // Else, we have a white-list of instructions that we are ak speculating.
4289 if (!isa<BinaryOperator>(Val: I) && !isa<GetElementPtrInst>(Val: I))
4290 return false; // Not in white-list - not worthwhile folding.
4291 // And finally, if this is a non-free instruction that we are okay
4292 // speculating, ensure that we consider the speculation budget.
4293 Cost +=
4294 TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
4295 if (Cost > Budget)
4296 return false; // Eagerly refuse to fold as soon as we're out of budget.
4297 }
4298 assert(Cost <= Budget &&
4299 "When we run out of budget we will eagerly return from within the "
4300 "per-instruction loop.");
4301 return true;
4302 };
4303
4304 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4305 if (!MergeCondStoresAggressively &&
4306 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4307 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4308 return false;
4309
4310 // If PostBB has more than two predecessors, we need to split it so we can
4311 // sink the store.
4312 if (std::next(x: pred_begin(BB: PostBB), n: 2) != pred_end(BB: PostBB)) {
4313 // We know that QFB's only successor is PostBB. And QFB has a single
4314 // predecessor. If QTB exists, then its only successor is also PostBB.
4315 // If QTB does not exist, then QFB's only predecessor has a conditional
4316 // branch to QFB and PostBB.
4317 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4318 BasicBlock *NewBB =
4319 SplitBlockPredecessors(BB: PostBB, Preds: {QFB, TruePred}, Suffix: "condstore.split", DTU);
4320 if (!NewBB)
4321 return false;
4322 PostBB = NewBB;
4323 }
4324
4325 // OK, we're going to sink the stores to PostBB. The store has to be
4326 // conditional though, so first create the predicate.
4327 Value *PCond = cast<BranchInst>(Val: PFB->getSinglePredecessor()->getTerminator())
4328 ->getCondition();
4329 Value *QCond = cast<BranchInst>(Val: QFB->getSinglePredecessor()->getTerminator())
4330 ->getCondition();
4331
4332 Value *PPHI = ensureValueAvailableInSuccessor(V: PStore->getValueOperand(),
4333 BB: PStore->getParent());
4334 Value *QPHI = ensureValueAvailableInSuccessor(V: QStore->getValueOperand(),
4335 BB: QStore->getParent(), AlternativeV: PPHI);
4336
4337 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4338 IRBuilder<> QB(PostBB, PostBBFirst);
4339 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4340
4341 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(V: PCond);
4342 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(V: QCond);
4343
4344 if (InvertPCond)
4345 PPred = QB.CreateNot(V: PPred);
4346 if (InvertQCond)
4347 QPred = QB.CreateNot(V: QPred);
4348 Value *CombinedPred = QB.CreateOr(LHS: PPred, RHS: QPred);
4349
4350 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4351 auto *T = SplitBlockAndInsertIfThen(Cond: CombinedPred, SplitBefore: InsertPt,
4352 /*Unreachable=*/false,
4353 /*BranchWeights=*/nullptr, DTU);
4354
4355 QB.SetInsertPoint(T);
4356 StoreInst *SI = cast<StoreInst>(Val: QB.CreateStore(Val: QPHI, Ptr: Address));
4357 SI->setAAMetadata(PStore->getAAMetadata().merge(Other: QStore->getAAMetadata()));
4358 // Choose the minimum alignment. If we could prove both stores execute, we
4359 // could use biggest one. In this case, though, we only know that one of the
4360 // stores executes. And we don't know it's safe to take the alignment from a
4361 // store that doesn't execute.
4362 SI->setAlignment(std::min(a: PStore->getAlign(), b: QStore->getAlign()));
4363
4364 QStore->eraseFromParent();
4365 PStore->eraseFromParent();
4366
4367 return true;
4368}
4369
4370static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
4371 DomTreeUpdater *DTU, const DataLayout &DL,
4372 const TargetTransformInfo &TTI) {
4373 // The intention here is to find diamonds or triangles (see below) where each
4374 // conditional block contains a store to the same address. Both of these
4375 // stores are conditional, so they can't be unconditionally sunk. But it may
4376 // be profitable to speculatively sink the stores into one merged store at the
4377 // end, and predicate the merged store on the union of the two conditions of
4378 // PBI and QBI.
4379 //
4380 // This can reduce the number of stores executed if both of the conditions are
4381 // true, and can allow the blocks to become small enough to be if-converted.
4382 // This optimization will also chain, so that ladders of test-and-set
4383 // sequences can be if-converted away.
4384 //
4385 // We only deal with simple diamonds or triangles:
4386 //
4387 // PBI or PBI or a combination of the two
4388 // / \ | \
4389 // PTB PFB | PFB
4390 // \ / | /
4391 // QBI QBI
4392 // / \ | \
4393 // QTB QFB | QFB
4394 // \ / | /
4395 // PostBB PostBB
4396 //
4397 // We model triangles as a type of diamond with a nullptr "true" block.
4398 // Triangles are canonicalized so that the fallthrough edge is represented by
4399 // a true condition, as in the diagram above.
4400 BasicBlock *PTB = PBI->getSuccessor(i: 0);
4401 BasicBlock *PFB = PBI->getSuccessor(i: 1);
4402 BasicBlock *QTB = QBI->getSuccessor(i: 0);
4403 BasicBlock *QFB = QBI->getSuccessor(i: 1);
4404 BasicBlock *PostBB = QFB->getSingleSuccessor();
4405
4406 // Make sure we have a good guess for PostBB. If QTB's only successor is
4407 // QFB, then QFB is a better PostBB.
4408 if (QTB->getSingleSuccessor() == QFB)
4409 PostBB = QFB;
4410
4411 // If we couldn't find a good PostBB, stop.
4412 if (!PostBB)
4413 return false;
4414
4415 bool InvertPCond = false, InvertQCond = false;
4416 // Canonicalize fallthroughs to the true branches.
4417 if (PFB == QBI->getParent()) {
4418 std::swap(a&: PFB, b&: PTB);
4419 InvertPCond = true;
4420 }
4421 if (QFB == PostBB) {
4422 std::swap(a&: QFB, b&: QTB);
4423 InvertQCond = true;
4424 }
4425
4426 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4427 // and QFB may not. Model fallthroughs as a nullptr block.
4428 if (PTB == QBI->getParent())
4429 PTB = nullptr;
4430 if (QTB == PostBB)
4431 QTB = nullptr;
4432
4433 // Legality bailouts. We must have at least the non-fallthrough blocks and
4434 // the post-dominating block, and the non-fallthroughs must only have one
4435 // predecessor.
4436 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4437 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4438 };
4439 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4440 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4441 return false;
4442 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4443 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4444 return false;
4445 if (!QBI->getParent()->hasNUses(N: 2))
4446 return false;
4447
4448 // OK, this is a sequence of two diamonds or triangles.
4449 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4450 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4451 for (auto *BB : {PTB, PFB}) {
4452 if (!BB)
4453 continue;
4454 for (auto &I : *BB)
4455 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4456 PStoreAddresses.insert(Ptr: SI->getPointerOperand());
4457 }
4458 for (auto *BB : {QTB, QFB}) {
4459 if (!BB)
4460 continue;
4461 for (auto &I : *BB)
4462 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4463 QStoreAddresses.insert(Ptr: SI->getPointerOperand());
4464 }
4465
4466 set_intersect(S1&: PStoreAddresses, S2: QStoreAddresses);
4467 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4468 // clear what it contains.
4469 auto &CommonAddresses = PStoreAddresses;
4470
4471 bool Changed = false;
4472 for (auto *Address : CommonAddresses)
4473 Changed |=
4474 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4475 InvertPCond, InvertQCond, DTU, DL, TTI);
4476 return Changed;
4477}
4478
4479/// If the previous block ended with a widenable branch, determine if reusing
4480/// the target block is profitable and legal. This will have the effect of
4481/// "widening" PBI, but doesn't require us to reason about hosting safety.
4482static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
4483 DomTreeUpdater *DTU) {
4484 // TODO: This can be generalized in two important ways:
4485 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4486 // values from the PBI edge.
4487 // 2) We can sink side effecting instructions into BI's fallthrough
4488 // successor provided they doesn't contribute to computation of
4489 // BI's condition.
4490 BasicBlock *IfTrueBB = PBI->getSuccessor(i: 0);
4491 BasicBlock *IfFalseBB = PBI->getSuccessor(i: 1);
4492 if (!isWidenableBranch(U: PBI) || IfTrueBB != BI->getParent() ||
4493 !BI->getParent()->getSinglePredecessor())
4494 return false;
4495 if (!IfFalseBB->phis().empty())
4496 return false; // TODO
4497 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4498 // may undo the transform done here.
4499 // TODO: There might be a more fine-grained solution to this.
4500 if (!llvm::succ_empty(BB: IfFalseBB))
4501 return false;
4502 // Use lambda to lazily compute expensive condition after cheap ones.
4503 auto NoSideEffects = [](BasicBlock &BB) {
4504 return llvm::none_of(Range&: BB, P: [](const Instruction &I) {
4505 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4506 });
4507 };
4508 if (BI->getSuccessor(i: 1) != IfFalseBB && // no inf looping
4509 BI->getSuccessor(i: 1)->getTerminatingDeoptimizeCall() && // profitability
4510 NoSideEffects(*BI->getParent())) {
4511 auto *OldSuccessor = BI->getSuccessor(i: 1);
4512 OldSuccessor->removePredecessor(Pred: BI->getParent());
4513 BI->setSuccessor(idx: 1, NewSucc: IfFalseBB);
4514 if (DTU)
4515 DTU->applyUpdates(
4516 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4517 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4518 return true;
4519 }
4520 if (BI->getSuccessor(i: 0) != IfFalseBB && // no inf looping
4521 BI->getSuccessor(i: 0)->getTerminatingDeoptimizeCall() && // profitability
4522 NoSideEffects(*BI->getParent())) {
4523 auto *OldSuccessor = BI->getSuccessor(i: 0);
4524 OldSuccessor->removePredecessor(Pred: BI->getParent());
4525 BI->setSuccessor(idx: 0, NewSucc: IfFalseBB);
4526 if (DTU)
4527 DTU->applyUpdates(
4528 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4529 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4530 return true;
4531 }
4532 return false;
4533}
4534
4535/// If we have a conditional branch as a predecessor of another block,
4536/// this function tries to simplify it. We know
4537/// that PBI and BI are both conditional branches, and BI is in one of the
4538/// successor blocks of PBI - PBI branches to BI.
4539static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
4540 DomTreeUpdater *DTU,
4541 const DataLayout &DL,
4542 const TargetTransformInfo &TTI) {
4543 assert(PBI->isConditional() && BI->isConditional());
4544 BasicBlock *BB = BI->getParent();
4545
4546 // If this block ends with a branch instruction, and if there is a
4547 // predecessor that ends on a branch of the same condition, make
4548 // this conditional branch redundant.
4549 if (PBI->getCondition() == BI->getCondition() &&
4550 PBI->getSuccessor(i: 0) != PBI->getSuccessor(i: 1)) {
4551 // Okay, the outcome of this conditional branch is statically
4552 // knowable. If this block had a single pred, handle specially, otherwise
4553 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4554 if (BB->getSinglePredecessor()) {
4555 // Turn this into a branch on constant.
4556 bool CondIsTrue = PBI->getSuccessor(i: 0) == BB;
4557 BI->setCondition(
4558 ConstantInt::get(Ty: Type::getInt1Ty(C&: BB->getContext()), V: CondIsTrue));
4559 return true; // Nuke the branch on constant.
4560 }
4561 }
4562
4563 // If the previous block ended with a widenable branch, determine if reusing
4564 // the target block is profitable and legal. This will have the effect of
4565 // "widening" PBI, but doesn't require us to reason about hosting safety.
4566 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4567 return true;
4568
4569 // If both branches are conditional and both contain stores to the same
4570 // address, remove the stores from the conditionals and create a conditional
4571 // merged store at the end.
4572 if (MergeCondStores && mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
4573 return true;
4574
4575 // If this is a conditional branch in an empty block, and if any
4576 // predecessors are a conditional branch to one of our destinations,
4577 // fold the conditions into logical ops and one cond br.
4578
4579 // Ignore dbg intrinsics.
4580 if (&*BB->instructionsWithoutDebug(SkipPseudoOp: false).begin() != BI)
4581 return false;
4582
4583 int PBIOp, BIOp;
4584 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
4585 PBIOp = 0;
4586 BIOp = 0;
4587 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
4588 PBIOp = 0;
4589 BIOp = 1;
4590 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
4591 PBIOp = 1;
4592 BIOp = 0;
4593 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
4594 PBIOp = 1;
4595 BIOp = 1;
4596 } else {
4597 return false;
4598 }
4599
4600 // Check to make sure that the other destination of this branch
4601 // isn't BB itself. If so, this is an infinite loop that will
4602 // keep getting unwound.
4603 if (PBI->getSuccessor(i: PBIOp) == BB)
4604 return false;
4605
4606 // If predecessor's branch probability to BB is too low don't merge branches.
4607 SmallVector<uint32_t, 2> PredWeights;
4608 if (!PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
4609 extractBranchWeights(I: *PBI, Weights&: PredWeights) &&
4610 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4611
4612 BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4613 Numerator: PredWeights[PBIOp],
4614 Denominator: static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4615
4616 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4617 if (CommonDestProb >= Likely)
4618 return false;
4619 }
4620
4621 // Do not perform this transformation if it would require
4622 // insertion of a large number of select instructions. For targets
4623 // without predication/cmovs, this is a big pessimization.
4624
4625 BasicBlock *CommonDest = PBI->getSuccessor(i: PBIOp);
4626 BasicBlock *RemovedDest = PBI->getSuccessor(i: PBIOp ^ 1);
4627 unsigned NumPhis = 0;
4628 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(Val: II);
4629 ++II, ++NumPhis) {
4630 if (NumPhis > 2) // Disable this xform.
4631 return false;
4632 }
4633
4634 // Finally, if everything is ok, fold the branches to logical ops.
4635 BasicBlock *OtherDest = BI->getSuccessor(i: BIOp ^ 1);
4636
4637 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4638 << "AND: " << *BI->getParent());
4639
4640 SmallVector<DominatorTree::UpdateType, 5> Updates;
4641
4642 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4643 // branch in it, where one edge (OtherDest) goes back to itself but the other
4644 // exits. We don't *know* that the program avoids the infinite loop
4645 // (even though that seems likely). If we do this xform naively, we'll end up
4646 // recursively unpeeling the loop. Since we know that (after the xform is
4647 // done) that the block *is* infinite if reached, we just make it an obviously
4648 // infinite loop with no cond branch.
4649 if (OtherDest == BB) {
4650 // Insert it at the end of the function, because it's either code,
4651 // or it won't matter if it's hot. :)
4652 BasicBlock *InfLoopBlock =
4653 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
4654 BranchInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
4655 if (DTU)
4656 Updates.push_back(Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4657 OtherDest = InfLoopBlock;
4658 }
4659
4660 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4661
4662 // BI may have other predecessors. Because of this, we leave
4663 // it alone, but modify PBI.
4664
4665 // Make sure we get to CommonDest on True&True directions.
4666 Value *PBICond = PBI->getCondition();
4667 IRBuilder<NoFolder> Builder(PBI);
4668 if (PBIOp)
4669 PBICond = Builder.CreateNot(V: PBICond, Name: PBICond->getName() + ".not");
4670
4671 Value *BICond = BI->getCondition();
4672 if (BIOp)
4673 BICond = Builder.CreateNot(V: BICond, Name: BICond->getName() + ".not");
4674
4675 // Merge the conditions.
4676 Value *Cond =
4677 createLogicalOp(Builder, Opc: Instruction::Or, LHS: PBICond, RHS: BICond, Name: "brmerge");
4678
4679 // Modify PBI to branch on the new condition to the new dests.
4680 PBI->setCondition(Cond);
4681 PBI->setSuccessor(idx: 0, NewSucc: CommonDest);
4682 PBI->setSuccessor(idx: 1, NewSucc: OtherDest);
4683
4684 if (DTU) {
4685 Updates.push_back(Elt: {DominatorTree::Insert, PBI->getParent(), OtherDest});
4686 Updates.push_back(Elt: {DominatorTree::Delete, PBI->getParent(), RemovedDest});
4687
4688 DTU->applyUpdates(Updates);
4689 }
4690
4691 // Update branch weight for PBI.
4692 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4693 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4694 bool HasWeights =
4695 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4696 SuccTrueWeight, SuccFalseWeight);
4697 if (HasWeights) {
4698 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4699 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4700 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4701 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4702 // The weight to CommonDest should be PredCommon * SuccTotal +
4703 // PredOther * SuccCommon.
4704 // The weight to OtherDest should be PredOther * SuccOther.
4705 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4706 PredOther * SuccCommon,
4707 PredOther * SuccOther};
4708 // Halve the weights if any of them cannot fit in an uint32_t
4709 fitWeights(Weights: NewWeights);
4710
4711 setBranchWeights(I: PBI, TrueWeight: NewWeights[0], FalseWeight: NewWeights[1], /*IsExpected=*/false);
4712 }
4713
4714 // OtherDest may have phi nodes. If so, add an entry from PBI's
4715 // block that are identical to the entries for BI's block.
4716 addPredecessorToBlock(Succ: OtherDest, NewPred: PBI->getParent(), ExistPred: BB);
4717
4718 // We know that the CommonDest already had an edge from PBI to
4719 // it. If it has PHIs though, the PHIs may have different
4720 // entries for BB and PBI's BB. If so, insert a select to make
4721 // them agree.
4722 for (PHINode &PN : CommonDest->phis()) {
4723 Value *BIV = PN.getIncomingValueForBlock(BB);
4724 unsigned PBBIdx = PN.getBasicBlockIndex(BB: PBI->getParent());
4725 Value *PBIV = PN.getIncomingValue(i: PBBIdx);
4726 if (BIV != PBIV) {
4727 // Insert a select in PBI to pick the right value.
4728 SelectInst *NV = cast<SelectInst>(
4729 Val: Builder.CreateSelect(C: PBICond, True: PBIV, False: BIV, Name: PBIV->getName() + ".mux"));
4730 PN.setIncomingValue(i: PBBIdx, V: NV);
4731 // Although the select has the same condition as PBI, the original branch
4732 // weights for PBI do not apply to the new select because the select's
4733 // 'logical' edges are incoming edges of the phi that is eliminated, not
4734 // the outgoing edges of PBI.
4735 if (HasWeights) {
4736 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4737 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4738 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4739 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4740 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4741 // The weight to PredOtherDest should be PredOther * SuccCommon.
4742 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4743 PredOther * SuccCommon};
4744
4745 fitWeights(Weights: NewWeights);
4746
4747 setBranchWeights(I: NV, TrueWeight: NewWeights[0], FalseWeight: NewWeights[1],
4748 /*IsExpected=*/false);
4749 }
4750 }
4751 }
4752
4753 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4754 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4755
4756 // This basic block is probably dead. We know it has at least
4757 // one fewer predecessor.
4758 return true;
4759}
4760
4761// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4762// true or to FalseBB if Cond is false.
4763// Takes care of updating the successors and removing the old terminator.
4764// Also makes sure not to introduce new successors by assuming that edges to
4765// non-successor TrueBBs and FalseBBs aren't reachable.
4766bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4767 Value *Cond, BasicBlock *TrueBB,
4768 BasicBlock *FalseBB,
4769 uint32_t TrueWeight,
4770 uint32_t FalseWeight) {
4771 auto *BB = OldTerm->getParent();
4772 // Remove any superfluous successor edges from the CFG.
4773 // First, figure out which successors to preserve.
4774 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4775 // successor.
4776 BasicBlock *KeepEdge1 = TrueBB;
4777 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4778
4779 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4780
4781 // Then remove the rest.
4782 for (BasicBlock *Succ : successors(I: OldTerm)) {
4783 // Make sure only to keep exactly one copy of each edge.
4784 if (Succ == KeepEdge1)
4785 KeepEdge1 = nullptr;
4786 else if (Succ == KeepEdge2)
4787 KeepEdge2 = nullptr;
4788 else {
4789 Succ->removePredecessor(Pred: BB,
4790 /*KeepOneInputPHIs=*/true);
4791
4792 if (Succ != TrueBB && Succ != FalseBB)
4793 RemovedSuccessors.insert(X: Succ);
4794 }
4795 }
4796
4797 IRBuilder<> Builder(OldTerm);
4798 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4799
4800 // Insert an appropriate new terminator.
4801 if (!KeepEdge1 && !KeepEdge2) {
4802 if (TrueBB == FalseBB) {
4803 // We were only looking for one successor, and it was present.
4804 // Create an unconditional branch to it.
4805 Builder.CreateBr(Dest: TrueBB);
4806 } else {
4807 // We found both of the successors we were looking for.
4808 // Create a conditional branch sharing the condition of the select.
4809 BranchInst *NewBI = Builder.CreateCondBr(Cond, True: TrueBB, False: FalseBB);
4810 if (TrueWeight != FalseWeight)
4811 setBranchWeights(I: NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
4812 }
4813 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4814 // Neither of the selected blocks were successors, so this
4815 // terminator must be unreachable.
4816 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4817 } else {
4818 // One of the selected values was a successor, but the other wasn't.
4819 // Insert an unconditional branch to the one that was found;
4820 // the edge to the one that wasn't must be unreachable.
4821 if (!KeepEdge1) {
4822 // Only TrueBB was found.
4823 Builder.CreateBr(Dest: TrueBB);
4824 } else {
4825 // Only FalseBB was found.
4826 Builder.CreateBr(Dest: FalseBB);
4827 }
4828 }
4829
4830 eraseTerminatorAndDCECond(TI: OldTerm);
4831
4832 if (DTU) {
4833 SmallVector<DominatorTree::UpdateType, 2> Updates;
4834 Updates.reserve(N: RemovedSuccessors.size());
4835 for (auto *RemovedSuccessor : RemovedSuccessors)
4836 Updates.push_back(Elt: {DominatorTree::Delete, BB, RemovedSuccessor});
4837 DTU->applyUpdates(Updates);
4838 }
4839
4840 return true;
4841}
4842
4843// Replaces
4844// (switch (select cond, X, Y)) on constant X, Y
4845// with a branch - conditional if X and Y lead to distinct BBs,
4846// unconditional otherwise.
4847bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4848 SelectInst *Select) {
4849 // Check for constant integer values in the select.
4850 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Val: Select->getTrueValue());
4851 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Val: Select->getFalseValue());
4852 if (!TrueVal || !FalseVal)
4853 return false;
4854
4855 // Find the relevant condition and destinations.
4856 Value *Condition = Select->getCondition();
4857 BasicBlock *TrueBB = SI->findCaseValue(C: TrueVal)->getCaseSuccessor();
4858 BasicBlock *FalseBB = SI->findCaseValue(C: FalseVal)->getCaseSuccessor();
4859
4860 // Get weight for TrueBB and FalseBB.
4861 uint32_t TrueWeight = 0, FalseWeight = 0;
4862 SmallVector<uint64_t, 8> Weights;
4863 bool HasWeights = hasBranchWeightMD(I: *SI);
4864 if (HasWeights) {
4865 getBranchWeights(TI: SI, Weights);
4866 if (Weights.size() == 1 + SI->getNumCases()) {
4867 TrueWeight =
4868 (uint32_t)Weights[SI->findCaseValue(C: TrueVal)->getSuccessorIndex()];
4869 FalseWeight =
4870 (uint32_t)Weights[SI->findCaseValue(C: FalseVal)->getSuccessorIndex()];
4871 }
4872 }
4873
4874 // Perform the actual simplification.
4875 return simplifyTerminatorOnSelect(OldTerm: SI, Cond: Condition, TrueBB, FalseBB, TrueWeight,
4876 FalseWeight);
4877}
4878
4879// Replaces
4880// (indirectbr (select cond, blockaddress(@fn, BlockA),
4881// blockaddress(@fn, BlockB)))
4882// with
4883// (br cond, BlockA, BlockB).
4884bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4885 SelectInst *SI) {
4886 // Check that both operands of the select are block addresses.
4887 BlockAddress *TBA = dyn_cast<BlockAddress>(Val: SI->getTrueValue());
4888 BlockAddress *FBA = dyn_cast<BlockAddress>(Val: SI->getFalseValue());
4889 if (!TBA || !FBA)
4890 return false;
4891
4892 // Extract the actual blocks.
4893 BasicBlock *TrueBB = TBA->getBasicBlock();
4894 BasicBlock *FalseBB = FBA->getBasicBlock();
4895
4896 // Perform the actual simplification.
4897 return simplifyTerminatorOnSelect(OldTerm: IBI, Cond: SI->getCondition(), TrueBB, FalseBB, TrueWeight: 0,
4898 FalseWeight: 0);
4899}
4900
4901/// This is called when we find an icmp instruction
4902/// (a seteq/setne with a constant) as the only instruction in a
4903/// block that ends with an uncond branch. We are looking for a very specific
4904/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4905/// this case, we merge the first two "or's of icmp" into a switch, but then the
4906/// default value goes to an uncond block with a seteq in it, we get something
4907/// like:
4908///
4909/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4910/// DEFAULT:
4911/// %tmp = icmp eq i8 %A, 92
4912/// br label %end
4913/// end:
4914/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4915///
4916/// We prefer to split the edge to 'end' so that there is a true/false entry to
4917/// the PHI, merging the third icmp into the switch.
4918bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4919 ICmpInst *ICI, IRBuilder<> &Builder) {
4920 BasicBlock *BB = ICI->getParent();
4921
4922 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4923 // complex.
4924 if (isa<PHINode>(Val: BB->begin()) || !ICI->hasOneUse())
4925 return false;
4926
4927 Value *V = ICI->getOperand(i_nocapture: 0);
4928 ConstantInt *Cst = cast<ConstantInt>(Val: ICI->getOperand(i_nocapture: 1));
4929
4930 // The pattern we're looking for is where our only predecessor is a switch on
4931 // 'V' and this block is the default case for the switch. In this case we can
4932 // fold the compared value into the switch to simplify things.
4933 BasicBlock *Pred = BB->getSinglePredecessor();
4934 if (!Pred || !isa<SwitchInst>(Val: Pred->getTerminator()))
4935 return false;
4936
4937 SwitchInst *SI = cast<SwitchInst>(Val: Pred->getTerminator());
4938 if (SI->getCondition() != V)
4939 return false;
4940
4941 // If BB is reachable on a non-default case, then we simply know the value of
4942 // V in this block. Substitute it and constant fold the icmp instruction
4943 // away.
4944 if (SI->getDefaultDest() != BB) {
4945 ConstantInt *VVal = SI->findCaseDest(BB);
4946 assert(VVal && "Should have a unique destination value");
4947 ICI->setOperand(i_nocapture: 0, Val_nocapture: VVal);
4948
4949 if (Value *V = simplifyInstruction(I: ICI, Q: {DL, ICI})) {
4950 ICI->replaceAllUsesWith(V);
4951 ICI->eraseFromParent();
4952 }
4953 // BB is now empty, so it is likely to simplify away.
4954 return requestResimplify();
4955 }
4956
4957 // Ok, the block is reachable from the default dest. If the constant we're
4958 // comparing exists in one of the other edges, then we can constant fold ICI
4959 // and zap it.
4960 if (SI->findCaseValue(C: Cst) != SI->case_default()) {
4961 Value *V;
4962 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4963 V = ConstantInt::getFalse(Context&: BB->getContext());
4964 else
4965 V = ConstantInt::getTrue(Context&: BB->getContext());
4966
4967 ICI->replaceAllUsesWith(V);
4968 ICI->eraseFromParent();
4969 // BB is now empty, so it is likely to simplify away.
4970 return requestResimplify();
4971 }
4972
4973 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4974 // the block.
4975 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(Idx: 0);
4976 PHINode *PHIUse = dyn_cast<PHINode>(Val: ICI->user_back());
4977 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4978 isa<PHINode>(Val: ++BasicBlock::iterator(PHIUse)))
4979 return false;
4980
4981 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4982 // true in the PHI.
4983 Constant *DefaultCst = ConstantInt::getTrue(Context&: BB->getContext());
4984 Constant *NewCst = ConstantInt::getFalse(Context&: BB->getContext());
4985
4986 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4987 std::swap(a&: DefaultCst, b&: NewCst);
4988
4989 // Replace ICI (which is used by the PHI for the default value) with true or
4990 // false depending on if it is EQ or NE.
4991 ICI->replaceAllUsesWith(V: DefaultCst);
4992 ICI->eraseFromParent();
4993
4994 SmallVector<DominatorTree::UpdateType, 2> Updates;
4995
4996 // Okay, the switch goes to this block on a default value. Add an edge from
4997 // the switch to the merge point on the compared value.
4998 BasicBlock *NewBB =
4999 BasicBlock::Create(Context&: BB->getContext(), Name: "switch.edge", Parent: BB->getParent(), InsertBefore: BB);
5000 {
5001 SwitchInstProfUpdateWrapper SIW(*SI);
5002 auto W0 = SIW.getSuccessorWeight(idx: 0);
5003 SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
5004 if (W0) {
5005 NewW = ((uint64_t(*W0) + 1) >> 1);
5006 SIW.setSuccessorWeight(idx: 0, W: *NewW);
5007 }
5008 SIW.addCase(OnVal: Cst, Dest: NewBB, W: NewW);
5009 if (DTU)
5010 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewBB});
5011 }
5012
5013 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5014 Builder.SetInsertPoint(NewBB);
5015 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5016 Builder.CreateBr(Dest: SuccBlock);
5017 PHIUse->addIncoming(V: NewCst, BB: NewBB);
5018 if (DTU) {
5019 Updates.push_back(Elt: {DominatorTree::Insert, NewBB, SuccBlock});
5020 DTU->applyUpdates(Updates);
5021 }
5022 return true;
5023}
5024
5025/// The specified branch is a conditional branch.
5026/// Check to see if it is branching on an or/and chain of icmp instructions, and
5027/// fold it into a switch instruction if so.
5028bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5029 IRBuilder<> &Builder,
5030 const DataLayout &DL) {
5031 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
5032 if (!Cond)
5033 return false;
5034
5035 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5036 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5037 // 'setne's and'ed together, collect them.
5038
5039 // Try to gather values from a chain of and/or to be turned into a switch
5040 ConstantComparesGatherer ConstantCompare(Cond, DL);
5041 // Unpack the result
5042 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5043 Value *CompVal = ConstantCompare.CompValue;
5044 unsigned UsedICmps = ConstantCompare.UsedICmps;
5045 Value *ExtraCase = ConstantCompare.Extra;
5046
5047 // If we didn't have a multiply compared value, fail.
5048 if (!CompVal)
5049 return false;
5050
5051 // Avoid turning single icmps into a switch.
5052 if (UsedICmps <= 1)
5053 return false;
5054
5055 bool TrueWhenEqual = match(V: Cond, P: m_LogicalOr(L: m_Value(), R: m_Value()));
5056
5057 // There might be duplicate constants in the list, which the switch
5058 // instruction can't handle, remove them now.
5059 array_pod_sort(Start: Values.begin(), End: Values.end(), Compare: constantIntSortPredicate);
5060 Values.erase(CS: llvm::unique(R&: Values), CE: Values.end());
5061
5062 // If Extra was used, we require at least two switch values to do the
5063 // transformation. A switch with one value is just a conditional branch.
5064 if (ExtraCase && Values.size() < 2)
5065 return false;
5066
5067 // TODO: Preserve branch weight metadata, similarly to how
5068 // foldValueComparisonIntoPredecessors preserves it.
5069
5070 // Figure out which block is which destination.
5071 BasicBlock *DefaultBB = BI->getSuccessor(i: 1);
5072 BasicBlock *EdgeBB = BI->getSuccessor(i: 0);
5073 if (!TrueWhenEqual)
5074 std::swap(a&: DefaultBB, b&: EdgeBB);
5075
5076 BasicBlock *BB = BI->getParent();
5077
5078 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5079 << " cases into SWITCH. BB is:\n"
5080 << *BB);
5081
5082 SmallVector<DominatorTree::UpdateType, 2> Updates;
5083
5084 // If there are any extra values that couldn't be folded into the switch
5085 // then we evaluate them with an explicit branch first. Split the block
5086 // right before the condbr to handle it.
5087 if (ExtraCase) {
5088 BasicBlock *NewBB = SplitBlock(Old: BB, SplitPt: BI, DTU, /*LI=*/nullptr,
5089 /*MSSAU=*/nullptr, BBName: "switch.early.test");
5090
5091 // Remove the uncond branch added to the old block.
5092 Instruction *OldTI = BB->getTerminator();
5093 Builder.SetInsertPoint(OldTI);
5094
5095 // There can be an unintended UB if extra values are Poison. Before the
5096 // transformation, extra values may not be evaluated according to the
5097 // condition, and it will not raise UB. But after transformation, we are
5098 // evaluating extra values before checking the condition, and it will raise
5099 // UB. It can be solved by adding freeze instruction to extra values.
5100 AssumptionCache *AC = Options.AC;
5101
5102 if (!isGuaranteedNotToBeUndefOrPoison(V: ExtraCase, AC, CtxI: BI, DT: nullptr))
5103 ExtraCase = Builder.CreateFreeze(V: ExtraCase);
5104
5105 if (TrueWhenEqual)
5106 Builder.CreateCondBr(Cond: ExtraCase, True: EdgeBB, False: NewBB);
5107 else
5108 Builder.CreateCondBr(Cond: ExtraCase, True: NewBB, False: EdgeBB);
5109
5110 OldTI->eraseFromParent();
5111
5112 if (DTU)
5113 Updates.push_back(Elt: {DominatorTree::Insert, BB, EdgeBB});
5114
5115 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5116 // for the edge we just added.
5117 addPredecessorToBlock(Succ: EdgeBB, NewPred: BB, ExistPred: NewBB);
5118
5119 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5120 << "\nEXTRABB = " << *BB);
5121 BB = NewBB;
5122 }
5123
5124 Builder.SetInsertPoint(BI);
5125 // Convert pointer to int before we switch.
5126 if (CompVal->getType()->isPointerTy()) {
5127 CompVal = Builder.CreatePtrToInt(
5128 V: CompVal, DestTy: DL.getIntPtrType(CompVal->getType()), Name: "magicptr");
5129 }
5130
5131 // Create the new switch instruction now.
5132 SwitchInst *New = Builder.CreateSwitch(V: CompVal, Dest: DefaultBB, NumCases: Values.size());
5133
5134 // Add all of the 'cases' to the switch instruction.
5135 for (ConstantInt *Val : Values)
5136 New->addCase(OnVal: Val, Dest: EdgeBB);
5137
5138 // We added edges from PI to the EdgeBB. As such, if there were any
5139 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5140 // the number of edges added.
5141 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5142 PHINode *PN = cast<PHINode>(Val&: BBI);
5143 Value *InVal = PN->getIncomingValueForBlock(BB);
5144 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5145 PN->addIncoming(V: InVal, BB);
5146 }
5147
5148 // Erase the old branch instruction.
5149 eraseTerminatorAndDCECond(TI: BI);
5150 if (DTU)
5151 DTU->applyUpdates(Updates);
5152
5153 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5154 return true;
5155}
5156
5157bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5158 if (isa<PHINode>(Val: RI->getValue()))
5159 return simplifyCommonResume(RI);
5160 else if (isa<LandingPadInst>(Val: RI->getParent()->getFirstNonPHIIt()) &&
5161 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5162 // The resume must unwind the exception that caused control to branch here.
5163 return simplifySingleResume(RI);
5164
5165 return false;
5166}
5167
5168// Check if cleanup block is empty
5169static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
5170 for (Instruction &I : R) {
5171 auto *II = dyn_cast<IntrinsicInst>(Val: &I);
5172 if (!II)
5173 return false;
5174
5175 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5176 switch (IntrinsicID) {
5177 case Intrinsic::dbg_declare:
5178 case Intrinsic::dbg_value:
5179 case Intrinsic::dbg_label:
5180 case Intrinsic::lifetime_end:
5181 break;
5182 default:
5183 return false;
5184 }
5185 }
5186 return true;
5187}
5188
5189// Simplify resume that is shared by several landing pads (phi of landing pad).
5190bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5191 BasicBlock *BB = RI->getParent();
5192
5193 // Check that there are no other instructions except for debug and lifetime
5194 // intrinsics between the phi's and resume instruction.
5195 if (!isCleanupBlockEmpty(R: make_range(x: RI->getParent()->getFirstNonPHIIt(),
5196 y: BB->getTerminator()->getIterator())))
5197 return false;
5198
5199 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5200 auto *PhiLPInst = cast<PHINode>(Val: RI->getValue());
5201
5202 // Check incoming blocks to see if any of them are trivial.
5203 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5204 Idx++) {
5205 auto *IncomingBB = PhiLPInst->getIncomingBlock(i: Idx);
5206 auto *IncomingValue = PhiLPInst->getIncomingValue(i: Idx);
5207
5208 // If the block has other successors, we can not delete it because
5209 // it has other dependents.
5210 if (IncomingBB->getUniqueSuccessor() != BB)
5211 continue;
5212
5213 auto *LandingPad = dyn_cast<LandingPadInst>(Val: IncomingBB->getFirstNonPHIIt());
5214 // Not the landing pad that caused the control to branch here.
5215 if (IncomingValue != LandingPad)
5216 continue;
5217
5218 if (isCleanupBlockEmpty(
5219 R: make_range(x: LandingPad->getNextNode(), y: IncomingBB->getTerminator())))
5220 TrivialUnwindBlocks.insert(X: IncomingBB);
5221 }
5222
5223 // If no trivial unwind blocks, don't do any simplifications.
5224 if (TrivialUnwindBlocks.empty())
5225 return false;
5226
5227 // Turn all invokes that unwind here into calls.
5228 for (auto *TrivialBB : TrivialUnwindBlocks) {
5229 // Blocks that will be simplified should be removed from the phi node.
5230 // Note there could be multiple edges to the resume block, and we need
5231 // to remove them all.
5232 while (PhiLPInst->getBasicBlockIndex(BB: TrivialBB) != -1)
5233 BB->removePredecessor(Pred: TrivialBB, KeepOneInputPHIs: true);
5234
5235 for (BasicBlock *Pred :
5236 llvm::make_early_inc_range(Range: predecessors(BB: TrivialBB))) {
5237 removeUnwindEdge(BB: Pred, DTU);
5238 ++NumInvokes;
5239 }
5240
5241 // In each SimplifyCFG run, only the current processed block can be erased.
5242 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5243 // of erasing TrivialBB, we only remove the branch to the common resume
5244 // block so that we can later erase the resume block since it has no
5245 // predecessors.
5246 TrivialBB->getTerminator()->eraseFromParent();
5247 new UnreachableInst(RI->getContext(), TrivialBB);
5248 if (DTU)
5249 DTU->applyUpdates(Updates: {{DominatorTree::Delete, TrivialBB, BB}});
5250 }
5251
5252 // Delete the resume block if all its predecessors have been removed.
5253 if (pred_empty(BB))
5254 DeleteDeadBlock(BB, DTU);
5255
5256 return !TrivialUnwindBlocks.empty();
5257}
5258
5259// Simplify resume that is only used by a single (non-phi) landing pad.
5260bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5261 BasicBlock *BB = RI->getParent();
5262 auto *LPInst = cast<LandingPadInst>(Val: BB->getFirstNonPHIIt());
5263 assert(RI->getValue() == LPInst &&
5264 "Resume must unwind the exception that caused control to here");
5265
5266 // Check that there are no other instructions except for debug intrinsics.
5267 if (!isCleanupBlockEmpty(
5268 R: make_range<Instruction *>(x: LPInst->getNextNode(), y: RI)))
5269 return false;
5270
5271 // Turn all invokes that unwind here into calls and delete the basic block.
5272 for (BasicBlock *Pred : llvm::make_early_inc_range(Range: predecessors(BB))) {
5273 removeUnwindEdge(BB: Pred, DTU);
5274 ++NumInvokes;
5275 }
5276
5277 // The landingpad is now unreachable. Zap it.
5278 DeleteDeadBlock(BB, DTU);
5279 return true;
5280}
5281
5282static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
5283 // If this is a trivial cleanup pad that executes no instructions, it can be
5284 // eliminated. If the cleanup pad continues to the caller, any predecessor
5285 // that is an EH pad will be updated to continue to the caller and any
5286 // predecessor that terminates with an invoke instruction will have its invoke
5287 // instruction converted to a call instruction. If the cleanup pad being
5288 // simplified does not continue to the caller, each predecessor will be
5289 // updated to continue to the unwind destination of the cleanup pad being
5290 // simplified.
5291 BasicBlock *BB = RI->getParent();
5292 CleanupPadInst *CPInst = RI->getCleanupPad();
5293 if (CPInst->getParent() != BB)
5294 // This isn't an empty cleanup.
5295 return false;
5296
5297 // We cannot kill the pad if it has multiple uses. This typically arises
5298 // from unreachable basic blocks.
5299 if (!CPInst->hasOneUse())
5300 return false;
5301
5302 // Check that there are no other instructions except for benign intrinsics.
5303 if (!isCleanupBlockEmpty(
5304 R: make_range<Instruction *>(x: CPInst->getNextNode(), y: RI)))
5305 return false;
5306
5307 // If the cleanup return we are simplifying unwinds to the caller, this will
5308 // set UnwindDest to nullptr.
5309 BasicBlock *UnwindDest = RI->getUnwindDest();
5310
5311 // We're about to remove BB from the control flow. Before we do, sink any
5312 // PHINodes into the unwind destination. Doing this before changing the
5313 // control flow avoids some potentially slow checks, since we can currently
5314 // be certain that UnwindDest and BB have no common predecessors (since they
5315 // are both EH pads).
5316 if (UnwindDest) {
5317 // First, go through the PHI nodes in UnwindDest and update any nodes that
5318 // reference the block we are removing
5319 for (PHINode &DestPN : UnwindDest->phis()) {
5320 int Idx = DestPN.getBasicBlockIndex(BB);
5321 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5322 assert(Idx != -1);
5323 // This PHI node has an incoming value that corresponds to a control
5324 // path through the cleanup pad we are removing. If the incoming
5325 // value is in the cleanup pad, it must be a PHINode (because we
5326 // verified above that the block is otherwise empty). Otherwise, the
5327 // value is either a constant or a value that dominates the cleanup
5328 // pad being removed.
5329 //
5330 // Because BB and UnwindDest are both EH pads, all of their
5331 // predecessors must unwind to these blocks, and since no instruction
5332 // can have multiple unwind destinations, there will be no overlap in
5333 // incoming blocks between SrcPN and DestPN.
5334 Value *SrcVal = DestPN.getIncomingValue(i: Idx);
5335 PHINode *SrcPN = dyn_cast<PHINode>(Val: SrcVal);
5336
5337 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5338 for (auto *Pred : predecessors(BB)) {
5339 Value *Incoming =
5340 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(BB: Pred) : SrcVal;
5341 DestPN.addIncoming(V: Incoming, BB: Pred);
5342 }
5343 }
5344
5345 // Sink any remaining PHI nodes directly into UnwindDest.
5346 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5347 for (PHINode &PN : make_early_inc_range(Range: BB->phis())) {
5348 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5349 // If the PHI node has no uses or all of its uses are in this basic
5350 // block (meaning they are debug or lifetime intrinsics), just leave
5351 // it. It will be erased when we erase BB below.
5352 continue;
5353
5354 // Otherwise, sink this PHI node into UnwindDest.
5355 // Any predecessors to UnwindDest which are not already represented
5356 // must be back edges which inherit the value from the path through
5357 // BB. In this case, the PHI value must reference itself.
5358 for (auto *pred : predecessors(BB: UnwindDest))
5359 if (pred != BB)
5360 PN.addIncoming(V: &PN, BB: pred);
5361 PN.moveBefore(InsertPos: InsertPt);
5362 // Also, add a dummy incoming value for the original BB itself,
5363 // so that the PHI is well-formed until we drop said predecessor.
5364 PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB);
5365 }
5366 }
5367
5368 std::vector<DominatorTree::UpdateType> Updates;
5369
5370 // We use make_early_inc_range here because we will remove all predecessors.
5371 for (BasicBlock *PredBB : llvm::make_early_inc_range(Range: predecessors(BB))) {
5372 if (UnwindDest == nullptr) {
5373 if (DTU) {
5374 DTU->applyUpdates(Updates);
5375 Updates.clear();
5376 }
5377 removeUnwindEdge(BB: PredBB, DTU);
5378 ++NumInvokes;
5379 } else {
5380 BB->removePredecessor(Pred: PredBB);
5381 Instruction *TI = PredBB->getTerminator();
5382 TI->replaceUsesOfWith(From: BB, To: UnwindDest);
5383 if (DTU) {
5384 Updates.push_back(x: {DominatorTree::Insert, PredBB, UnwindDest});
5385 Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
5386 }
5387 }
5388 }
5389
5390 if (DTU)
5391 DTU->applyUpdates(Updates);
5392
5393 DeleteDeadBlock(BB, DTU);
5394
5395 return true;
5396}
5397
5398// Try to merge two cleanuppads together.
5399static bool mergeCleanupPad(CleanupReturnInst *RI) {
5400 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5401 // with.
5402 BasicBlock *UnwindDest = RI->getUnwindDest();
5403 if (!UnwindDest)
5404 return false;
5405
5406 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5407 // be safe to merge without code duplication.
5408 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5409 return false;
5410
5411 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5412 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(Val: &UnwindDest->front());
5413 if (!SuccessorCleanupPad)
5414 return false;
5415
5416 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5417 // Replace any uses of the successor cleanupad with the predecessor pad
5418 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5419 // funclet bundle operands.
5420 SuccessorCleanupPad->replaceAllUsesWith(V: PredecessorCleanupPad);
5421 // Remove the old cleanuppad.
5422 SuccessorCleanupPad->eraseFromParent();
5423 // Now, we simply replace the cleanupret with a branch to the unwind
5424 // destination.
5425 BranchInst::Create(IfTrue: UnwindDest, InsertBefore: RI->getParent());
5426 RI->eraseFromParent();
5427
5428 return true;
5429}
5430
5431bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5432 // It is possible to transiantly have an undef cleanuppad operand because we
5433 // have deleted some, but not all, dead blocks.
5434 // Eventually, this block will be deleted.
5435 if (isa<UndefValue>(Val: RI->getOperand(i_nocapture: 0)))
5436 return false;
5437
5438 if (mergeCleanupPad(RI))
5439 return true;
5440
5441 if (removeEmptyCleanup(RI, DTU))
5442 return true;
5443
5444 return false;
5445}
5446
5447// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5448bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5449 BasicBlock *BB = UI->getParent();
5450
5451 bool Changed = false;
5452
5453 // Ensure that any debug-info records that used to occur after the Unreachable
5454 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5455 // the block.
5456 BB->flushTerminatorDbgRecords();
5457
5458 // Debug-info records on the unreachable inst itself should be deleted, as
5459 // below we delete everything past the final executable instruction.
5460 UI->dropDbgRecords();
5461
5462 // If there are any instructions immediately before the unreachable that can
5463 // be removed, do so.
5464 while (UI->getIterator() != BB->begin()) {
5465 BasicBlock::iterator BBI = UI->getIterator();
5466 --BBI;
5467
5468 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBI))
5469 break; // Can not drop any more instructions. We're done here.
5470 // Otherwise, this instruction can be freely erased,
5471 // even if it is not side-effect free.
5472
5473 // Note that deleting EH's here is in fact okay, although it involves a bit
5474 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5475 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5476 // and we can therefore guarantee this block will be erased.
5477
5478 // If we're deleting this, we're deleting any subsequent debug info, so
5479 // delete DbgRecords.
5480 BBI->dropDbgRecords();
5481
5482 // Delete this instruction (any uses are guaranteed to be dead)
5483 BBI->replaceAllUsesWith(V: PoisonValue::get(T: BBI->getType()));
5484 BBI->eraseFromParent();
5485 Changed = true;
5486 }
5487
5488 // If the unreachable instruction is the first in the block, take a gander
5489 // at all of the predecessors of this instruction, and simplify them.
5490 if (&BB->front() != UI)
5491 return Changed;
5492
5493 std::vector<DominatorTree::UpdateType> Updates;
5494
5495 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5496 for (BasicBlock *Predecessor : Preds) {
5497 Instruction *TI = Predecessor->getTerminator();
5498 IRBuilder<> Builder(TI);
5499 if (auto *BI = dyn_cast<BranchInst>(Val: TI)) {
5500 // We could either have a proper unconditional branch,
5501 // or a degenerate conditional branch with matching destinations.
5502 if (all_of(Range: BI->successors(),
5503 P: [BB](auto *Successor) { return Successor == BB; })) {
5504 new UnreachableInst(TI->getContext(), TI->getIterator());
5505 TI->eraseFromParent();
5506 Changed = true;
5507 } else {
5508 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5509 Value* Cond = BI->getCondition();
5510 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5511 "The destinations are guaranteed to be different here.");
5512 CallInst *Assumption;
5513 if (BI->getSuccessor(i: 0) == BB) {
5514 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
5515 Builder.CreateBr(Dest: BI->getSuccessor(i: 1));
5516 } else {
5517 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5518 Assumption = Builder.CreateAssumption(Cond);
5519 Builder.CreateBr(Dest: BI->getSuccessor(i: 0));
5520 }
5521 if (Options.AC)
5522 Options.AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
5523
5524 eraseTerminatorAndDCECond(TI: BI);
5525 Changed = true;
5526 }
5527 if (DTU)
5528 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5529 } else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) {
5530 SwitchInstProfUpdateWrapper SU(*SI);
5531 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5532 if (i->getCaseSuccessor() != BB) {
5533 ++i;
5534 continue;
5535 }
5536 BB->removePredecessor(Pred: SU->getParent());
5537 i = SU.removeCase(I: i);
5538 e = SU->case_end();
5539 Changed = true;
5540 }
5541 // Note that the default destination can't be removed!
5542 if (DTU && SI->getDefaultDest() != BB)
5543 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5544 } else if (auto *II = dyn_cast<InvokeInst>(Val: TI)) {
5545 if (II->getUnwindDest() == BB) {
5546 if (DTU) {
5547 DTU->applyUpdates(Updates);
5548 Updates.clear();
5549 }
5550 auto *CI = cast<CallInst>(Val: removeUnwindEdge(BB: TI->getParent(), DTU));
5551 if (!CI->doesNotThrow())
5552 CI->setDoesNotThrow();
5553 Changed = true;
5554 }
5555 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: TI)) {
5556 if (CSI->getUnwindDest() == BB) {
5557 if (DTU) {
5558 DTU->applyUpdates(Updates);
5559 Updates.clear();
5560 }
5561 removeUnwindEdge(BB: TI->getParent(), DTU);
5562 Changed = true;
5563 continue;
5564 }
5565
5566 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5567 E = CSI->handler_end();
5568 I != E; ++I) {
5569 if (*I == BB) {
5570 CSI->removeHandler(HI: I);
5571 --I;
5572 --E;
5573 Changed = true;
5574 }
5575 }
5576 if (DTU)
5577 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5578 if (CSI->getNumHandlers() == 0) {
5579 if (CSI->hasUnwindDest()) {
5580 // Redirect all predecessors of the block containing CatchSwitchInst
5581 // to instead branch to the CatchSwitchInst's unwind destination.
5582 if (DTU) {
5583 for (auto *PredecessorOfPredecessor : predecessors(BB: Predecessor)) {
5584 Updates.push_back(x: {DominatorTree::Insert,
5585 PredecessorOfPredecessor,
5586 CSI->getUnwindDest()});
5587 Updates.push_back(x: {DominatorTree::Delete,
5588 PredecessorOfPredecessor, Predecessor});
5589 }
5590 }
5591 Predecessor->replaceAllUsesWith(V: CSI->getUnwindDest());
5592 } else {
5593 // Rewrite all preds to unwind to caller (or from invoke to call).
5594 if (DTU) {
5595 DTU->applyUpdates(Updates);
5596 Updates.clear();
5597 }
5598 SmallVector<BasicBlock *, 8> EHPreds(predecessors(BB: Predecessor));
5599 for (BasicBlock *EHPred : EHPreds)
5600 removeUnwindEdge(BB: EHPred, DTU);
5601 }
5602 // The catchswitch is no longer reachable.
5603 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5604 CSI->eraseFromParent();
5605 Changed = true;
5606 }
5607 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: TI)) {
5608 (void)CRI;
5609 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5610 "Expected to always have an unwind to BB.");
5611 if (DTU)
5612 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5613 new UnreachableInst(TI->getContext(), TI->getIterator());
5614 TI->eraseFromParent();
5615 Changed = true;
5616 }
5617 }
5618
5619 if (DTU)
5620 DTU->applyUpdates(Updates);
5621
5622 // If this block is now dead, remove it.
5623 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5624 DeleteDeadBlock(BB, DTU);
5625 return true;
5626 }
5627
5628 return Changed;
5629}
5630
5631static bool casesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
5632 assert(Cases.size() >= 1);
5633
5634 array_pod_sort(Start: Cases.begin(), End: Cases.end(), Compare: constantIntSortPredicate);
5635 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5636 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5637 return false;
5638 }
5639 return true;
5640}
5641
5642static void createUnreachableSwitchDefault(SwitchInst *Switch,
5643 DomTreeUpdater *DTU,
5644 bool RemoveOrigDefaultBlock = true) {
5645 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5646 auto *BB = Switch->getParent();
5647 auto *OrigDefaultBlock = Switch->getDefaultDest();
5648 if (RemoveOrigDefaultBlock)
5649 OrigDefaultBlock->removePredecessor(Pred: BB);
5650 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5651 Context&: BB->getContext(), Name: BB->getName() + ".unreachabledefault", Parent: BB->getParent(),
5652 InsertBefore: OrigDefaultBlock);
5653 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5654 UI->setDebugLoc(DebugLoc::getTemporary());
5655 Switch->setDefaultDest(&*NewDefaultBlock);
5656 if (DTU) {
5657 SmallVector<DominatorTree::UpdateType, 2> Updates;
5658 Updates.push_back(Elt: {DominatorTree::Insert, BB, &*NewDefaultBlock});
5659 if (RemoveOrigDefaultBlock &&
5660 !is_contained(Range: successors(BB), Element: OrigDefaultBlock))
5661 Updates.push_back(Elt: {DominatorTree::Delete, BB, &*OrigDefaultBlock});
5662 DTU->applyUpdates(Updates);
5663 }
5664}
5665
5666/// Turn a switch into an integer range comparison and branch.
5667/// Switches with more than 2 destinations are ignored.
5668/// Switches with 1 destination are also ignored.
5669bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5670 IRBuilder<> &Builder) {
5671 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5672
5673 bool HasDefault = !SI->defaultDestUnreachable();
5674
5675 auto *BB = SI->getParent();
5676
5677 // Partition the cases into two sets with different destinations.
5678 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5679 BasicBlock *DestB = nullptr;
5680 SmallVector<ConstantInt *, 16> CasesA;
5681 SmallVector<ConstantInt *, 16> CasesB;
5682
5683 for (auto Case : SI->cases()) {
5684 BasicBlock *Dest = Case.getCaseSuccessor();
5685 if (!DestA)
5686 DestA = Dest;
5687 if (Dest == DestA) {
5688 CasesA.push_back(Elt: Case.getCaseValue());
5689 continue;
5690 }
5691 if (!DestB)
5692 DestB = Dest;
5693 if (Dest == DestB) {
5694 CasesB.push_back(Elt: Case.getCaseValue());
5695 continue;
5696 }
5697 return false; // More than two destinations.
5698 }
5699 if (!DestB)
5700 return false; // All destinations are the same and the default is unreachable
5701
5702 assert(DestA && DestB &&
5703 "Single-destination switch should have been folded.");
5704 assert(DestA != DestB);
5705 assert(DestB != SI->getDefaultDest());
5706 assert(!CasesB.empty() && "There must be non-default cases.");
5707 assert(!CasesA.empty() || HasDefault);
5708
5709 // Figure out if one of the sets of cases form a contiguous range.
5710 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5711 BasicBlock *ContiguousDest = nullptr;
5712 BasicBlock *OtherDest = nullptr;
5713 if (!CasesA.empty() && casesAreContiguous(Cases&: CasesA)) {
5714 ContiguousCases = &CasesA;
5715 ContiguousDest = DestA;
5716 OtherDest = DestB;
5717 } else if (casesAreContiguous(Cases&: CasesB)) {
5718 ContiguousCases = &CasesB;
5719 ContiguousDest = DestB;
5720 OtherDest = DestA;
5721 } else
5722 return false;
5723
5724 // Start building the compare and branch.
5725
5726 Constant *Offset = ConstantExpr::getNeg(C: ContiguousCases->back());
5727 Constant *NumCases =
5728 ConstantInt::get(Ty: Offset->getType(), V: ContiguousCases->size());
5729
5730 Value *Sub = SI->getCondition();
5731 if (!Offset->isNullValue())
5732 Sub = Builder.CreateAdd(LHS: Sub, RHS: Offset, Name: Sub->getName() + ".off");
5733
5734 Value *Cmp;
5735 // If NumCases overflowed, then all possible values jump to the successor.
5736 if (NumCases->isNullValue() && !ContiguousCases->empty())
5737 Cmp = ConstantInt::getTrue(Context&: SI->getContext());
5738 else
5739 Cmp = Builder.CreateICmpULT(LHS: Sub, RHS: NumCases, Name: "switch");
5740 BranchInst *NewBI = Builder.CreateCondBr(Cond: Cmp, True: ContiguousDest, False: OtherDest);
5741
5742 // Update weight for the newly-created conditional branch.
5743 if (hasBranchWeightMD(I: *SI)) {
5744 SmallVector<uint64_t, 8> Weights;
5745 getBranchWeights(TI: SI, Weights);
5746 if (Weights.size() == 1 + SI->getNumCases()) {
5747 uint64_t TrueWeight = 0;
5748 uint64_t FalseWeight = 0;
5749 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5750 if (SI->getSuccessor(idx: I) == ContiguousDest)
5751 TrueWeight += Weights[I];
5752 else
5753 FalseWeight += Weights[I];
5754 }
5755 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5756 TrueWeight /= 2;
5757 FalseWeight /= 2;
5758 }
5759 setBranchWeights(I: NewBI, TrueWeight, FalseWeight, /*IsExpected=*/false);
5760 }
5761 }
5762
5763 // Prune obsolete incoming values off the successors' PHI nodes.
5764 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5765 unsigned PreviousEdges = ContiguousCases->size();
5766 if (ContiguousDest == SI->getDefaultDest())
5767 ++PreviousEdges;
5768 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5769 cast<PHINode>(Val&: BBI)->removeIncomingValue(BB: SI->getParent());
5770 }
5771 for (auto BBI = OtherDest->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5772 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5773 if (OtherDest == SI->getDefaultDest())
5774 ++PreviousEdges;
5775 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5776 cast<PHINode>(Val&: BBI)->removeIncomingValue(BB: SI->getParent());
5777 }
5778
5779 // Clean up the default block - it may have phis or other instructions before
5780 // the unreachable terminator.
5781 if (!HasDefault)
5782 createUnreachableSwitchDefault(Switch: SI, DTU);
5783
5784 auto *UnreachableDefault = SI->getDefaultDest();
5785
5786 // Drop the switch.
5787 SI->eraseFromParent();
5788
5789 if (!HasDefault && DTU)
5790 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, UnreachableDefault}});
5791
5792 return true;
5793}
5794
5795/// Compute masked bits for the condition of a switch
5796/// and use it to remove dead cases.
5797static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
5798 AssumptionCache *AC,
5799 const DataLayout &DL) {
5800 Value *Cond = SI->getCondition();
5801 KnownBits Known = computeKnownBits(V: Cond, DL, AC, CxtI: SI);
5802
5803 // We can also eliminate cases by determining that their values are outside of
5804 // the limited range of the condition based on how many significant (non-sign)
5805 // bits are in the condition value.
5806 unsigned MaxSignificantBitsInCond =
5807 ComputeMaxSignificantBits(Op: Cond, DL, AC, CxtI: SI);
5808
5809 // Gather dead cases.
5810 SmallVector<ConstantInt *, 8> DeadCases;
5811 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5812 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5813 for (const auto &Case : SI->cases()) {
5814 auto *Successor = Case.getCaseSuccessor();
5815 if (DTU) {
5816 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Key: Successor);
5817 if (Inserted)
5818 UniqueSuccessors.push_back(Elt: Successor);
5819 ++It->second;
5820 }
5821 const APInt &CaseVal = Case.getCaseValue()->getValue();
5822 if (Known.Zero.intersects(RHS: CaseVal) || !Known.One.isSubsetOf(RHS: CaseVal) ||
5823 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5824 DeadCases.push_back(Elt: Case.getCaseValue());
5825 if (DTU)
5826 --NumPerSuccessorCases[Successor];
5827 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5828 << " is dead.\n");
5829 }
5830 }
5831
5832 // If we can prove that the cases must cover all possible values, the
5833 // default destination becomes dead and we can remove it. If we know some
5834 // of the bits in the value, we can use that to more precisely compute the
5835 // number of possible unique case values.
5836 bool HasDefault = !SI->defaultDestUnreachable();
5837 const unsigned NumUnknownBits =
5838 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5839 assert(NumUnknownBits <= Known.getBitWidth());
5840 if (HasDefault && DeadCases.empty() &&
5841 NumUnknownBits < 64 /* avoid overflow */) {
5842 uint64_t AllNumCases = 1ULL << NumUnknownBits;
5843 if (SI->getNumCases() == AllNumCases) {
5844 createUnreachableSwitchDefault(Switch: SI, DTU);
5845 return true;
5846 }
5847 // When only one case value is missing, replace default with that case.
5848 // Eliminating the default branch will provide more opportunities for
5849 // optimization, such as lookup tables.
5850 if (SI->getNumCases() == AllNumCases - 1) {
5851 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
5852 IntegerType *CondTy = cast<IntegerType>(Val: Cond->getType());
5853 if (CondTy->getIntegerBitWidth() > 64 ||
5854 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
5855 return false;
5856
5857 uint64_t MissingCaseVal = 0;
5858 for (const auto &Case : SI->cases())
5859 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
5860 auto *MissingCase =
5861 cast<ConstantInt>(Val: ConstantInt::get(Ty: Cond->getType(), V: MissingCaseVal));
5862 SwitchInstProfUpdateWrapper SIW(*SI);
5863 SIW.addCase(OnVal: MissingCase, Dest: SI->getDefaultDest(), W: SIW.getSuccessorWeight(idx: 0));
5864 createUnreachableSwitchDefault(Switch: SI, DTU, /*RemoveOrigDefaultBlock*/ false);
5865 SIW.setSuccessorWeight(idx: 0, W: 0);
5866 return true;
5867 }
5868 }
5869
5870 if (DeadCases.empty())
5871 return false;
5872
5873 SwitchInstProfUpdateWrapper SIW(*SI);
5874 for (ConstantInt *DeadCase : DeadCases) {
5875 SwitchInst::CaseIt CaseI = SI->findCaseValue(C: DeadCase);
5876 assert(CaseI != SI->case_default() &&
5877 "Case was not found. Probably mistake in DeadCases forming.");
5878 // Prune unused values from PHI nodes.
5879 CaseI->getCaseSuccessor()->removePredecessor(Pred: SI->getParent());
5880 SIW.removeCase(I: CaseI);
5881 }
5882
5883 if (DTU) {
5884 std::vector<DominatorTree::UpdateType> Updates;
5885 for (auto *Successor : UniqueSuccessors)
5886 if (NumPerSuccessorCases[Successor] == 0)
5887 Updates.push_back(x: {DominatorTree::Delete, SI->getParent(), Successor});
5888 DTU->applyUpdates(Updates);
5889 }
5890
5891 return true;
5892}
5893
5894/// If BB would be eligible for simplification by
5895/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5896/// by an unconditional branch), look at the phi node for BB in the successor
5897/// block and see if the incoming value is equal to CaseValue. If so, return
5898/// the phi node, and set PhiIndex to BB's index in the phi node.
5899static PHINode *findPHIForConditionForwarding(ConstantInt *CaseValue,
5900 BasicBlock *BB, int *PhiIndex) {
5901 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
5902 return nullptr; // BB must be empty to be a candidate for simplification.
5903 if (!BB->getSinglePredecessor())
5904 return nullptr; // BB must be dominated by the switch.
5905
5906 BranchInst *Branch = dyn_cast<BranchInst>(Val: BB->getTerminator());
5907 if (!Branch || !Branch->isUnconditional())
5908 return nullptr; // Terminator must be unconditional branch.
5909
5910 BasicBlock *Succ = Branch->getSuccessor(i: 0);
5911
5912 for (PHINode &PHI : Succ->phis()) {
5913 int Idx = PHI.getBasicBlockIndex(BB);
5914 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5915
5916 Value *InValue = PHI.getIncomingValue(i: Idx);
5917 if (InValue != CaseValue)
5918 continue;
5919
5920 *PhiIndex = Idx;
5921 return &PHI;
5922 }
5923
5924 return nullptr;
5925}
5926
5927/// Try to forward the condition of a switch instruction to a phi node
5928/// dominated by the switch, if that would mean that some of the destination
5929/// blocks of the switch can be folded away. Return true if a change is made.
5930static bool forwardSwitchConditionToPHI(SwitchInst *SI) {
5931 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5932
5933 ForwardingNodesMap ForwardingNodes;
5934 BasicBlock *SwitchBlock = SI->getParent();
5935 bool Changed = false;
5936 for (const auto &Case : SI->cases()) {
5937 ConstantInt *CaseValue = Case.getCaseValue();
5938 BasicBlock *CaseDest = Case.getCaseSuccessor();
5939
5940 // Replace phi operands in successor blocks that are using the constant case
5941 // value rather than the switch condition variable:
5942 // switchbb:
5943 // switch i32 %x, label %default [
5944 // i32 17, label %succ
5945 // ...
5946 // succ:
5947 // %r = phi i32 ... [ 17, %switchbb ] ...
5948 // -->
5949 // %r = phi i32 ... [ %x, %switchbb ] ...
5950
5951 for (PHINode &Phi : CaseDest->phis()) {
5952 // This only works if there is exactly 1 incoming edge from the switch to
5953 // a phi. If there is >1, that means multiple cases of the switch map to 1
5954 // value in the phi, and that phi value is not the switch condition. Thus,
5955 // this transform would not make sense (the phi would be invalid because
5956 // a phi can't have different incoming values from the same block).
5957 int SwitchBBIdx = Phi.getBasicBlockIndex(BB: SwitchBlock);
5958 if (Phi.getIncomingValue(i: SwitchBBIdx) == CaseValue &&
5959 count(Range: Phi.blocks(), Element: SwitchBlock) == 1) {
5960 Phi.setIncomingValue(i: SwitchBBIdx, V: SI->getCondition());
5961 Changed = true;
5962 }
5963 }
5964
5965 // Collect phi nodes that are indirectly using this switch's case constants.
5966 int PhiIdx;
5967 if (auto *Phi = findPHIForConditionForwarding(CaseValue, BB: CaseDest, PhiIndex: &PhiIdx))
5968 ForwardingNodes[Phi].push_back(Elt: PhiIdx);
5969 }
5970
5971 for (auto &ForwardingNode : ForwardingNodes) {
5972 PHINode *Phi = ForwardingNode.first;
5973 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5974 // Check if it helps to fold PHI.
5975 if (Indexes.size() < 2 && !llvm::is_contained(Range: Phi->incoming_values(), Element: SI->getCondition()))
5976 continue;
5977
5978 for (int Index : Indexes)
5979 Phi->setIncomingValue(i: Index, V: SI->getCondition());
5980 Changed = true;
5981 }
5982
5983 return Changed;
5984}
5985
5986/// Return true if the backend will be able to handle
5987/// initializing an array of constants like C.
5988static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
5989 if (C->isThreadDependent())
5990 return false;
5991 if (C->isDLLImportDependent())
5992 return false;
5993
5994 if (!isa<ConstantFP>(Val: C) && !isa<ConstantInt>(Val: C) &&
5995 !isa<ConstantPointerNull>(Val: C) && !isa<GlobalValue>(Val: C) &&
5996 !isa<UndefValue>(Val: C) && !isa<ConstantExpr>(Val: C))
5997 return false;
5998
5999 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
6000 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6001 // materializing the array of constants.
6002 Constant *StrippedC = cast<Constant>(Val: CE->stripInBoundsConstantOffsets());
6003 if (StrippedC == C || !validLookupTableConstant(C: StrippedC, TTI))
6004 return false;
6005 }
6006
6007 if (!TTI.shouldBuildLookupTablesForConstant(C))
6008 return false;
6009
6010 return true;
6011}
6012
6013/// If V is a Constant, return it. Otherwise, try to look up
6014/// its constant value in ConstantPool, returning 0 if it's not there.
6015static Constant *
6016lookupConstant(Value *V,
6017 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6018 if (Constant *C = dyn_cast<Constant>(Val: V))
6019 return C;
6020 return ConstantPool.lookup(Val: V);
6021}
6022
6023/// Try to fold instruction I into a constant. This works for
6024/// simple instructions such as binary operations where both operands are
6025/// constant or can be replaced by constants from the ConstantPool. Returns the
6026/// resulting constant on success, 0 otherwise.
6027static Constant *
6028constantFold(Instruction *I, const DataLayout &DL,
6029 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6030 if (SelectInst *Select = dyn_cast<SelectInst>(Val: I)) {
6031 Constant *A = lookupConstant(V: Select->getCondition(), ConstantPool);
6032 if (!A)
6033 return nullptr;
6034 if (A->isAllOnesValue())
6035 return lookupConstant(V: Select->getTrueValue(), ConstantPool);
6036 if (A->isNullValue())
6037 return lookupConstant(V: Select->getFalseValue(), ConstantPool);
6038 return nullptr;
6039 }
6040
6041 SmallVector<Constant *, 4> COps;
6042 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6043 if (Constant *A = lookupConstant(V: I->getOperand(i: N), ConstantPool))
6044 COps.push_back(Elt: A);
6045 else
6046 return nullptr;
6047 }
6048
6049 return ConstantFoldInstOperands(I, Ops: COps, DL);
6050}
6051
6052/// Try to determine the resulting constant values in phi nodes
6053/// at the common destination basic block, *CommonDest, for one of the case
6054/// destionations CaseDest corresponding to value CaseVal (0 for the default
6055/// case), of a switch instruction SI.
6056static bool
6057getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
6058 BasicBlock **CommonDest,
6059 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6060 const DataLayout &DL, const TargetTransformInfo &TTI) {
6061 // The block from which we enter the common destination.
6062 BasicBlock *Pred = SI->getParent();
6063
6064 // If CaseDest is empty except for some side-effect free instructions through
6065 // which we can constant-propagate the CaseVal, continue to its successor.
6066 SmallDenseMap<Value *, Constant *> ConstantPool;
6067 ConstantPool.insert(KV: std::make_pair(x: SI->getCondition(), y&: CaseVal));
6068 for (Instruction &I : CaseDest->instructionsWithoutDebug(SkipPseudoOp: false)) {
6069 if (I.isTerminator()) {
6070 // If the terminator is a simple branch, continue to the next block.
6071 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6072 return false;
6073 Pred = CaseDest;
6074 CaseDest = I.getSuccessor(Idx: 0);
6075 } else if (Constant *C = constantFold(I: &I, DL, ConstantPool)) {
6076 // Instruction is side-effect free and constant.
6077
6078 // If the instruction has uses outside this block or a phi node slot for
6079 // the block, it is not safe to bypass the instruction since it would then
6080 // no longer dominate all its uses.
6081 for (auto &Use : I.uses()) {
6082 User *User = Use.getUser();
6083 if (Instruction *I = dyn_cast<Instruction>(Val: User))
6084 if (I->getParent() == CaseDest)
6085 continue;
6086 if (PHINode *Phi = dyn_cast<PHINode>(Val: User))
6087 if (Phi->getIncomingBlock(U: Use) == CaseDest)
6088 continue;
6089 return false;
6090 }
6091
6092 ConstantPool.insert(KV: std::make_pair(x: &I, y&: C));
6093 } else {
6094 break;
6095 }
6096 }
6097
6098 // If we did not have a CommonDest before, use the current one.
6099 if (!*CommonDest)
6100 *CommonDest = CaseDest;
6101 // If the destination isn't the common one, abort.
6102 if (CaseDest != *CommonDest)
6103 return false;
6104
6105 // Get the values for this case from phi nodes in the destination block.
6106 for (PHINode &PHI : (*CommonDest)->phis()) {
6107 int Idx = PHI.getBasicBlockIndex(BB: Pred);
6108 if (Idx == -1)
6109 continue;
6110
6111 Constant *ConstVal =
6112 lookupConstant(V: PHI.getIncomingValue(i: Idx), ConstantPool);
6113 if (!ConstVal)
6114 return false;
6115
6116 // Be conservative about which kinds of constants we support.
6117 if (!validLookupTableConstant(C: ConstVal, TTI))
6118 return false;
6119
6120 Res.push_back(Elt: std::make_pair(x: &PHI, y&: ConstVal));
6121 }
6122
6123 return Res.size() > 0;
6124}
6125
6126// Helper function used to add CaseVal to the list of cases that generate
6127// Result. Returns the updated number of cases that generate this result.
6128static size_t mapCaseToResult(ConstantInt *CaseVal,
6129 SwitchCaseResultVectorTy &UniqueResults,
6130 Constant *Result) {
6131 for (auto &I : UniqueResults) {
6132 if (I.first == Result) {
6133 I.second.push_back(Elt: CaseVal);
6134 return I.second.size();
6135 }
6136 }
6137 UniqueResults.push_back(
6138 Elt: std::make_pair(x&: Result, y: SmallVector<ConstantInt *, 4>(1, CaseVal)));
6139 return 1;
6140}
6141
6142// Helper function that initializes a map containing
6143// results for the PHI node of the common destination block for a switch
6144// instruction. Returns false if multiple PHI nodes have been found or if
6145// there is not a common destination block for the switch.
6146static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
6147 BasicBlock *&CommonDest,
6148 SwitchCaseResultVectorTy &UniqueResults,
6149 Constant *&DefaultResult,
6150 const DataLayout &DL,
6151 const TargetTransformInfo &TTI,
6152 uintptr_t MaxUniqueResults) {
6153 for (const auto &I : SI->cases()) {
6154 ConstantInt *CaseVal = I.getCaseValue();
6155
6156 // Resulting value at phi nodes for this case value.
6157 SwitchCaseResultsTy Results;
6158 if (!getCaseResults(SI, CaseVal, CaseDest: I.getCaseSuccessor(), CommonDest: &CommonDest, Res&: Results,
6159 DL, TTI))
6160 return false;
6161
6162 // Only one value per case is permitted.
6163 if (Results.size() > 1)
6164 return false;
6165
6166 // Add the case->result mapping to UniqueResults.
6167 const size_t NumCasesForResult =
6168 mapCaseToResult(CaseVal, UniqueResults, Result: Results.begin()->second);
6169
6170 // Early out if there are too many cases for this result.
6171 if (NumCasesForResult > MaxSwitchCasesPerResult)
6172 return false;
6173
6174 // Early out if there are too many unique results.
6175 if (UniqueResults.size() > MaxUniqueResults)
6176 return false;
6177
6178 // Check the PHI consistency.
6179 if (!PHI)
6180 PHI = Results[0].first;
6181 else if (PHI != Results[0].first)
6182 return false;
6183 }
6184 // Find the default result value.
6185 SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
6186 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest, Res&: DefaultResults,
6187 DL, TTI);
6188 // If the default value is not found abort unless the default destination
6189 // is unreachable.
6190 DefaultResult =
6191 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6192
6193 return DefaultResult || SI->defaultDestUnreachable();
6194}
6195
6196// Helper function that checks if it is possible to transform a switch with only
6197// two cases (or two cases + default) that produces a result into a select.
6198// TODO: Handle switches with more than 2 cases that map to the same result.
6199static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6200 Constant *DefaultResult, Value *Condition,
6201 IRBuilder<> &Builder, const DataLayout &DL) {
6202 // If we are selecting between only two cases transform into a simple
6203 // select or a two-way select if default is possible.
6204 // Example:
6205 // switch (a) { %0 = icmp eq i32 %a, 10
6206 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6207 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6208 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6209 // }
6210 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6211 ResultVector[1].second.size() == 1) {
6212 ConstantInt *FirstCase = ResultVector[0].second[0];
6213 ConstantInt *SecondCase = ResultVector[1].second[0];
6214 Value *SelectValue = ResultVector[1].first;
6215 if (DefaultResult) {
6216 Value *ValueCompare =
6217 Builder.CreateICmpEQ(LHS: Condition, RHS: SecondCase, Name: "switch.selectcmp");
6218 SelectValue = Builder.CreateSelect(C: ValueCompare, True: ResultVector[1].first,
6219 False: DefaultResult, Name: "switch.select");
6220 }
6221 Value *ValueCompare =
6222 Builder.CreateICmpEQ(LHS: Condition, RHS: FirstCase, Name: "switch.selectcmp");
6223 return Builder.CreateSelect(C: ValueCompare, True: ResultVector[0].first,
6224 False: SelectValue, Name: "switch.select");
6225 }
6226
6227 // Handle the degenerate case where two cases have the same result value.
6228 if (ResultVector.size() == 1 && DefaultResult) {
6229 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6230 unsigned CaseCount = CaseValues.size();
6231 // n bits group cases map to the same result:
6232 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6233 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6234 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6235 if (isPowerOf2_32(Value: CaseCount)) {
6236 ConstantInt *MinCaseVal = CaseValues[0];
6237 // If there are bits that are set exclusively by CaseValues, we
6238 // can transform the switch into a select if the conjunction of
6239 // all the values uniquely identify CaseValues.
6240 APInt AndMask = APInt::getAllOnes(numBits: MinCaseVal->getBitWidth());
6241
6242 // Find the minimum value and compute the and of all the case values.
6243 for (auto *Case : CaseValues) {
6244 if (Case->getValue().slt(RHS: MinCaseVal->getValue()))
6245 MinCaseVal = Case;
6246 AndMask &= Case->getValue();
6247 }
6248 KnownBits Known = computeKnownBits(V: Condition, DL);
6249
6250 if (!AndMask.isZero() && Known.getMaxValue().uge(RHS: AndMask)) {
6251 // Compute the number of bits that are free to vary.
6252 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6253
6254 // Check if the number of values covered by the mask is equal
6255 // to the number of cases.
6256 if (FreeBits == Log2_32(Value: CaseCount)) {
6257 Value *And = Builder.CreateAnd(LHS: Condition, RHS: AndMask);
6258 Value *Cmp = Builder.CreateICmpEQ(
6259 LHS: And, RHS: Constant::getIntegerValue(Ty: And->getType(), V: AndMask));
6260 return Builder.CreateSelect(C: Cmp, True: ResultVector[0].first,
6261 False: DefaultResult);
6262 }
6263 }
6264
6265 // Mark the bits case number touched.
6266 APInt BitMask = APInt::getZero(numBits: MinCaseVal->getBitWidth());
6267 for (auto *Case : CaseValues)
6268 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6269
6270 // Check if cases with the same result can cover all number
6271 // in touched bits.
6272 if (BitMask.popcount() == Log2_32(Value: CaseCount)) {
6273 if (!MinCaseVal->isNullValue())
6274 Condition = Builder.CreateSub(LHS: Condition, RHS: MinCaseVal);
6275 Value *And = Builder.CreateAnd(LHS: Condition, RHS: ~BitMask, Name: "switch.and");
6276 Value *Cmp = Builder.CreateICmpEQ(
6277 LHS: And, RHS: Constant::getNullValue(Ty: And->getType()), Name: "switch.selectcmp");
6278 return Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6279 }
6280 }
6281
6282 // Handle the degenerate case where two cases have the same value.
6283 if (CaseValues.size() == 2) {
6284 Value *Cmp1 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[0],
6285 Name: "switch.selectcmp.case1");
6286 Value *Cmp2 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[1],
6287 Name: "switch.selectcmp.case2");
6288 Value *Cmp = Builder.CreateOr(LHS: Cmp1, RHS: Cmp2, Name: "switch.selectcmp");
6289 return Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6290 }
6291 }
6292
6293 return nullptr;
6294}
6295
6296// Helper function to cleanup a switch instruction that has been converted into
6297// a select, fixing up PHI nodes and basic blocks.
6298static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
6299 Value *SelectValue,
6300 IRBuilder<> &Builder,
6301 DomTreeUpdater *DTU) {
6302 std::vector<DominatorTree::UpdateType> Updates;
6303
6304 BasicBlock *SelectBB = SI->getParent();
6305 BasicBlock *DestBB = PHI->getParent();
6306
6307 if (DTU && !is_contained(Range: predecessors(BB: DestBB), Element: SelectBB))
6308 Updates.push_back(x: {DominatorTree::Insert, SelectBB, DestBB});
6309 Builder.CreateBr(Dest: DestBB);
6310
6311 // Remove the switch.
6312
6313 PHI->removeIncomingValueIf(
6314 Predicate: [&](unsigned Idx) { return PHI->getIncomingBlock(i: Idx) == SelectBB; });
6315 PHI->addIncoming(V: SelectValue, BB: SelectBB);
6316
6317 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6318 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6319 BasicBlock *Succ = SI->getSuccessor(idx: i);
6320
6321 if (Succ == DestBB)
6322 continue;
6323 Succ->removePredecessor(Pred: SelectBB);
6324 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6325 Updates.push_back(x: {DominatorTree::Delete, SelectBB, Succ});
6326 }
6327 SI->eraseFromParent();
6328 if (DTU)
6329 DTU->applyUpdates(Updates);
6330}
6331
6332/// If a switch is only used to initialize one or more phi nodes in a common
6333/// successor block with only two different constant values, try to replace the
6334/// switch with a select. Returns true if the fold was made.
6335static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6336 DomTreeUpdater *DTU, const DataLayout &DL,
6337 const TargetTransformInfo &TTI) {
6338 Value *const Cond = SI->getCondition();
6339 PHINode *PHI = nullptr;
6340 BasicBlock *CommonDest = nullptr;
6341 Constant *DefaultResult;
6342 SwitchCaseResultVectorTy UniqueResults;
6343 // Collect all the cases that will deliver the same value from the switch.
6344 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6345 DL, TTI, /*MaxUniqueResults*/ 2))
6346 return false;
6347
6348 assert(PHI != nullptr && "PHI for value select not found");
6349 Builder.SetInsertPoint(SI);
6350 Value *SelectValue =
6351 foldSwitchToSelect(ResultVector: UniqueResults, DefaultResult, Condition: Cond, Builder, DL);
6352 if (!SelectValue)
6353 return false;
6354
6355 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6356 return true;
6357}
6358
6359namespace {
6360
6361/// This class represents a lookup table that can be used to replace a switch.
6362class SwitchLookupTable {
6363public:
6364 /// Create a lookup table to use as a switch replacement with the contents
6365 /// of Values, using DefaultValue to fill any holes in the table.
6366 SwitchLookupTable(
6367 Module &M, uint64_t TableSize, ConstantInt *Offset,
6368 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6369 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6370
6371 /// Build instructions with Builder to retrieve the value at
6372 /// the position given by Index in the lookup table.
6373 Value *buildLookup(Value *Index, IRBuilder<> &Builder, const DataLayout &DL);
6374
6375 /// Return true if a table with TableSize elements of
6376 /// type ElementType would fit in a target-legal register.
6377 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6378 Type *ElementType);
6379
6380private:
6381 // Depending on the contents of the table, it can be represented in
6382 // different ways.
6383 enum {
6384 // For tables where each element contains the same value, we just have to
6385 // store that single value and return it for each lookup.
6386 SingleValueKind,
6387
6388 // For tables where there is a linear relationship between table index
6389 // and values. We calculate the result with a simple multiplication
6390 // and addition instead of a table lookup.
6391 LinearMapKind,
6392
6393 // For small tables with integer elements, we can pack them into a bitmap
6394 // that fits into a target-legal register. Values are retrieved by
6395 // shift and mask operations.
6396 BitMapKind,
6397
6398 // The table is stored as an array of values. Values are retrieved by load
6399 // instructions from the table.
6400 ArrayKind
6401 } Kind;
6402
6403 // For SingleValueKind, this is the single value.
6404 Constant *SingleValue = nullptr;
6405
6406 // For BitMapKind, this is the bitmap.
6407 ConstantInt *BitMap = nullptr;
6408 IntegerType *BitMapElementTy = nullptr;
6409
6410 // For LinearMapKind, these are the constants used to derive the value.
6411 ConstantInt *LinearOffset = nullptr;
6412 ConstantInt *LinearMultiplier = nullptr;
6413 bool LinearMapValWrapped = false;
6414
6415 // For ArrayKind, this is the array.
6416 GlobalVariable *Array = nullptr;
6417};
6418
6419} // end anonymous namespace
6420
6421SwitchLookupTable::SwitchLookupTable(
6422 Module &M, uint64_t TableSize, ConstantInt *Offset,
6423 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6424 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6425 assert(Values.size() && "Can't build lookup table without values!");
6426 assert(TableSize >= Values.size() && "Can't fit values in table!");
6427
6428 // If all values in the table are equal, this is that value.
6429 SingleValue = Values.begin()->second;
6430
6431 Type *ValueType = Values.begin()->second->getType();
6432
6433 // Build up the table contents.
6434 SmallVector<Constant *, 64> TableContents(TableSize);
6435 for (const auto &[CaseVal, CaseRes] : Values) {
6436 assert(CaseRes->getType() == ValueType);
6437
6438 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6439 TableContents[Idx] = CaseRes;
6440
6441 if (SingleValue && !isa<PoisonValue>(Val: CaseRes) && CaseRes != SingleValue)
6442 SingleValue = isa<PoisonValue>(Val: SingleValue) ? CaseRes : nullptr;
6443 }
6444
6445 // Fill in any holes in the table with the default result.
6446 if (Values.size() < TableSize) {
6447 assert(DefaultValue &&
6448 "Need a default value to fill the lookup table holes.");
6449 assert(DefaultValue->getType() == ValueType);
6450 for (uint64_t I = 0; I < TableSize; ++I) {
6451 if (!TableContents[I])
6452 TableContents[I] = DefaultValue;
6453 }
6454
6455 // If the default value is poison, all the holes are poison.
6456 bool DefaultValueIsPoison = isa<PoisonValue>(Val: DefaultValue);
6457
6458 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6459 SingleValue = nullptr;
6460 }
6461
6462 // If each element in the table contains the same value, we only need to store
6463 // that single value.
6464 if (SingleValue) {
6465 Kind = SingleValueKind;
6466 return;
6467 }
6468
6469 // Check if we can derive the value with a linear transformation from the
6470 // table index.
6471 if (isa<IntegerType>(Val: ValueType)) {
6472 bool LinearMappingPossible = true;
6473 APInt PrevVal;
6474 APInt DistToPrev;
6475 // When linear map is monotonic and signed overflow doesn't happen on
6476 // maximum index, we can attach nsw on Add and Mul.
6477 bool NonMonotonic = false;
6478 assert(TableSize >= 2 && "Should be a SingleValue table.");
6479 // Check if there is the same distance between two consecutive values.
6480 for (uint64_t I = 0; I < TableSize; ++I) {
6481 ConstantInt *ConstVal = dyn_cast<ConstantInt>(Val: TableContents[I]);
6482
6483 if (!ConstVal && isa<PoisonValue>(Val: TableContents[I])) {
6484 // This is an poison, so it's (probably) a lookup table hole.
6485 // To prevent any regressions from before we switched to using poison as
6486 // the default value, holes will fall back to using the first value.
6487 // This can be removed once we add proper handling for poisons in lookup
6488 // tables.
6489 ConstVal = dyn_cast<ConstantInt>(Val: Values[0].second);
6490 }
6491
6492 if (!ConstVal) {
6493 // This is an undef. We could deal with it, but undefs in lookup tables
6494 // are very seldom. It's probably not worth the additional complexity.
6495 LinearMappingPossible = false;
6496 break;
6497 }
6498 const APInt &Val = ConstVal->getValue();
6499 if (I != 0) {
6500 APInt Dist = Val - PrevVal;
6501 if (I == 1) {
6502 DistToPrev = Dist;
6503 } else if (Dist != DistToPrev) {
6504 LinearMappingPossible = false;
6505 break;
6506 }
6507 NonMonotonic |=
6508 Dist.isStrictlyPositive() ? Val.sle(RHS: PrevVal) : Val.sgt(RHS: PrevVal);
6509 }
6510 PrevVal = Val;
6511 }
6512 if (LinearMappingPossible) {
6513 LinearOffset = cast<ConstantInt>(Val: TableContents[0]);
6514 LinearMultiplier = ConstantInt::get(Context&: M.getContext(), V: DistToPrev);
6515 APInt M = LinearMultiplier->getValue();
6516 bool MayWrap = true;
6517 if (isIntN(N: M.getBitWidth(), x: TableSize - 1))
6518 (void)M.smul_ov(RHS: APInt(M.getBitWidth(), TableSize - 1), Overflow&: MayWrap);
6519 LinearMapValWrapped = NonMonotonic || MayWrap;
6520 Kind = LinearMapKind;
6521 ++NumLinearMaps;
6522 return;
6523 }
6524 }
6525
6526 // If the type is integer and the table fits in a register, build a bitmap.
6527 if (wouldFitInRegister(DL, TableSize, ElementType: ValueType)) {
6528 IntegerType *IT = cast<IntegerType>(Val: ValueType);
6529 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6530 for (uint64_t I = TableSize; I > 0; --I) {
6531 TableInt <<= IT->getBitWidth();
6532 // Insert values into the bitmap. Undef values are set to zero.
6533 if (!isa<UndefValue>(Val: TableContents[I - 1])) {
6534 ConstantInt *Val = cast<ConstantInt>(Val: TableContents[I - 1]);
6535 TableInt |= Val->getValue().zext(width: TableInt.getBitWidth());
6536 }
6537 }
6538 BitMap = ConstantInt::get(Context&: M.getContext(), V: TableInt);
6539 BitMapElementTy = IT;
6540 Kind = BitMapKind;
6541 ++NumBitMaps;
6542 return;
6543 }
6544
6545 // Store the table in an array.
6546 ArrayType *ArrayTy = ArrayType::get(ElementType: ValueType, NumElements: TableSize);
6547 Constant *Initializer = ConstantArray::get(T: ArrayTy, V: TableContents);
6548
6549 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6550 GlobalVariable::PrivateLinkage, Initializer,
6551 "switch.table." + FuncName);
6552 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6553 // Set the alignment to that of an array items. We will be only loading one
6554 // value out of it.
6555 Array->setAlignment(DL.getPrefTypeAlign(Ty: ValueType));
6556 Kind = ArrayKind;
6557}
6558
6559Value *SwitchLookupTable::buildLookup(Value *Index, IRBuilder<> &Builder,
6560 const DataLayout &DL) {
6561 switch (Kind) {
6562 case SingleValueKind:
6563 return SingleValue;
6564 case LinearMapKind: {
6565 // Derive the result value from the input value.
6566 Value *Result = Builder.CreateIntCast(V: Index, DestTy: LinearMultiplier->getType(),
6567 isSigned: false, Name: "switch.idx.cast");
6568 if (!LinearMultiplier->isOne())
6569 Result = Builder.CreateMul(LHS: Result, RHS: LinearMultiplier, Name: "switch.idx.mult",
6570 /*HasNUW = */ false,
6571 /*HasNSW = */ !LinearMapValWrapped);
6572
6573 if (!LinearOffset->isZero())
6574 Result = Builder.CreateAdd(LHS: Result, RHS: LinearOffset, Name: "switch.offset",
6575 /*HasNUW = */ false,
6576 /*HasNSW = */ !LinearMapValWrapped);
6577 return Result;
6578 }
6579 case BitMapKind: {
6580 // Type of the bitmap (e.g. i59).
6581 IntegerType *MapTy = BitMap->getIntegerType();
6582
6583 // Cast Index to the same type as the bitmap.
6584 // Note: The Index is <= the number of elements in the table, so
6585 // truncating it to the width of the bitmask is safe.
6586 Value *ShiftAmt = Builder.CreateZExtOrTrunc(V: Index, DestTy: MapTy, Name: "switch.cast");
6587
6588 // Multiply the shift amount by the element width. NUW/NSW can always be
6589 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6590 // BitMap's bit width.
6591 ShiftAmt = Builder.CreateMul(
6592 LHS: ShiftAmt, RHS: ConstantInt::get(Ty: MapTy, V: BitMapElementTy->getBitWidth()),
6593 Name: "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6594
6595 // Shift down.
6596 Value *DownShifted =
6597 Builder.CreateLShr(LHS: BitMap, RHS: ShiftAmt, Name: "switch.downshift");
6598 // Mask off.
6599 return Builder.CreateTrunc(V: DownShifted, DestTy: BitMapElementTy, Name: "switch.masked");
6600 }
6601 case ArrayKind: {
6602 Type *IndexTy = DL.getIndexType(PtrTy: Array->getType());
6603
6604 if (Index->getType() != IndexTy)
6605 Index = Builder.CreateZExtOrTrunc(V: Index, DestTy: IndexTy);
6606
6607 Value *GEPIndices[] = {ConstantInt::get(Ty: IndexTy, V: 0), Index};
6608 Value *GEP = Builder.CreateInBoundsGEP(Ty: Array->getValueType(), Ptr: Array,
6609 IdxList: GEPIndices, Name: "switch.gep");
6610 return Builder.CreateLoad(
6611 Ty: cast<ArrayType>(Val: Array->getValueType())->getElementType(), Ptr: GEP,
6612 Name: "switch.load");
6613 }
6614 }
6615 llvm_unreachable("Unknown lookup table kind!");
6616}
6617
6618bool SwitchLookupTable::wouldFitInRegister(const DataLayout &DL,
6619 uint64_t TableSize,
6620 Type *ElementType) {
6621 auto *IT = dyn_cast<IntegerType>(Val: ElementType);
6622 if (!IT)
6623 return false;
6624 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6625 // are <= 15, we could try to narrow the type.
6626
6627 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6628 if (TableSize >= UINT_MAX / IT->getBitWidth())
6629 return false;
6630 return DL.fitsInLegalInteger(Width: TableSize * IT->getBitWidth());
6631}
6632
6633static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
6634 const DataLayout &DL) {
6635 // Allow any legal type.
6636 if (TTI.isTypeLegal(Ty))
6637 return true;
6638
6639 auto *IT = dyn_cast<IntegerType>(Val: Ty);
6640 if (!IT)
6641 return false;
6642
6643 // Also allow power of 2 integer types that have at least 8 bits and fit in
6644 // a register. These types are common in frontend languages and targets
6645 // usually support loads of these types.
6646 // TODO: We could relax this to any integer that fits in a register and rely
6647 // on ABI alignment and padding in the table to allow the load to be widened.
6648 // Or we could widen the constants and truncate the load.
6649 unsigned BitWidth = IT->getBitWidth();
6650 return BitWidth >= 8 && isPowerOf2_32(Value: BitWidth) &&
6651 DL.fitsInLegalInteger(Width: IT->getBitWidth());
6652}
6653
6654static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6655 // 40% is the default density for building a jump table in optsize/minsize
6656 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6657 // function was based on.
6658 const uint64_t MinDensity = 40;
6659
6660 if (CaseRange >= UINT64_MAX / 100)
6661 return false; // Avoid multiplication overflows below.
6662
6663 return NumCases * 100 >= CaseRange * MinDensity;
6664}
6665
6666static bool isSwitchDense(ArrayRef<int64_t> Values) {
6667 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6668 uint64_t Range = Diff + 1;
6669 if (Range < Diff)
6670 return false; // Overflow.
6671
6672 return isSwitchDense(NumCases: Values.size(), CaseRange: Range);
6673}
6674
6675/// Determine whether a lookup table should be built for this switch, based on
6676/// the number of cases, size of the table, and the types of the results.
6677// TODO: We could support larger than legal types by limiting based on the
6678// number of loads required and/or table size. If the constants are small we
6679// could use smaller table entries and extend after the load.
6680static bool
6681shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
6682 const TargetTransformInfo &TTI, const DataLayout &DL,
6683 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6684 if (SI->getNumCases() > TableSize)
6685 return false; // TableSize overflowed.
6686
6687 bool AllTablesFitInRegister = true;
6688 bool HasIllegalType = false;
6689 for (const auto &I : ResultTypes) {
6690 Type *Ty = I.second;
6691
6692 // Saturate this flag to true.
6693 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6694
6695 // Saturate this flag to false.
6696 AllTablesFitInRegister =
6697 AllTablesFitInRegister &&
6698 SwitchLookupTable::wouldFitInRegister(DL, TableSize, ElementType: Ty);
6699
6700 // If both flags saturate, we're done. NOTE: This *only* works with
6701 // saturating flags, and all flags have to saturate first due to the
6702 // non-deterministic behavior of iterating over a dense map.
6703 if (HasIllegalType && !AllTablesFitInRegister)
6704 break;
6705 }
6706
6707 // If each table would fit in a register, we should build it anyway.
6708 if (AllTablesFitInRegister)
6709 return true;
6710
6711 // Don't build a table that doesn't fit in-register if it has illegal types.
6712 if (HasIllegalType)
6713 return false;
6714
6715 return isSwitchDense(NumCases: SI->getNumCases(), CaseRange: TableSize);
6716}
6717
6718static bool shouldUseSwitchConditionAsTableIndex(
6719 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6720 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6721 const DataLayout &DL, const TargetTransformInfo &TTI) {
6722 if (MinCaseVal.isNullValue())
6723 return true;
6724 if (MinCaseVal.isNegative() ||
6725 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6726 !HasDefaultResults)
6727 return false;
6728 return all_of(Range: ResultTypes, P: [&](const auto &KV) {
6729 return SwitchLookupTable::wouldFitInRegister(
6730 DL, TableSize: MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6731 ElementType: KV.second /* ResultType */);
6732 });
6733}
6734
6735/// Try to reuse the switch table index compare. Following pattern:
6736/// \code
6737/// if (idx < tablesize)
6738/// r = table[idx]; // table does not contain default_value
6739/// else
6740/// r = default_value;
6741/// if (r != default_value)
6742/// ...
6743/// \endcode
6744/// Is optimized to:
6745/// \code
6746/// cond = idx < tablesize;
6747/// if (cond)
6748/// r = table[idx];
6749/// else
6750/// r = default_value;
6751/// if (cond)
6752/// ...
6753/// \endcode
6754/// Jump threading will then eliminate the second if(cond).
6755static void reuseTableCompare(
6756 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6757 Constant *DefaultValue,
6758 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6759 ICmpInst *CmpInst = dyn_cast<ICmpInst>(Val: PhiUser);
6760 if (!CmpInst)
6761 return;
6762
6763 // We require that the compare is in the same block as the phi so that jump
6764 // threading can do its work afterwards.
6765 if (CmpInst->getParent() != PhiBlock)
6766 return;
6767
6768 Constant *CmpOp1 = dyn_cast<Constant>(Val: CmpInst->getOperand(i_nocapture: 1));
6769 if (!CmpOp1)
6770 return;
6771
6772 Value *RangeCmp = RangeCheckBranch->getCondition();
6773 Constant *TrueConst = ConstantInt::getTrue(Ty: RangeCmp->getType());
6774 Constant *FalseConst = ConstantInt::getFalse(Ty: RangeCmp->getType());
6775
6776 // Check if the compare with the default value is constant true or false.
6777 const DataLayout &DL = PhiBlock->getDataLayout();
6778 Constant *DefaultConst = ConstantFoldCompareInstOperands(
6779 Predicate: CmpInst->getPredicate(), LHS: DefaultValue, RHS: CmpOp1, DL);
6780 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6781 return;
6782
6783 // Check if the compare with the case values is distinct from the default
6784 // compare result.
6785 for (auto ValuePair : Values) {
6786 Constant *CaseConst = ConstantFoldCompareInstOperands(
6787 Predicate: CmpInst->getPredicate(), LHS: ValuePair.second, RHS: CmpOp1, DL);
6788 if (!CaseConst || CaseConst == DefaultConst ||
6789 (CaseConst != TrueConst && CaseConst != FalseConst))
6790 return;
6791 }
6792
6793 // Check if the branch instruction dominates the phi node. It's a simple
6794 // dominance check, but sufficient for our needs.
6795 // Although this check is invariant in the calling loops, it's better to do it
6796 // at this late stage. Practically we do it at most once for a switch.
6797 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6798 for (BasicBlock *Pred : predecessors(BB: PhiBlock)) {
6799 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6800 return;
6801 }
6802
6803 if (DefaultConst == FalseConst) {
6804 // The compare yields the same result. We can replace it.
6805 CmpInst->replaceAllUsesWith(V: RangeCmp);
6806 ++NumTableCmpReuses;
6807 } else {
6808 // The compare yields the same result, just inverted. We can replace it.
6809 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6810 V1: RangeCmp, V2: ConstantInt::get(Ty: RangeCmp->getType(), V: 1), Name: "inverted.cmp",
6811 InsertBefore: RangeCheckBranch->getIterator());
6812 CmpInst->replaceAllUsesWith(V: InvertedTableCmp);
6813 ++NumTableCmpReuses;
6814 }
6815}
6816
6817/// If the switch is only used to initialize one or more phi nodes in a common
6818/// successor block with different constant values, replace the switch with
6819/// lookup tables.
6820static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
6821 DomTreeUpdater *DTU, const DataLayout &DL,
6822 const TargetTransformInfo &TTI) {
6823 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6824
6825 BasicBlock *BB = SI->getParent();
6826 Function *Fn = BB->getParent();
6827 // Only build lookup table when we have a target that supports it or the
6828 // attribute is not set.
6829 if (!TTI.shouldBuildLookupTables() ||
6830 (Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()))
6831 return false;
6832
6833 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6834 // split off a dense part and build a lookup table for that.
6835
6836 // FIXME: This creates arrays of GEPs to constant strings, which means each
6837 // GEP needs a runtime relocation in PIC code. We should just build one big
6838 // string and lookup indices into that.
6839
6840 // Ignore switches with less than three cases. Lookup tables will not make
6841 // them faster, so we don't analyze them.
6842 if (SI->getNumCases() < 3)
6843 return false;
6844
6845 // Figure out the corresponding result for each case value and phi node in the
6846 // common destination, as well as the min and max case values.
6847 assert(!SI->cases().empty());
6848 SwitchInst::CaseIt CI = SI->case_begin();
6849 ConstantInt *MinCaseVal = CI->getCaseValue();
6850 ConstantInt *MaxCaseVal = CI->getCaseValue();
6851
6852 BasicBlock *CommonDest = nullptr;
6853
6854 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6855 SmallDenseMap<PHINode *, ResultListTy> ResultLists;
6856
6857 SmallDenseMap<PHINode *, Constant *> DefaultResults;
6858 SmallDenseMap<PHINode *, Type *> ResultTypes;
6859 SmallVector<PHINode *, 4> PHIs;
6860
6861 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6862 ConstantInt *CaseVal = CI->getCaseValue();
6863 if (CaseVal->getValue().slt(RHS: MinCaseVal->getValue()))
6864 MinCaseVal = CaseVal;
6865 if (CaseVal->getValue().sgt(RHS: MaxCaseVal->getValue()))
6866 MaxCaseVal = CaseVal;
6867
6868 // Resulting value at phi nodes for this case value.
6869 using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
6870 ResultsTy Results;
6871 if (!getCaseResults(SI, CaseVal, CaseDest: CI->getCaseSuccessor(), CommonDest: &CommonDest,
6872 Res&: Results, DL, TTI))
6873 return false;
6874
6875 // Append the result from this case to the list for each phi.
6876 for (const auto &I : Results) {
6877 PHINode *PHI = I.first;
6878 Constant *Value = I.second;
6879 auto [It, Inserted] = ResultLists.try_emplace(Key: PHI);
6880 if (Inserted)
6881 PHIs.push_back(Elt: PHI);
6882 It->second.push_back(Elt: std::make_pair(x&: CaseVal, y&: Value));
6883 }
6884 }
6885
6886 // Keep track of the result types.
6887 for (PHINode *PHI : PHIs) {
6888 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6889 }
6890
6891 uint64_t NumResults = ResultLists[PHIs[0]].size();
6892
6893 // If the table has holes, we need a constant result for the default case
6894 // or a bitmask that fits in a register.
6895 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6896 bool HasDefaultResults =
6897 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest,
6898 Res&: DefaultResultsList, DL, TTI);
6899
6900 for (const auto &I : DefaultResultsList) {
6901 PHINode *PHI = I.first;
6902 Constant *Result = I.second;
6903 DefaultResults[PHI] = Result;
6904 }
6905
6906 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
6907 MinCaseVal&: *MinCaseVal, MaxCaseVal: *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6908 uint64_t TableSize;
6909 if (UseSwitchConditionAsTableIndex)
6910 TableSize = MaxCaseVal->getLimitedValue() + 1;
6911 else
6912 TableSize =
6913 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6914
6915 // If the default destination is unreachable, or if the lookup table covers
6916 // all values of the conditional variable, branch directly to the lookup table
6917 // BB. Otherwise, check that the condition is within the case range.
6918 bool DefaultIsReachable = !SI->defaultDestUnreachable();
6919
6920 bool TableHasHoles = (NumResults < TableSize);
6921
6922 // If the table has holes but the default destination doesn't produce any
6923 // constant results, the lookup table entries corresponding to the holes will
6924 // contain poison.
6925 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
6926
6927 // If the default destination doesn't produce a constant result but is still
6928 // reachable, and the lookup table has holes, we need to use a mask to
6929 // determine if the current index should load from the lookup table or jump
6930 // to the default case.
6931 // The mask is unnecessary if the table has holes but the default destination
6932 // is unreachable, as in that case the holes must also be unreachable.
6933 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
6934 if (NeedMask) {
6935 // As an extra penalty for the validity test we require more cases.
6936 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6937 return false;
6938 if (!DL.fitsInLegalInteger(Width: TableSize))
6939 return false;
6940 }
6941
6942 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6943 return false;
6944
6945 std::vector<DominatorTree::UpdateType> Updates;
6946
6947 // Compute the maximum table size representable by the integer type we are
6948 // switching upon.
6949 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6950 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6951 assert(MaxTableSize >= TableSize &&
6952 "It is impossible for a switch to have more entries than the max "
6953 "representable value of its input integer type's size.");
6954
6955 // Create the BB that does the lookups.
6956 Module &Mod = *CommonDest->getParent()->getParent();
6957 BasicBlock *LookupBB = BasicBlock::Create(
6958 Context&: Mod.getContext(), Name: "switch.lookup", Parent: CommonDest->getParent(), InsertBefore: CommonDest);
6959
6960 // Compute the table index value.
6961 Builder.SetInsertPoint(SI);
6962 Value *TableIndex;
6963 ConstantInt *TableIndexOffset;
6964 if (UseSwitchConditionAsTableIndex) {
6965 TableIndexOffset = ConstantInt::get(Ty: MaxCaseVal->getIntegerType(), V: 0);
6966 TableIndex = SI->getCondition();
6967 } else {
6968 TableIndexOffset = MinCaseVal;
6969 // If the default is unreachable, all case values are s>= MinCaseVal. Then
6970 // we can try to attach nsw.
6971 bool MayWrap = true;
6972 if (!DefaultIsReachable) {
6973 APInt Res = MaxCaseVal->getValue().ssub_ov(RHS: MinCaseVal->getValue(), Overflow&: MayWrap);
6974 (void)Res;
6975 }
6976
6977 TableIndex = Builder.CreateSub(LHS: SI->getCondition(), RHS: TableIndexOffset,
6978 Name: "switch.tableidx", /*HasNUW =*/false,
6979 /*HasNSW =*/!MayWrap);
6980 }
6981
6982 BranchInst *RangeCheckBranch = nullptr;
6983
6984 // Grow the table to cover all possible index values to avoid the range check.
6985 // It will use the default result to fill in the table hole later, so make
6986 // sure it exist.
6987 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6988 ConstantRange CR = computeConstantRange(V: TableIndex, /* ForSigned */ false);
6989 // Grow the table shouldn't have any size impact by checking
6990 // wouldFitInRegister.
6991 // TODO: Consider growing the table also when it doesn't fit in a register
6992 // if no optsize is specified.
6993 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6994 if (!CR.isUpperWrapped() && all_of(Range&: ResultTypes, P: [&](const auto &KV) {
6995 return SwitchLookupTable::wouldFitInRegister(
6996 DL, TableSize: UpperBound, ElementType: KV.second /* ResultType */);
6997 })) {
6998 // There may be some case index larger than the UpperBound (unreachable
6999 // case), so make sure the table size does not get smaller.
7000 TableSize = std::max(a: UpperBound, b: TableSize);
7001 // The default branch is unreachable after we enlarge the lookup table.
7002 // Adjust DefaultIsReachable to reuse code path.
7003 DefaultIsReachable = false;
7004 }
7005 }
7006
7007 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7008 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7009 Builder.CreateBr(Dest: LookupBB);
7010 if (DTU)
7011 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7012 // Note: We call removeProdecessor later since we need to be able to get the
7013 // PHI value for the default case in case we're using a bit mask.
7014 } else {
7015 Value *Cmp = Builder.CreateICmpULT(
7016 LHS: TableIndex, RHS: ConstantInt::get(Ty: MinCaseVal->getType(), V: TableSize));
7017 RangeCheckBranch =
7018 Builder.CreateCondBr(Cond: Cmp, True: LookupBB, False: SI->getDefaultDest());
7019 if (DTU)
7020 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7021 }
7022
7023 // Populate the BB that does the lookups.
7024 Builder.SetInsertPoint(LookupBB);
7025
7026 if (NeedMask) {
7027 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7028 // re-purposed to do the hole check, and we create a new LookupBB.
7029 BasicBlock *MaskBB = LookupBB;
7030 MaskBB->setName("switch.hole_check");
7031 LookupBB = BasicBlock::Create(Context&: Mod.getContext(), Name: "switch.lookup",
7032 Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7033
7034 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7035 // unnecessary illegal types.
7036 uint64_t TableSizePowOf2 = NextPowerOf2(A: std::max(a: 7ULL, b: TableSize - 1ULL));
7037 APInt MaskInt(TableSizePowOf2, 0);
7038 APInt One(TableSizePowOf2, 1);
7039 // Build bitmask; fill in a 1 bit for every case.
7040 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7041 for (const auto &Result : ResultList) {
7042 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7043 .getLimitedValue();
7044 MaskInt |= One << Idx;
7045 }
7046 ConstantInt *TableMask = ConstantInt::get(Context&: Mod.getContext(), V: MaskInt);
7047
7048 // Get the TableIndex'th bit of the bitmask.
7049 // If this bit is 0 (meaning hole) jump to the default destination,
7050 // else continue with table lookup.
7051 IntegerType *MapTy = TableMask->getIntegerType();
7052 Value *MaskIndex =
7053 Builder.CreateZExtOrTrunc(V: TableIndex, DestTy: MapTy, Name: "switch.maskindex");
7054 Value *Shifted = Builder.CreateLShr(LHS: TableMask, RHS: MaskIndex, Name: "switch.shifted");
7055 Value *LoBit = Builder.CreateTrunc(
7056 V: Shifted, DestTy: Type::getInt1Ty(C&: Mod.getContext()), Name: "switch.lobit");
7057 Builder.CreateCondBr(Cond: LoBit, True: LookupBB, False: SI->getDefaultDest());
7058 if (DTU) {
7059 Updates.push_back(x: {DominatorTree::Insert, MaskBB, LookupBB});
7060 Updates.push_back(x: {DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7061 }
7062 Builder.SetInsertPoint(LookupBB);
7063 addPredecessorToBlock(Succ: SI->getDefaultDest(), NewPred: MaskBB, ExistPred: BB);
7064 }
7065
7066 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7067 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7068 // do not delete PHINodes here.
7069 SI->getDefaultDest()->removePredecessor(Pred: BB,
7070 /*KeepOneInputPHIs=*/true);
7071 if (DTU)
7072 Updates.push_back(x: {DominatorTree::Delete, BB, SI->getDefaultDest()});
7073 }
7074
7075 for (PHINode *PHI : PHIs) {
7076 const ResultListTy &ResultList = ResultLists[PHI];
7077
7078 Type *ResultType = ResultList.begin()->second->getType();
7079
7080 // Use any value to fill the lookup table holes.
7081 Constant *DV =
7082 AllHolesArePoison ? PoisonValue::get(T: ResultType) : DefaultResults[PHI];
7083 StringRef FuncName = Fn->getName();
7084 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
7085 DL, FuncName);
7086
7087 Value *Result = Table.buildLookup(Index: TableIndex, Builder, DL);
7088
7089 // Do a small peephole optimization: re-use the switch table compare if
7090 // possible.
7091 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7092 BasicBlock *PhiBlock = PHI->getParent();
7093 // Search for compare instructions which use the phi.
7094 for (auto *User : PHI->users()) {
7095 reuseTableCompare(PhiUser: User, PhiBlock, RangeCheckBranch, DefaultValue: DV, Values: ResultList);
7096 }
7097 }
7098
7099 PHI->addIncoming(V: Result, BB: LookupBB);
7100 }
7101
7102 Builder.CreateBr(Dest: CommonDest);
7103 if (DTU)
7104 Updates.push_back(x: {DominatorTree::Insert, LookupBB, CommonDest});
7105
7106 // Remove the switch.
7107 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7108 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
7109 BasicBlock *Succ = SI->getSuccessor(idx: i);
7110
7111 if (Succ == SI->getDefaultDest())
7112 continue;
7113 Succ->removePredecessor(Pred: BB);
7114 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
7115 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7116 }
7117 SI->eraseFromParent();
7118
7119 if (DTU)
7120 DTU->applyUpdates(Updates);
7121
7122 ++NumLookupTables;
7123 if (NeedMask)
7124 ++NumLookupTablesHoles;
7125 return true;
7126}
7127
7128/// Try to transform a switch that has "holes" in it to a contiguous sequence
7129/// of cases.
7130///
7131/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7132/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7133///
7134/// This converts a sparse switch into a dense switch which allows better
7135/// lowering and could also allow transforming into a lookup table.
7136static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7137 const DataLayout &DL,
7138 const TargetTransformInfo &TTI) {
7139 auto *CondTy = cast<IntegerType>(Val: SI->getCondition()->getType());
7140 if (CondTy->getIntegerBitWidth() > 64 ||
7141 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7142 return false;
7143 // Only bother with this optimization if there are more than 3 switch cases;
7144 // SDAG will only bother creating jump tables for 4 or more cases.
7145 if (SI->getNumCases() < 4)
7146 return false;
7147
7148 // This transform is agnostic to the signedness of the input or case values. We
7149 // can treat the case values as signed or unsigned. We can optimize more common
7150 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7151 // as signed.
7152 SmallVector<int64_t,4> Values;
7153 for (const auto &C : SI->cases())
7154 Values.push_back(Elt: C.getCaseValue()->getValue().getSExtValue());
7155 llvm::sort(C&: Values);
7156
7157 // If the switch is already dense, there's nothing useful to do here.
7158 if (isSwitchDense(Values))
7159 return false;
7160
7161 // First, transform the values such that they start at zero and ascend.
7162 int64_t Base = Values[0];
7163 for (auto &V : Values)
7164 V -= (uint64_t)(Base);
7165
7166 // Now we have signed numbers that have been shifted so that, given enough
7167 // precision, there are no negative values. Since the rest of the transform
7168 // is bitwise only, we switch now to an unsigned representation.
7169
7170 // This transform can be done speculatively because it is so cheap - it
7171 // results in a single rotate operation being inserted.
7172
7173 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7174 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7175 // less than 64.
7176 unsigned Shift = 64;
7177 for (auto &V : Values)
7178 Shift = std::min(a: Shift, b: (unsigned)llvm::countr_zero(Val: (uint64_t)V));
7179 assert(Shift < 64);
7180 if (Shift > 0)
7181 for (auto &V : Values)
7182 V = (int64_t)((uint64_t)V >> Shift);
7183
7184 if (!isSwitchDense(Values))
7185 // Transform didn't create a dense switch.
7186 return false;
7187
7188 // The obvious transform is to shift the switch condition right and emit a
7189 // check that the condition actually cleanly divided by GCD, i.e.
7190 // C & (1 << Shift - 1) == 0
7191 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7192 //
7193 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7194 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7195 // are nonzero then the switch condition will be very large and will hit the
7196 // default case.
7197
7198 auto *Ty = cast<IntegerType>(Val: SI->getCondition()->getType());
7199 Builder.SetInsertPoint(SI);
7200 Value *Sub =
7201 Builder.CreateSub(LHS: SI->getCondition(), RHS: ConstantInt::get(Ty, V: Base));
7202 Value *Rot = Builder.CreateIntrinsic(
7203 RetTy: Ty, ID: Intrinsic::fshl,
7204 Args: {Sub, Sub, ConstantInt::get(Ty, V: Ty->getBitWidth() - Shift)});
7205 SI->replaceUsesOfWith(From: SI->getCondition(), To: Rot);
7206
7207 for (auto Case : SI->cases()) {
7208 auto *Orig = Case.getCaseValue();
7209 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7210 Case.setValue(cast<ConstantInt>(Val: ConstantInt::get(Ty, V: Sub.lshr(shiftAmt: Shift))));
7211 }
7212 return true;
7213}
7214
7215/// Tries to transform switch of powers of two to reduce switch range.
7216/// For example, switch like:
7217/// switch (C) { case 1: case 2: case 64: case 128: }
7218/// will be transformed to:
7219/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7220///
7221/// This transformation allows better lowering and may transform the switch
7222/// instruction into a sequence of bit manipulation and a smaller
7223/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7224/// address of the jump target, and indirectly jump to it).
7225static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7226 const DataLayout &DL,
7227 const TargetTransformInfo &TTI) {
7228 Value *Condition = SI->getCondition();
7229 LLVMContext &Context = SI->getContext();
7230 auto *CondTy = cast<IntegerType>(Val: Condition->getType());
7231
7232 if (CondTy->getIntegerBitWidth() > 64 ||
7233 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7234 return false;
7235
7236 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7237 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7238 {Condition, ConstantInt::getTrue(Context)});
7239 if (TTI.getIntrinsicInstrCost(ICA: Attrs, CostKind: TTI::TCK_SizeAndLatency) >
7240 TTI::TCC_Basic * 2)
7241 return false;
7242
7243 // Only bother with this optimization if there are more than 3 switch cases.
7244 // SDAG will start emitting jump tables for 4 or more cases.
7245 if (SI->getNumCases() < 4)
7246 return false;
7247
7248 // We perform this optimization only for switches with
7249 // unreachable default case.
7250 // This assumtion will save us from checking if `Condition` is a power of two.
7251 if (!SI->defaultDestUnreachable())
7252 return false;
7253
7254 // Check that switch cases are powers of two.
7255 SmallVector<uint64_t, 4> Values;
7256 for (const auto &Case : SI->cases()) {
7257 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7258 if (llvm::has_single_bit(Value: CaseValue))
7259 Values.push_back(Elt: CaseValue);
7260 else
7261 return false;
7262 }
7263
7264 // isSwichDense requires case values to be sorted.
7265 llvm::sort(C&: Values);
7266 if (!isSwitchDense(NumCases: Values.size(), CaseRange: llvm::countr_zero(Val: Values.back()) -
7267 llvm::countr_zero(Val: Values.front()) + 1))
7268 // Transform is unable to generate dense switch.
7269 return false;
7270
7271 Builder.SetInsertPoint(SI);
7272
7273 // Replace each case with its trailing zeros number.
7274 for (auto &Case : SI->cases()) {
7275 auto *OrigValue = Case.getCaseValue();
7276 Case.setValue(ConstantInt::get(Ty: OrigValue->getIntegerType(),
7277 V: OrigValue->getValue().countr_zero()));
7278 }
7279
7280 // Replace condition with its trailing zeros number.
7281 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7282 ID: Intrinsic::cttz, Types: {CondTy}, Args: {Condition, ConstantInt::getTrue(Context)});
7283
7284 SI->setCondition(ConditionTrailingZeros);
7285
7286 return true;
7287}
7288
7289/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7290/// the same destination.
7291static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
7292 DomTreeUpdater *DTU) {
7293 auto *Cmp = dyn_cast<CmpIntrinsic>(Val: SI->getCondition());
7294 if (!Cmp || !Cmp->hasOneUse())
7295 return false;
7296
7297 SmallVector<uint32_t, 4> Weights;
7298 bool HasWeights = extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights);
7299 if (!HasWeights)
7300 Weights.resize(N: 4); // Avoid checking HasWeights everywhere.
7301
7302 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7303 int64_t Res;
7304 BasicBlock *Succ, *OtherSucc;
7305 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7306 BasicBlock *Unreachable = nullptr;
7307
7308 if (SI->getNumCases() == 2) {
7309 // Find which of 1, 0 or -1 is missing (handled by default dest).
7310 SmallSet<int64_t, 3> Missing;
7311 Missing.insert(V: 1);
7312 Missing.insert(V: 0);
7313 Missing.insert(V: -1);
7314
7315 Succ = SI->getDefaultDest();
7316 SuccWeight = Weights[0];
7317 OtherSucc = nullptr;
7318 for (auto &Case : SI->cases()) {
7319 std::optional<int64_t> Val =
7320 Case.getCaseValue()->getValue().trySExtValue();
7321 if (!Val)
7322 return false;
7323 if (!Missing.erase(V: *Val))
7324 return false;
7325 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7326 return false;
7327 OtherSucc = Case.getCaseSuccessor();
7328 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7329 }
7330
7331 assert(Missing.size() == 1 && "Should have one case left");
7332 Res = *Missing.begin();
7333 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7334 // Normalize so that Succ is taken once and OtherSucc twice.
7335 Unreachable = SI->getDefaultDest();
7336 Succ = OtherSucc = nullptr;
7337 for (auto &Case : SI->cases()) {
7338 BasicBlock *NewSucc = Case.getCaseSuccessor();
7339 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7340 if (!OtherSucc || OtherSucc == NewSucc) {
7341 OtherSucc = NewSucc;
7342 OtherSuccWeight += Weight;
7343 } else if (!Succ) {
7344 Succ = NewSucc;
7345 SuccWeight = Weight;
7346 } else if (Succ == NewSucc) {
7347 std::swap(a&: Succ, b&: OtherSucc);
7348 std::swap(a&: SuccWeight, b&: OtherSuccWeight);
7349 } else
7350 return false;
7351 }
7352 for (auto &Case : SI->cases()) {
7353 std::optional<int64_t> Val =
7354 Case.getCaseValue()->getValue().trySExtValue();
7355 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7356 return false;
7357 if (Case.getCaseSuccessor() == Succ) {
7358 Res = *Val;
7359 break;
7360 }
7361 }
7362 } else {
7363 return false;
7364 }
7365
7366 // Determine predicate for the missing case.
7367 ICmpInst::Predicate Pred;
7368 switch (Res) {
7369 case 1:
7370 Pred = ICmpInst::ICMP_UGT;
7371 break;
7372 case 0:
7373 Pred = ICmpInst::ICMP_EQ;
7374 break;
7375 case -1:
7376 Pred = ICmpInst::ICMP_ULT;
7377 break;
7378 }
7379 if (Cmp->isSigned())
7380 Pred = ICmpInst::getSignedPredicate(Pred);
7381
7382 MDNode *NewWeights = nullptr;
7383 if (HasWeights)
7384 NewWeights = MDBuilder(SI->getContext())
7385 .createBranchWeights(TrueWeight: SuccWeight, FalseWeight: OtherSuccWeight);
7386
7387 BasicBlock *BB = SI->getParent();
7388 Builder.SetInsertPoint(SI->getIterator());
7389 Value *ICmp = Builder.CreateICmp(P: Pred, LHS: Cmp->getLHS(), RHS: Cmp->getRHS());
7390 Builder.CreateCondBr(Cond: ICmp, True: Succ, False: OtherSucc, BranchWeights: NewWeights,
7391 Unpredictable: SI->getMetadata(KindID: LLVMContext::MD_unpredictable));
7392 OtherSucc->removePredecessor(Pred: BB);
7393 if (Unreachable)
7394 Unreachable->removePredecessor(Pred: BB);
7395 SI->eraseFromParent();
7396 Cmp->eraseFromParent();
7397 if (DTU && Unreachable)
7398 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, Unreachable}});
7399 return true;
7400}
7401
7402/// Checking whether two cases of SI are equal depends on the contents of the
7403/// BasicBlock and the incoming values of their successor PHINodes.
7404/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7405/// calling this function on each BasicBlock every time isEqual is called,
7406/// especially since the same BasicBlock may be passed as an argument multiple
7407/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7408/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7409/// of the incoming values.
7410struct SwitchSuccWrapper {
7411 BasicBlock *Dest;
7412 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> *PhiPredIVs;
7413};
7414
7415namespace llvm {
7416template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
7417 static const SwitchSuccWrapper *getEmptyKey() {
7418 return static_cast<SwitchSuccWrapper *>(
7419 DenseMapInfo<void *>::getEmptyKey());
7420 }
7421 static const SwitchSuccWrapper *getTombstoneKey() {
7422 return static_cast<SwitchSuccWrapper *>(
7423 DenseMapInfo<void *>::getTombstoneKey());
7424 }
7425 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
7426 BasicBlock *Succ = SSW->Dest;
7427 BranchInst *BI = cast<BranchInst>(Val: Succ->getTerminator());
7428 assert(BI->isUnconditional() &&
7429 "Only supporting unconditional branches for now");
7430 assert(BI->getNumSuccessors() == 1 &&
7431 "Expected unconditional branches to have one successor");
7432 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
7433
7434 // Since we assume the BB is just a single BranchInst with a single
7435 // successor, we hash as the BB and the incoming Values of its successor
7436 // PHIs. Initially, we tried to just use the successor BB as the hash, but
7437 // including the incoming PHI values leads to better performance.
7438 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
7439 // time and passing it in SwitchSuccWrapper, but this slowed down the
7440 // average compile time without having any impact on the worst case compile
7441 // time.
7442 BasicBlock *BB = BI->getSuccessor(i: 0);
7443 SmallVector<Value *> PhiValsForBB;
7444 for (PHINode &Phi : BB->phis())
7445 PhiValsForBB.emplace_back(Args&: (*SSW->PhiPredIVs)[&Phi][BB]);
7446
7447 return hash_combine(args: BB, args: hash_combine_range(R&: PhiValsForBB));
7448 }
7449 static bool isEqual(const SwitchSuccWrapper *LHS,
7450 const SwitchSuccWrapper *RHS) {
7451 auto EKey = DenseMapInfo<SwitchSuccWrapper *>::getEmptyKey();
7452 auto TKey = DenseMapInfo<SwitchSuccWrapper *>::getTombstoneKey();
7453 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
7454 return LHS == RHS;
7455
7456 BasicBlock *A = LHS->Dest;
7457 BasicBlock *B = RHS->Dest;
7458
7459 // FIXME: we checked that the size of A and B are both 1 in
7460 // simplifyDuplicateSwitchArms to make the Case list smaller to
7461 // improve performance. If we decide to support BasicBlocks with more
7462 // than just a single instruction, we need to check that A.size() ==
7463 // B.size() here, and we need to check more than just the BranchInsts
7464 // for equality.
7465
7466 BranchInst *ABI = cast<BranchInst>(Val: A->getTerminator());
7467 BranchInst *BBI = cast<BranchInst>(Val: B->getTerminator());
7468 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
7469 "Only supporting unconditional branches for now");
7470 if (ABI->getSuccessor(i: 0) != BBI->getSuccessor(i: 0))
7471 return false;
7472
7473 // Need to check that PHIs in successor have matching values
7474 BasicBlock *Succ = ABI->getSuccessor(i: 0);
7475 for (PHINode &Phi : Succ->phis()) {
7476 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
7477 if (PredIVs[A] != PredIVs[B])
7478 return false;
7479 }
7480
7481 return true;
7482 }
7483};
7484} // namespace llvm
7485
7486bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
7487 DomTreeUpdater *DTU) {
7488 // Build Cases. Skip BBs that are not candidates for simplification. Mark
7489 // PHINodes which need to be processed into PhiPredIVs. We decide to process
7490 // an entire PHI at once after the loop, opposed to calling
7491 // getIncomingValueForBlock inside this loop, since each call to
7492 // getIncomingValueForBlock is O(|Preds|).
7493 SmallPtrSet<PHINode *, 8> Phis;
7494 SmallPtrSet<BasicBlock *, 8> Seen;
7495 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
7496 DenseMap<BasicBlock *, SmallVector<unsigned, 4>> BBToSuccessorIndexes;
7497 SmallVector<SwitchSuccWrapper> Cases;
7498 Cases.reserve(N: SI->getNumSuccessors());
7499
7500 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
7501 BasicBlock *BB = SI->getSuccessor(idx: I);
7502
7503 // FIXME: Support more than just a single BranchInst. One way we could do
7504 // this is by taking a hashing approach of all insts in BB.
7505 if (BB->size() != 1)
7506 continue;
7507
7508 // FIXME: This case needs some extra care because the terminators other than
7509 // SI need to be updated. For now, consider only backedges to the SI.
7510 if (BB->hasNPredecessorsOrMore(N: 4) ||
7511 BB->getUniquePredecessor() != SI->getParent())
7512 continue;
7513
7514 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7515 // on other kinds of terminators. We decide to only support unconditional
7516 // branches for now for compile time reasons.
7517 auto *BI = dyn_cast<BranchInst>(Val: BB->getTerminator());
7518 if (!BI || BI->isConditional())
7519 continue;
7520
7521 if (Seen.insert(Ptr: BB).second) {
7522 // Keep track of which PHIs we need as keys in PhiPredIVs below.
7523 for (BasicBlock *Succ : BI->successors())
7524 Phis.insert_range(R: llvm::make_pointer_range(Range: Succ->phis()));
7525 // Add the successor only if not previously visited.
7526 Cases.emplace_back(Args: SwitchSuccWrapper{.Dest: BB, .PhiPredIVs: &PhiPredIVs});
7527 }
7528
7529 BBToSuccessorIndexes[BB].emplace_back(Args&: I);
7530 }
7531
7532 // Precompute a data structure to improve performance of isEqual for
7533 // SwitchSuccWrapper.
7534 PhiPredIVs.reserve(NumEntries: Phis.size());
7535 for (PHINode *Phi : Phis) {
7536 auto &IVs =
7537 PhiPredIVs.try_emplace(Key: Phi, Args: Phi->getNumIncomingValues()).first->second;
7538 for (auto &IV : Phi->incoming_values())
7539 IVs.insert(KV: {Phi->getIncomingBlock(U: IV), IV.get()});
7540 }
7541
7542 // Build a set such that if the SwitchSuccWrapper exists in the set and
7543 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
7544 // which is not in the set should be replaced with the one in the set. If the
7545 // SwitchSuccWrapper is not in the set, then it should be added to the set so
7546 // other SwitchSuccWrappers can check against it in the same manner. We use
7547 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
7548 // around information to isEquality, getHashValue, and when doing the
7549 // replacement with better performance.
7550 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
7551 ReplaceWith.reserve(Size: Cases.size());
7552
7553 SmallVector<DominatorTree::UpdateType> Updates;
7554 Updates.reserve(N: ReplaceWith.size());
7555 bool MadeChange = false;
7556 for (auto &SSW : Cases) {
7557 // SSW is a candidate for simplification. If we find a duplicate BB,
7558 // replace it.
7559 const auto [It, Inserted] = ReplaceWith.insert(V: &SSW);
7560 if (!Inserted) {
7561 // We know that SI's parent BB no longer dominates the old case successor
7562 // since we are making it dead.
7563 Updates.push_back(Elt: {DominatorTree::Delete, SI->getParent(), SSW.Dest});
7564 const auto &Successors = BBToSuccessorIndexes.at(Val: SSW.Dest);
7565 for (unsigned Idx : Successors)
7566 SI->setSuccessor(idx: Idx, NewSucc: (*It)->Dest);
7567 MadeChange = true;
7568 }
7569 }
7570
7571 if (DTU)
7572 DTU->applyUpdates(Updates);
7573
7574 return MadeChange;
7575}
7576
7577bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7578 BasicBlock *BB = SI->getParent();
7579
7580 if (isValueEqualityComparison(TI: SI)) {
7581 // If we only have one predecessor, and if it is a branch on this value,
7582 // see if that predecessor totally determines the outcome of this switch.
7583 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7584 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: SI, Pred: OnlyPred, Builder))
7585 return requestResimplify();
7586
7587 Value *Cond = SI->getCondition();
7588 if (SelectInst *Select = dyn_cast<SelectInst>(Val: Cond))
7589 if (simplifySwitchOnSelect(SI, Select))
7590 return requestResimplify();
7591
7592 // If the block only contains the switch, see if we can fold the block
7593 // away into any preds.
7594 if (SI == &*BB->instructionsWithoutDebug(SkipPseudoOp: false).begin())
7595 if (foldValueComparisonIntoPredecessors(TI: SI, Builder))
7596 return requestResimplify();
7597 }
7598
7599 // Try to transform the switch into an icmp and a branch.
7600 // The conversion from switch to comparison may lose information on
7601 // impossible switch values, so disable it early in the pipeline.
7602 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
7603 return requestResimplify();
7604
7605 // Remove unreachable cases.
7606 if (eliminateDeadSwitchCases(SI, DTU, AC: Options.AC, DL))
7607 return requestResimplify();
7608
7609 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
7610 return requestResimplify();
7611
7612 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7613 return requestResimplify();
7614
7615 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
7616 return requestResimplify();
7617
7618 // The conversion from switch to lookup tables results in difficult-to-analyze
7619 // code and makes pruning branches much harder. This is a problem if the
7620 // switch expression itself can still be restricted as a result of inlining or
7621 // CVP. Therefore, only apply this transformation during late stages of the
7622 // optimisation pipeline.
7623 if (Options.ConvertSwitchToLookupTable &&
7624 switchToLookupTable(SI, Builder, DTU, DL, TTI))
7625 return requestResimplify();
7626
7627 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7628 return requestResimplify();
7629
7630 if (reduceSwitchRange(SI, Builder, DL, TTI))
7631 return requestResimplify();
7632
7633 if (HoistCommon &&
7634 hoistCommonCodeFromSuccessors(TI: SI, AllInstsEqOnly: !Options.HoistCommonInsts))
7635 return requestResimplify();
7636
7637 if (simplifyDuplicateSwitchArms(SI, DTU))
7638 return requestResimplify();
7639
7640 return false;
7641}
7642
7643bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7644 BasicBlock *BB = IBI->getParent();
7645 bool Changed = false;
7646
7647 // Eliminate redundant destinations.
7648 SmallPtrSet<Value *, 8> Succs;
7649 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
7650 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7651 BasicBlock *Dest = IBI->getDestination(i);
7652 if (!Dest->hasAddressTaken() || !Succs.insert(Ptr: Dest).second) {
7653 if (!Dest->hasAddressTaken())
7654 RemovedSuccs.insert(X: Dest);
7655 Dest->removePredecessor(Pred: BB);
7656 IBI->removeDestination(i);
7657 --i;
7658 --e;
7659 Changed = true;
7660 }
7661 }
7662
7663 if (DTU) {
7664 std::vector<DominatorTree::UpdateType> Updates;
7665 Updates.reserve(n: RemovedSuccs.size());
7666 for (auto *RemovedSucc : RemovedSuccs)
7667 Updates.push_back(x: {DominatorTree::Delete, BB, RemovedSucc});
7668 DTU->applyUpdates(Updates);
7669 }
7670
7671 if (IBI->getNumDestinations() == 0) {
7672 // If the indirectbr has no successors, change it to unreachable.
7673 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7674 eraseTerminatorAndDCECond(TI: IBI);
7675 return true;
7676 }
7677
7678 if (IBI->getNumDestinations() == 1) {
7679 // If the indirectbr has one successor, change it to a direct branch.
7680 BranchInst::Create(IfTrue: IBI->getDestination(i: 0), InsertBefore: IBI->getIterator());
7681 eraseTerminatorAndDCECond(TI: IBI);
7682 return true;
7683 }
7684
7685 if (SelectInst *SI = dyn_cast<SelectInst>(Val: IBI->getAddress())) {
7686 if (simplifyIndirectBrOnSelect(IBI, SI))
7687 return requestResimplify();
7688 }
7689 return Changed;
7690}
7691
7692/// Given an block with only a single landing pad and a unconditional branch
7693/// try to find another basic block which this one can be merged with. This
7694/// handles cases where we have multiple invokes with unique landing pads, but
7695/// a shared handler.
7696///
7697/// We specifically choose to not worry about merging non-empty blocks
7698/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7699/// practice, the optimizer produces empty landing pad blocks quite frequently
7700/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7701/// sinking in this file)
7702///
7703/// This is primarily a code size optimization. We need to avoid performing
7704/// any transform which might inhibit optimization (such as our ability to
7705/// specialize a particular handler via tail commoning). We do this by not
7706/// merging any blocks which require us to introduce a phi. Since the same
7707/// values are flowing through both blocks, we don't lose any ability to
7708/// specialize. If anything, we make such specialization more likely.
7709///
7710/// TODO - This transformation could remove entries from a phi in the target
7711/// block when the inputs in the phi are the same for the two blocks being
7712/// merged. In some cases, this could result in removal of the PHI entirely.
7713static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
7714 BasicBlock *BB, DomTreeUpdater *DTU) {
7715 auto Succ = BB->getUniqueSuccessor();
7716 assert(Succ);
7717 // If there's a phi in the successor block, we'd likely have to introduce
7718 // a phi into the merged landing pad block.
7719 if (isa<PHINode>(Val: *Succ->begin()))
7720 return false;
7721
7722 for (BasicBlock *OtherPred : predecessors(BB: Succ)) {
7723 if (BB == OtherPred)
7724 continue;
7725 BasicBlock::iterator I = OtherPred->begin();
7726 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(Val&: I);
7727 if (!LPad2 || !LPad2->isIdenticalTo(I: LPad))
7728 continue;
7729 ++I;
7730 BranchInst *BI2 = dyn_cast<BranchInst>(Val&: I);
7731 if (!BI2 || !BI2->isIdenticalTo(I: BI))
7732 continue;
7733
7734 std::vector<DominatorTree::UpdateType> Updates;
7735
7736 // We've found an identical block. Update our predecessors to take that
7737 // path instead and make ourselves dead.
7738 SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
7739 for (BasicBlock *Pred : UniquePreds) {
7740 InvokeInst *II = cast<InvokeInst>(Val: Pred->getTerminator());
7741 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7742 "unexpected successor");
7743 II->setUnwindDest(OtherPred);
7744 if (DTU) {
7745 Updates.push_back(x: {DominatorTree::Insert, Pred, OtherPred});
7746 Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
7747 }
7748 }
7749
7750 SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
7751 for (BasicBlock *Succ : UniqueSuccs) {
7752 Succ->removePredecessor(Pred: BB);
7753 if (DTU)
7754 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7755 }
7756
7757 IRBuilder<> Builder(BI);
7758 Builder.CreateUnreachable();
7759 BI->eraseFromParent();
7760 if (DTU)
7761 DTU->applyUpdates(Updates);
7762 return true;
7763 }
7764 return false;
7765}
7766
7767bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7768 return Branch->isUnconditional() ? simplifyUncondBranch(BI: Branch, Builder)
7769 : simplifyCondBranch(BI: Branch, Builder);
7770}
7771
7772bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7773 IRBuilder<> &Builder) {
7774 BasicBlock *BB = BI->getParent();
7775 BasicBlock *Succ = BI->getSuccessor(i: 0);
7776
7777 // If the Terminator is the only non-phi instruction, simplify the block.
7778 // If LoopHeader is provided, check if the block or its successor is a loop
7779 // header. (This is for early invocations before loop simplify and
7780 // vectorization to keep canonical loop forms for nested loops. These blocks
7781 // can be eliminated when the pass is invoked later in the back-end.)
7782 // Note that if BB has only one predecessor then we do not introduce new
7783 // backedge, so we can eliminate BB.
7784 bool NeedCanonicalLoop =
7785 Options.NeedCanonicalLoop &&
7786 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(N: 2) &&
7787 (is_contained(Range&: LoopHeaders, Element: BB) || is_contained(Range&: LoopHeaders, Element: Succ)));
7788 BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
7789 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7790 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7791 return true;
7792
7793 // If the only instruction in the block is a seteq/setne comparison against a
7794 // constant, try to simplify the block.
7795 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val&: I))
7796 if (ICI->isEquality() && isa<ConstantInt>(Val: ICI->getOperand(i_nocapture: 1))) {
7797 ++I;
7798 if (I->isTerminator() &&
7799 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7800 return true;
7801 }
7802
7803 // See if we can merge an empty landing pad block with another which is
7804 // equivalent.
7805 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(Val&: I)) {
7806 ++I;
7807 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
7808 return true;
7809 }
7810
7811 // If this basic block is ONLY a compare and a branch, and if a predecessor
7812 // branches to us and our successor, fold the comparison into the
7813 // predecessor and use logical operations to update the incoming value
7814 // for PHI nodes in common successor.
7815 if (Options.SpeculateBlocks &&
7816 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, TTI: &TTI,
7817 BonusInstThreshold: Options.BonusInstThreshold))
7818 return requestResimplify();
7819 return false;
7820}
7821
7822static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
7823 BasicBlock *PredPred = nullptr;
7824 for (auto *P : predecessors(BB)) {
7825 BasicBlock *PPred = P->getSinglePredecessor();
7826 if (!PPred || (PredPred && PredPred != PPred))
7827 return nullptr;
7828 PredPred = PPred;
7829 }
7830 return PredPred;
7831}
7832
7833/// Fold the following pattern:
7834/// bb0:
7835/// br i1 %cond1, label %bb1, label %bb2
7836/// bb1:
7837/// br i1 %cond2, label %bb3, label %bb4
7838/// bb2:
7839/// br i1 %cond2, label %bb4, label %bb3
7840/// bb3:
7841/// ...
7842/// bb4:
7843/// ...
7844/// into
7845/// bb0:
7846/// %cond = xor i1 %cond1, %cond2
7847/// br i1 %cond, label %bb4, label %bb3
7848/// bb3:
7849/// ...
7850/// bb4:
7851/// ...
7852/// NOTE: %cond2 always dominates the terminator of bb0.
7853static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU) {
7854 BasicBlock *BB = BI->getParent();
7855 BasicBlock *BB1 = BI->getSuccessor(i: 0);
7856 BasicBlock *BB2 = BI->getSuccessor(i: 1);
7857 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
7858 if (Succ == BB)
7859 return false;
7860 if (&Succ->front() != Succ->getTerminator())
7861 return false;
7862 SuccBI = dyn_cast<BranchInst>(Val: Succ->getTerminator());
7863 if (!SuccBI || !SuccBI->isConditional())
7864 return false;
7865 BasicBlock *Succ1 = SuccBI->getSuccessor(i: 0);
7866 BasicBlock *Succ2 = SuccBI->getSuccessor(i: 1);
7867 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
7868 !isa<PHINode>(Val: Succ1->front()) && !isa<PHINode>(Val: Succ2->front());
7869 };
7870 BranchInst *BB1BI, *BB2BI;
7871 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
7872 return false;
7873
7874 if (BB1BI->getCondition() != BB2BI->getCondition() ||
7875 BB1BI->getSuccessor(i: 0) != BB2BI->getSuccessor(i: 1) ||
7876 BB1BI->getSuccessor(i: 1) != BB2BI->getSuccessor(i: 0))
7877 return false;
7878
7879 BasicBlock *BB3 = BB1BI->getSuccessor(i: 0);
7880 BasicBlock *BB4 = BB1BI->getSuccessor(i: 1);
7881 IRBuilder<> Builder(BI);
7882 BI->setCondition(
7883 Builder.CreateXor(LHS: BI->getCondition(), RHS: BB1BI->getCondition()));
7884 BB1->removePredecessor(Pred: BB);
7885 BI->setSuccessor(idx: 0, NewSucc: BB4);
7886 BB2->removePredecessor(Pred: BB);
7887 BI->setSuccessor(idx: 1, NewSucc: BB3);
7888 if (DTU) {
7889 SmallVector<DominatorTree::UpdateType, 4> Updates;
7890 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB1});
7891 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB4});
7892 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB2});
7893 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB3});
7894
7895 DTU->applyUpdates(Updates);
7896 }
7897 bool HasWeight = false;
7898 uint64_t BBTWeight, BBFWeight;
7899 if (extractBranchWeights(I: *BI, TrueVal&: BBTWeight, FalseVal&: BBFWeight))
7900 HasWeight = true;
7901 else
7902 BBTWeight = BBFWeight = 1;
7903 uint64_t BB1TWeight, BB1FWeight;
7904 if (extractBranchWeights(I: *BB1BI, TrueVal&: BB1TWeight, FalseVal&: BB1FWeight))
7905 HasWeight = true;
7906 else
7907 BB1TWeight = BB1FWeight = 1;
7908 uint64_t BB2TWeight, BB2FWeight;
7909 if (extractBranchWeights(I: *BB2BI, TrueVal&: BB2TWeight, FalseVal&: BB2FWeight))
7910 HasWeight = true;
7911 else
7912 BB2TWeight = BB2FWeight = 1;
7913 if (HasWeight) {
7914 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
7915 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
7916 fitWeights(Weights);
7917 setBranchWeights(I: BI, TrueWeight: Weights[0], FalseWeight: Weights[1], /*IsExpected=*/false);
7918 }
7919 return true;
7920}
7921
7922bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7923 assert(
7924 !isa<ConstantInt>(BI->getCondition()) &&
7925 BI->getSuccessor(0) != BI->getSuccessor(1) &&
7926 "Tautological conditional branch should have been eliminated already.");
7927
7928 BasicBlock *BB = BI->getParent();
7929 if (!Options.SimplifyCondBranch ||
7930 BI->getFunction()->hasFnAttribute(Kind: Attribute::OptForFuzzing))
7931 return false;
7932
7933 // Conditional branch
7934 if (isValueEqualityComparison(TI: BI)) {
7935 // If we only have one predecessor, and if it is a branch on this value,
7936 // see if that predecessor totally determines the outcome of this
7937 // switch.
7938 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7939 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: BI, Pred: OnlyPred, Builder))
7940 return requestResimplify();
7941
7942 // This block must be empty, except for the setcond inst, if it exists.
7943 // Ignore dbg and pseudo intrinsics.
7944 auto I = BB->instructionsWithoutDebug(SkipPseudoOp: true).begin();
7945 if (&*I == BI) {
7946 if (foldValueComparisonIntoPredecessors(TI: BI, Builder))
7947 return requestResimplify();
7948 } else if (&*I == cast<Instruction>(Val: BI->getCondition())) {
7949 ++I;
7950 if (&*I == BI && foldValueComparisonIntoPredecessors(TI: BI, Builder))
7951 return requestResimplify();
7952 }
7953 }
7954
7955 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7956 if (simplifyBranchOnICmpChain(BI, Builder, DL))
7957 return true;
7958
7959 // If this basic block has dominating predecessor blocks and the dominating
7960 // blocks' conditions imply BI's condition, we know the direction of BI.
7961 std::optional<bool> Imp = isImpliedByDomCondition(Cond: BI->getCondition(), ContextI: BI, DL);
7962 if (Imp) {
7963 // Turn this into a branch on constant.
7964 auto *OldCond = BI->getCondition();
7965 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(Context&: BB->getContext())
7966 : ConstantInt::getFalse(Context&: BB->getContext());
7967 BI->setCondition(TorF);
7968 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
7969 return requestResimplify();
7970 }
7971
7972 // If this basic block is ONLY a compare and a branch, and if a predecessor
7973 // branches to us and one of our successors, fold the comparison into the
7974 // predecessor and use logical operations to pick the right destination.
7975 if (Options.SpeculateBlocks &&
7976 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, TTI: &TTI,
7977 BonusInstThreshold: Options.BonusInstThreshold))
7978 return requestResimplify();
7979
7980 // We have a conditional branch to two blocks that are only reachable
7981 // from BI. We know that the condbr dominates the two blocks, so see if
7982 // there is any identical code in the "then" and "else" blocks. If so, we
7983 // can hoist it up to the branching block.
7984 if (BI->getSuccessor(i: 0)->getSinglePredecessor()) {
7985 if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
7986 if (HoistCommon &&
7987 hoistCommonCodeFromSuccessors(TI: BI, AllInstsEqOnly: !Options.HoistCommonInsts))
7988 return requestResimplify();
7989
7990 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
7991 isProfitableToSpeculate(BI, Invert: std::nullopt, TTI)) {
7992 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
7993 auto CanSpeculateConditionalLoadsStores = [&]() {
7994 for (auto *Succ : successors(BB)) {
7995 for (Instruction &I : *Succ) {
7996 if (I.isTerminator()) {
7997 if (I.getNumSuccessors() > 1)
7998 return false;
7999 continue;
8000 } else if (!isSafeCheapLoadStore(I: &I, TTI) ||
8001 SpeculatedConditionalLoadsStores.size() ==
8002 HoistLoadsStoresWithCondFaultingThreshold) {
8003 return false;
8004 }
8005 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
8006 }
8007 }
8008 return !SpeculatedConditionalLoadsStores.empty();
8009 };
8010
8011 if (CanSpeculateConditionalLoadsStores()) {
8012 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8013 Invert: std::nullopt, Sel: nullptr);
8014 return requestResimplify();
8015 }
8016 }
8017 } else {
8018 // If Successor #1 has multiple preds, we may be able to conditionally
8019 // execute Successor #0 if it branches to Successor #1.
8020 Instruction *Succ0TI = BI->getSuccessor(i: 0)->getTerminator();
8021 if (Succ0TI->getNumSuccessors() == 1 &&
8022 Succ0TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 1))
8023 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 0)))
8024 return requestResimplify();
8025 }
8026 } else if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8027 // If Successor #0 has multiple preds, we may be able to conditionally
8028 // execute Successor #1 if it branches to Successor #0.
8029 Instruction *Succ1TI = BI->getSuccessor(i: 1)->getTerminator();
8030 if (Succ1TI->getNumSuccessors() == 1 &&
8031 Succ1TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 0))
8032 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 1)))
8033 return requestResimplify();
8034 }
8035
8036 // If this is a branch on something for which we know the constant value in
8037 // predecessors (e.g. a phi node in the current block), thread control
8038 // through this block.
8039 if (foldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, AC: Options.AC))
8040 return requestResimplify();
8041
8042 // Scan predecessor blocks for conditional branches.
8043 for (BasicBlock *Pred : predecessors(BB))
8044 if (BranchInst *PBI = dyn_cast<BranchInst>(Val: Pred->getTerminator()))
8045 if (PBI != BI && PBI->isConditional())
8046 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8047 return requestResimplify();
8048
8049 // Look for diamond patterns.
8050 if (MergeCondStores)
8051 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8052 if (BranchInst *PBI = dyn_cast<BranchInst>(Val: PrevBB->getTerminator()))
8053 if (PBI != BI && PBI->isConditional())
8054 if (mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
8055 return requestResimplify();
8056
8057 // Look for nested conditional branches.
8058 if (mergeNestedCondBranch(BI, DTU))
8059 return requestResimplify();
8060
8061 return false;
8062}
8063
8064/// Check if passing a value to an instruction will cause undefined behavior.
8065static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8066 assert(V->getType() == I->getType() && "Mismatched types");
8067 Constant *C = dyn_cast<Constant>(Val: V);
8068 if (!C)
8069 return false;
8070
8071 if (I->use_empty())
8072 return false;
8073
8074 if (C->isNullValue() || isa<UndefValue>(Val: C)) {
8075 // Only look at the first use we can handle, avoid hurting compile time with
8076 // long uselists
8077 auto FindUse = llvm::find_if(Range: I->uses(), P: [](auto &U) {
8078 auto *Use = cast<Instruction>(U.getUser());
8079 // Change this list when we want to add new instructions.
8080 switch (Use->getOpcode()) {
8081 default:
8082 return false;
8083 case Instruction::GetElementPtr:
8084 case Instruction::Ret:
8085 case Instruction::BitCast:
8086 case Instruction::Load:
8087 case Instruction::Store:
8088 case Instruction::Call:
8089 case Instruction::CallBr:
8090 case Instruction::Invoke:
8091 case Instruction::UDiv:
8092 case Instruction::URem:
8093 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8094 // implemented to avoid code complexity as it is unclear how useful such
8095 // logic is.
8096 case Instruction::SDiv:
8097 case Instruction::SRem:
8098 return true;
8099 }
8100 });
8101 if (FindUse == I->use_end())
8102 return false;
8103 auto &Use = *FindUse;
8104 auto *User = cast<Instruction>(Val: Use.getUser());
8105 // Bail out if User is not in the same BB as I or User == I or User comes
8106 // before I in the block. The latter two can be the case if User is a
8107 // PHI node.
8108 if (User->getParent() != I->getParent() || User == I ||
8109 User->comesBefore(Other: I))
8110 return false;
8111
8112 // Now make sure that there are no instructions in between that can alter
8113 // control flow (eg. calls)
8114 auto InstrRange =
8115 make_range(x: std::next(x: I->getIterator()), y: User->getIterator());
8116 if (any_of(Range&: InstrRange, P: [](Instruction &I) {
8117 return !isGuaranteedToTransferExecutionToSuccessor(I: &I);
8118 }))
8119 return false;
8120
8121 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8122 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: User))
8123 if (GEP->getPointerOperand() == I) {
8124 // The type of GEP may differ from the type of base pointer.
8125 // Bail out on vector GEPs, as they are not handled by other checks.
8126 if (GEP->getType()->isVectorTy())
8127 return false;
8128 // The current base address is null, there are four cases to consider:
8129 // getelementptr (TY, null, 0) -> null
8130 // getelementptr (TY, null, not zero) -> may be modified
8131 // getelementptr inbounds (TY, null, 0) -> null
8132 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8133 // undefined?
8134 if (!GEP->hasAllZeroIndices() &&
8135 (!GEP->isInBounds() ||
8136 NullPointerIsDefined(F: GEP->getFunction(),
8137 AS: GEP->getPointerAddressSpace())))
8138 PtrValueMayBeModified = true;
8139 return passingValueIsAlwaysUndefined(V, I: GEP, PtrValueMayBeModified);
8140 }
8141
8142 // Look through return.
8143 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: User)) {
8144 bool HasNoUndefAttr =
8145 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NoUndef);
8146 // Return undefined to a noundef return value is undefined.
8147 if (isa<UndefValue>(Val: C) && HasNoUndefAttr)
8148 return true;
8149 // Return null to a nonnull+noundef return value is undefined.
8150 if (C->isNullValue() && HasNoUndefAttr &&
8151 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NonNull)) {
8152 return !PtrValueMayBeModified;
8153 }
8154 }
8155
8156 // Load from null is undefined.
8157 if (LoadInst *LI = dyn_cast<LoadInst>(Val: User))
8158 if (!LI->isVolatile())
8159 return !NullPointerIsDefined(F: LI->getFunction(),
8160 AS: LI->getPointerAddressSpace());
8161
8162 // Store to null is undefined.
8163 if (StoreInst *SI = dyn_cast<StoreInst>(Val: User))
8164 if (!SI->isVolatile())
8165 return (!NullPointerIsDefined(F: SI->getFunction(),
8166 AS: SI->getPointerAddressSpace())) &&
8167 SI->getPointerOperand() == I;
8168
8169 // llvm.assume(false/undef) always triggers immediate UB.
8170 if (auto *Assume = dyn_cast<AssumeInst>(Val: User)) {
8171 // Ignore assume operand bundles.
8172 if (I == Assume->getArgOperand(i: 0))
8173 return true;
8174 }
8175
8176 if (auto *CB = dyn_cast<CallBase>(Val: User)) {
8177 if (C->isNullValue() && NullPointerIsDefined(F: CB->getFunction()))
8178 return false;
8179 // A call to null is undefined.
8180 if (CB->getCalledOperand() == I)
8181 return true;
8182
8183 if (CB->isArgOperand(U: &Use)) {
8184 unsigned ArgIdx = CB->getArgOperandNo(U: &Use);
8185 // Passing null to a nonnnull+noundef argument is undefined.
8186 if (isa<ConstantPointerNull>(Val: C) &&
8187 CB->paramHasNonNullAttr(ArgNo: ArgIdx, /*AllowUndefOrPoison=*/false))
8188 return !PtrValueMayBeModified;
8189 // Passing undef to a noundef argument is undefined.
8190 if (isa<UndefValue>(Val: C) && CB->isPassingUndefUB(ArgNo: ArgIdx))
8191 return true;
8192 }
8193 }
8194 // Div/Rem by zero is immediate UB
8195 if (match(V: User, P: m_BinOp(L: m_Value(), R: m_Specific(V: I))) && User->isIntDivRem())
8196 return true;
8197 }
8198 return false;
8199}
8200
8201/// If BB has an incoming value that will always trigger undefined behavior
8202/// (eg. null pointer dereference), remove the branch leading here.
8203static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
8204 DomTreeUpdater *DTU,
8205 AssumptionCache *AC) {
8206 for (PHINode &PHI : BB->phis())
8207 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8208 if (passingValueIsAlwaysUndefined(V: PHI.getIncomingValue(i), I: &PHI)) {
8209 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8210 Instruction *T = Predecessor->getTerminator();
8211 IRBuilder<> Builder(T);
8212 if (BranchInst *BI = dyn_cast<BranchInst>(Val: T)) {
8213 BB->removePredecessor(Pred: Predecessor);
8214 // Turn unconditional branches into unreachables and remove the dead
8215 // destination from conditional branches.
8216 if (BI->isUnconditional())
8217 Builder.CreateUnreachable();
8218 else {
8219 // Preserve guarding condition in assume, because it might not be
8220 // inferrable from any dominating condition.
8221 Value *Cond = BI->getCondition();
8222 CallInst *Assumption;
8223 if (BI->getSuccessor(i: 0) == BB)
8224 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
8225 else
8226 Assumption = Builder.CreateAssumption(Cond);
8227 if (AC)
8228 AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
8229 Builder.CreateBr(Dest: BI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 1)
8230 : BI->getSuccessor(i: 0));
8231 }
8232 BI->eraseFromParent();
8233 if (DTU)
8234 DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8235 return true;
8236 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: T)) {
8237 // Redirect all branches leading to UB into
8238 // a newly created unreachable block.
8239 BasicBlock *Unreachable = BasicBlock::Create(
8240 Context&: Predecessor->getContext(), Name: "unreachable", Parent: BB->getParent(), InsertBefore: BB);
8241 Builder.SetInsertPoint(Unreachable);
8242 // The new block contains only one instruction: Unreachable
8243 Builder.CreateUnreachable();
8244 for (const auto &Case : SI->cases())
8245 if (Case.getCaseSuccessor() == BB) {
8246 BB->removePredecessor(Pred: Predecessor);
8247 Case.setSuccessor(Unreachable);
8248 }
8249 if (SI->getDefaultDest() == BB) {
8250 BB->removePredecessor(Pred: Predecessor);
8251 SI->setDefaultDest(Unreachable);
8252 }
8253
8254 if (DTU)
8255 DTU->applyUpdates(
8256 Updates: { { DominatorTree::Insert, Predecessor, Unreachable },
8257 { DominatorTree::Delete, Predecessor, BB } });
8258 return true;
8259 }
8260 }
8261
8262 return false;
8263}
8264
8265bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8266 bool Changed = false;
8267
8268 assert(BB && BB->getParent() && "Block not embedded in function!");
8269 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8270
8271 // Remove basic blocks that have no predecessors (except the entry block)...
8272 // or that just have themself as a predecessor. These are unreachable.
8273 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8274 BB->getSinglePredecessor() == BB) {
8275 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8276 DeleteDeadBlock(BB, DTU);
8277 return true;
8278 }
8279
8280 // Check to see if we can constant propagate this terminator instruction
8281 // away...
8282 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8283 /*TLI=*/nullptr, DTU);
8284
8285 // Check for and eliminate duplicate PHI nodes in this block.
8286 Changed |= EliminateDuplicatePHINodes(BB);
8287
8288 // Check for and remove branches that will always cause undefined behavior.
8289 if (removeUndefIntroducingPredecessor(BB, DTU, AC: Options.AC))
8290 return requestResimplify();
8291
8292 // Merge basic blocks into their predecessor if there is only one distinct
8293 // pred, and if there is only one distinct successor of the predecessor, and
8294 // if there are no PHI nodes.
8295 if (MergeBlockIntoPredecessor(BB, DTU))
8296 return true;
8297
8298 if (SinkCommon && Options.SinkCommonInsts)
8299 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8300 mergeCompatibleInvokes(BB, DTU)) {
8301 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8302 // so we may now how duplicate PHI's.
8303 // Let's rerun EliminateDuplicatePHINodes() first,
8304 // before foldTwoEntryPHINode() potentially converts them into select's,
8305 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8306 return true;
8307 }
8308
8309 IRBuilder<> Builder(BB);
8310
8311 if (Options.SpeculateBlocks &&
8312 !BB->getParent()->hasFnAttribute(Kind: Attribute::OptForFuzzing)) {
8313 // If there is a trivial two-entry PHI node in this basic block, and we can
8314 // eliminate it, do so now.
8315 if (auto *PN = dyn_cast<PHINode>(Val: BB->begin()))
8316 if (PN->getNumIncomingValues() == 2)
8317 if (foldTwoEntryPHINode(PN, TTI, DTU, AC: Options.AC, DL,
8318 SpeculateUnpredictables: Options.SpeculateUnpredictables))
8319 return true;
8320 }
8321
8322 Instruction *Terminator = BB->getTerminator();
8323 Builder.SetInsertPoint(Terminator);
8324 switch (Terminator->getOpcode()) {
8325 case Instruction::Br:
8326 Changed |= simplifyBranch(Branch: cast<BranchInst>(Val: Terminator), Builder);
8327 break;
8328 case Instruction::Resume:
8329 Changed |= simplifyResume(RI: cast<ResumeInst>(Val: Terminator), Builder);
8330 break;
8331 case Instruction::CleanupRet:
8332 Changed |= simplifyCleanupReturn(RI: cast<CleanupReturnInst>(Val: Terminator));
8333 break;
8334 case Instruction::Switch:
8335 Changed |= simplifySwitch(SI: cast<SwitchInst>(Val: Terminator), Builder);
8336 break;
8337 case Instruction::Unreachable:
8338 Changed |= simplifyUnreachable(UI: cast<UnreachableInst>(Val: Terminator));
8339 break;
8340 case Instruction::IndirectBr:
8341 Changed |= simplifyIndirectBr(IBI: cast<IndirectBrInst>(Val: Terminator));
8342 break;
8343 }
8344
8345 return Changed;
8346}
8347
8348bool SimplifyCFGOpt::run(BasicBlock *BB) {
8349 bool Changed = false;
8350
8351 // Repeated simplify BB as long as resimplification is requested.
8352 do {
8353 Resimplify = false;
8354
8355 // Perform one round of simplifcation. Resimplify flag will be set if
8356 // another iteration is requested.
8357 Changed |= simplifyOnce(BB);
8358 } while (Resimplify);
8359
8360 return Changed;
8361}
8362
8363bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
8364 DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
8365 ArrayRef<WeakVH> LoopHeaders) {
8366 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8367 Options)
8368 .run(BB);
8369}
8370