1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
19#include "llvm/ADT/SetOperations.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/CaptureTracking.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/Analysis/DomTreeUpdater.h"
29#include "llvm/Analysis/GuardUtils.h"
30#include "llvm/Analysis/InstructionSimplify.h"
31#include "llvm/Analysis/Loads.h"
32#include "llvm/Analysis/MemorySSA.h"
33#include "llvm/Analysis/MemorySSAUpdater.h"
34#include "llvm/Analysis/TargetTransformInfo.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
40#include "llvm/IR/ConstantRange.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
51#include "llvm/IR/Instructions.h"
52#include "llvm/IR/IntrinsicInst.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PatternMatch.h"
61#include "llvm/IR/ProfDataUtils.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/BranchProbability.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/ErrorHandling.h"
72#include "llvm/Support/KnownBits.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Transforms/Utils/BasicBlockUtils.h"
76#include "llvm/Transforms/Utils/Cloning.h"
77#include "llvm/Transforms/Utils/Local.h"
78#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
79#include "llvm/Transforms/Utils/ValueMapper.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
101cl::opt<bool> RequireAndPreserveDomTree(
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
112static cl::opt<unsigned> PHINodeFoldingThreshold(
113 "phi-node-folding-threshold", cl::Hidden, cl::init(Val: 2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
117static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(Val: 4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(Val: true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
127static cl::opt<bool> HoistLoadsWithCondFaulting(
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(Val: true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
131static cl::opt<bool> HoistStoresWithCondFaulting(
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(Val: true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
135static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(Val: 6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
141static cl::opt<unsigned>
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(Val: 20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(Val: true),
149 cl::desc("Sink common instructions down to the end block"));
150
151static cl::opt<bool> HoistCondStores(
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(Val: true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
155static cl::opt<bool> MergeCondStores(
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(Val: true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
161static cl::opt<bool> MergeCondStoresAggressively(
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(Val: false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
166static cl::opt<bool> SpeculateOneExpensiveInst(
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(Val: true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
171static cl::opt<unsigned> MaxSpeculationDepth(
172 "max-speculation-depth", cl::Hidden, cl::init(Val: 10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(Val: 10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
183static cl::opt<unsigned>
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(Val: 2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
189static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(Val: 2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
196static cl::opt<bool> EnableMergeCompatibleInvokes(
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(Val: true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
200static cl::opt<unsigned> MaxSwitchCasesPerResult(
201 "max-switch-cases-per-result", cl::Hidden, cl::init(Val: 16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
204extern cl::opt<bool> ProfcheckDisableMetadataFixes;
205
206} // end namespace llvm
207
208STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
209STATISTIC(NumLinearMaps,
210 "Number of switch instructions turned into linear mapping");
211STATISTIC(NumLookupTables,
212 "Number of switch instructions turned into lookup tables");
213STATISTIC(
214 NumLookupTablesHoles,
215 "Number of switch instructions turned into lookup tables (holes checked)");
216STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
217STATISTIC(NumFoldValueComparisonIntoPredecessors,
218 "Number of value comparisons folded into predecessor basic blocks");
219STATISTIC(NumFoldBranchToCommonDest,
220 "Number of branches folded into predecessor basic block");
221STATISTIC(
222 NumHoistCommonCode,
223 "Number of common instruction 'blocks' hoisted up to the begin block");
224STATISTIC(NumHoistCommonInstrs,
225 "Number of common instructions hoisted up to the begin block");
226STATISTIC(NumSinkCommonCode,
227 "Number of common instruction 'blocks' sunk down to the end block");
228STATISTIC(NumSinkCommonInstrs,
229 "Number of common instructions sunk down to the end block");
230STATISTIC(NumSpeculations, "Number of speculative executed instructions");
231STATISTIC(NumInvokes,
232 "Number of invokes with empty resume blocks simplified into calls");
233STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
234STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
235
236namespace {
237
238// The first field contains the value that the switch produces when a certain
239// case group is selected, and the second field is a vector containing the
240// cases composing the case group.
241using SwitchCaseResultVectorTy =
242 SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
243
244// The first field contains the phi node that generates a result of the switch
245// and the second field contains the value generated for a certain case in the
246// switch for that PHI.
247using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
248
249/// ValueEqualityComparisonCase - Represents a case of a switch.
250struct ValueEqualityComparisonCase {
251 ConstantInt *Value;
252 BasicBlock *Dest;
253
254 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
255 : Value(Value), Dest(Dest) {}
256
257 bool operator<(ValueEqualityComparisonCase RHS) const {
258 // Comparing pointers is ok as we only rely on the order for uniquing.
259 return Value < RHS.Value;
260 }
261
262 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
263};
264
265class SimplifyCFGOpt {
266 const TargetTransformInfo &TTI;
267 DomTreeUpdater *DTU;
268 const DataLayout &DL;
269 ArrayRef<WeakVH> LoopHeaders;
270 const SimplifyCFGOptions &Options;
271 bool Resimplify;
272
273 Value *isValueEqualityComparison(Instruction *TI);
274 BasicBlock *getValueEqualityComparisonCases(
275 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
276 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
277 BasicBlock *Pred,
278 IRBuilder<> &Builder);
279 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
280 Instruction *PTI,
281 IRBuilder<> &Builder);
282 bool foldValueComparisonIntoPredecessors(Instruction *TI,
283 IRBuilder<> &Builder);
284
285 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
286 bool simplifySingleResume(ResumeInst *RI);
287 bool simplifyCommonResume(ResumeInst *RI);
288 bool simplifyCleanupReturn(CleanupReturnInst *RI);
289 bool simplifyUnreachable(UnreachableInst *UI);
290 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
291 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
292 bool simplifyIndirectBr(IndirectBrInst *IBI);
293 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
294 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
295 bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
296
297 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
298 IRBuilder<> &Builder);
299 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
300 SelectInst *Select,
301 IRBuilder<> &Builder);
302 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
303 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
304 Instruction *TI, Instruction *I1,
305 SmallVectorImpl<Instruction *> &OtherSuccTIs,
306 ArrayRef<BasicBlock *> UniqueSuccessors);
307 bool speculativelyExecuteBB(CondBrInst *BI, BasicBlock *ThenBB);
308 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
309 BasicBlock *TrueBB, BasicBlock *FalseBB,
310 uint32_t TrueWeight, uint32_t FalseWeight);
311 bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
312 const DataLayout &DL);
313 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
314 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
315 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
316 bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
317
318public:
319 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
320 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
321 const SimplifyCFGOptions &Opts)
322 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
323 assert((!DTU || !DTU->hasPostDomTree()) &&
324 "SimplifyCFG is not yet capable of maintaining validity of a "
325 "PostDomTree, so don't ask for it.");
326 }
327
328 bool simplifyOnce(BasicBlock *BB);
329 bool run(BasicBlock *BB);
330
331 // Helper to set Resimplify and return change indication.
332 bool requestResimplify() {
333 Resimplify = true;
334 return true;
335 }
336};
337
338// we synthesize a || b as select a, true, b
339// we synthesize a && b as select a, b, false
340// this function determines if SI is playing one of those roles.
341[[maybe_unused]] bool
342isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
343 return ((isa<ConstantInt>(Val: SI->getTrueValue()) &&
344 (dyn_cast<ConstantInt>(Val: SI->getTrueValue())->isOne())) ||
345 (isa<ConstantInt>(Val: SI->getFalseValue()) &&
346 (dyn_cast<ConstantInt>(Val: SI->getFalseValue())->isNullValue())));
347}
348
349} // end anonymous namespace
350
351/// Return true if all the PHI nodes in the basic block \p BB
352/// receive compatible (identical) incoming values when coming from
353/// all of the predecessor blocks that are specified in \p IncomingBlocks.
354///
355/// Note that if the values aren't exactly identical, but \p EquivalenceSet
356/// is provided, and *both* of the values are present in the set,
357/// then they are considered equal.
358static bool incomingValuesAreCompatible(
359 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
360 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
361 assert(IncomingBlocks.size() == 2 &&
362 "Only for a pair of incoming blocks at the time!");
363
364 // FIXME: it is okay if one of the incoming values is an `undef` value,
365 // iff the other incoming value is guaranteed to be a non-poison value.
366 // FIXME: it is okay if one of the incoming values is a `poison` value.
367 return all_of(Range: BB->phis(), P: [IncomingBlocks, EquivalenceSet](PHINode &PN) {
368 Value *IV0 = PN.getIncomingValueForBlock(BB: IncomingBlocks[0]);
369 Value *IV1 = PN.getIncomingValueForBlock(BB: IncomingBlocks[1]);
370 if (IV0 == IV1)
371 return true;
372 if (EquivalenceSet && EquivalenceSet->contains(Ptr: IV0) &&
373 EquivalenceSet->contains(Ptr: IV1))
374 return true;
375 return false;
376 });
377}
378
379/// Return true if it is safe to merge these two
380/// terminator instructions together.
381static bool
382safeToMergeTerminators(Instruction *SI1, Instruction *SI2,
383 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
384 if (SI1 == SI2)
385 return false; // Can't merge with self!
386
387 // It is not safe to merge these two switch instructions if they have a common
388 // successor, and if that successor has a PHI node, and if *that* PHI node has
389 // conflicting incoming values from the two switch blocks.
390 BasicBlock *SI1BB = SI1->getParent();
391 BasicBlock *SI2BB = SI2->getParent();
392
393 SmallPtrSet<BasicBlock *, 16> SI1Succs(llvm::from_range, successors(BB: SI1BB));
394 bool Fail = false;
395 for (BasicBlock *Succ : successors(BB: SI2BB)) {
396 if (!SI1Succs.count(Ptr: Succ))
397 continue;
398 if (incomingValuesAreCompatible(BB: Succ, IncomingBlocks: {SI1BB, SI2BB}))
399 continue;
400 Fail = true;
401 if (FailBlocks)
402 FailBlocks->insert(X: Succ);
403 else
404 break;
405 }
406
407 return !Fail;
408}
409
410/// Update PHI nodes in Succ to indicate that there will now be entries in it
411/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
412/// will be the same as those coming in from ExistPred, an existing predecessor
413/// of Succ.
414static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
415 BasicBlock *ExistPred,
416 MemorySSAUpdater *MSSAU = nullptr) {
417 for (PHINode &PN : Succ->phis())
418 PN.addIncoming(V: PN.getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
419 if (MSSAU)
420 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(BB: Succ))
421 MPhi->addIncoming(V: MPhi->getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
422}
423
424/// Compute an abstract "cost" of speculating the given instruction,
425/// which is assumed to be safe to speculate. TCC_Free means cheap,
426/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
427/// expensive.
428static InstructionCost computeSpeculationCost(const User *I,
429 const TargetTransformInfo &TTI) {
430 return TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
431}
432
433/// If we have a merge point of an "if condition" as accepted above,
434/// return true if the specified value dominates the block. We don't handle
435/// the true generality of domination here, just a special case which works
436/// well enough for us.
437///
438/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
439/// see if V (which must be an instruction) and its recursive operands
440/// that do not dominate BB have a combined cost lower than Budget and
441/// are non-trapping. If both are true, the instruction is inserted into the
442/// set and true is returned.
443///
444/// The cost for most non-trapping instructions is defined as 1 except for
445/// Select whose cost is 2.
446///
447/// After this function returns, Cost is increased by the cost of
448/// V plus its non-dominating operands. If that cost is greater than
449/// Budget, false is returned and Cost is undefined.
450static bool dominatesMergePoint(
451 Value *V, BasicBlock *BB, Instruction *InsertPt,
452 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
453 InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
454 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
455 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
456 // so limit the recursion depth.
457 // TODO: While this recursion limit does prevent pathological behavior, it
458 // would be better to track visited instructions to avoid cycles.
459 if (Depth == MaxSpeculationDepth)
460 return false;
461
462 Instruction *I = dyn_cast<Instruction>(Val: V);
463 if (!I) {
464 // Non-instructions dominate all instructions and can be executed
465 // unconditionally.
466 return true;
467 }
468 BasicBlock *PBB = I->getParent();
469
470 // We don't want to allow weird loops that might have the "if condition" in
471 // the bottom of this block.
472 if (PBB == BB)
473 return false;
474
475 // If this instruction is defined in a block that contains an unconditional
476 // branch to BB, then it must be in the 'conditional' part of the "if
477 // statement". If not, it definitely dominates the region.
478 UncondBrInst *BI = dyn_cast<UncondBrInst>(Val: PBB->getTerminator());
479 if (!BI || BI->getSuccessor() != BB)
480 return true;
481
482 // If we have seen this instruction before, don't count it again.
483 if (AggressiveInsts.count(Ptr: I))
484 return true;
485
486 // Okay, it looks like the instruction IS in the "condition". Check to
487 // see if it's a cheap instruction to unconditionally compute, and if it
488 // only uses stuff defined outside of the condition. If so, hoist it out.
489 if (!isSafeToSpeculativelyExecute(I, CtxI: InsertPt, AC))
490 return false;
491
492 // Overflow arithmetic instruction plus extract value are usually generated
493 // when a division is being replaced. But, in this case, the zero check may
494 // still be kept in the code. In that case it would be worth to hoist these
495 // two instruction out of the basic block. Let's treat this pattern as one
496 // single cheap instruction here!
497 WithOverflowInst *OverflowInst;
498 if (match(V: I, P: m_ExtractValue<1>(V: m_OneUse(SubPattern: m_WithOverflowInst(I&: OverflowInst))))) {
499 ZeroCostInstructions.insert(Ptr: OverflowInst);
500 Cost += 1;
501 } else if (!ZeroCostInstructions.contains(Ptr: I))
502 Cost += computeSpeculationCost(I, TTI);
503
504 // Allow exactly one instruction to be speculated regardless of its cost
505 // (as long as it is safe to do so).
506 // This is intended to flatten the CFG even if the instruction is a division
507 // or other expensive operation. The speculation of an expensive instruction
508 // is expected to be undone in CodeGenPrepare if the speculation has not
509 // enabled further IR optimizations.
510 if (Cost > Budget &&
511 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
512 !Cost.isValid()))
513 return false;
514
515 // Okay, we can only really hoist these out if their operands do
516 // not take us over the cost threshold.
517 for (Use &Op : I->operands())
518 if (!dominatesMergePoint(V: Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
519 TTI, AC, ZeroCostInstructions, Depth: Depth + 1))
520 return false;
521 // Okay, it's safe to do this! Remember this instruction.
522 AggressiveInsts.insert(Ptr: I);
523 return true;
524}
525
526/// Extract ConstantInt from value, looking through IntToPtr
527/// and PointerNullValue. Return NULL if value is not a constant int.
528static ConstantInt *getConstantInt(Value *V, const DataLayout &DL) {
529 // Normal constant int.
530 ConstantInt *CI = dyn_cast<ConstantInt>(Val: V);
531 if (CI || !isa<Constant>(Val: V) || !V->getType()->isPointerTy())
532 return CI;
533
534 // It is not safe to look through inttoptr or ptrtoint when using unstable
535 // pointer types.
536 if (DL.hasUnstableRepresentation(Ty: V->getType()))
537 return nullptr;
538
539 // This is some kind of pointer constant. Turn it into a pointer-sized
540 // ConstantInt if possible.
541 IntegerType *IntPtrTy = cast<IntegerType>(Val: DL.getIntPtrType(V->getType()));
542
543 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
544 if (isa<ConstantPointerNull>(Val: V))
545 return ConstantInt::get(Ty: IntPtrTy, V: 0);
546
547 // IntToPtr const int, we can look through this if the semantics of
548 // inttoptr for this address space are a simple (truncating) bitcast.
549 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
550 if (CE->getOpcode() == Instruction::IntToPtr)
551 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: 0))) {
552 // The constant is very likely to have the right type already.
553 if (CI->getType() == IntPtrTy)
554 return CI;
555 else
556 return cast<ConstantInt>(
557 Val: ConstantFoldIntegerCast(C: CI, DestTy: IntPtrTy, /*isSigned=*/IsSigned: false, DL));
558 }
559 return nullptr;
560}
561
562namespace {
563
564/// Given a chain of or (||) or and (&&) comparison of a value against a
565/// constant, this will try to recover the information required for a switch
566/// structure.
567/// It will depth-first traverse the chain of comparison, seeking for patterns
568/// like %a == 12 or %a < 4 and combine them to produce a set of integer
569/// representing the different cases for the switch.
570/// Note that if the chain is composed of '||' it will build the set of elements
571/// that matches the comparisons (i.e. any of this value validate the chain)
572/// while for a chain of '&&' it will build the set elements that make the test
573/// fail.
574struct ConstantComparesGatherer {
575 const DataLayout &DL;
576
577 /// Value found for the switch comparison
578 Value *CompValue = nullptr;
579
580 /// Extra clause to be checked before the switch
581 Value *Extra = nullptr;
582
583 /// Set of integers to match in switch
584 SmallVector<ConstantInt *, 8> Vals;
585
586 /// Number of comparisons matched in the and/or chain
587 unsigned UsedICmps = 0;
588
589 /// If the elements in Vals matches the comparisons
590 bool IsEq = false;
591
592 // Used to check if the first matched CompValue shall be the Extra check.
593 bool IgnoreFirstMatch = false;
594 bool MultipleMatches = false;
595
596 /// Construct and compute the result for the comparison instruction Cond
597 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
598 gather(V: Cond);
599 if (CompValue || !MultipleMatches)
600 return;
601 Extra = nullptr;
602 Vals.clear();
603 UsedICmps = 0;
604 IgnoreFirstMatch = true;
605 gather(V: Cond);
606 }
607
608 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
609 ConstantComparesGatherer &
610 operator=(const ConstantComparesGatherer &) = delete;
611
612private:
613 /// Try to set the current value used for the comparison, it succeeds only if
614 /// it wasn't set before or if the new value is the same as the old one
615 bool setValueOnce(Value *NewVal) {
616 if (IgnoreFirstMatch) {
617 IgnoreFirstMatch = false;
618 return false;
619 }
620 if (CompValue && CompValue != NewVal) {
621 MultipleMatches = true;
622 return false;
623 }
624 CompValue = NewVal;
625 return true;
626 }
627
628 /// Try to match Instruction "I" as a comparison against a constant and
629 /// populates the array Vals with the set of values that match (or do not
630 /// match depending on isEQ).
631 /// Return false on failure. On success, the Value the comparison matched
632 /// against is placed in CompValue.
633 /// If CompValue is already set, the function is expected to fail if a match
634 /// is found but the value compared to is different.
635 bool matchInstruction(Instruction *I, bool isEQ) {
636 if (match(V: I, P: m_Not(V: m_Instruction(I))))
637 isEQ = !isEQ;
638
639 Value *Val;
640 if (match(V: I, P: m_NUWTrunc(Op: m_Value(V&: Val)))) {
641 // If we already have a value for the switch, it has to match!
642 if (!setValueOnce(Val))
643 return false;
644 UsedICmps++;
645 Vals.push_back(Elt: ConstantInt::get(Ty: cast<IntegerType>(Val: Val->getType()), V: isEQ));
646 return true;
647 }
648 // If this is an icmp against a constant, handle this as one of the cases.
649 ICmpInst *ICI;
650 ConstantInt *C;
651 if (!((ICI = dyn_cast<ICmpInst>(Val: I)) &&
652 (C = getConstantInt(V: I->getOperand(i: 1), DL)))) {
653 return false;
654 }
655
656 Value *RHSVal;
657 const APInt *RHSC;
658
659 // Pattern match a special case
660 // (x & ~2^z) == y --> x == y || x == y|2^z
661 // This undoes a transformation done by instcombine to fuse 2 compares.
662 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
663 // It's a little bit hard to see why the following transformations are
664 // correct. Here is a CVC3 program to verify them for 64-bit values:
665
666 /*
667 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
668 x : BITVECTOR(64);
669 y : BITVECTOR(64);
670 z : BITVECTOR(64);
671 mask : BITVECTOR(64) = BVSHL(ONE, z);
672 QUERY( (y & ~mask = y) =>
673 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
674 );
675 QUERY( (y | mask = y) =>
676 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
677 );
678 */
679
680 // Please note that each pattern must be a dual implication (<--> or
681 // iff). One directional implication can create spurious matches. If the
682 // implication is only one-way, an unsatisfiable condition on the left
683 // side can imply a satisfiable condition on the right side. Dual
684 // implication ensures that satisfiable conditions are transformed to
685 // other satisfiable conditions and unsatisfiable conditions are
686 // transformed to other unsatisfiable conditions.
687
688 // Here is a concrete example of a unsatisfiable condition on the left
689 // implying a satisfiable condition on the right:
690 //
691 // mask = (1 << z)
692 // (x & ~mask) == y --> (x == y || x == (y | mask))
693 //
694 // Substituting y = 3, z = 0 yields:
695 // (x & -2) == 3 --> (x == 3 || x == 2)
696
697 // Pattern match a special case:
698 /*
699 QUERY( (y & ~mask = y) =>
700 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
701 );
702 */
703 if (match(V: ICI->getOperand(i_nocapture: 0),
704 P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
705 APInt Mask = ~*RHSC;
706 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
707 // If we already have a value for the switch, it has to match!
708 if (!setValueOnce(RHSVal))
709 return false;
710
711 Vals.push_back(Elt: C);
712 Vals.push_back(
713 Elt: ConstantInt::get(Context&: C->getContext(),
714 V: C->getValue() | Mask));
715 UsedICmps++;
716 return true;
717 }
718 }
719
720 // Pattern match a special case:
721 /*
722 QUERY( (y | mask = y) =>
723 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
724 );
725 */
726 if (match(V: ICI->getOperand(i_nocapture: 0),
727 P: m_Or(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
728 APInt Mask = *RHSC;
729 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
730 // If we already have a value for the switch, it has to match!
731 if (!setValueOnce(RHSVal))
732 return false;
733
734 Vals.push_back(Elt: C);
735 Vals.push_back(Elt: ConstantInt::get(Context&: C->getContext(),
736 V: C->getValue() & ~Mask));
737 UsedICmps++;
738 return true;
739 }
740 }
741
742 // If we already have a value for the switch, it has to match!
743 if (!setValueOnce(ICI->getOperand(i_nocapture: 0)))
744 return false;
745
746 UsedICmps++;
747 Vals.push_back(Elt: C);
748 return true;
749 }
750
751 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
752 ConstantRange Span =
753 ConstantRange::makeExactICmpRegion(Pred: ICI->getPredicate(), Other: C->getValue());
754
755 // Shift the range if the compare is fed by an add. This is the range
756 // compare idiom as emitted by instcombine.
757 Value *CandidateVal = I->getOperand(i: 0);
758 if (match(V: I->getOperand(i: 0), P: m_Add(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
759 Span = Span.subtract(CI: *RHSC);
760 CandidateVal = RHSVal;
761 }
762
763 // If this is an and/!= check, then we are looking to build the set of
764 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
765 // x != 0 && x != 1.
766 if (!isEQ)
767 Span = Span.inverse();
768
769 // If there are a ton of values, we don't want to make a ginormous switch.
770 if (Span.isSizeLargerThan(MaxSize: 8) || Span.isEmptySet()) {
771 return false;
772 }
773
774 // If we already have a value for the switch, it has to match!
775 if (!setValueOnce(CandidateVal))
776 return false;
777
778 // Add all values from the range to the set
779 APInt Tmp = Span.getLower();
780 do
781 Vals.push_back(Elt: ConstantInt::get(Context&: I->getContext(), V: Tmp));
782 while (++Tmp != Span.getUpper());
783
784 UsedICmps++;
785 return true;
786 }
787
788 /// Given a potentially 'or'd or 'and'd together collection of icmp
789 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
790 /// the value being compared, and stick the list constants into the Vals
791 /// vector.
792 /// One "Extra" case is allowed to differ from the other.
793 void gather(Value *V) {
794 Value *Op0, *Op1;
795 if (match(V, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
796 IsEq = true;
797 else if (match(V, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
798 IsEq = false;
799 else
800 return;
801 // Keep a stack (SmallVector for efficiency) for depth-first traversal
802 SmallVector<Value *, 8> DFT{Op0, Op1};
803 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
804
805 while (!DFT.empty()) {
806 V = DFT.pop_back_val();
807
808 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
809 // If it is a || (or && depending on isEQ), process the operands.
810 if (IsEq ? match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))
811 : match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
812 if (Visited.insert(Ptr: Op1).second)
813 DFT.push_back(Elt: Op1);
814 if (Visited.insert(Ptr: Op0).second)
815 DFT.push_back(Elt: Op0);
816
817 continue;
818 }
819
820 // Try to match the current instruction
821 if (matchInstruction(I, isEQ: IsEq))
822 // Match succeed, continue the loop
823 continue;
824 }
825
826 // One element of the sequence of || (or &&) could not be match as a
827 // comparison against the same value as the others.
828 // We allow only one "Extra" case to be checked before the switch
829 if (!Extra) {
830 Extra = V;
831 continue;
832 }
833 // Failed to parse a proper sequence, abort now
834 CompValue = nullptr;
835 break;
836 }
837 }
838};
839
840} // end anonymous namespace
841
842static void eraseTerminatorAndDCECond(Instruction *TI,
843 MemorySSAUpdater *MSSAU = nullptr) {
844 Instruction *Cond = nullptr;
845 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
846 Cond = dyn_cast<Instruction>(Val: SI->getCondition());
847 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
848 Cond = dyn_cast<Instruction>(Val: BI->getCondition());
849 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(Val: TI)) {
850 Cond = dyn_cast<Instruction>(Val: IBI->getAddress());
851 }
852
853 TI->eraseFromParent();
854 if (Cond)
855 RecursivelyDeleteTriviallyDeadInstructions(V: Cond, TLI: nullptr, MSSAU);
856}
857
858/// Return true if the specified terminator checks
859/// to see if a value is equal to constant integer value.
860Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
861 Value *CV = nullptr;
862 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
863 // Do not permit merging of large switch instructions into their
864 // predecessors unless there is only one predecessor.
865 if (!SI->getParent()->hasNPredecessorsOrMore(N: 128 / SI->getNumSuccessors()))
866 CV = SI->getCondition();
867 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI))
868 if (BI->getCondition()->hasOneUse()) {
869 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition())) {
870 if (ICI->isEquality() && getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL))
871 CV = ICI->getOperand(i_nocapture: 0);
872 } else if (auto *Trunc = dyn_cast<TruncInst>(Val: BI->getCondition())) {
873 if (Trunc->hasNoUnsignedWrap())
874 CV = Trunc->getOperand(i_nocapture: 0);
875 }
876 }
877
878 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
879 if (CV) {
880 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(Val: CV)) {
881 Value *Ptr = PTII->getPointerOperand();
882 if (DL.hasUnstableRepresentation(Ty: Ptr->getType()))
883 return CV;
884 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
885 CV = Ptr;
886 }
887 }
888 return CV;
889}
890
891/// Given a value comparison instruction,
892/// decode all of the 'cases' that it represents and return the 'default' block.
893BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
894 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
895 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
896 Cases.reserve(n: SI->getNumCases());
897 for (auto Case : SI->cases())
898 Cases.push_back(x: ValueEqualityComparisonCase(Case.getCaseValue(),
899 Case.getCaseSuccessor()));
900 return SI->getDefaultDest();
901 }
902
903 CondBrInst *BI = cast<CondBrInst>(Val: TI);
904 Value *Cond = BI->getCondition();
905 ICmpInst::Predicate Pred;
906 ConstantInt *C;
907 if (auto *ICI = dyn_cast<ICmpInst>(Val: Cond)) {
908 Pred = ICI->getPredicate();
909 C = getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL);
910 } else {
911 Pred = ICmpInst::ICMP_NE;
912 auto *Trunc = cast<TruncInst>(Val: Cond);
913 C = ConstantInt::get(Ty: cast<IntegerType>(Val: Trunc->getOperand(i_nocapture: 0)->getType()), V: 0);
914 }
915 BasicBlock *Succ = BI->getSuccessor(i: Pred == ICmpInst::ICMP_NE);
916 Cases.push_back(x: ValueEqualityComparisonCase(C, Succ));
917 return BI->getSuccessor(i: Pred == ICmpInst::ICMP_EQ);
918}
919
920/// Given a vector of bb/value pairs, remove any entries
921/// in the list that match the specified block.
922static void
923eliminateBlockCases(BasicBlock *BB,
924 std::vector<ValueEqualityComparisonCase> &Cases) {
925 llvm::erase(C&: Cases, V: BB);
926}
927
928/// Return true if there are any keys in C1 that exist in C2 as well.
929static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
930 std::vector<ValueEqualityComparisonCase> &C2) {
931 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
932
933 // Make V1 be smaller than V2.
934 if (V1->size() > V2->size())
935 std::swap(a&: V1, b&: V2);
936
937 if (V1->empty())
938 return false;
939 if (V1->size() == 1) {
940 // Just scan V2.
941 ConstantInt *TheVal = (*V1)[0].Value;
942 for (const ValueEqualityComparisonCase &VECC : *V2)
943 if (TheVal == VECC.Value)
944 return true;
945 }
946
947 // Otherwise, just sort both lists and compare element by element.
948 array_pod_sort(Start: V1->begin(), End: V1->end());
949 array_pod_sort(Start: V2->begin(), End: V2->end());
950 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
951 while (i1 != e1 && i2 != e2) {
952 if ((*V1)[i1].Value == (*V2)[i2].Value)
953 return true;
954 if ((*V1)[i1].Value < (*V2)[i2].Value)
955 ++i1;
956 else
957 ++i2;
958 }
959 return false;
960}
961
962/// If TI is known to be a terminator instruction and its block is known to
963/// only have a single predecessor block, check to see if that predecessor is
964/// also a value comparison with the same value, and if that comparison
965/// determines the outcome of this comparison. If so, simplify TI. This does a
966/// very limited form of jump threading.
967bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
968 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
969 Value *PredVal = isValueEqualityComparison(TI: Pred->getTerminator());
970 if (!PredVal)
971 return false; // Not a value comparison in predecessor.
972
973 Value *ThisVal = isValueEqualityComparison(TI);
974 assert(ThisVal && "This isn't a value comparison!!");
975 if (ThisVal != PredVal)
976 return false; // Different predicates.
977
978 // TODO: Preserve branch weight metadata, similarly to how
979 // foldValueComparisonIntoPredecessors preserves it.
980
981 // Find out information about when control will move from Pred to TI's block.
982 std::vector<ValueEqualityComparisonCase> PredCases;
983 BasicBlock *PredDef =
984 getValueEqualityComparisonCases(TI: Pred->getTerminator(), Cases&: PredCases);
985 eliminateBlockCases(BB: PredDef, Cases&: PredCases); // Remove default from cases.
986
987 // Find information about how control leaves this block.
988 std::vector<ValueEqualityComparisonCase> ThisCases;
989 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, Cases&: ThisCases);
990 eliminateBlockCases(BB: ThisDef, Cases&: ThisCases); // Remove default from cases.
991
992 // If TI's block is the default block from Pred's comparison, potentially
993 // simplify TI based on this knowledge.
994 if (PredDef == TI->getParent()) {
995 // If we are here, we know that the value is none of those cases listed in
996 // PredCases. If there are any cases in ThisCases that are in PredCases, we
997 // can simplify TI.
998 if (!valuesOverlap(C1&: PredCases, C2&: ThisCases))
999 return false;
1000
1001 if (isa<CondBrInst>(Val: TI)) {
1002 // Okay, one of the successors of this condbr is dead. Convert it to a
1003 // uncond br.
1004 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1005 // Insert the new branch.
1006 Instruction *NI = Builder.CreateBr(Dest: ThisDef);
1007 (void)NI;
1008
1009 // Remove PHI node entries for the dead edge.
1010 ThisCases[0].Dest->removePredecessor(Pred: PredDef);
1011
1012 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1013 << "Through successor TI: " << *TI << "Leaving: " << *NI
1014 << "\n");
1015
1016 eraseTerminatorAndDCECond(TI);
1017
1018 if (DTU)
1019 DTU->applyUpdates(
1020 Updates: {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1021
1022 return true;
1023 }
1024
1025 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(Val: TI);
1026 // Okay, TI has cases that are statically dead, prune them away.
1027 SmallPtrSet<Constant *, 16> DeadCases;
1028 for (const ValueEqualityComparisonCase &Case : PredCases)
1029 DeadCases.insert(Ptr: Case.Value);
1030
1031 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1032 << "Through successor TI: " << *TI);
1033
1034 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1035 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1036 --i;
1037 auto *Successor = i->getCaseSuccessor();
1038 if (DTU)
1039 ++NumPerSuccessorCases[Successor];
1040 if (DeadCases.count(Ptr: i->getCaseValue())) {
1041 Successor->removePredecessor(Pred: PredDef);
1042 SI.removeCase(I: i);
1043 if (DTU)
1044 --NumPerSuccessorCases[Successor];
1045 }
1046 }
1047
1048 if (DTU) {
1049 std::vector<DominatorTree::UpdateType> Updates;
1050 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1051 if (I.second == 0)
1052 Updates.push_back(x: {DominatorTree::Delete, PredDef, I.first});
1053 DTU->applyUpdates(Updates);
1054 }
1055
1056 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1057 return true;
1058 }
1059
1060 // Otherwise, TI's block must correspond to some matched value. Find out
1061 // which value (or set of values) this is.
1062 ConstantInt *TIV = nullptr;
1063 BasicBlock *TIBB = TI->getParent();
1064 for (const auto &[Value, Dest] : PredCases)
1065 if (Dest == TIBB) {
1066 if (TIV)
1067 return false; // Cannot handle multiple values coming to this block.
1068 TIV = Value;
1069 }
1070 assert(TIV && "No edge from pred to succ?");
1071
1072 // Okay, we found the one constant that our value can be if we get into TI's
1073 // BB. Find out which successor will unconditionally be branched to.
1074 BasicBlock *TheRealDest = nullptr;
1075 for (const auto &[Value, Dest] : ThisCases)
1076 if (Value == TIV) {
1077 TheRealDest = Dest;
1078 break;
1079 }
1080
1081 // If not handled by any explicit cases, it is handled by the default case.
1082 if (!TheRealDest)
1083 TheRealDest = ThisDef;
1084
1085 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1086
1087 // Remove PHI node entries for dead edges.
1088 BasicBlock *CheckEdge = TheRealDest;
1089 for (BasicBlock *Succ : successors(BB: TIBB))
1090 if (Succ != CheckEdge) {
1091 if (Succ != TheRealDest)
1092 RemovedSuccs.insert(Ptr: Succ);
1093 Succ->removePredecessor(Pred: TIBB);
1094 } else
1095 CheckEdge = nullptr;
1096
1097 // Insert the new branch.
1098 Instruction *NI = Builder.CreateBr(Dest: TheRealDest);
1099 (void)NI;
1100
1101 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1102 << "Through successor TI: " << *TI << "Leaving: " << *NI
1103 << "\n");
1104
1105 eraseTerminatorAndDCECond(TI);
1106 if (DTU) {
1107 SmallVector<DominatorTree::UpdateType, 2> Updates;
1108 Updates.reserve(N: RemovedSuccs.size());
1109 for (auto *RemovedSucc : RemovedSuccs)
1110 Updates.push_back(Elt: {DominatorTree::Delete, TIBB, RemovedSucc});
1111 DTU->applyUpdates(Updates);
1112 }
1113 return true;
1114}
1115
1116namespace {
1117
1118/// This class implements a stable ordering of constant
1119/// integers that does not depend on their address. This is important for
1120/// applications that sort ConstantInt's to ensure uniqueness.
1121struct ConstantIntOrdering {
1122 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1123 return LHS->getValue().ult(RHS: RHS->getValue());
1124 }
1125};
1126
1127} // end anonymous namespace
1128
1129static int constantIntSortPredicate(ConstantInt *const *P1,
1130 ConstantInt *const *P2) {
1131 const ConstantInt *LHS = *P1;
1132 const ConstantInt *RHS = *P2;
1133 if (LHS == RHS)
1134 return 0;
1135 return LHS->getValue().ult(RHS: RHS->getValue()) ? 1 : -1;
1136}
1137
1138/// Get Weights of a given terminator, the default weight is at the front
1139/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1140/// metadata.
1141static void getBranchWeights(Instruction *TI,
1142 SmallVectorImpl<uint64_t> &Weights) {
1143 MDNode *MD = TI->getMetadata(KindID: LLVMContext::MD_prof);
1144 assert(MD && "Invalid branch-weight metadata");
1145 extractFromBranchWeightMD64(ProfileData: MD, Weights);
1146
1147 // If TI is a conditional eq, the default case is the false case,
1148 // and the corresponding branch-weight data is at index 2. We swap the
1149 // default weight to be the first entry.
1150 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
1151 assert(Weights.size() == 2);
1152 auto *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition());
1153 if (!ICI)
1154 return;
1155
1156 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1157 std::swap(a&: Weights.front(), b&: Weights.back());
1158 }
1159}
1160
1161static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1162 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1163 Instruction *PTI = PredBlock->getTerminator();
1164
1165 // If we have bonus instructions, clone them into the predecessor block.
1166 // Note that there may be multiple predecessor blocks, so we cannot move
1167 // bonus instructions to a predecessor block.
1168 for (Instruction &BonusInst : *BB) {
1169 if (BonusInst.isTerminator())
1170 continue;
1171
1172 Instruction *NewBonusInst = BonusInst.clone();
1173
1174 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(Other: PTI->getDebugLoc())) {
1175 // Unless the instruction has the same !dbg location as the original
1176 // branch, drop it. When we fold the bonus instructions we want to make
1177 // sure we reset their debug locations in order to avoid stepping on
1178 // dead code caused by folding dead branches.
1179 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1180 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1181 mapAtomInstance(DL, VMap);
1182 }
1183
1184 RemapInstruction(I: NewBonusInst, VM&: VMap,
1185 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1186
1187 // If we speculated an instruction, we need to drop any metadata that may
1188 // result in undefined behavior, as the metadata might have been valid
1189 // only given the branch precondition.
1190 // Similarly strip attributes on call parameters that may cause UB in
1191 // location the call is moved to.
1192 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1193
1194 NewBonusInst->insertInto(ParentBB: PredBlock, It: PTI->getIterator());
1195 auto Range = NewBonusInst->cloneDebugInfoFrom(From: &BonusInst);
1196 RemapDbgRecordRange(M: NewBonusInst->getModule(), Range, VM&: VMap,
1197 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1198
1199 NewBonusInst->takeName(V: &BonusInst);
1200 BonusInst.setName(NewBonusInst->getName() + ".old");
1201 VMap[&BonusInst] = NewBonusInst;
1202
1203 // Update (liveout) uses of bonus instructions,
1204 // now that the bonus instruction has been cloned into predecessor.
1205 // Note that we expect to be in a block-closed SSA form for this to work!
1206 for (Use &U : make_early_inc_range(Range: BonusInst.uses())) {
1207 auto *UI = cast<Instruction>(Val: U.getUser());
1208 auto *PN = dyn_cast<PHINode>(Val: UI);
1209 if (!PN) {
1210 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1211 "If the user is not a PHI node, then it should be in the same "
1212 "block as, and come after, the original bonus instruction.");
1213 continue; // Keep using the original bonus instruction.
1214 }
1215 // Is this the block-closed SSA form PHI node?
1216 if (PN->getIncomingBlock(U) == BB)
1217 continue; // Great, keep using the original bonus instruction.
1218 // The only other alternative is an "use" when coming from
1219 // the predecessor block - here we should refer to the cloned bonus instr.
1220 assert(PN->getIncomingBlock(U) == PredBlock &&
1221 "Not in block-closed SSA form?");
1222 U.set(NewBonusInst);
1223 }
1224 }
1225
1226 // Key Instructions: We may have propagated atom info into the pred. If the
1227 // pred's terminator already has atom info do nothing as merging would drop
1228 // one atom group anyway. If it doesn't, propagte the remapped atom group
1229 // from BB's terminator.
1230 if (auto &PredDL = PTI->getDebugLoc()) {
1231 auto &DL = BB->getTerminator()->getDebugLoc();
1232 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1233 PredDL.isSameSourceLocation(Other: DL)) {
1234 PTI->setDebugLoc(DL);
1235 RemapSourceAtom(I: PTI, VM&: VMap);
1236 }
1237 }
1238}
1239
1240bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1241 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1242 BasicBlock *BB = TI->getParent();
1243 BasicBlock *Pred = PTI->getParent();
1244
1245 SmallVector<DominatorTree::UpdateType, 32> Updates;
1246
1247 // Figure out which 'cases' to copy from SI to PSI.
1248 std::vector<ValueEqualityComparisonCase> BBCases;
1249 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, Cases&: BBCases);
1250
1251 std::vector<ValueEqualityComparisonCase> PredCases;
1252 BasicBlock *PredDefault = getValueEqualityComparisonCases(TI: PTI, Cases&: PredCases);
1253
1254 // Based on whether the default edge from PTI goes to BB or not, fill in
1255 // PredCases and PredDefault with the new switch cases we would like to
1256 // build.
1257 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1258
1259 // Update the branch weight metadata along the way
1260 SmallVector<uint64_t, 8> Weights;
1261 bool PredHasWeights = hasBranchWeightMD(I: *PTI);
1262 bool SuccHasWeights = hasBranchWeightMD(I: *TI);
1263
1264 if (PredHasWeights) {
1265 getBranchWeights(TI: PTI, Weights);
1266 // branch-weight metadata is inconsistent here.
1267 if (Weights.size() != 1 + PredCases.size())
1268 PredHasWeights = SuccHasWeights = false;
1269 } else if (SuccHasWeights)
1270 // If there are no predecessor weights but there are successor weights,
1271 // populate Weights with 1, which will later be scaled to the sum of
1272 // successor's weights
1273 Weights.assign(NumElts: 1 + PredCases.size(), Elt: 1);
1274
1275 SmallVector<uint64_t, 8> SuccWeights;
1276 if (SuccHasWeights) {
1277 getBranchWeights(TI, Weights&: SuccWeights);
1278 // branch-weight metadata is inconsistent here.
1279 if (SuccWeights.size() != 1 + BBCases.size())
1280 PredHasWeights = SuccHasWeights = false;
1281 } else if (PredHasWeights)
1282 SuccWeights.assign(NumElts: 1 + BBCases.size(), Elt: 1);
1283
1284 if (PredDefault == BB) {
1285 // If this is the default destination from PTI, only the edges in TI
1286 // that don't occur in PTI, or that branch to BB will be activated.
1287 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1288 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1289 if (PredCases[i].Dest != BB)
1290 PTIHandled.insert(x: PredCases[i].Value);
1291 else {
1292 // The default destination is BB, we don't need explicit targets.
1293 std::swap(a&: PredCases[i], b&: PredCases.back());
1294
1295 if (PredHasWeights || SuccHasWeights) {
1296 // Increase weight for the default case.
1297 Weights[0] += Weights[i + 1];
1298 std::swap(a&: Weights[i + 1], b&: Weights.back());
1299 Weights.pop_back();
1300 }
1301
1302 PredCases.pop_back();
1303 --i;
1304 --e;
1305 }
1306
1307 // Reconstruct the new switch statement we will be building.
1308 if (PredDefault != BBDefault) {
1309 PredDefault->removePredecessor(Pred);
1310 if (DTU && PredDefault != BB)
1311 Updates.push_back(Elt: {DominatorTree::Delete, Pred, PredDefault});
1312 PredDefault = BBDefault;
1313 ++NewSuccessors[BBDefault];
1314 }
1315
1316 unsigned CasesFromPred = Weights.size();
1317 uint64_t ValidTotalSuccWeight = 0;
1318 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1319 if (!PTIHandled.count(x: BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1320 PredCases.push_back(x: BBCases[i]);
1321 ++NewSuccessors[BBCases[i].Dest];
1322 if (SuccHasWeights || PredHasWeights) {
1323 // The default weight is at index 0, so weight for the ith case
1324 // should be at index i+1. Scale the cases from successor by
1325 // PredDefaultWeight (Weights[0]).
1326 Weights.push_back(Elt: Weights[0] * SuccWeights[i + 1]);
1327 ValidTotalSuccWeight += SuccWeights[i + 1];
1328 }
1329 }
1330
1331 if (SuccHasWeights || PredHasWeights) {
1332 ValidTotalSuccWeight += SuccWeights[0];
1333 // Scale the cases from predecessor by ValidTotalSuccWeight.
1334 for (unsigned i = 1; i < CasesFromPred; ++i)
1335 Weights[i] *= ValidTotalSuccWeight;
1336 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1337 Weights[0] *= SuccWeights[0];
1338 }
1339 } else {
1340 // If this is not the default destination from PSI, only the edges
1341 // in SI that occur in PSI with a destination of BB will be
1342 // activated.
1343 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1344 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1345 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1346 if (PredCases[i].Dest == BB) {
1347 PTIHandled.insert(x: PredCases[i].Value);
1348
1349 if (PredHasWeights || SuccHasWeights) {
1350 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1351 std::swap(a&: Weights[i + 1], b&: Weights.back());
1352 Weights.pop_back();
1353 }
1354
1355 std::swap(a&: PredCases[i], b&: PredCases.back());
1356 PredCases.pop_back();
1357 --i;
1358 --e;
1359 }
1360
1361 // Okay, now we know which constants were sent to BB from the
1362 // predecessor. Figure out where they will all go now.
1363 for (const ValueEqualityComparisonCase &Case : BBCases)
1364 if (PTIHandled.count(x: Case.Value)) {
1365 // If this is one we are capable of getting...
1366 if (PredHasWeights || SuccHasWeights)
1367 Weights.push_back(Elt: WeightsForHandled[Case.Value]);
1368 PredCases.push_back(x: Case);
1369 ++NewSuccessors[Case.Dest];
1370 PTIHandled.erase(x: Case.Value); // This constant is taken care of
1371 }
1372
1373 // If there are any constants vectored to BB that TI doesn't handle,
1374 // they must go to the default destination of TI.
1375 for (ConstantInt *I : PTIHandled) {
1376 if (PredHasWeights || SuccHasWeights)
1377 Weights.push_back(Elt: WeightsForHandled[I]);
1378 PredCases.push_back(x: ValueEqualityComparisonCase(I, BBDefault));
1379 ++NewSuccessors[BBDefault];
1380 }
1381 }
1382
1383 // Okay, at this point, we know which new successor Pred will get. Make
1384 // sure we update the number of entries in the PHI nodes for these
1385 // successors.
1386 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1387 if (DTU) {
1388 SuccsOfPred = {llvm::from_range, successors(BB: Pred)};
1389 Updates.reserve(N: Updates.size() + NewSuccessors.size());
1390 }
1391 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1392 NewSuccessors) {
1393 for (auto I : seq(Size: NewSuccessor.second)) {
1394 (void)I;
1395 addPredecessorToBlock(Succ: NewSuccessor.first, NewPred: Pred, ExistPred: BB);
1396 }
1397 if (DTU && !SuccsOfPred.contains(Ptr: NewSuccessor.first))
1398 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewSuccessor.first});
1399 }
1400
1401 Builder.SetInsertPoint(PTI);
1402 // Convert pointer to int before we switch.
1403 if (CV->getType()->isPointerTy()) {
1404 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1405 "Should not end up here with unstable pointers");
1406 CV =
1407 Builder.CreatePtrToInt(V: CV, DestTy: DL.getIntPtrType(CV->getType()), Name: "magicptr");
1408 }
1409
1410 // Now that the successors are updated, create the new Switch instruction.
1411 SwitchInst *NewSI = Builder.CreateSwitch(V: CV, Dest: PredDefault, NumCases: PredCases.size());
1412 NewSI->setDebugLoc(PTI->getDebugLoc());
1413 for (ValueEqualityComparisonCase &V : PredCases)
1414 NewSI->addCase(OnVal: V.Value, Dest: V.Dest);
1415
1416 if (PredHasWeights || SuccHasWeights)
1417 setFittedBranchWeights(I&: *NewSI, Weights, /*IsExpected=*/false,
1418 /*ElideAllZero=*/true);
1419
1420 eraseTerminatorAndDCECond(TI: PTI);
1421
1422 // Okay, last check. If BB is still a successor of PSI, then we must
1423 // have an infinite loop case. If so, add an infinitely looping block
1424 // to handle the case to preserve the behavior of the code.
1425 BasicBlock *InfLoopBlock = nullptr;
1426 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1427 if (NewSI->getSuccessor(idx: i) == BB) {
1428 if (!InfLoopBlock) {
1429 // Insert it at the end of the function, because it's either code,
1430 // or it won't matter if it's hot. :)
1431 InfLoopBlock =
1432 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
1433 UncondBrInst::Create(Target: InfLoopBlock, InsertBefore: InfLoopBlock);
1434 if (DTU)
1435 Updates.push_back(
1436 Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1437 }
1438 NewSI->setSuccessor(idx: i, NewSucc: InfLoopBlock);
1439 }
1440
1441 if (DTU) {
1442 if (InfLoopBlock)
1443 Updates.push_back(Elt: {DominatorTree::Insert, Pred, InfLoopBlock});
1444
1445 Updates.push_back(Elt: {DominatorTree::Delete, Pred, BB});
1446
1447 DTU->applyUpdates(Updates);
1448 }
1449
1450 ++NumFoldValueComparisonIntoPredecessors;
1451 return true;
1452}
1453
1454/// The specified terminator is a value equality comparison instruction
1455/// (either a switch or a branch on "X == c").
1456/// See if any of the predecessors of the terminator block are value comparisons
1457/// on the same value. If so, and if safe to do so, fold them together.
1458bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1459 IRBuilder<> &Builder) {
1460 BasicBlock *BB = TI->getParent();
1461 Value *CV = isValueEqualityComparison(TI); // CondVal
1462 assert(CV && "Not a comparison?");
1463
1464 bool Changed = false;
1465
1466 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1467 while (!Preds.empty()) {
1468 BasicBlock *Pred = Preds.pop_back_val();
1469 Instruction *PTI = Pred->getTerminator();
1470
1471 // Don't try to fold into itself.
1472 if (Pred == BB)
1473 continue;
1474
1475 // See if the predecessor is a comparison with the same value.
1476 Value *PCV = isValueEqualityComparison(TI: PTI); // PredCondVal
1477 if (PCV != CV)
1478 continue;
1479
1480 SmallSetVector<BasicBlock *, 4> FailBlocks;
1481 if (!safeToMergeTerminators(SI1: TI, SI2: PTI, FailBlocks: &FailBlocks)) {
1482 for (auto *Succ : FailBlocks) {
1483 if (!SplitBlockPredecessors(BB: Succ, Preds: TI->getParent(), Suffix: ".fold.split", DTU))
1484 return false;
1485 }
1486 }
1487
1488 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1489 Changed = true;
1490 }
1491 return Changed;
1492}
1493
1494// If we would need to insert a select that uses the value of this invoke
1495// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1496// need to do this), we can't hoist the invoke, as there is nowhere to put the
1497// select in this case.
1498static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1499 Instruction *I1, Instruction *I2) {
1500 for (BasicBlock *Succ : successors(BB: BB1)) {
1501 for (const PHINode &PN : Succ->phis()) {
1502 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1503 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1504 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1505 return false;
1506 }
1507 }
1508 }
1509 return true;
1510}
1511
1512// Get interesting characteristics of instructions that
1513// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1514// instructions can be reordered across.
1515enum SkipFlags {
1516 SkipReadMem = 1,
1517 SkipSideEffect = 2,
1518 SkipImplicitControlFlow = 4
1519};
1520
1521static unsigned skippedInstrFlags(Instruction *I) {
1522 unsigned Flags = 0;
1523 if (I->mayReadFromMemory())
1524 Flags |= SkipReadMem;
1525 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1526 // inalloca) across stacksave/stackrestore boundaries.
1527 if (I->mayHaveSideEffects() || isa<AllocaInst>(Val: I))
1528 Flags |= SkipSideEffect;
1529 if (!isGuaranteedToTransferExecutionToSuccessor(I))
1530 Flags |= SkipImplicitControlFlow;
1531 return Flags;
1532}
1533
1534// Returns true if it is safe to reorder an instruction across preceding
1535// instructions in a basic block.
1536static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1537 // Don't reorder a store over a load.
1538 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1539 return false;
1540
1541 // If we have seen an instruction with side effects, it's unsafe to reorder an
1542 // instruction which reads memory or itself has side effects.
1543 if ((Flags & SkipSideEffect) &&
1544 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(Val: I)))
1545 return false;
1546
1547 // Reordering across an instruction which does not necessarily transfer
1548 // control to the next instruction is speculation.
1549 if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1550 return false;
1551
1552 // Hoisting of llvm.deoptimize is only legal together with the next return
1553 // instruction, which this pass is not always able to do.
1554 if (auto *CB = dyn_cast<CallBase>(Val: I))
1555 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1556 return false;
1557
1558 // It's also unsafe/illegal to hoist an instruction above its instruction
1559 // operands
1560 BasicBlock *BB = I->getParent();
1561 for (Value *Op : I->operands()) {
1562 if (auto *J = dyn_cast<Instruction>(Val: Op))
1563 if (J->getParent() == BB)
1564 return false;
1565 }
1566
1567 return true;
1568}
1569
1570static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1571
1572/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1573/// instructions \p I1 and \p I2 can and should be hoisted.
1574static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
1575 const TargetTransformInfo &TTI) {
1576 // If we're going to hoist a call, make sure that the two instructions
1577 // we're commoning/hoisting are both marked with musttail, or neither of
1578 // them is marked as such. Otherwise, we might end up in a situation where
1579 // we hoist from a block where the terminator is a `ret` to a block where
1580 // the terminator is a `br`, and `musttail` calls expect to be followed by
1581 // a return.
1582 auto *C1 = dyn_cast<CallInst>(Val: I1);
1583 auto *C2 = dyn_cast<CallInst>(Val: I2);
1584 if (C1 && C2)
1585 if (C1->isMustTailCall() != C2->isMustTailCall())
1586 return false;
1587
1588 if (!TTI.isProfitableToHoist(I: I1) || !TTI.isProfitableToHoist(I: I2))
1589 return false;
1590
1591 // If any of the two call sites has nomerge or convergent attribute, stop
1592 // hoisting.
1593 if (const auto *CB1 = dyn_cast<CallBase>(Val: I1))
1594 if (CB1->cannotMerge() || CB1->isConvergent())
1595 return false;
1596 if (const auto *CB2 = dyn_cast<CallBase>(Val: I2))
1597 if (CB2->cannotMerge() || CB2->isConvergent())
1598 return false;
1599
1600 return true;
1601}
1602
1603/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1604/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1605/// hoistCommonCodeFromSuccessors. e.g. The input:
1606/// I1 DVRs: { x, z },
1607/// OtherInsts: { I2 DVRs: { x, y, z } }
1608/// would result in hoisting only DbgVariableRecord x.
1609static void hoistLockstepIdenticalDbgVariableRecords(
1610 Instruction *TI, Instruction *I1,
1611 SmallVectorImpl<Instruction *> &OtherInsts) {
1612 if (!I1->hasDbgRecords())
1613 return;
1614 using CurrentAndEndIt =
1615 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1616 // Vector of {Current, End} iterators.
1617 SmallVector<CurrentAndEndIt> Itrs;
1618 Itrs.reserve(N: OtherInsts.size() + 1);
1619 // Helper lambdas for lock-step checks:
1620 // Return true if this Current == End.
1621 auto atEnd = [](const CurrentAndEndIt &Pair) {
1622 return Pair.first == Pair.second;
1623 };
1624 // Return true if all Current are identical.
1625 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1626 return all_of(Range: make_first_range(c: ArrayRef(Itrs).drop_front()),
1627 P: [&](DbgRecord::self_iterator I) {
1628 return Itrs[0].first->isIdenticalToWhenDefined(R: *I);
1629 });
1630 };
1631
1632 // Collect the iterators.
1633 Itrs.push_back(
1634 Elt: {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1635 for (Instruction *Other : OtherInsts) {
1636 if (!Other->hasDbgRecords())
1637 return;
1638 Itrs.push_back(
1639 Elt: {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1640 }
1641
1642 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1643 // the lock-step DbgRecord are identical, hoist all of them to TI.
1644 // This replicates the dbg.* intrinsic behaviour in
1645 // hoistCommonCodeFromSuccessors.
1646 while (none_of(Range&: Itrs, P: atEnd)) {
1647 bool HoistDVRs = allIdentical(Itrs);
1648 for (CurrentAndEndIt &Pair : Itrs) {
1649 // Increment Current iterator now as we may be about to move the
1650 // DbgRecord.
1651 DbgRecord &DR = *Pair.first++;
1652 if (HoistDVRs) {
1653 DR.removeFromParent();
1654 TI->getParent()->insertDbgRecordBefore(DR: &DR, Here: TI->getIterator());
1655 }
1656 }
1657 }
1658}
1659
1660static bool areIdenticalUpToCommutativity(const Instruction *I1,
1661 const Instruction *I2) {
1662 if (I1->isIdenticalToWhenDefined(I: I2, /*IntersectAttrs=*/true))
1663 return true;
1664
1665 if (auto *Cmp1 = dyn_cast<CmpInst>(Val: I1))
1666 if (auto *Cmp2 = dyn_cast<CmpInst>(Val: I2))
1667 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1668 Cmp1->getOperand(i_nocapture: 0) == Cmp2->getOperand(i_nocapture: 1) &&
1669 Cmp1->getOperand(i_nocapture: 1) == Cmp2->getOperand(i_nocapture: 0);
1670
1671 if (I1->isCommutative() && I1->isSameOperationAs(I: I2)) {
1672 return I1->getOperand(i: 0) == I2->getOperand(i: 1) &&
1673 I1->getOperand(i: 1) == I2->getOperand(i: 0) &&
1674 equal(LRange: drop_begin(RangeOrContainer: I1->operands(), N: 2), RRange: drop_begin(RangeOrContainer: I2->operands(), N: 2));
1675 }
1676
1677 return false;
1678}
1679
1680/// If the target supports conditional faulting,
1681/// we look for the following pattern:
1682/// \code
1683/// BB:
1684/// ...
1685/// %cond = icmp ult %x, %y
1686/// br i1 %cond, label %TrueBB, label %FalseBB
1687/// FalseBB:
1688/// store i32 1, ptr %q, align 4
1689/// ...
1690/// TrueBB:
1691/// %maskedloadstore = load i32, ptr %b, align 4
1692/// store i32 %maskedloadstore, ptr %p, align 4
1693/// ...
1694/// \endcode
1695///
1696/// and transform it into:
1697///
1698/// \code
1699/// BB:
1700/// ...
1701/// %cond = icmp ult %x, %y
1702/// %maskedloadstore = cload i32, ptr %b, %cond
1703/// cstore i32 %maskedloadstore, ptr %p, %cond
1704/// cstore i32 1, ptr %q, ~%cond
1705/// br i1 %cond, label %TrueBB, label %FalseBB
1706/// FalseBB:
1707/// ...
1708/// TrueBB:
1709/// ...
1710/// \endcode
1711///
1712/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1713/// e.g.
1714///
1715/// \code
1716/// %vcond = bitcast i1 %cond to <1 x i1>
1717/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1718/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1719/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1720/// call void @llvm.masked.store.v1i32.p0
1721/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1722/// %cond.not = xor i1 %cond, true
1723/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1724/// call void @llvm.masked.store.v1i32.p0
1725/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1726/// \endcode
1727///
1728/// So we need to turn hoisted load/store into cload/cstore.
1729///
1730/// \param BI The branch instruction.
1731/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1732/// will be speculated.
1733/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1734static void hoistConditionalLoadsStores(
1735 CondBrInst *BI,
1736 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1737 std::optional<bool> Invert, Instruction *Sel) {
1738 auto &Context = BI->getParent()->getContext();
1739 auto *VCondTy = FixedVectorType::get(ElementType: Type::getInt1Ty(C&: Context), NumElts: 1);
1740 auto *Cond = BI->getCondition();
1741 // Construct the condition if needed.
1742 BasicBlock *BB = BI->getParent();
1743 Value *Mask = nullptr;
1744 Value *MaskFalse = nullptr;
1745 Value *MaskTrue = nullptr;
1746 if (Invert.has_value()) {
1747 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1748 Mask = Builder.CreateBitCast(
1749 V: *Invert ? Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)) : Cond,
1750 DestTy: VCondTy);
1751 } else {
1752 IRBuilder<> Builder(BI);
1753 MaskFalse = Builder.CreateBitCast(
1754 V: Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)), DestTy: VCondTy);
1755 MaskTrue = Builder.CreateBitCast(V: Cond, DestTy: VCondTy);
1756 }
1757 auto PeekThroughBitcasts = [](Value *V) {
1758 while (auto *BitCast = dyn_cast<BitCastInst>(Val: V))
1759 V = BitCast->getOperand(i_nocapture: 0);
1760 return V;
1761 };
1762 for (auto *I : SpeculatedConditionalLoadsStores) {
1763 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1764 if (!Invert.has_value())
1765 Mask = I->getParent() == BI->getSuccessor(i: 0) ? MaskTrue : MaskFalse;
1766 // We currently assume conditional faulting load/store is supported for
1767 // scalar types only when creating new instructions. This can be easily
1768 // extended for vector types in the future.
1769 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1770 auto *Op0 = I->getOperand(i: 0);
1771 CallInst *MaskedLoadStore = nullptr;
1772 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1773 // Handle Load.
1774 auto *Ty = I->getType();
1775 PHINode *PN = nullptr;
1776 Value *PassThru = nullptr;
1777 if (Invert.has_value())
1778 for (User *U : I->users()) {
1779 if ((PN = dyn_cast<PHINode>(Val: U))) {
1780 PassThru = Builder.CreateBitCast(
1781 V: PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1782 DestTy: FixedVectorType::get(ElementType: Ty, NumElts: 1));
1783 } else if (auto *Ins = cast<Instruction>(Val: U);
1784 Sel && Ins->getParent() == BB) {
1785 // This happens when store or/and a speculative instruction between
1786 // load and store were hoisted to the BB. Make sure the masked load
1787 // inserted before its use.
1788 // We assume there's one of such use.
1789 Builder.SetInsertPoint(Ins);
1790 }
1791 }
1792 MaskedLoadStore = Builder.CreateMaskedLoad(
1793 Ty: FixedVectorType::get(ElementType: Ty, NumElts: 1), Ptr: Op0, Alignment: LI->getAlign(), Mask, PassThru);
1794 Value *NewLoadStore = Builder.CreateBitCast(V: MaskedLoadStore, DestTy: Ty);
1795 if (PN)
1796 PN->setIncomingValue(i: PN->getBasicBlockIndex(BB), V: NewLoadStore);
1797 I->replaceAllUsesWith(V: NewLoadStore);
1798 } else {
1799 // Handle Store.
1800 auto *StoredVal = Builder.CreateBitCast(
1801 V: PeekThroughBitcasts(Op0), DestTy: FixedVectorType::get(ElementType: Op0->getType(), NumElts: 1));
1802 MaskedLoadStore = Builder.CreateMaskedStore(
1803 Val: StoredVal, Ptr: I->getOperand(i: 1), Alignment: cast<StoreInst>(Val: I)->getAlign(), Mask);
1804 }
1805 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1806 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1807 //
1808 // !nonnull, !align : Not support pointer type, no need to keep.
1809 // !range: Load type is changed from scalar to vector, but the metadata on
1810 // vector specifies a per-element range, so the semantics stay the
1811 // same. Keep it.
1812 // !annotation: Not impact semantics. Keep it.
1813 if (const MDNode *Ranges = I->getMetadata(KindID: LLVMContext::MD_range))
1814 MaskedLoadStore->addRangeRetAttr(CR: getConstantRangeFromMetadata(RangeMD: *Ranges));
1815 I->dropUBImplyingAttrsAndUnknownMetadata(KnownIDs: {LLVMContext::MD_annotation});
1816 // FIXME: DIAssignID is not supported for masked store yet.
1817 // (Verifier::visitDIAssignIDMetadata)
1818 at::deleteAssignmentMarkers(Inst: I);
1819 I->eraseMetadataIf(Pred: [](unsigned MDKind, MDNode *Node) {
1820 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1821 });
1822 MaskedLoadStore->copyMetadata(SrcInst: *I);
1823 I->eraseFromParent();
1824 }
1825}
1826
1827static bool isSafeCheapLoadStore(const Instruction *I,
1828 const TargetTransformInfo &TTI) {
1829 // Not handle volatile or atomic.
1830 bool IsStore = false;
1831 if (auto *L = dyn_cast<LoadInst>(Val: I)) {
1832 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1833 return false;
1834 } else if (auto *S = dyn_cast<StoreInst>(Val: I)) {
1835 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1836 return false;
1837 IsStore = true;
1838 } else
1839 return false;
1840
1841 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1842 // That's why we have the alignment limitation.
1843 // FIXME: Update the prototype of the intrinsics?
1844 return TTI.hasConditionalLoadStoreForType(Ty: getLoadStoreType(I), IsStore) &&
1845 getLoadStoreAlignment(I) < Value::MaximumAlignment;
1846}
1847
1848/// Hoist any common code in the successor blocks up into the block. This
1849/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1850/// given, only perform hoisting in case all successors blocks contain matching
1851/// instructions only. In that case, all instructions can be hoisted and the
1852/// original branch will be replaced and selects for PHIs are added.
1853bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1854 bool AllInstsEqOnly) {
1855 // This does very trivial matching, with limited scanning, to find identical
1856 // instructions in the two blocks. In particular, we don't want to get into
1857 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1858 // such, we currently just scan for obviously identical instructions in an
1859 // identical order, possibly separated by the same number of non-identical
1860 // instructions.
1861 BasicBlock *BB = TI->getParent();
1862 unsigned int SuccSize = succ_size(BB);
1863 if (SuccSize < 2)
1864 return false;
1865
1866 // If either of the blocks has it's address taken, then we can't do this fold,
1867 // because the code we'd hoist would no longer run when we jump into the block
1868 // by it's address.
1869 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1870 for (auto *Succ : UniqueSuccessors) {
1871 if (Succ->hasAddressTaken())
1872 return false;
1873 // Use getUniquePredecessor instead of getSinglePredecessor to support
1874 // multi-cases successors in switch.
1875 if (Succ->getUniquePredecessor())
1876 continue;
1877 // If Succ has >1 predecessors, continue to check if the Succ contains only
1878 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1879 // can relax the condition based on the assumptiom that the program would
1880 // never enter Succ and trigger such an UB.
1881 if (isa<UnreachableInst>(Val: *Succ->begin()))
1882 continue;
1883 return false;
1884 }
1885 // The second of pair is a SkipFlags bitmask.
1886 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1887 SmallVector<SuccIterPair, 8> SuccIterPairs;
1888 for (auto *Succ : UniqueSuccessors) {
1889 BasicBlock::iterator SuccItr = Succ->begin();
1890 if (isa<PHINode>(Val: *SuccItr))
1891 return false;
1892 SuccIterPairs.push_back(Elt: SuccIterPair(SuccItr, 0));
1893 }
1894
1895 if (AllInstsEqOnly) {
1896 // Check if all instructions in the successor blocks match. This allows
1897 // hoisting all instructions and removing the blocks we are hoisting from,
1898 // so does not add any new instructions.
1899
1900 // Check if sizes and terminators of all successors match.
1901 unsigned Size0 = UniqueSuccessors[0]->size();
1902 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1903 bool AllSame =
1904 all_of(Range: drop_begin(RangeOrContainer&: UniqueSuccessors), P: [Term0, Size0](BasicBlock *Succ) {
1905 return Succ->getTerminator()->isIdenticalTo(I: Term0) &&
1906 Succ->size() == Size0;
1907 });
1908 if (!AllSame)
1909 return false;
1910 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1911 while (LRI.isValid()) {
1912 Instruction *I0 = (*LRI)[0];
1913 if (any_of(Range: *LRI, P: [I0](Instruction *I) {
1914 return !areIdenticalUpToCommutativity(I1: I0, I2: I);
1915 })) {
1916 return false;
1917 }
1918 --LRI;
1919 }
1920 // Now we know that all instructions in all successors can be hoisted. Let
1921 // the loop below handle the hoisting.
1922 }
1923
1924 // Count how many instructions were not hoisted so far. There's a limit on how
1925 // many instructions we skip, serving as a compilation time control as well as
1926 // preventing excessive increase of life ranges.
1927 unsigned NumSkipped = 0;
1928 // If we find an unreachable instruction at the beginning of a basic block, we
1929 // can still hoist instructions from the rest of the basic blocks.
1930 if (SuccIterPairs.size() > 2) {
1931 erase_if(C&: SuccIterPairs,
1932 P: [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1933 if (SuccIterPairs.size() < 2)
1934 return false;
1935 }
1936
1937 bool Changed = false;
1938
1939 for (;;) {
1940 auto *SuccIterPairBegin = SuccIterPairs.begin();
1941 auto &BB1ItrPair = *SuccIterPairBegin++;
1942 auto OtherSuccIterPairRange =
1943 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1944 auto OtherSuccIterRange = make_first_range(c&: OtherSuccIterPairRange);
1945
1946 Instruction *I1 = &*BB1ItrPair.first;
1947
1948 bool AllInstsAreIdentical = true;
1949 bool HasTerminator = I1->isTerminator();
1950 for (auto &SuccIter : OtherSuccIterRange) {
1951 Instruction *I2 = &*SuccIter;
1952 HasTerminator |= I2->isTerminator();
1953 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1954 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1955 AllInstsAreIdentical = false;
1956 }
1957
1958 SmallVector<Instruction *, 8> OtherInsts;
1959 for (auto &SuccIter : OtherSuccIterRange)
1960 OtherInsts.push_back(Elt: &*SuccIter);
1961
1962 // If we are hoisting the terminator instruction, don't move one (making a
1963 // broken BB), instead clone it, and remove BI.
1964 if (HasTerminator) {
1965 // Even if BB, which contains only one unreachable instruction, is ignored
1966 // at the beginning of the loop, we can hoist the terminator instruction.
1967 // If any instructions remain in the block, we cannot hoist terminators.
1968 if (NumSkipped || !AllInstsAreIdentical) {
1969 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1970 return Changed;
1971 }
1972
1973 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1974 TI, I1, OtherSuccTIs&: OtherInsts, UniqueSuccessors: UniqueSuccessors.getArrayRef()) ||
1975 Changed;
1976 }
1977
1978 if (AllInstsAreIdentical) {
1979 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1980 AllInstsAreIdentical =
1981 isSafeToHoistInstr(I: I1, Flags: SkipFlagsBB1) &&
1982 all_of(Range&: OtherSuccIterPairRange, P: [=](const auto &Pair) {
1983 Instruction *I2 = &*Pair.first;
1984 unsigned SkipFlagsBB2 = Pair.second;
1985 // Even if the instructions are identical, it may not
1986 // be safe to hoist them if we have skipped over
1987 // instructions with side effects or their operands
1988 // weren't hoisted.
1989 return isSafeToHoistInstr(I: I2, Flags: SkipFlagsBB2) &&
1990 shouldHoistCommonInstructions(I1, I2, TTI);
1991 });
1992 }
1993
1994 if (AllInstsAreIdentical) {
1995 BB1ItrPair.first++;
1996 // For a normal instruction, we just move one to right before the
1997 // branch, then replace all uses of the other with the first. Finally,
1998 // we remove the now redundant second instruction.
1999 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2000 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2001 // and leave any that were not hoisted behind (by calling moveBefore
2002 // rather than moveBeforePreserving).
2003 I1->moveBefore(InsertPos: TI->getIterator());
2004 for (auto &SuccIter : OtherSuccIterRange) {
2005 Instruction *I2 = &*SuccIter++;
2006 assert(I2 != I1);
2007 if (!I2->use_empty())
2008 I2->replaceAllUsesWith(V: I1);
2009 I1->andIRFlags(V: I2);
2010 if (auto *CB = dyn_cast<CallBase>(Val: I1)) {
2011 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I2));
2012 assert(Success && "We should not be trying to hoist callbases "
2013 "with non-intersectable attributes");
2014 // For NDEBUG Compile.
2015 (void)Success;
2016 }
2017
2018 combineMetadataForCSE(K: I1, J: I2, DoesKMove: true);
2019 // I1 and I2 are being combined into a single instruction. Its debug
2020 // location is the merged locations of the original instructions.
2021 I1->applyMergedLocation(LocA: I1->getDebugLoc(), LocB: I2->getDebugLoc());
2022 I2->eraseFromParent();
2023 }
2024 if (!Changed)
2025 NumHoistCommonCode += SuccIterPairs.size();
2026 Changed = true;
2027 NumHoistCommonInstrs += SuccIterPairs.size();
2028 } else {
2029 if (NumSkipped >= HoistCommonSkipLimit) {
2030 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2031 return Changed;
2032 }
2033 // We are about to skip over a pair of non-identical instructions. Record
2034 // if any have characteristics that would prevent reordering instructions
2035 // across them.
2036 for (auto &SuccIterPair : SuccIterPairs) {
2037 Instruction *I = &*SuccIterPair.first++;
2038 SuccIterPair.second |= skippedInstrFlags(I);
2039 }
2040 ++NumSkipped;
2041 }
2042 }
2043}
2044
2045bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2046 Instruction *TI, Instruction *I1,
2047 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2048 ArrayRef<BasicBlock *> UniqueSuccessors) {
2049
2050 auto *BI = dyn_cast<CondBrInst>(Val: TI);
2051
2052 bool Changed = false;
2053 BasicBlock *TIParent = TI->getParent();
2054 BasicBlock *BB1 = I1->getParent();
2055
2056 // Use only for an if statement.
2057 auto *I2 = *OtherSuccTIs.begin();
2058 auto *BB2 = I2->getParent();
2059 if (BI) {
2060 assert(OtherSuccTIs.size() == 1);
2061 assert(BI->getSuccessor(0) == I1->getParent());
2062 assert(BI->getSuccessor(1) == I2->getParent());
2063 }
2064
2065 // In the case of an if statement, we try to hoist an invoke.
2066 // FIXME: Can we define a safety predicate for CallBr?
2067 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2068 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2069 if (isa<InvokeInst>(Val: I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2070 return false;
2071
2072 // TODO: callbr hoisting currently disabled pending further study.
2073 if (isa<CallBrInst>(Val: I1))
2074 return false;
2075
2076 for (BasicBlock *Succ : successors(BB: BB1)) {
2077 for (PHINode &PN : Succ->phis()) {
2078 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2079 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2080 Value *BB2V = PN.getIncomingValueForBlock(BB: OtherSuccTI->getParent());
2081 if (BB1V == BB2V)
2082 continue;
2083
2084 // In the case of an if statement, check for
2085 // passingValueIsAlwaysUndefined here because we would rather eliminate
2086 // undefined control flow then converting it to a select.
2087 if (!BI || passingValueIsAlwaysUndefined(V: BB1V, I: &PN) ||
2088 passingValueIsAlwaysUndefined(V: BB2V, I: &PN))
2089 return false;
2090 }
2091 }
2092 }
2093
2094 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2095 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2096 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts&: OtherSuccTIs);
2097 // Clone the terminator and hoist it into the pred, without any debug info.
2098 Instruction *NT = I1->clone();
2099 NT->insertInto(ParentBB: TIParent, It: TI->getIterator());
2100 if (!NT->getType()->isVoidTy()) {
2101 I1->replaceAllUsesWith(V: NT);
2102 for (Instruction *OtherSuccTI : OtherSuccTIs)
2103 OtherSuccTI->replaceAllUsesWith(V: NT);
2104 NT->takeName(V: I1);
2105 }
2106 Changed = true;
2107 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2108
2109 // Ensure terminator gets a debug location, even an unknown one, in case
2110 // it involves inlinable calls.
2111 SmallVector<DebugLoc, 4> Locs;
2112 Locs.push_back(Elt: I1->getDebugLoc());
2113 for (auto *OtherSuccTI : OtherSuccTIs)
2114 Locs.push_back(Elt: OtherSuccTI->getDebugLoc());
2115 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2116
2117 // PHIs created below will adopt NT's merged DebugLoc.
2118 IRBuilder<NoFolder> Builder(NT);
2119
2120 // In the case of an if statement, hoisting one of the terminators from our
2121 // successor is a great thing. Unfortunately, the successors of the if/else
2122 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2123 // must agree for all PHI nodes, so we insert select instruction to compute
2124 // the final result.
2125 if (BI) {
2126 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2127 for (BasicBlock *Succ : successors(BB: BB1)) {
2128 for (PHINode &PN : Succ->phis()) {
2129 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2130 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
2131 if (BB1V == BB2V)
2132 continue;
2133
2134 // These values do not agree. Insert a select instruction before NT
2135 // that determines the right value.
2136 SelectInst *&SI = InsertedSelects[std::make_pair(x&: BB1V, y&: BB2V)];
2137 if (!SI) {
2138 // Propagate fast-math-flags from phi node to its replacement select.
2139 SI = cast<SelectInst>(Val: Builder.CreateSelectFMF(
2140 C: BI->getCondition(), True: BB1V, False: BB2V,
2141 FMFSource: isa<FPMathOperator>(Val: PN) ? &PN : nullptr,
2142 Name: BB1V->getName() + "." + BB2V->getName(), MDFrom: BI));
2143 }
2144
2145 // Make the PHI node use the select for all incoming values for BB1/BB2
2146 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2147 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2148 PN.setIncomingValue(i, V: SI);
2149 }
2150 }
2151 }
2152
2153 SmallVector<DominatorTree::UpdateType, 4> Updates;
2154
2155 // Update any PHI nodes in our new successors.
2156 for (BasicBlock *Succ : successors(BB: BB1)) {
2157 addPredecessorToBlock(Succ, NewPred: TIParent, ExistPred: BB1);
2158 if (DTU)
2159 Updates.push_back(Elt: {DominatorTree::Insert, TIParent, Succ});
2160 }
2161
2162 if (DTU) {
2163 // TI might be a switch with multi-cases destination, so we need to care for
2164 // the duplication of successors.
2165 for (BasicBlock *Succ : UniqueSuccessors)
2166 Updates.push_back(Elt: {DominatorTree::Delete, TIParent, Succ});
2167 }
2168
2169 eraseTerminatorAndDCECond(TI);
2170 if (DTU)
2171 DTU->applyUpdates(Updates);
2172 return Changed;
2173}
2174
2175// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2176// into variables.
2177static bool replacingOperandWithVariableIsCheap(const Instruction *I,
2178 int OpIdx) {
2179 // Divide/Remainder by constant is typically much cheaper than by variable.
2180 if (I->isIntDivRem())
2181 return OpIdx != 1;
2182 return !isa<IntrinsicInst>(Val: I);
2183}
2184
2185// All instructions in Insts belong to different blocks that all unconditionally
2186// branch to a common successor. Analyze each instruction and return true if it
2187// would be possible to sink them into their successor, creating one common
2188// instruction instead. For every value that would be required to be provided by
2189// PHI node (because an operand varies in each input block), add to PHIOperands.
2190static bool canSinkInstructions(
2191 ArrayRef<Instruction *> Insts,
2192 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2193 // Prune out obviously bad instructions to move. Each instruction must have
2194 // the same number of uses, and we check later that the uses are consistent.
2195 std::optional<unsigned> NumUses;
2196 for (auto *I : Insts) {
2197 // These instructions may change or break semantics if moved.
2198 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
2199 I->getType()->isTokenTy())
2200 return false;
2201
2202 // Do not try to sink an instruction in an infinite loop - it can cause
2203 // this algorithm to infinite loop.
2204 if (I->getParent()->getSingleSuccessor() == I->getParent())
2205 return false;
2206
2207 // Conservatively return false if I is an inline-asm instruction. Sinking
2208 // and merging inline-asm instructions can potentially create arguments
2209 // that cannot satisfy the inline-asm constraints.
2210 // If the instruction has nomerge or convergent attribute, return false.
2211 if (const auto *C = dyn_cast<CallBase>(Val: I))
2212 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2213 return false;
2214
2215 if (!NumUses)
2216 NumUses = I->getNumUses();
2217 else if (NumUses != I->getNumUses())
2218 return false;
2219 }
2220
2221 const Instruction *I0 = Insts.front();
2222 const auto I0MMRA = MMRAMetadata(*I0);
2223 for (auto *I : Insts) {
2224 if (!I->isSameOperationAs(I: I0, flags: Instruction::CompareUsingIntersectedAttrs))
2225 return false;
2226
2227 // Treat MMRAs conservatively. This pass can be quite aggressive and
2228 // could drop a lot of MMRAs otherwise.
2229 if (MMRAMetadata(*I) != I0MMRA)
2230 return false;
2231 }
2232
2233 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2234 // then the other phi operands must match the instructions from Insts. This
2235 // also has to hold true for any phi nodes that would be created as a result
2236 // of sinking. Both of these cases are represented by PhiOperands.
2237 for (const Use &U : I0->uses()) {
2238 auto It = PHIOperands.find(Val: &U);
2239 if (It == PHIOperands.end())
2240 // There may be uses in other blocks when sinking into a loop header.
2241 return false;
2242 if (!equal(LRange&: Insts, RRange&: It->second))
2243 return false;
2244 }
2245
2246 // For calls to be sinkable, they must all be indirect, or have same callee.
2247 // I.e. if we have two direct calls to different callees, we don't want to
2248 // turn that into an indirect call. Likewise, if we have an indirect call,
2249 // and a direct call, we don't actually want to have a single indirect call.
2250 if (isa<CallBase>(Val: I0)) {
2251 auto IsIndirectCall = [](const Instruction *I) {
2252 return cast<CallBase>(Val: I)->isIndirectCall();
2253 };
2254 bool HaveIndirectCalls = any_of(Range&: Insts, P: IsIndirectCall);
2255 bool AllCallsAreIndirect = all_of(Range&: Insts, P: IsIndirectCall);
2256 if (HaveIndirectCalls) {
2257 if (!AllCallsAreIndirect)
2258 return false;
2259 } else {
2260 // All callees must be identical.
2261 Value *Callee = nullptr;
2262 for (const Instruction *I : Insts) {
2263 Value *CurrCallee = cast<CallBase>(Val: I)->getCalledOperand();
2264 if (!Callee)
2265 Callee = CurrCallee;
2266 else if (Callee != CurrCallee)
2267 return false;
2268 }
2269 }
2270 }
2271
2272 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2273 Value *Op = I0->getOperand(i: OI);
2274 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2275 assert(I->getNumOperands() == I0->getNumOperands());
2276 return I->getOperand(i: OI) == I0->getOperand(i: OI);
2277 };
2278 if (!all_of(Range&: Insts, P: SameAsI0)) {
2279 if ((isa<Constant>(Val: Op) && !replacingOperandWithVariableIsCheap(I: I0, OpIdx: OI)) ||
2280 !canReplaceOperandWithVariable(I: I0, OpIdx: OI))
2281 // We can't create a PHI from this GEP.
2282 return false;
2283 auto &Ops = PHIOperands[&I0->getOperandUse(i: OI)];
2284 for (auto *I : Insts)
2285 Ops.push_back(Elt: I->getOperand(i: OI));
2286 }
2287 }
2288 return true;
2289}
2290
2291// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2292// instruction of every block in Blocks to their common successor, commoning
2293// into one instruction.
2294static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2295 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(Idx: 0);
2296
2297 // canSinkInstructions returning true guarantees that every block has at
2298 // least one non-terminator instruction.
2299 SmallVector<Instruction*,4> Insts;
2300 for (auto *BB : Blocks) {
2301 Instruction *I = BB->getTerminator();
2302 I = I->getPrevNode();
2303 Insts.push_back(Elt: I);
2304 }
2305
2306 // We don't need to do any more checking here; canSinkInstructions should
2307 // have done it all for us.
2308 SmallVector<Value*, 4> NewOperands;
2309 Instruction *I0 = Insts.front();
2310 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2311 // This check is different to that in canSinkInstructions. There, we
2312 // cared about the global view once simplifycfg (and instcombine) have
2313 // completed - it takes into account PHIs that become trivially
2314 // simplifiable. However here we need a more local view; if an operand
2315 // differs we create a PHI and rely on instcombine to clean up the very
2316 // small mess we may make.
2317 bool NeedPHI = any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
2318 return I->getOperand(i: O) != I0->getOperand(i: O);
2319 });
2320 if (!NeedPHI) {
2321 NewOperands.push_back(Elt: I0->getOperand(i: O));
2322 continue;
2323 }
2324
2325 // Create a new PHI in the successor block and populate it.
2326 auto *Op = I0->getOperand(i: O);
2327 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2328 auto *PN =
2329 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
2330 PN->insertBefore(InsertPos: BBEnd->begin());
2331 for (auto *I : Insts)
2332 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
2333 NewOperands.push_back(Elt: PN);
2334 }
2335
2336 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2337 // and move it to the start of the successor block.
2338 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2339 I0->getOperandUse(i: O).set(NewOperands[O]);
2340
2341 I0->moveBefore(BB&: *BBEnd, I: BBEnd->getFirstInsertionPt());
2342
2343 // Update metadata and IR flags, and merge debug locations.
2344 for (auto *I : Insts)
2345 if (I != I0) {
2346 // The debug location for the "common" instruction is the merged locations
2347 // of all the commoned instructions. We start with the original location
2348 // of the "common" instruction and iteratively merge each location in the
2349 // loop below.
2350 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2351 // However, as N-way merge for CallInst is rare, so we use simplified API
2352 // instead of using complex API for N-way merge.
2353 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
2354 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
2355 I0->andIRFlags(V: I);
2356 if (auto *CB = dyn_cast<CallBase>(Val: I0)) {
2357 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I));
2358 assert(Success && "We should not be trying to sink callbases "
2359 "with non-intersectable attributes");
2360 // For NDEBUG Compile.
2361 (void)Success;
2362 }
2363 }
2364
2365 for (User *U : make_early_inc_range(Range: I0->users())) {
2366 // canSinkLastInstruction checked that all instructions are only used by
2367 // phi nodes in a way that allows replacing the phi node with the common
2368 // instruction.
2369 auto *PN = cast<PHINode>(Val: U);
2370 PN->replaceAllUsesWith(V: I0);
2371 PN->eraseFromParent();
2372 }
2373
2374 // Finally nuke all instructions apart from the common instruction.
2375 for (auto *I : Insts) {
2376 if (I == I0)
2377 continue;
2378 // The remaining uses are debug users, replace those with the common inst.
2379 // In most (all?) cases this just introduces a use-before-def.
2380 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2381 I->replaceAllUsesWith(V: I0);
2382 I->eraseFromParent();
2383 }
2384}
2385
2386/// Check whether BB's predecessors end with unconditional branches. If it is
2387/// true, sink any common code from the predecessors to BB.
2388static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
2389 DomTreeUpdater *DTU) {
2390 // We support two situations:
2391 // (1) all incoming arcs are unconditional
2392 // (2) there are non-unconditional incoming arcs
2393 //
2394 // (2) is very common in switch defaults and
2395 // else-if patterns;
2396 //
2397 // if (a) f(1);
2398 // else if (b) f(2);
2399 //
2400 // produces:
2401 //
2402 // [if]
2403 // / \
2404 // [f(1)] [if]
2405 // | | \
2406 // | | |
2407 // | [f(2)]|
2408 // \ | /
2409 // [ end ]
2410 //
2411 // [end] has two unconditional predecessor arcs and one conditional. The
2412 // conditional refers to the implicit empty 'else' arc. This conditional
2413 // arc can also be caused by an empty default block in a switch.
2414 //
2415 // In this case, we attempt to sink code from all *unconditional* arcs.
2416 // If we can sink instructions from these arcs (determined during the scan
2417 // phase below) we insert a common successor for all unconditional arcs and
2418 // connect that to [end], to enable sinking:
2419 //
2420 // [if]
2421 // / \
2422 // [x(1)] [if]
2423 // | | \
2424 // | | \
2425 // | [x(2)] |
2426 // \ / |
2427 // [sink.split] |
2428 // \ /
2429 // [ end ]
2430 //
2431 SmallVector<BasicBlock*,4> UnconditionalPreds;
2432 bool HaveNonUnconditionalPredecessors = false;
2433 for (auto *PredBB : predecessors(BB)) {
2434 auto *PredBr = dyn_cast<UncondBrInst>(Val: PredBB->getTerminator());
2435 if (PredBr)
2436 UnconditionalPreds.push_back(Elt: PredBB);
2437 else
2438 HaveNonUnconditionalPredecessors = true;
2439 }
2440 if (UnconditionalPreds.size() < 2)
2441 return false;
2442
2443 // We take a two-step approach to tail sinking. First we scan from the end of
2444 // each block upwards in lockstep. If the n'th instruction from the end of each
2445 // block can be sunk, those instructions are added to ValuesToSink and we
2446 // carry on. If we can sink an instruction but need to PHI-merge some operands
2447 // (because they're not identical in each instruction) we add these to
2448 // PHIOperands.
2449 // We prepopulate PHIOperands with the phis that already exist in BB.
2450 DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
2451 for (PHINode &PN : BB->phis()) {
2452 SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
2453 for (const Use &U : PN.incoming_values())
2454 IncomingVals.insert(KV: {PN.getIncomingBlock(U), &U});
2455 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2456 for (BasicBlock *Pred : UnconditionalPreds)
2457 Ops.push_back(Elt: *IncomingVals[Pred]);
2458 }
2459
2460 int ScanIdx = 0;
2461 SmallPtrSet<Value*,4> InstructionsToSink;
2462 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2463 while (LRI.isValid() &&
2464 canSinkInstructions(Insts: *LRI, PHIOperands)) {
2465 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2466 << "\n");
2467 InstructionsToSink.insert_range(R: *LRI);
2468 ++ScanIdx;
2469 --LRI;
2470 }
2471
2472 // If no instructions can be sunk, early-return.
2473 if (ScanIdx == 0)
2474 return false;
2475
2476 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2477
2478 if (!followedByDeoptOrUnreachable) {
2479 // Check whether this is the pointer operand of a load/store.
2480 auto IsMemOperand = [](Use &U) {
2481 auto *I = cast<Instruction>(Val: U.getUser());
2482 if (isa<LoadInst>(Val: I))
2483 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2484 if (isa<StoreInst>(Val: I))
2485 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2486 return false;
2487 };
2488
2489 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2490 // actually sink before encountering instruction that is unprofitable to
2491 // sink?
2492 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2493 unsigned NumPHIInsts = 0;
2494 for (Use &U : (*LRI)[0]->operands()) {
2495 auto It = PHIOperands.find(Val: &U);
2496 if (It != PHIOperands.end() && !all_of(Range&: It->second, P: [&](Value *V) {
2497 return InstructionsToSink.contains(Ptr: V);
2498 })) {
2499 ++NumPHIInsts;
2500 // Do not separate a load/store from the gep producing the address.
2501 // The gep can likely be folded into the load/store as an addressing
2502 // mode. Additionally, a load of a gep is easier to analyze than a
2503 // load of a phi.
2504 if (IsMemOperand(U) &&
2505 any_of(Range&: It->second, P: [](Value *V) { return isa<GEPOperator>(Val: V); }))
2506 return false;
2507 // FIXME: this check is overly optimistic. We may end up not sinking
2508 // said instruction, due to the very same profitability check.
2509 // See @creating_too_many_phis in sink-common-code.ll.
2510 }
2511 }
2512 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2513 return NumPHIInsts <= 1;
2514 };
2515
2516 // We've determined that we are going to sink last ScanIdx instructions,
2517 // and recorded them in InstructionsToSink. Now, some instructions may be
2518 // unprofitable to sink. But that determination depends on the instructions
2519 // that we are going to sink.
2520
2521 // First, forward scan: find the first instruction unprofitable to sink,
2522 // recording all the ones that are profitable to sink.
2523 // FIXME: would it be better, after we detect that not all are profitable.
2524 // to either record the profitable ones, or erase the unprofitable ones?
2525 // Maybe we need to choose (at runtime) the one that will touch least
2526 // instrs?
2527 LRI.reset();
2528 int Idx = 0;
2529 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2530 while (Idx < ScanIdx) {
2531 if (!ProfitableToSinkInstruction(LRI)) {
2532 // Too many PHIs would be created.
2533 LLVM_DEBUG(
2534 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2535 break;
2536 }
2537 InstructionsProfitableToSink.insert_range(R: *LRI);
2538 --LRI;
2539 ++Idx;
2540 }
2541
2542 // If no instructions can be sunk, early-return.
2543 if (Idx == 0)
2544 return false;
2545
2546 // Did we determine that (only) some instructions are unprofitable to sink?
2547 if (Idx < ScanIdx) {
2548 // Okay, some instructions are unprofitable.
2549 ScanIdx = Idx;
2550 InstructionsToSink = InstructionsProfitableToSink;
2551
2552 // But, that may make other instructions unprofitable, too.
2553 // So, do a backward scan, do any earlier instructions become
2554 // unprofitable?
2555 assert(
2556 !ProfitableToSinkInstruction(LRI) &&
2557 "We already know that the last instruction is unprofitable to sink");
2558 ++LRI;
2559 --Idx;
2560 while (Idx >= 0) {
2561 // If we detect that an instruction becomes unprofitable to sink,
2562 // all earlier instructions won't be sunk either,
2563 // so preemptively keep InstructionsProfitableToSink in sync.
2564 // FIXME: is this the most performant approach?
2565 for (auto *I : *LRI)
2566 InstructionsProfitableToSink.erase(Ptr: I);
2567 if (!ProfitableToSinkInstruction(LRI)) {
2568 // Everything starting with this instruction won't be sunk.
2569 ScanIdx = Idx;
2570 InstructionsToSink = InstructionsProfitableToSink;
2571 }
2572 ++LRI;
2573 --Idx;
2574 }
2575 }
2576
2577 // If no instructions can be sunk, early-return.
2578 if (ScanIdx == 0)
2579 return false;
2580 }
2581
2582 bool Changed = false;
2583
2584 if (HaveNonUnconditionalPredecessors) {
2585 if (!followedByDeoptOrUnreachable) {
2586 // It is always legal to sink common instructions from unconditional
2587 // predecessors. However, if not all predecessors are unconditional,
2588 // this transformation might be pessimizing. So as a rule of thumb,
2589 // don't do it unless we'd sink at least one non-speculatable instruction.
2590 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2591 LRI.reset();
2592 int Idx = 0;
2593 bool Profitable = false;
2594 while (Idx < ScanIdx) {
2595 if (!isSafeToSpeculativelyExecute(I: (*LRI)[0])) {
2596 Profitable = true;
2597 break;
2598 }
2599 --LRI;
2600 ++Idx;
2601 }
2602 if (!Profitable)
2603 return false;
2604 }
2605
2606 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2607 // We have a conditional edge and we're going to sink some instructions.
2608 // Insert a new block postdominating all blocks we're going to sink from.
2609 if (!SplitBlockPredecessors(BB, Preds: UnconditionalPreds, Suffix: ".sink.split", DTU))
2610 // Edges couldn't be split.
2611 return false;
2612 Changed = true;
2613 }
2614
2615 // Now that we've analyzed all potential sinking candidates, perform the
2616 // actual sink. We iteratively sink the last non-terminator of the source
2617 // blocks into their common successor unless doing so would require too
2618 // many PHI instructions to be generated (currently only one PHI is allowed
2619 // per sunk instruction).
2620 //
2621 // We can use InstructionsToSink to discount values needing PHI-merging that will
2622 // actually be sunk in a later iteration. This allows us to be more
2623 // aggressive in what we sink. This does allow a false positive where we
2624 // sink presuming a later value will also be sunk, but stop half way through
2625 // and never actually sink it which means we produce more PHIs than intended.
2626 // This is unlikely in practice though.
2627 int SinkIdx = 0;
2628 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2629 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2630 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2631 << "\n");
2632
2633 // Because we've sunk every instruction in turn, the current instruction to
2634 // sink is always at index 0.
2635 LRI.reset();
2636
2637 sinkLastInstruction(Blocks: UnconditionalPreds);
2638 NumSinkCommonInstrs++;
2639 Changed = true;
2640 }
2641 if (SinkIdx != 0)
2642 ++NumSinkCommonCode;
2643 return Changed;
2644}
2645
2646namespace {
2647
2648struct CompatibleSets {
2649 using SetTy = SmallVector<InvokeInst *, 2>;
2650
2651 SmallVector<SetTy, 1> Sets;
2652
2653 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2654
2655 SetTy &getCompatibleSet(InvokeInst *II);
2656
2657 void insert(InvokeInst *II);
2658};
2659
2660CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2661 // Perform a linear scan over all the existing sets, see if the new `invoke`
2662 // is compatible with any particular set. Since we know that all the `invokes`
2663 // within a set are compatible, only check the first `invoke` in each set.
2664 // WARNING: at worst, this has quadratic complexity.
2665 for (CompatibleSets::SetTy &Set : Sets) {
2666 if (CompatibleSets::shouldBelongToSameSet(Invokes: {Set.front(), II}))
2667 return Set;
2668 }
2669
2670 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2671 return Sets.emplace_back();
2672}
2673
2674void CompatibleSets::insert(InvokeInst *II) {
2675 getCompatibleSet(II).emplace_back(Args&: II);
2676}
2677
2678bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2679 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2680
2681 // Can we theoretically merge these `invoke`s?
2682 auto IsIllegalToMerge = [](InvokeInst *II) {
2683 return II->cannotMerge() || II->isInlineAsm();
2684 };
2685 if (any_of(Range&: Invokes, P: IsIllegalToMerge))
2686 return false;
2687
2688 // Either both `invoke`s must be direct,
2689 // or both `invoke`s must be indirect.
2690 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2691 bool HaveIndirectCalls = any_of(Range&: Invokes, P: IsIndirectCall);
2692 bool AllCallsAreIndirect = all_of(Range&: Invokes, P: IsIndirectCall);
2693 if (HaveIndirectCalls) {
2694 if (!AllCallsAreIndirect)
2695 return false;
2696 } else {
2697 // All callees must be identical.
2698 Value *Callee = nullptr;
2699 for (InvokeInst *II : Invokes) {
2700 Value *CurrCallee = II->getCalledOperand();
2701 assert(CurrCallee && "There is always a called operand.");
2702 if (!Callee)
2703 Callee = CurrCallee;
2704 else if (Callee != CurrCallee)
2705 return false;
2706 }
2707 }
2708
2709 // Either both `invoke`s must not have a normal destination,
2710 // or both `invoke`s must have a normal destination,
2711 auto HasNormalDest = [](InvokeInst *II) {
2712 return !isa<UnreachableInst>(Val: II->getNormalDest()->getFirstNonPHIOrDbg());
2713 };
2714 if (any_of(Range&: Invokes, P: HasNormalDest)) {
2715 // Do not merge `invoke` that does not have a normal destination with one
2716 // that does have a normal destination, even though doing so would be legal.
2717 if (!all_of(Range&: Invokes, P: HasNormalDest))
2718 return false;
2719
2720 // All normal destinations must be identical.
2721 BasicBlock *NormalBB = nullptr;
2722 for (InvokeInst *II : Invokes) {
2723 BasicBlock *CurrNormalBB = II->getNormalDest();
2724 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2725 if (!NormalBB)
2726 NormalBB = CurrNormalBB;
2727 else if (NormalBB != CurrNormalBB)
2728 return false;
2729 }
2730
2731 // In the normal destination, the incoming values for these two `invoke`s
2732 // must be compatible.
2733 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2734 if (!incomingValuesAreCompatible(
2735 BB: NormalBB, IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()},
2736 EquivalenceSet: &EquivalenceSet))
2737 return false;
2738 }
2739
2740#ifndef NDEBUG
2741 // All unwind destinations must be identical.
2742 // We know that because we have started from said unwind destination.
2743 BasicBlock *UnwindBB = nullptr;
2744 for (InvokeInst *II : Invokes) {
2745 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2746 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2747 if (!UnwindBB)
2748 UnwindBB = CurrUnwindBB;
2749 else
2750 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2751 }
2752#endif
2753
2754 // In the unwind destination, the incoming values for these two `invoke`s
2755 // must be compatible.
2756 if (!incomingValuesAreCompatible(
2757 BB: Invokes.front()->getUnwindDest(),
2758 IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2759 return false;
2760
2761 // Ignoring arguments, these `invoke`s must be identical,
2762 // including operand bundles.
2763 const InvokeInst *II0 = Invokes.front();
2764 for (auto *II : Invokes.drop_front())
2765 if (!II->isSameOperationAs(I: II0, flags: Instruction::CompareUsingIntersectedAttrs))
2766 return false;
2767
2768 // Can we theoretically form the data operands for the merged `invoke`?
2769 auto IsIllegalToMergeArguments = [](auto Ops) {
2770 Use &U0 = std::get<0>(Ops);
2771 Use &U1 = std::get<1>(Ops);
2772 if (U0 == U1)
2773 return false;
2774 return !canReplaceOperandWithVariable(I: cast<Instruction>(Val: U0.getUser()),
2775 OpIdx: U0.getOperandNo());
2776 };
2777 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2778 if (any_of(Range: zip(t: Invokes[0]->data_ops(), u: Invokes[1]->data_ops()),
2779 P: IsIllegalToMergeArguments))
2780 return false;
2781
2782 return true;
2783}
2784
2785} // namespace
2786
2787// Merge all invokes in the provided set, all of which are compatible
2788// as per the `CompatibleSets::shouldBelongToSameSet()`.
2789static void mergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2790 DomTreeUpdater *DTU) {
2791 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2792
2793 SmallVector<DominatorTree::UpdateType, 8> Updates;
2794 if (DTU)
2795 Updates.reserve(N: 2 + 3 * Invokes.size());
2796
2797 bool HasNormalDest =
2798 !isa<UnreachableInst>(Val: Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2799
2800 // Clone one of the invokes into a new basic block.
2801 // Since they are all compatible, it doesn't matter which invoke is cloned.
2802 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2803 InvokeInst *II0 = Invokes.front();
2804 BasicBlock *II0BB = II0->getParent();
2805 BasicBlock *InsertBeforeBlock =
2806 II0->getParent()->getIterator()->getNextNode();
2807 Function *Func = II0BB->getParent();
2808 LLVMContext &Ctx = II0->getContext();
2809
2810 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2811 Context&: Ctx, Name: II0BB->getName() + ".invoke", Parent: Func, InsertBefore: InsertBeforeBlock);
2812
2813 auto *MergedInvoke = cast<InvokeInst>(Val: II0->clone());
2814 // NOTE: all invokes have the same attributes, so no handling needed.
2815 MergedInvoke->insertInto(ParentBB: MergedInvokeBB, It: MergedInvokeBB->end());
2816
2817 if (!HasNormalDest) {
2818 // This set does not have a normal destination,
2819 // so just form a new block with unreachable terminator.
2820 BasicBlock *MergedNormalDest = BasicBlock::Create(
2821 Context&: Ctx, Name: II0BB->getName() + ".cont", Parent: Func, InsertBefore: InsertBeforeBlock);
2822 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2823 UI->setDebugLoc(DebugLoc::getTemporary());
2824 MergedInvoke->setNormalDest(MergedNormalDest);
2825 }
2826
2827 // The unwind destination, however, remainds identical for all invokes here.
2828
2829 return MergedInvoke;
2830 }();
2831
2832 if (DTU) {
2833 // Predecessor blocks that contained these invokes will now branch to
2834 // the new block that contains the merged invoke, ...
2835 for (InvokeInst *II : Invokes)
2836 Updates.push_back(
2837 Elt: {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2838
2839 // ... which has the new `unreachable` block as normal destination,
2840 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2841 for (BasicBlock *SuccBBOfMergedInvoke : successors(I: MergedInvoke))
2842 Updates.push_back(Elt: {DominatorTree::Insert, MergedInvoke->getParent(),
2843 SuccBBOfMergedInvoke});
2844
2845 // Since predecessor blocks now unconditionally branch to a new block,
2846 // they no longer branch to their original successors.
2847 for (InvokeInst *II : Invokes)
2848 for (BasicBlock *SuccOfPredBB : successors(BB: II->getParent()))
2849 Updates.push_back(
2850 Elt: {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2851 }
2852
2853 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2854
2855 // Form the merged operands for the merged invoke.
2856 for (Use &U : MergedInvoke->operands()) {
2857 // Only PHI together the indirect callees and data operands.
2858 if (MergedInvoke->isCallee(U: &U)) {
2859 if (!IsIndirectCall)
2860 continue;
2861 } else if (!MergedInvoke->isDataOperand(U: &U))
2862 continue;
2863
2864 // Don't create trivial PHI's with all-identical incoming values.
2865 bool NeedPHI = any_of(Range&: Invokes, P: [&U](InvokeInst *II) {
2866 return II->getOperand(i_nocapture: U.getOperandNo()) != U.get();
2867 });
2868 if (!NeedPHI)
2869 continue;
2870
2871 // Form a PHI out of all the data ops under this index.
2872 PHINode *PN = PHINode::Create(
2873 Ty: U->getType(), /*NumReservedValues=*/Invokes.size(), NameStr: "", InsertBefore: MergedInvoke->getIterator());
2874 for (InvokeInst *II : Invokes)
2875 PN->addIncoming(V: II->getOperand(i_nocapture: U.getOperandNo()), BB: II->getParent());
2876
2877 U.set(PN);
2878 }
2879
2880 // We've ensured that each PHI node has compatible (identical) incoming values
2881 // when coming from each of the `invoke`s in the current merge set,
2882 // so update the PHI nodes accordingly.
2883 for (BasicBlock *Succ : successors(I: MergedInvoke))
2884 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2885 /*ExistPred=*/Invokes.front()->getParent());
2886
2887 // And finally, replace the original `invoke`s with an unconditional branch
2888 // to the block with the merged `invoke`. Also, give that merged `invoke`
2889 // the merged debugloc of all the original `invoke`s.
2890 DILocation *MergedDebugLoc = nullptr;
2891 for (InvokeInst *II : Invokes) {
2892 // Compute the debug location common to all the original `invoke`s.
2893 if (!MergedDebugLoc)
2894 MergedDebugLoc = II->getDebugLoc();
2895 else
2896 MergedDebugLoc =
2897 DebugLoc::getMergedLocation(LocA: MergedDebugLoc, LocB: II->getDebugLoc());
2898
2899 // And replace the old `invoke` with an unconditionally branch
2900 // to the block with the merged `invoke`.
2901 for (BasicBlock *OrigSuccBB : successors(BB: II->getParent()))
2902 OrigSuccBB->removePredecessor(Pred: II->getParent());
2903 auto *BI = UncondBrInst::Create(Target: MergedInvoke->getParent(), InsertBefore: II->getParent());
2904 // The unconditional branch is part of the replacement for the original
2905 // invoke, so should use its DebugLoc.
2906 BI->setDebugLoc(II->getDebugLoc());
2907 bool Success = MergedInvoke->tryIntersectAttributes(Other: II);
2908 assert(Success && "Merged invokes with incompatible attributes");
2909 // For NDEBUG Compile
2910 (void)Success;
2911 II->replaceAllUsesWith(V: MergedInvoke);
2912 II->eraseFromParent();
2913 ++NumInvokesMerged;
2914 }
2915 MergedInvoke->setDebugLoc(MergedDebugLoc);
2916 ++NumInvokeSetsFormed;
2917
2918 if (DTU)
2919 DTU->applyUpdates(Updates);
2920}
2921
2922/// If this block is a `landingpad` exception handling block, categorize all
2923/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2924/// being "mergeable" together, and then merge invokes in each set together.
2925///
2926/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2927/// [...] [...]
2928/// | |
2929/// [invoke0] [invoke1]
2930/// / \ / \
2931/// [cont0] [landingpad] [cont1]
2932/// to:
2933/// [...] [...]
2934/// \ /
2935/// [invoke]
2936/// / \
2937/// [cont] [landingpad]
2938///
2939/// But of course we can only do that if the invokes share the `landingpad`,
2940/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2941/// and the invoked functions are "compatible".
2942static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
2943 if (!EnableMergeCompatibleInvokes)
2944 return false;
2945
2946 bool Changed = false;
2947
2948 // FIXME: generalize to all exception handling blocks?
2949 if (!BB->isLandingPad())
2950 return Changed;
2951
2952 CompatibleSets Grouper;
2953
2954 // Record all the predecessors of this `landingpad`. As per verifier,
2955 // the only allowed predecessor is the unwind edge of an `invoke`.
2956 // We want to group "compatible" `invokes` into the same set to be merged.
2957 for (BasicBlock *PredBB : predecessors(BB))
2958 Grouper.insert(II: cast<InvokeInst>(Val: PredBB->getTerminator()));
2959
2960 // And now, merge `invoke`s that were grouped togeter.
2961 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2962 if (Invokes.size() < 2)
2963 continue;
2964 Changed = true;
2965 mergeCompatibleInvokesImpl(Invokes, DTU);
2966 }
2967
2968 return Changed;
2969}
2970
2971namespace {
2972/// Track ephemeral values, which should be ignored for cost-modelling
2973/// purposes. Requires walking instructions in reverse order.
2974class EphemeralValueTracker {
2975 SmallPtrSet<const Instruction *, 32> EphValues;
2976
2977 bool isEphemeral(const Instruction *I) {
2978 if (isa<AssumeInst>(Val: I))
2979 return true;
2980 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2981 all_of(Range: I->users(), P: [&](const User *U) {
2982 return EphValues.count(Ptr: cast<Instruction>(Val: U));
2983 });
2984 }
2985
2986public:
2987 bool track(const Instruction *I) {
2988 if (isEphemeral(I)) {
2989 EphValues.insert(Ptr: I);
2990 return true;
2991 }
2992 return false;
2993 }
2994
2995 bool contains(const Instruction *I) const { return EphValues.contains(Ptr: I); }
2996};
2997} // namespace
2998
2999/// Determine if we can hoist sink a sole store instruction out of a
3000/// conditional block.
3001///
3002/// We are looking for code like the following:
3003/// BrBB:
3004/// store i32 %add, i32* %arrayidx2
3005/// ... // No other stores or function calls (we could be calling a memory
3006/// ... // function).
3007/// %cmp = icmp ult %x, %y
3008/// br i1 %cmp, label %EndBB, label %ThenBB
3009/// ThenBB:
3010/// store i32 %add5, i32* %arrayidx2
3011/// br label EndBB
3012/// EndBB:
3013/// ...
3014/// We are going to transform this into:
3015/// BrBB:
3016/// store i32 %add, i32* %arrayidx2
3017/// ... //
3018/// %cmp = icmp ult %x, %y
3019/// %add.add5 = select i1 %cmp, i32 %add, %add5
3020/// store i32 %add.add5, i32* %arrayidx2
3021/// ...
3022///
3023/// \return The pointer to the value of the previous store if the store can be
3024/// hoisted into the predecessor block. 0 otherwise.
3025static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
3026 BasicBlock *StoreBB, BasicBlock *EndBB) {
3027 StoreInst *StoreToHoist = dyn_cast<StoreInst>(Val: I);
3028 if (!StoreToHoist)
3029 return nullptr;
3030
3031 // Volatile or atomic.
3032 if (!StoreToHoist->isSimple())
3033 return nullptr;
3034
3035 Value *StorePtr = StoreToHoist->getPointerOperand();
3036 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3037
3038 // Look for a store to the same pointer in BrBB.
3039 unsigned MaxNumInstToLookAt = 9;
3040 // Skip pseudo probe intrinsic calls which are not really killing any memory
3041 // accesses.
3042 for (Instruction &CurI : reverse(C&: *BrBB)) {
3043 if (!MaxNumInstToLookAt)
3044 break;
3045 --MaxNumInstToLookAt;
3046
3047 if (isa<PseudoProbeInst>(Val: CurI))
3048 continue;
3049
3050 // Could be calling an instruction that affects memory like free().
3051 if (CurI.mayWriteToMemory() && !isa<StoreInst>(Val: CurI))
3052 return nullptr;
3053
3054 if (auto *SI = dyn_cast<StoreInst>(Val: &CurI)) {
3055 // Found the previous store to same location and type. Make sure it is
3056 // simple, to avoid introducing a spurious non-atomic write after an
3057 // atomic write.
3058 if (SI->getPointerOperand() == StorePtr &&
3059 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3060 SI->getAlign() >= StoreToHoist->getAlign())
3061 // Found the previous store, return its value operand.
3062 return SI->getValueOperand();
3063 return nullptr; // Unknown store.
3064 }
3065
3066 if (auto *LI = dyn_cast<LoadInst>(Val: &CurI)) {
3067 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3068 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3069 Value *Obj = getUnderlyingObject(V: StorePtr);
3070 bool ExplicitlyDereferenceableOnly;
3071 if (isWritableObject(Object: Obj, ExplicitlyDereferenceableOnly) &&
3072 capturesNothing(
3073 CC: PointerMayBeCaptured(V: Obj, /*ReturnCaptures=*/false,
3074 Mask: CaptureComponents::Provenance)) &&
3075 (!ExplicitlyDereferenceableOnly ||
3076 isDereferenceablePointer(V: StorePtr, Ty: StoreTy,
3077 DL: LI->getDataLayout()))) {
3078 // Found a previous load, return it.
3079 return LI;
3080 }
3081 }
3082 // The load didn't work out, but we may still find a store.
3083 }
3084 }
3085
3086 return nullptr;
3087}
3088
3089/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3090/// converted to selects.
3091static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
3092 BasicBlock *EndBB,
3093 unsigned &SpeculatedInstructions,
3094 InstructionCost &Cost,
3095 const TargetTransformInfo &TTI) {
3096 TargetTransformInfo::TargetCostKind CostKind =
3097 BB->getParent()->hasMinSize()
3098 ? TargetTransformInfo::TCK_CodeSize
3099 : TargetTransformInfo::TCK_SizeAndLatency;
3100
3101 bool HaveRewritablePHIs = false;
3102 for (PHINode &PN : EndBB->phis()) {
3103 Value *OrigV = PN.getIncomingValueForBlock(BB);
3104 Value *ThenV = PN.getIncomingValueForBlock(BB: ThenBB);
3105
3106 // FIXME: Try to remove some of the duplication with
3107 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3108 if (ThenV == OrigV)
3109 continue;
3110
3111 Cost += TTI.getCmpSelInstrCost(Opcode: Instruction::Select, ValTy: PN.getType(),
3112 CondTy: CmpInst::makeCmpResultType(opnd_type: PN.getType()),
3113 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
3114
3115 // Don't convert to selects if we could remove undefined behavior instead.
3116 if (passingValueIsAlwaysUndefined(V: OrigV, I: &PN) ||
3117 passingValueIsAlwaysUndefined(V: ThenV, I: &PN))
3118 return false;
3119
3120 HaveRewritablePHIs = true;
3121 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(Val: OrigV);
3122 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(Val: ThenV);
3123 if (!OrigCE && !ThenCE)
3124 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3125
3126 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(I: OrigCE, TTI) : 0;
3127 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(I: ThenCE, TTI) : 0;
3128 InstructionCost MaxCost =
3129 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3130 if (OrigCost + ThenCost > MaxCost)
3131 return false;
3132
3133 // Account for the cost of an unfolded ConstantExpr which could end up
3134 // getting expanded into Instructions.
3135 // FIXME: This doesn't account for how many operations are combined in the
3136 // constant expression.
3137 ++SpeculatedInstructions;
3138 if (SpeculatedInstructions > 1)
3139 return false;
3140 }
3141
3142 return HaveRewritablePHIs;
3143}
3144
3145static bool isProfitableToSpeculate(const CondBrInst *BI,
3146 std::optional<bool> Invert,
3147 const TargetTransformInfo &TTI) {
3148 // If the branch is non-unpredictable, and is predicted to *not* branch to
3149 // the `then` block, then avoid speculating it.
3150 if (BI->getMetadata(KindID: LLVMContext::MD_unpredictable))
3151 return true;
3152
3153 uint64_t TWeight, FWeight;
3154 if (!extractBranchWeights(I: *BI, TrueVal&: TWeight, FalseVal&: FWeight) || (TWeight + FWeight) == 0)
3155 return true;
3156
3157 if (!Invert.has_value())
3158 return false;
3159
3160 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3161 BranchProbability BIEndProb =
3162 BranchProbability::getBranchProbability(Numerator: EndWeight, Denominator: TWeight + FWeight);
3163 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3164 return BIEndProb < Likely;
3165}
3166
3167/// Speculate a conditional basic block flattening the CFG.
3168///
3169/// Note that this is a very risky transform currently. Speculating
3170/// instructions like this is most often not desirable. Instead, there is an MI
3171/// pass which can do it with full awareness of the resource constraints.
3172/// However, some cases are "obvious" and we should do directly. An example of
3173/// this is speculating a single, reasonably cheap instruction.
3174///
3175/// There is only one distinct advantage to flattening the CFG at the IR level:
3176/// it makes very common but simplistic optimizations such as are common in
3177/// instcombine and the DAG combiner more powerful by removing CFG edges and
3178/// modeling their effects with easier to reason about SSA value graphs.
3179///
3180///
3181/// An illustration of this transform is turning this IR:
3182/// \code
3183/// BB:
3184/// %cmp = icmp ult %x, %y
3185/// br i1 %cmp, label %EndBB, label %ThenBB
3186/// ThenBB:
3187/// %sub = sub %x, %y
3188/// br label BB2
3189/// EndBB:
3190/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3191/// ...
3192/// \endcode
3193///
3194/// Into this IR:
3195/// \code
3196/// BB:
3197/// %cmp = icmp ult %x, %y
3198/// %sub = sub %x, %y
3199/// %cond = select i1 %cmp, 0, %sub
3200/// ...
3201/// \endcode
3202///
3203/// \returns true if the conditional block is removed.
3204bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3205 BasicBlock *ThenBB) {
3206 if (!Options.SpeculateBlocks)
3207 return false;
3208
3209 // Be conservative for now. FP select instruction can often be expensive.
3210 Value *BrCond = BI->getCondition();
3211 if (isa<FCmpInst>(Val: BrCond))
3212 return false;
3213
3214 BasicBlock *BB = BI->getParent();
3215 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(Idx: 0);
3216 InstructionCost Budget =
3217 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3218
3219 // If ThenBB is actually on the false edge of the conditional branch, remember
3220 // to swap the select operands later.
3221 bool Invert = false;
3222 if (ThenBB != BI->getSuccessor(i: 0)) {
3223 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3224 Invert = true;
3225 }
3226 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3227
3228 if (!isProfitableToSpeculate(BI, Invert, TTI))
3229 return false;
3230
3231 // Keep a count of how many times instructions are used within ThenBB when
3232 // they are candidates for sinking into ThenBB. Specifically:
3233 // - They are defined in BB, and
3234 // - They have no side effects, and
3235 // - All of their uses are in ThenBB.
3236 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3237
3238 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3239
3240 unsigned SpeculatedInstructions = 0;
3241 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3242 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3243 Value *SpeculatedStoreValue = nullptr;
3244 StoreInst *SpeculatedStore = nullptr;
3245 EphemeralValueTracker EphTracker;
3246 for (Instruction &I : reverse(C: drop_end(RangeOrContainer&: *ThenBB))) {
3247 // Skip pseudo probes. The consequence is we lose track of the branch
3248 // probability for ThenBB, which is fine since the optimization here takes
3249 // place regardless of the branch probability.
3250 if (isa<PseudoProbeInst>(Val: I)) {
3251 // The probe should be deleted so that it will not be over-counted when
3252 // the samples collected on the non-conditional path are counted towards
3253 // the conditional path. We leave it for the counts inference algorithm to
3254 // figure out a proper count for an unknown probe.
3255 SpeculatedPseudoProbes.push_back(Elt: &I);
3256 continue;
3257 }
3258
3259 // Ignore ephemeral values, they will be dropped by the transform.
3260 if (EphTracker.track(I: &I))
3261 continue;
3262
3263 // Only speculatively execute a single instruction (not counting the
3264 // terminator) for now.
3265 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3266 isSafeCheapLoadStore(I: &I, TTI) &&
3267 SpeculatedConditionalLoadsStores.size() <
3268 HoistLoadsStoresWithCondFaultingThreshold;
3269 // Not count load/store into cost if target supports conditional faulting
3270 // b/c it's cheap to speculate it.
3271 if (IsSafeCheapLoadStore)
3272 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
3273 else
3274 ++SpeculatedInstructions;
3275
3276 if (SpeculatedInstructions > 1)
3277 return false;
3278
3279 // Don't hoist the instruction if it's unsafe or expensive.
3280 if (!IsSafeCheapLoadStore &&
3281 !isSafeToSpeculativelyExecute(I: &I, CtxI: BI, AC: Options.AC) &&
3282 !(HoistCondStores && !SpeculatedStoreValue &&
3283 (SpeculatedStoreValue =
3284 isSafeToSpeculateStore(I: &I, BrBB: BB, StoreBB: ThenBB, EndBB))))
3285 return false;
3286 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3287 computeSpeculationCost(I: &I, TTI) >
3288 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3289 return false;
3290
3291 // Store the store speculation candidate.
3292 if (!SpeculatedStore && SpeculatedStoreValue)
3293 SpeculatedStore = cast<StoreInst>(Val: &I);
3294
3295 // Do not hoist the instruction if any of its operands are defined but not
3296 // used in BB. The transformation will prevent the operand from
3297 // being sunk into the use block.
3298 for (Use &Op : I.operands()) {
3299 Instruction *OpI = dyn_cast<Instruction>(Val&: Op);
3300 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3301 continue; // Not a candidate for sinking.
3302
3303 ++SinkCandidateUseCounts[OpI];
3304 }
3305 }
3306
3307 // Consider any sink candidates which are only used in ThenBB as costs for
3308 // speculation. Note, while we iterate over a DenseMap here, we are summing
3309 // and so iteration order isn't significant.
3310 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3311 if (Inst->hasNUses(N: Count)) {
3312 ++SpeculatedInstructions;
3313 if (SpeculatedInstructions > 1)
3314 return false;
3315 }
3316
3317 // Check that we can insert the selects and that it's not too expensive to do
3318 // so.
3319 bool Convert =
3320 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3321 InstructionCost Cost = 0;
3322 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3323 SpeculatedInstructions, Cost, TTI);
3324 if (!Convert || Cost > Budget)
3325 return false;
3326
3327 // If we get here, we can hoist the instruction and if-convert.
3328 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3329
3330 Instruction *Sel = nullptr;
3331 // Insert a select of the value of the speculated store.
3332 if (SpeculatedStoreValue) {
3333 IRBuilder<NoFolder> Builder(BI);
3334 Value *OrigV = SpeculatedStore->getValueOperand();
3335 Value *TrueV = SpeculatedStore->getValueOperand();
3336 Value *FalseV = SpeculatedStoreValue;
3337 if (Invert)
3338 std::swap(a&: TrueV, b&: FalseV);
3339 Value *S = Builder.CreateSelect(
3340 C: BrCond, True: TrueV, False: FalseV, Name: "spec.store.select", MDFrom: BI);
3341 Sel = cast<Instruction>(Val: S);
3342 SpeculatedStore->setOperand(i_nocapture: 0, Val_nocapture: S);
3343 SpeculatedStore->applyMergedLocation(LocA: BI->getDebugLoc(),
3344 LocB: SpeculatedStore->getDebugLoc());
3345 // The value stored is still conditional, but the store itself is now
3346 // unconditonally executed, so we must be sure that any linked dbg.assign
3347 // intrinsics are tracking the new stored value (the result of the
3348 // select). If we don't, and the store were to be removed by another pass
3349 // (e.g. DSE), then we'd eventually end up emitting a location describing
3350 // the conditional value, unconditionally.
3351 //
3352 // === Before this transformation ===
3353 // pred:
3354 // store %one, %x.dest, !DIAssignID !1
3355 // dbg.assign %one, "x", ..., !1, ...
3356 // br %cond if.then
3357 //
3358 // if.then:
3359 // store %two, %x.dest, !DIAssignID !2
3360 // dbg.assign %two, "x", ..., !2, ...
3361 //
3362 // === After this transformation ===
3363 // pred:
3364 // store %one, %x.dest, !DIAssignID !1
3365 // dbg.assign %one, "x", ..., !1
3366 /// ...
3367 // %merge = select %cond, %two, %one
3368 // store %merge, %x.dest, !DIAssignID !2
3369 // dbg.assign %merge, "x", ..., !2
3370 for (DbgVariableRecord *DbgAssign :
3371 at::getDVRAssignmentMarkers(Inst: SpeculatedStore))
3372 if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: OrigV))
3373 DbgAssign->replaceVariableLocationOp(OldValue: OrigV, NewValue: S);
3374 }
3375
3376 // Metadata can be dependent on the condition we are hoisting above.
3377 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3378 // to avoid making it appear as if the condition is a constant, which would
3379 // be misleading while debugging.
3380 // Similarly strip attributes that maybe dependent on condition we are
3381 // hoisting above.
3382 for (auto &I : make_early_inc_range(Range&: *ThenBB)) {
3383 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3384 I.dropLocation();
3385 }
3386 I.dropUBImplyingAttrsAndMetadata();
3387
3388 // Drop ephemeral values.
3389 if (EphTracker.contains(I: &I)) {
3390 I.replaceAllUsesWith(V: PoisonValue::get(T: I.getType()));
3391 I.eraseFromParent();
3392 }
3393 }
3394
3395 // Hoist the instructions.
3396 // Drop DbgVariableRecords attached to these instructions.
3397 for (auto &It : *ThenBB)
3398 for (DbgRecord &DR : make_early_inc_range(Range: It.getDbgRecordRange()))
3399 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3400 // equivalent).
3401 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(Val: &DR);
3402 !DVR || !DVR->isDbgAssign())
3403 It.dropOneDbgRecord(I: &DR);
3404 BB->splice(ToIt: BI->getIterator(), FromBB: ThenBB, FromBeginIt: ThenBB->begin(),
3405 FromEndIt: std::prev(x: ThenBB->end()));
3406
3407 if (!SpeculatedConditionalLoadsStores.empty())
3408 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3409 Sel);
3410
3411 // Insert selects and rewrite the PHI operands.
3412 IRBuilder<NoFolder> Builder(BI);
3413 for (PHINode &PN : EndBB->phis()) {
3414 unsigned OrigI = PN.getBasicBlockIndex(BB);
3415 unsigned ThenI = PN.getBasicBlockIndex(BB: ThenBB);
3416 Value *OrigV = PN.getIncomingValue(i: OrigI);
3417 Value *ThenV = PN.getIncomingValue(i: ThenI);
3418
3419 // Skip PHIs which are trivial.
3420 if (OrigV == ThenV)
3421 continue;
3422
3423 // Create a select whose true value is the speculatively executed value and
3424 // false value is the pre-existing value. Swap them if the branch
3425 // destinations were inverted.
3426 Value *TrueV = ThenV, *FalseV = OrigV;
3427 if (Invert)
3428 std::swap(a&: TrueV, b&: FalseV);
3429 Value *V = Builder.CreateSelect(C: BrCond, True: TrueV, False: FalseV, Name: "spec.select", MDFrom: BI);
3430 PN.setIncomingValue(i: OrigI, V);
3431 PN.setIncomingValue(i: ThenI, V);
3432 }
3433
3434 // Remove speculated pseudo probes.
3435 for (Instruction *I : SpeculatedPseudoProbes)
3436 I->eraseFromParent();
3437
3438 ++NumSpeculations;
3439 return true;
3440}
3441
3442/// Return true if we can thread a branch across this block.
3443static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
3444 int Size = 0;
3445 EphemeralValueTracker EphTracker;
3446
3447 // Walk the loop in reverse so that we can identify ephemeral values properly
3448 // (values only feeding assumes).
3449 for (Instruction &I : reverse(C&: *BB)) {
3450 // Can't fold blocks that contain noduplicate or convergent calls.
3451 if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
3452 if (CI->cannotDuplicate() || CI->isConvergent())
3453 return false;
3454
3455 // Ignore ephemeral values which are deleted during codegen.
3456 // We will delete Phis while threading, so Phis should not be accounted in
3457 // block's size.
3458 if (!EphTracker.track(I: &I) && !isa<PHINode>(Val: I)) {
3459 if (Size++ > MaxSmallBlockSize)
3460 return false; // Don't clone large BB's.
3461 }
3462
3463 // We can only support instructions that do not define values that are
3464 // live outside of the current basic block.
3465 for (User *U : I.users()) {
3466 Instruction *UI = cast<Instruction>(Val: U);
3467 if (UI->getParent() != BB || isa<PHINode>(Val: UI))
3468 return false;
3469 }
3470
3471 // Looks ok, continue checking.
3472 }
3473
3474 return true;
3475}
3476
3477static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
3478 BasicBlock *To) {
3479 // Don't look past the block defining the value, we might get the value from
3480 // a previous loop iteration.
3481 auto *I = dyn_cast<Instruction>(Val: V);
3482 if (I && I->getParent() == To)
3483 return nullptr;
3484
3485 // We know the value if the From block branches on it.
3486 auto *BI = dyn_cast<CondBrInst>(Val: From->getTerminator());
3487 if (BI && BI->getCondition() == V &&
3488 BI->getSuccessor(i: 0) != BI->getSuccessor(i: 1))
3489 return BI->getSuccessor(i: 0) == To ? ConstantInt::getTrue(Context&: BI->getContext())
3490 : ConstantInt::getFalse(Context&: BI->getContext());
3491
3492 return nullptr;
3493}
3494
3495/// If we have a conditional branch on something for which we know the constant
3496/// value in predecessors (e.g. a phi node in the current block), thread edges
3497/// from the predecessor to their ultimate destination.
3498static std::optional<bool>
3499foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst *BI, DomTreeUpdater *DTU,
3500 const DataLayout &DL,
3501 AssumptionCache *AC) {
3502 SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
3503 BasicBlock *BB = BI->getParent();
3504 Value *Cond = BI->getCondition();
3505 PHINode *PN = dyn_cast<PHINode>(Val: Cond);
3506 if (PN && PN->getParent() == BB) {
3507 // Degenerate case of a single entry PHI.
3508 if (PN->getNumIncomingValues() == 1) {
3509 FoldSingleEntryPHINodes(BB: PN->getParent());
3510 return true;
3511 }
3512
3513 for (Use &U : PN->incoming_values())
3514 if (auto *CB = dyn_cast<ConstantInt>(Val&: U))
3515 KnownValues[CB].insert(X: PN->getIncomingBlock(U));
3516 } else {
3517 for (BasicBlock *Pred : predecessors(BB)) {
3518 if (ConstantInt *CB = getKnownValueOnEdge(V: Cond, From: Pred, To: BB))
3519 KnownValues[CB].insert(X: Pred);
3520 }
3521 }
3522
3523 if (KnownValues.empty())
3524 return false;
3525
3526 // Now we know that this block has multiple preds and two succs.
3527 // Check that the block is small enough and values defined in the block are
3528 // not used outside of it.
3529 if (!blockIsSimpleEnoughToThreadThrough(BB))
3530 return false;
3531
3532 for (const auto &Pair : KnownValues) {
3533 // Okay, we now know that all edges from PredBB should be revectored to
3534 // branch to RealDest.
3535 ConstantInt *CB = Pair.first;
3536 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3537 BasicBlock *RealDest = BI->getSuccessor(i: !CB->getZExtValue());
3538
3539 if (RealDest == BB)
3540 continue; // Skip self loops.
3541
3542 // Skip if the predecessor's terminator is an indirect branch.
3543 if (any_of(Range&: PredBBs, P: [](BasicBlock *PredBB) {
3544 return isa<IndirectBrInst>(Val: PredBB->getTerminator());
3545 }))
3546 continue;
3547
3548 LLVM_DEBUG({
3549 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3550 << " has value " << *Pair.first << " in predecessors:\n";
3551 for (const BasicBlock *PredBB : Pair.second)
3552 dbgs() << " " << PredBB->getName() << "\n";
3553 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3554 });
3555
3556 // Split the predecessors we are threading into a new edge block. We'll
3557 // clone the instructions into this block, and then redirect it to RealDest.
3558 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, Preds: PredBBs, Suffix: ".critedge", DTU);
3559 if (!EdgeBB)
3560 continue;
3561
3562 // TODO: These just exist to reduce test diff, we can drop them if we like.
3563 EdgeBB->setName(RealDest->getName() + ".critedge");
3564 EdgeBB->moveBefore(MovePos: RealDest);
3565
3566 // Update PHI nodes.
3567 addPredecessorToBlock(Succ: RealDest, NewPred: EdgeBB, ExistPred: BB);
3568
3569 // BB may have instructions that are being threaded over. Clone these
3570 // instructions into EdgeBB. We know that there will be no uses of the
3571 // cloned instructions outside of EdgeBB.
3572 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3573 ValueToValueMapTy TranslateMap; // Track translated values.
3574 TranslateMap[Cond] = CB;
3575
3576 // RemoveDIs: track instructions that we optimise away while folding, so
3577 // that we can copy DbgVariableRecords from them later.
3578 BasicBlock::iterator SrcDbgCursor = BB->begin();
3579 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3580 if (PHINode *PN = dyn_cast<PHINode>(Val&: BBI)) {
3581 TranslateMap[PN] = PN->getIncomingValueForBlock(BB: EdgeBB);
3582 continue;
3583 }
3584 // Clone the instruction.
3585 Instruction *N = BBI->clone();
3586 // Insert the new instruction into its new home.
3587 N->insertInto(ParentBB: EdgeBB, It: InsertPt);
3588
3589 if (BBI->hasName())
3590 N->setName(BBI->getName() + ".c");
3591
3592 // Update operands due to translation.
3593 // Key Instructions: Remap all the atom groups.
3594 if (const DebugLoc &DL = BBI->getDebugLoc())
3595 mapAtomInstance(DL, VMap&: TranslateMap);
3596 RemapInstruction(I: N, VM&: TranslateMap,
3597 Flags: RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
3598
3599 // Check for trivial simplification.
3600 if (Value *V = simplifyInstruction(I: N, Q: {DL, nullptr, nullptr, AC})) {
3601 if (!BBI->use_empty())
3602 TranslateMap[&*BBI] = V;
3603 if (!N->mayHaveSideEffects()) {
3604 N->eraseFromParent(); // Instruction folded away, don't need actual
3605 // inst
3606 N = nullptr;
3607 }
3608 } else {
3609 if (!BBI->use_empty())
3610 TranslateMap[&*BBI] = N;
3611 }
3612 if (N) {
3613 // Copy all debug-info attached to instructions from the last we
3614 // successfully clone, up to this instruction (they might have been
3615 // folded away).
3616 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3617 N->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3618 SrcDbgCursor = std::next(x: BBI);
3619 // Clone debug-info on this instruction too.
3620 N->cloneDebugInfoFrom(From: &*BBI);
3621
3622 // Register the new instruction with the assumption cache if necessary.
3623 if (auto *Assume = dyn_cast<AssumeInst>(Val: N))
3624 if (AC)
3625 AC->registerAssumption(CI: Assume);
3626 }
3627 }
3628
3629 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3630 InsertPt->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3631 InsertPt->cloneDebugInfoFrom(From: BI);
3632
3633 BB->removePredecessor(Pred: EdgeBB);
3634 UncondBrInst *EdgeBI = cast<UncondBrInst>(Val: EdgeBB->getTerminator());
3635 EdgeBI->setSuccessor(idx: 0, NewSucc: RealDest);
3636 EdgeBI->setDebugLoc(BI->getDebugLoc());
3637
3638 if (DTU) {
3639 SmallVector<DominatorTree::UpdateType, 2> Updates;
3640 Updates.push_back(Elt: {DominatorTree::Delete, EdgeBB, BB});
3641 Updates.push_back(Elt: {DominatorTree::Insert, EdgeBB, RealDest});
3642 DTU->applyUpdates(Updates);
3643 }
3644
3645 // For simplicity, we created a separate basic block for the edge. Merge
3646 // it back into the predecessor if possible. This not only avoids
3647 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3648 // bypass the check for trivial cycles above.
3649 MergeBlockIntoPredecessor(BB: EdgeBB, DTU);
3650
3651 // Signal repeat, simplifying any other constants.
3652 return std::nullopt;
3653 }
3654
3655 return false;
3656}
3657
3658bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3659 // Note: If BB is a loop header then there is a risk that threading introduces
3660 // a non-canonical loop by moving a back edge. So we avoid this optimization
3661 // for loop headers if NeedCanonicalLoop is set.
3662 if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BI->getParent()))
3663 return false;
3664
3665 std::optional<bool> Result;
3666 bool EverChanged = false;
3667 do {
3668 // Note that None means "we changed things, but recurse further."
3669 Result =
3670 foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC: Options.AC);
3671 EverChanged |= Result == std::nullopt || *Result;
3672 } while (Result == std::nullopt);
3673 return EverChanged;
3674}
3675
3676/// Given a BB that starts with the specified two-entry PHI node,
3677/// see if we can eliminate it.
3678static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
3679 DomTreeUpdater *DTU, AssumptionCache *AC,
3680 const DataLayout &DL,
3681 bool SpeculateUnpredictables) {
3682 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3683 // statement", which has a very simple dominance structure. Basically, we
3684 // are trying to find the condition that is being branched on, which
3685 // subsequently causes this merge to happen. We really want control
3686 // dependence information for this check, but simplifycfg can't keep it up
3687 // to date, and this catches most of the cases we care about anyway.
3688 BasicBlock *BB = PN->getParent();
3689
3690 BasicBlock *IfTrue, *IfFalse;
3691 CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3692 if (!DomBI)
3693 return false;
3694 Value *IfCond = DomBI->getCondition();
3695 // Don't bother if the branch will be constant folded trivially.
3696 if (isa<ConstantInt>(Val: IfCond))
3697 return false;
3698
3699 BasicBlock *DomBlock = DomBI->getParent();
3700 SmallVector<BasicBlock *, 2> IfBlocks;
3701 llvm::copy_if(Range: PN->blocks(), Out: std::back_inserter(x&: IfBlocks),
3702 P: [](BasicBlock *IfBlock) {
3703 return isa<UncondBrInst>(Val: IfBlock->getTerminator());
3704 });
3705 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3706 "Will have either one or two blocks to speculate.");
3707
3708 // If the branch is non-unpredictable, see if we either predictably jump to
3709 // the merge bb (if we have only a single 'then' block), or if we predictably
3710 // jump to one specific 'then' block (if we have two of them).
3711 // It isn't beneficial to speculatively execute the code
3712 // from the block that we know is predictably not entered.
3713 bool IsUnpredictable = DomBI->getMetadata(KindID: LLVMContext::MD_unpredictable);
3714 if (!IsUnpredictable) {
3715 uint64_t TWeight, FWeight;
3716 if (extractBranchWeights(I: *DomBI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3717 (TWeight + FWeight) != 0) {
3718 BranchProbability BITrueProb =
3719 BranchProbability::getBranchProbability(Numerator: TWeight, Denominator: TWeight + FWeight);
3720 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3721 BranchProbability BIFalseProb = BITrueProb.getCompl();
3722 if (IfBlocks.size() == 1) {
3723 BranchProbability BIBBProb =
3724 DomBI->getSuccessor(i: 0) == BB ? BITrueProb : BIFalseProb;
3725 if (BIBBProb >= Likely)
3726 return false;
3727 } else {
3728 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3729 return false;
3730 }
3731 }
3732 }
3733
3734 // Don't try to fold an unreachable block. For example, the phi node itself
3735 // can't be the candidate if-condition for a select that we want to form.
3736 if (auto *IfCondPhiInst = dyn_cast<PHINode>(Val: IfCond))
3737 if (IfCondPhiInst->getParent() == BB)
3738 return false;
3739
3740 // Okay, we found that we can merge this two-entry phi node into a select.
3741 // Doing so would require us to fold *all* two entry phi nodes in this block.
3742 // At some point this becomes non-profitable (particularly if the target
3743 // doesn't support cmov's). Only do this transformation if there are two or
3744 // fewer PHI nodes in this block.
3745 unsigned NumPhis = 0;
3746 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(Val: I); ++NumPhis, ++I)
3747 if (NumPhis > 2)
3748 return false;
3749
3750 // Loop over the PHI's seeing if we can promote them all to select
3751 // instructions. While we are at it, keep track of the instructions
3752 // that need to be moved to the dominating block.
3753 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3754 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3755 InstructionCost Cost = 0;
3756 InstructionCost Budget =
3757 TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3758 if (SpeculateUnpredictables && IsUnpredictable)
3759 Budget += TTI.getBranchMispredictPenalty();
3760
3761 bool Changed = false;
3762 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(Val: II);) {
3763 PHINode *PN = cast<PHINode>(Val: II++);
3764 if (Value *V = simplifyInstruction(I: PN, Q: {DL, PN})) {
3765 PN->replaceAllUsesWith(V);
3766 PN->eraseFromParent();
3767 Changed = true;
3768 continue;
3769 }
3770
3771 if (!dominatesMergePoint(V: PN->getIncomingValue(i: 0), BB, InsertPt: DomBI,
3772 AggressiveInsts, Cost, Budget, TTI, AC,
3773 ZeroCostInstructions) ||
3774 !dominatesMergePoint(V: PN->getIncomingValue(i: 1), BB, InsertPt: DomBI,
3775 AggressiveInsts, Cost, Budget, TTI, AC,
3776 ZeroCostInstructions))
3777 return Changed;
3778 }
3779
3780 // If we folded the first phi, PN dangles at this point. Refresh it. If
3781 // we ran out of PHIs then we simplified them all.
3782 PN = dyn_cast<PHINode>(Val: BB->begin());
3783 if (!PN)
3784 return true;
3785
3786 // Don't fold i1 branches on PHIs which contain binary operators or
3787 // (possibly inverted) select form of or/ands if their parameters are
3788 // an equality test.
3789 auto IsBinOpOrAndEq = [](Value *V) {
3790 CmpPredicate Pred;
3791 if (match(V, P: m_CombineOr(
3792 L: m_CombineOr(
3793 L: m_BinOp(L: m_Cmp(Pred, L: m_Value(), R: m_Value()), R: m_Value()),
3794 R: m_BinOp(L: m_Value(), R: m_Cmp(Pred, L: m_Value(), R: m_Value()))),
3795 R: m_c_Select(L: m_ImmConstant(),
3796 R: m_Cmp(Pred, L: m_Value(), R: m_Value()))))) {
3797 return CmpInst::isEquality(pred: Pred);
3798 }
3799 return false;
3800 };
3801 if (PN->getType()->isIntegerTy(Bitwidth: 1) &&
3802 (IsBinOpOrAndEq(PN->getIncomingValue(i: 0)) ||
3803 IsBinOpOrAndEq(PN->getIncomingValue(i: 1)) || IsBinOpOrAndEq(IfCond)))
3804 return Changed;
3805
3806 // If all PHI nodes are promotable, check to make sure that all instructions
3807 // in the predecessor blocks can be promoted as well. If not, we won't be able
3808 // to get rid of the control flow, so it's not worth promoting to select
3809 // instructions.
3810 for (BasicBlock *IfBlock : IfBlocks)
3811 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3812 if (!AggressiveInsts.count(Ptr: &*I) && !I->isDebugOrPseudoInst()) {
3813 // This is not an aggressive instruction that we can promote.
3814 // Because of this, we won't be able to get rid of the control flow, so
3815 // the xform is not worth it.
3816 return Changed;
3817 }
3818
3819 // If either of the blocks has it's address taken, we can't do this fold.
3820 if (any_of(Range&: IfBlocks,
3821 P: [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3822 return Changed;
3823
3824 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3825 if (IsUnpredictable) dbgs() << " (unpredictable)";
3826 dbgs() << " T: " << IfTrue->getName()
3827 << " F: " << IfFalse->getName() << "\n");
3828
3829 // If we can still promote the PHI nodes after this gauntlet of tests,
3830 // do all of the PHI's now.
3831
3832 // Move all 'aggressive' instructions, which are defined in the
3833 // conditional parts of the if's up to the dominating block.
3834 for (BasicBlock *IfBlock : IfBlocks)
3835 hoistAllInstructionsInto(DomBlock, InsertPt: DomBI, BB: IfBlock);
3836
3837 IRBuilder<NoFolder> Builder(DomBI);
3838 // Propagate fast-math-flags from phi nodes to replacement selects.
3839 while (PHINode *PN = dyn_cast<PHINode>(Val: BB->begin())) {
3840 // Change the PHI node into a select instruction.
3841 Value *TrueVal = PN->getIncomingValueForBlock(BB: IfTrue);
3842 Value *FalseVal = PN->getIncomingValueForBlock(BB: IfFalse);
3843
3844 Value *Sel = Builder.CreateSelectFMF(C: IfCond, True: TrueVal, False: FalseVal,
3845 FMFSource: isa<FPMathOperator>(Val: PN) ? PN : nullptr,
3846 Name: "", MDFrom: DomBI);
3847 PN->replaceAllUsesWith(V: Sel);
3848 Sel->takeName(V: PN);
3849 PN->eraseFromParent();
3850 }
3851
3852 // At this point, all IfBlocks are empty, so our if statement
3853 // has been flattened. Change DomBlock to jump directly to our new block to
3854 // avoid other simplifycfg's kicking in on the diamond.
3855 Builder.CreateBr(Dest: BB);
3856
3857 SmallVector<DominatorTree::UpdateType, 3> Updates;
3858 if (DTU) {
3859 Updates.push_back(Elt: {DominatorTree::Insert, DomBlock, BB});
3860 for (auto *Successor : successors(BB: DomBlock))
3861 Updates.push_back(Elt: {DominatorTree::Delete, DomBlock, Successor});
3862 }
3863
3864 DomBI->eraseFromParent();
3865 if (DTU)
3866 DTU->applyUpdates(Updates);
3867
3868 return true;
3869}
3870
3871static Value *createLogicalOp(IRBuilderBase &Builder,
3872 Instruction::BinaryOps Opc, Value *LHS,
3873 Value *RHS, const Twine &Name = "") {
3874 // Try to relax logical op to binary op.
3875 if (impliesPoison(ValAssumedPoison: RHS, V: LHS))
3876 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3877 if (Opc == Instruction::And)
3878 return Builder.CreateLogicalAnd(Cond1: LHS, Cond2: RHS, Name);
3879 if (Opc == Instruction::Or)
3880 return Builder.CreateLogicalOr(Cond1: LHS, Cond2: RHS, Name);
3881 llvm_unreachable("Invalid logical opcode");
3882}
3883
3884/// Return true if either PBI or BI has branch weight available, and store
3885/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3886/// not have branch weight, use 1:1 as its weight.
3887static bool extractPredSuccWeights(CondBrInst *PBI, CondBrInst *BI,
3888 uint64_t &PredTrueWeight,
3889 uint64_t &PredFalseWeight,
3890 uint64_t &SuccTrueWeight,
3891 uint64_t &SuccFalseWeight) {
3892 bool PredHasWeights =
3893 extractBranchWeights(I: *PBI, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight);
3894 bool SuccHasWeights =
3895 extractBranchWeights(I: *BI, TrueVal&: SuccTrueWeight, FalseVal&: SuccFalseWeight);
3896 if (PredHasWeights || SuccHasWeights) {
3897 if (!PredHasWeights)
3898 PredTrueWeight = PredFalseWeight = 1;
3899 if (!SuccHasWeights)
3900 SuccTrueWeight = SuccFalseWeight = 1;
3901 return true;
3902 } else {
3903 return false;
3904 }
3905}
3906
3907/// Determine if the two branches share a common destination and deduce a glue
3908/// that joins the branches' conditions to arrive at the common destination if
3909/// that would be profitable.
3910static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3911shouldFoldCondBranchesToCommonDestination(CondBrInst *BI, CondBrInst *PBI,
3912 const TargetTransformInfo *TTI) {
3913 assert(BI && PBI && "Both blocks must end with a conditional branches.");
3914 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3915 "PredBB must be a predecessor of BB.");
3916
3917 // We have the potential to fold the conditions together, but if the
3918 // predecessor branch is predictable, we may not want to merge them.
3919 uint64_t PTWeight, PFWeight;
3920 BranchProbability PBITrueProb, Likely;
3921 if (TTI && !PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
3922 extractBranchWeights(I: *PBI, TrueVal&: PTWeight, FalseVal&: PFWeight) &&
3923 (PTWeight + PFWeight) != 0) {
3924 PBITrueProb =
3925 BranchProbability::getBranchProbability(Numerator: PTWeight, Denominator: PTWeight + PFWeight);
3926 Likely = TTI->getPredictableBranchThreshold();
3927 }
3928
3929 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
3930 // Speculate the 2nd condition unless the 1st is probably true.
3931 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3932 return {{BI->getSuccessor(i: 0), Instruction::Or, false}};
3933 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
3934 // Speculate the 2nd condition unless the 1st is probably false.
3935 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3936 return {{BI->getSuccessor(i: 1), Instruction::And, false}};
3937 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
3938 // Speculate the 2nd condition unless the 1st is probably true.
3939 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3940 return {{BI->getSuccessor(i: 1), Instruction::And, true}};
3941 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
3942 // Speculate the 2nd condition unless the 1st is probably false.
3943 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3944 return {{BI->getSuccessor(i: 0), Instruction::Or, true}};
3945 }
3946 return std::nullopt;
3947}
3948
3949static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI,
3950 DomTreeUpdater *DTU,
3951 MemorySSAUpdater *MSSAU,
3952 const TargetTransformInfo *TTI) {
3953 BasicBlock *BB = BI->getParent();
3954 BasicBlock *PredBlock = PBI->getParent();
3955
3956 // Determine if the two branches share a common destination.
3957 BasicBlock *CommonSucc;
3958 Instruction::BinaryOps Opc;
3959 bool InvertPredCond;
3960 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) =
3961 *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
3962
3963 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3964
3965 IRBuilder<> Builder(PBI);
3966 // The builder is used to create instructions to eliminate the branch in BB.
3967 // If BB's terminator has !annotation metadata, add it to the new
3968 // instructions.
3969 Builder.CollectMetadataToCopy(Src: BB->getTerminator(),
3970 MetadataKinds: {LLVMContext::MD_annotation});
3971
3972 // If we need to invert the condition in the pred block to match, do so now.
3973 if (InvertPredCond) {
3974 InvertBranch(PBI, Builder);
3975 }
3976
3977 BasicBlock *UniqueSucc =
3978 PBI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 0) : BI->getSuccessor(i: 1);
3979
3980 // Before cloning instructions, notify the successor basic block that it
3981 // is about to have a new predecessor. This will update PHI nodes,
3982 // which will allow us to update live-out uses of bonus instructions.
3983 addPredecessorToBlock(Succ: UniqueSucc, NewPred: PredBlock, ExistPred: BB, MSSAU);
3984
3985 // Try to update branch weights.
3986 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3987 SmallVector<uint64_t, 2> MDWeights;
3988 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3989 SuccTrueWeight, SuccFalseWeight)) {
3990
3991 if (PBI->getSuccessor(i: 0) == BB) {
3992 // PBI: br i1 %x, BB, FalseDest
3993 // BI: br i1 %y, UniqueSucc, FalseDest
3994 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3995 MDWeights.push_back(Elt: PredTrueWeight * SuccTrueWeight);
3996 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3997 // TrueWeight for PBI * FalseWeight for BI.
3998 // We assume that total weights of a CondBrInst can fit into 32 bits.
3999 // Therefore, we will not have overflow using 64-bit arithmetic.
4000 MDWeights.push_back(Elt: PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4001 PredTrueWeight * SuccFalseWeight);
4002 } else {
4003 // PBI: br i1 %x, TrueDest, BB
4004 // BI: br i1 %y, TrueDest, UniqueSucc
4005 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4006 // FalseWeight for PBI * TrueWeight for BI.
4007 MDWeights.push_back(Elt: PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4008 PredFalseWeight * SuccTrueWeight);
4009 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4010 MDWeights.push_back(Elt: PredFalseWeight * SuccFalseWeight);
4011 }
4012
4013 setFittedBranchWeights(I&: *PBI, Weights: MDWeights, /*IsExpected=*/false,
4014 /*ElideAllZero=*/true);
4015
4016 // TODO: If BB is reachable from all paths through PredBlock, then we
4017 // could replace PBI's branch probabilities with BI's.
4018 } else
4019 PBI->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
4020
4021 // Now, update the CFG.
4022 PBI->setSuccessor(idx: PBI->getSuccessor(i: 0) != BB, NewSucc: UniqueSucc);
4023
4024 if (DTU)
4025 DTU->applyUpdates(Updates: {{DominatorTree::Insert, PredBlock, UniqueSucc},
4026 {DominatorTree::Delete, PredBlock, BB}});
4027
4028 // If BI was a loop latch, it may have had associated loop metadata.
4029 // We need to copy it to the new latch, that is, PBI.
4030 if (MDNode *LoopMD = BI->getMetadata(KindID: LLVMContext::MD_loop))
4031 PBI->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
4032
4033 ValueToValueMapTy VMap; // maps original values to cloned values
4034 cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
4035
4036 Module *M = BB->getModule();
4037
4038 PredBlock->getTerminator()->cloneDebugInfoFrom(From: BB->getTerminator());
4039 for (DbgVariableRecord &DVR :
4040 filterDbgVars(R: PredBlock->getTerminator()->getDbgRecordRange())) {
4041 RemapDbgRecord(M, DR: &DVR, VM&: VMap,
4042 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
4043 }
4044
4045 // Now that the Cond was cloned into the predecessor basic block,
4046 // or/and the two conditions together.
4047 Value *BICond = VMap[BI->getCondition()];
4048 PBI->setCondition(
4049 createLogicalOp(Builder, Opc, LHS: PBI->getCondition(), RHS: BICond, Name: "or.cond"));
4050 if (!ProfcheckDisableMetadataFixes)
4051 if (auto *SI = dyn_cast<SelectInst>(Val: PBI->getCondition()))
4052 if (!MDWeights.empty()) {
4053 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4054 setFittedBranchWeights(I&: *SI, Weights: {MDWeights[0], MDWeights[1]},
4055 /*IsExpected=*/false, /*ElideAllZero=*/true);
4056 }
4057
4058 ++NumFoldBranchToCommonDest;
4059 return true;
4060}
4061
4062/// Return if an instruction's type or any of its operands' types are a vector
4063/// type.
4064static bool isVectorOp(Instruction &I) {
4065 return I.getType()->isVectorTy() || any_of(Range: I.operands(), P: [](Use &U) {
4066 return U->getType()->isVectorTy();
4067 });
4068}
4069
4070/// If this basic block is simple enough, and if a predecessor branches to us
4071/// and one of our successors, fold the block into the predecessor and use
4072/// logical operations to pick the right destination.
4073bool llvm::foldBranchToCommonDest(CondBrInst *BI, DomTreeUpdater *DTU,
4074 MemorySSAUpdater *MSSAU,
4075 const TargetTransformInfo *TTI,
4076 unsigned BonusInstThreshold) {
4077 BasicBlock *BB = BI->getParent();
4078 TargetTransformInfo::TargetCostKind CostKind =
4079 BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
4080 : TargetTransformInfo::TCK_SizeAndLatency;
4081
4082 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
4083
4084 if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Val: Cond) ||
4085 Cond->getParent() != BB || !Cond->hasOneUse())
4086 return false;
4087
4088 // Finally, don't infinitely unroll conditional loops.
4089 if (is_contained(Range: successors(BB), Element: BB))
4090 return false;
4091
4092 // With which predecessors will we want to deal with?
4093 SmallVector<BasicBlock *, 8> Preds;
4094 for (BasicBlock *PredBlock : predecessors(BB)) {
4095 CondBrInst *PBI = dyn_cast<CondBrInst>(Val: PredBlock->getTerminator());
4096
4097 // Check that we have two conditional branches. If there is a PHI node in
4098 // the common successor, verify that the same value flows in from both
4099 // blocks.
4100 if (!PBI || !safeToMergeTerminators(SI1: BI, SI2: PBI))
4101 continue;
4102
4103 // Determine if the two branches share a common destination.
4104 BasicBlock *CommonSucc;
4105 Instruction::BinaryOps Opc;
4106 bool InvertPredCond;
4107 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4108 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) = *Recipe;
4109 else
4110 continue;
4111
4112 // Check the cost of inserting the necessary logic before performing the
4113 // transformation.
4114 if (TTI) {
4115 Type *Ty = BI->getCondition()->getType();
4116 InstructionCost Cost = TTI->getArithmeticInstrCost(Opcode: Opc, Ty, CostKind);
4117 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4118 !isa<CmpInst>(Val: PBI->getCondition())))
4119 Cost += TTI->getArithmeticInstrCost(Opcode: Instruction::Xor, Ty, CostKind);
4120
4121 if (Cost > BranchFoldThreshold)
4122 continue;
4123 }
4124
4125 // Ok, we do want to deal with this predecessor. Record it.
4126 Preds.emplace_back(Args&: PredBlock);
4127 }
4128
4129 // If there aren't any predecessors into which we can fold,
4130 // don't bother checking the cost.
4131 if (Preds.empty())
4132 return false;
4133
4134 // Only allow this transformation if computing the condition doesn't involve
4135 // too many instructions and these involved instructions can be executed
4136 // unconditionally. We denote all involved instructions except the condition
4137 // as "bonus instructions", and only allow this transformation when the
4138 // number of the bonus instructions we'll need to create when cloning into
4139 // each predecessor does not exceed a certain threshold.
4140 unsigned NumBonusInsts = 0;
4141 bool SawVectorOp = false;
4142 const unsigned PredCount = Preds.size();
4143 for (Instruction &I : *BB) {
4144 // Don't check the branch condition comparison itself.
4145 if (&I == Cond)
4146 continue;
4147 // Ignore the terminator.
4148 if (isa<UncondBrInst, CondBrInst>(Val: I))
4149 continue;
4150 // I must be safe to execute unconditionally.
4151 if (!isSafeToSpeculativelyExecute(I: &I))
4152 return false;
4153 SawVectorOp |= isVectorOp(I);
4154
4155 // Account for the cost of duplicating this instruction into each
4156 // predecessor. Ignore free instructions.
4157 if (!TTI || TTI->getInstructionCost(U: &I, CostKind) !=
4158 TargetTransformInfo::TCC_Free) {
4159 NumBonusInsts += PredCount;
4160
4161 // Early exits once we reach the limit.
4162 if (NumBonusInsts >
4163 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4164 return false;
4165 }
4166
4167 auto IsBCSSAUse = [BB, &I](Use &U) {
4168 auto *UI = cast<Instruction>(Val: U.getUser());
4169 if (auto *PN = dyn_cast<PHINode>(Val: UI))
4170 return PN->getIncomingBlock(U) == BB;
4171 return UI->getParent() == BB && I.comesBefore(Other: UI);
4172 };
4173
4174 // Does this instruction require rewriting of uses?
4175 if (!all_of(Range: I.uses(), P: IsBCSSAUse))
4176 return false;
4177 }
4178 if (NumBonusInsts >
4179 BonusInstThreshold *
4180 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4181 return false;
4182
4183 // Ok, we have the budget. Perform the transformation.
4184 for (BasicBlock *PredBlock : Preds) {
4185 auto *PBI = cast<CondBrInst>(Val: PredBlock->getTerminator());
4186 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4187 }
4188 return false;
4189}
4190
4191// If there is only one store in BB1 and BB2, return it, otherwise return
4192// nullptr.
4193static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
4194 StoreInst *S = nullptr;
4195 for (auto *BB : {BB1, BB2}) {
4196 if (!BB)
4197 continue;
4198 for (auto &I : *BB)
4199 if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
4200 if (S)
4201 // Multiple stores seen.
4202 return nullptr;
4203 else
4204 S = SI;
4205 }
4206 }
4207 return S;
4208}
4209
4210static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
4211 Value *AlternativeV = nullptr) {
4212 // PHI is going to be a PHI node that allows the value V that is defined in
4213 // BB to be referenced in BB's only successor.
4214 //
4215 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4216 // doesn't matter to us what the other operand is (it'll never get used). We
4217 // could just create a new PHI with an undef incoming value, but that could
4218 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4219 // other PHI. So here we directly look for some PHI in BB's successor with V
4220 // as an incoming operand. If we find one, we use it, else we create a new
4221 // one.
4222 //
4223 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4224 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4225 // where OtherBB is the single other predecessor of BB's only successor.
4226 PHINode *PHI = nullptr;
4227 BasicBlock *Succ = BB->getSingleSuccessor();
4228
4229 for (auto I = Succ->begin(); isa<PHINode>(Val: I); ++I)
4230 if (cast<PHINode>(Val&: I)->getIncomingValueForBlock(BB) == V) {
4231 PHI = cast<PHINode>(Val&: I);
4232 if (!AlternativeV)
4233 break;
4234
4235 assert(Succ->hasNPredecessors(2));
4236 auto PredI = pred_begin(BB: Succ);
4237 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4238 if (PHI->getIncomingValueForBlock(BB: OtherPredBB) == AlternativeV)
4239 break;
4240 PHI = nullptr;
4241 }
4242 if (PHI)
4243 return PHI;
4244
4245 // If V is not an instruction defined in BB, just return it.
4246 if (!AlternativeV &&
4247 (!isa<Instruction>(Val: V) || cast<Instruction>(Val: V)->getParent() != BB))
4248 return V;
4249
4250 PHI = PHINode::Create(Ty: V->getType(), NumReservedValues: 2, NameStr: "simplifycfg.merge");
4251 PHI->insertBefore(InsertPos: Succ->begin());
4252 PHI->addIncoming(V, BB);
4253 for (BasicBlock *PredBB : predecessors(BB: Succ))
4254 if (PredBB != BB)
4255 PHI->addIncoming(
4256 V: AlternativeV ? AlternativeV : PoisonValue::get(T: V->getType()), BB: PredBB);
4257 return PHI;
4258}
4259
4260static bool mergeConditionalStoreToAddress(
4261 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4262 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4263 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4264 // For every pointer, there must be exactly two stores, one coming from
4265 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4266 // store (to any address) in PTB,PFB or QTB,QFB.
4267 // FIXME: We could relax this restriction with a bit more work and performance
4268 // testing.
4269 StoreInst *PStore = findUniqueStoreInBlocks(BB1: PTB, BB2: PFB);
4270 StoreInst *QStore = findUniqueStoreInBlocks(BB1: QTB, BB2: QFB);
4271 if (!PStore || !QStore)
4272 return false;
4273
4274 // Now check the stores are compatible.
4275 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4276 PStore->getValueOperand()->getType() !=
4277 QStore->getValueOperand()->getType())
4278 return false;
4279
4280 // Check that sinking the store won't cause program behavior changes. Sinking
4281 // the store out of the Q blocks won't change any behavior as we're sinking
4282 // from a block to its unconditional successor. But we're moving a store from
4283 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4284 // So we need to check that there are no aliasing loads or stores in
4285 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4286 // operations between PStore and the end of its parent block.
4287 //
4288 // The ideal way to do this is to query AliasAnalysis, but we don't
4289 // preserve AA currently so that is dangerous. Be super safe and just
4290 // check there are no other memory operations at all.
4291 for (auto &I : *QFB->getSinglePredecessor())
4292 if (I.mayReadOrWriteMemory())
4293 return false;
4294 for (auto &I : *QFB)
4295 if (&I != QStore && I.mayReadOrWriteMemory())
4296 return false;
4297 if (QTB)
4298 for (auto &I : *QTB)
4299 if (&I != QStore && I.mayReadOrWriteMemory())
4300 return false;
4301 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4302 I != E; ++I)
4303 if (&*I != PStore && I->mayReadOrWriteMemory())
4304 return false;
4305
4306 // If we're not in aggressive mode, we only optimize if we have some
4307 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4308 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4309 if (!BB)
4310 return true;
4311 // Heuristic: if the block can be if-converted/phi-folded and the
4312 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4313 // thread this store.
4314 InstructionCost Cost = 0;
4315 InstructionCost Budget =
4316 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4317 for (auto &I : *BB) {
4318 // Consider terminator instruction to be free.
4319 if (I.isTerminator())
4320 continue;
4321 // If this is one the stores that we want to speculate out of this BB,
4322 // then don't count it's cost, consider it to be free.
4323 if (auto *S = dyn_cast<StoreInst>(Val: &I))
4324 if (llvm::find(Range&: FreeStores, Val: S))
4325 continue;
4326 // Else, we have a white-list of instructions that we are ak speculating.
4327 if (!isa<BinaryOperator>(Val: I) && !isa<GetElementPtrInst>(Val: I))
4328 return false; // Not in white-list - not worthwhile folding.
4329 // And finally, if this is a non-free instruction that we are okay
4330 // speculating, ensure that we consider the speculation budget.
4331 Cost +=
4332 TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
4333 if (Cost > Budget)
4334 return false; // Eagerly refuse to fold as soon as we're out of budget.
4335 }
4336 assert(Cost <= Budget &&
4337 "When we run out of budget we will eagerly return from within the "
4338 "per-instruction loop.");
4339 return true;
4340 };
4341
4342 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4343 if (!MergeCondStoresAggressively &&
4344 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4345 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4346 return false;
4347
4348 // If PostBB has more than two predecessors, we need to split it so we can
4349 // sink the store.
4350 if (std::next(x: pred_begin(BB: PostBB), n: 2) != pred_end(BB: PostBB)) {
4351 // We know that QFB's only successor is PostBB. And QFB has a single
4352 // predecessor. If QTB exists, then its only successor is also PostBB.
4353 // If QTB does not exist, then QFB's only predecessor has a conditional
4354 // branch to QFB and PostBB.
4355 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4356 BasicBlock *NewBB =
4357 SplitBlockPredecessors(BB: PostBB, Preds: {QFB, TruePred}, Suffix: "condstore.split", DTU);
4358 if (!NewBB)
4359 return false;
4360 PostBB = NewBB;
4361 }
4362
4363 // OK, we're going to sink the stores to PostBB. The store has to be
4364 // conditional though, so first create the predicate.
4365 CondBrInst *PBranch =
4366 cast<CondBrInst>(Val: PFB->getSinglePredecessor()->getTerminator());
4367 CondBrInst *QBranch =
4368 cast<CondBrInst>(Val: QFB->getSinglePredecessor()->getTerminator());
4369 Value *PCond = PBranch->getCondition();
4370 Value *QCond = QBranch->getCondition();
4371
4372 Value *PPHI = ensureValueAvailableInSuccessor(V: PStore->getValueOperand(),
4373 BB: PStore->getParent());
4374 Value *QPHI = ensureValueAvailableInSuccessor(V: QStore->getValueOperand(),
4375 BB: QStore->getParent(), AlternativeV: PPHI);
4376
4377 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4378 IRBuilder<> QB(PostBB, PostBBFirst);
4379 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4380
4381 InvertPCond ^= (PStore->getParent() != PTB);
4382 InvertQCond ^= (QStore->getParent() != QTB);
4383 Value *PPred = InvertPCond ? QB.CreateNot(V: PCond) : PCond;
4384 Value *QPred = InvertQCond ? QB.CreateNot(V: QCond) : QCond;
4385
4386 Value *CombinedPred = QB.CreateOr(LHS: PPred, RHS: QPred);
4387
4388 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4389 auto *T = SplitBlockAndInsertIfThen(Cond: CombinedPred, SplitBefore: InsertPt,
4390 /*Unreachable=*/false,
4391 /*BranchWeights=*/nullptr, DTU);
4392 if (hasBranchWeightMD(I: *PBranch) && hasBranchWeightMD(I: *QBranch) &&
4393 !ProfcheckDisableMetadataFixes) {
4394 SmallVector<uint32_t, 2> PWeights, QWeights;
4395 extractBranchWeights(I: *PBranch, Weights&: PWeights);
4396 extractBranchWeights(I: *QBranch, Weights&: QWeights);
4397 if (InvertPCond)
4398 std::swap(a&: PWeights[0], b&: PWeights[1]);
4399 if (InvertQCond)
4400 std::swap(a&: QWeights[0], b&: QWeights[1]);
4401 auto CombinedWeights = getDisjunctionWeights(B1: PWeights, B2: QWeights);
4402 setFittedBranchWeights(I&: *PostBB->getTerminator(),
4403 Weights: {CombinedWeights[0], CombinedWeights[1]},
4404 /*IsExpected=*/false, /*ElideAllZero=*/true);
4405 }
4406
4407 QB.SetInsertPoint(T);
4408 StoreInst *SI = cast<StoreInst>(Val: QB.CreateStore(Val: QPHI, Ptr: Address));
4409 SI->setAAMetadata(PStore->getAAMetadata().merge(Other: QStore->getAAMetadata()));
4410 // Choose the minimum alignment. If we could prove both stores execute, we
4411 // could use biggest one. In this case, though, we only know that one of the
4412 // stores executes. And we don't know it's safe to take the alignment from a
4413 // store that doesn't execute.
4414 SI->setAlignment(std::min(a: PStore->getAlign(), b: QStore->getAlign()));
4415
4416 QStore->eraseFromParent();
4417 PStore->eraseFromParent();
4418
4419 return true;
4420}
4421
4422static bool mergeConditionalStores(CondBrInst *PBI, CondBrInst *QBI,
4423 DomTreeUpdater *DTU, const DataLayout &DL,
4424 const TargetTransformInfo &TTI) {
4425 // The intention here is to find diamonds or triangles (see below) where each
4426 // conditional block contains a store to the same address. Both of these
4427 // stores are conditional, so they can't be unconditionally sunk. But it may
4428 // be profitable to speculatively sink the stores into one merged store at the
4429 // end, and predicate the merged store on the union of the two conditions of
4430 // PBI and QBI.
4431 //
4432 // This can reduce the number of stores executed if both of the conditions are
4433 // true, and can allow the blocks to become small enough to be if-converted.
4434 // This optimization will also chain, so that ladders of test-and-set
4435 // sequences can be if-converted away.
4436 //
4437 // We only deal with simple diamonds or triangles:
4438 //
4439 // PBI or PBI or a combination of the two
4440 // / \ | \
4441 // PTB PFB | PFB
4442 // \ / | /
4443 // QBI QBI
4444 // / \ | \
4445 // QTB QFB | QFB
4446 // \ / | /
4447 // PostBB PostBB
4448 //
4449 // We model triangles as a type of diamond with a nullptr "true" block.
4450 // Triangles are canonicalized so that the fallthrough edge is represented by
4451 // a true condition, as in the diagram above.
4452 BasicBlock *PTB = PBI->getSuccessor(i: 0);
4453 BasicBlock *PFB = PBI->getSuccessor(i: 1);
4454 BasicBlock *QTB = QBI->getSuccessor(i: 0);
4455 BasicBlock *QFB = QBI->getSuccessor(i: 1);
4456 BasicBlock *PostBB = QFB->getSingleSuccessor();
4457
4458 // Make sure we have a good guess for PostBB. If QTB's only successor is
4459 // QFB, then QFB is a better PostBB.
4460 if (QTB->getSingleSuccessor() == QFB)
4461 PostBB = QFB;
4462
4463 // If we couldn't find a good PostBB, stop.
4464 if (!PostBB)
4465 return false;
4466
4467 bool InvertPCond = false, InvertQCond = false;
4468 // Canonicalize fallthroughs to the true branches.
4469 if (PFB == QBI->getParent()) {
4470 std::swap(a&: PFB, b&: PTB);
4471 InvertPCond = true;
4472 }
4473 if (QFB == PostBB) {
4474 std::swap(a&: QFB, b&: QTB);
4475 InvertQCond = true;
4476 }
4477
4478 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4479 // and QFB may not. Model fallthroughs as a nullptr block.
4480 if (PTB == QBI->getParent())
4481 PTB = nullptr;
4482 if (QTB == PostBB)
4483 QTB = nullptr;
4484
4485 // Legality bailouts. We must have at least the non-fallthrough blocks and
4486 // the post-dominating block, and the non-fallthroughs must only have one
4487 // predecessor.
4488 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4489 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4490 };
4491 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4492 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4493 return false;
4494 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4495 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4496 return false;
4497 if (!QBI->getParent()->hasNUses(N: 2))
4498 return false;
4499
4500 // OK, this is a sequence of two diamonds or triangles.
4501 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4502 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4503 for (auto *BB : {PTB, PFB}) {
4504 if (!BB)
4505 continue;
4506 for (auto &I : *BB)
4507 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4508 PStoreAddresses.insert(Ptr: SI->getPointerOperand());
4509 }
4510 for (auto *BB : {QTB, QFB}) {
4511 if (!BB)
4512 continue;
4513 for (auto &I : *BB)
4514 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4515 QStoreAddresses.insert(Ptr: SI->getPointerOperand());
4516 }
4517
4518 set_intersect(S1&: PStoreAddresses, S2: QStoreAddresses);
4519 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4520 // clear what it contains.
4521 auto &CommonAddresses = PStoreAddresses;
4522
4523 bool Changed = false;
4524 for (auto *Address : CommonAddresses)
4525 Changed |=
4526 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4527 InvertPCond, InvertQCond, DTU, DL, TTI);
4528 return Changed;
4529}
4530
4531/// If the previous block ended with a widenable branch, determine if reusing
4532/// the target block is profitable and legal. This will have the effect of
4533/// "widening" PBI, but doesn't require us to reason about hosting safety.
4534static bool tryWidenCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI,
4535 DomTreeUpdater *DTU) {
4536 // TODO: This can be generalized in two important ways:
4537 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4538 // values from the PBI edge.
4539 // 2) We can sink side effecting instructions into BI's fallthrough
4540 // successor provided they doesn't contribute to computation of
4541 // BI's condition.
4542 BasicBlock *IfTrueBB = PBI->getSuccessor(i: 0);
4543 BasicBlock *IfFalseBB = PBI->getSuccessor(i: 1);
4544 if (!isWidenableBranch(U: PBI) || IfTrueBB != BI->getParent() ||
4545 !BI->getParent()->getSinglePredecessor())
4546 return false;
4547 if (!IfFalseBB->phis().empty())
4548 return false; // TODO
4549 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4550 // may undo the transform done here.
4551 // TODO: There might be a more fine-grained solution to this.
4552 if (!llvm::succ_empty(BB: IfFalseBB))
4553 return false;
4554 // Use lambda to lazily compute expensive condition after cheap ones.
4555 auto NoSideEffects = [](BasicBlock &BB) {
4556 return llvm::none_of(Range&: BB, P: [](const Instruction &I) {
4557 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4558 });
4559 };
4560 if (BI->getSuccessor(i: 1) != IfFalseBB && // no inf looping
4561 BI->getSuccessor(i: 1)->getTerminatingDeoptimizeCall() && // profitability
4562 NoSideEffects(*BI->getParent())) {
4563 auto *OldSuccessor = BI->getSuccessor(i: 1);
4564 OldSuccessor->removePredecessor(Pred: BI->getParent());
4565 BI->setSuccessor(idx: 1, NewSucc: IfFalseBB);
4566 if (DTU)
4567 DTU->applyUpdates(
4568 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4569 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4570 return true;
4571 }
4572 if (BI->getSuccessor(i: 0) != IfFalseBB && // no inf looping
4573 BI->getSuccessor(i: 0)->getTerminatingDeoptimizeCall() && // profitability
4574 NoSideEffects(*BI->getParent())) {
4575 auto *OldSuccessor = BI->getSuccessor(i: 0);
4576 OldSuccessor->removePredecessor(Pred: BI->getParent());
4577 BI->setSuccessor(idx: 0, NewSucc: IfFalseBB);
4578 if (DTU)
4579 DTU->applyUpdates(
4580 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4581 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4582 return true;
4583 }
4584 return false;
4585}
4586
4587/// If we have a conditional branch as a predecessor of another block,
4588/// this function tries to simplify it. We know
4589/// that PBI and BI are both conditional branches, and BI is in one of the
4590/// successor blocks of PBI - PBI branches to BI.
4591static bool SimplifyCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI,
4592 DomTreeUpdater *DTU,
4593 const DataLayout &DL,
4594 const TargetTransformInfo &TTI) {
4595 BasicBlock *BB = BI->getParent();
4596
4597 // If this block ends with a branch instruction, and if there is a
4598 // predecessor that ends on a branch of the same condition, make
4599 // this conditional branch redundant.
4600 if (PBI->getCondition() == BI->getCondition() &&
4601 PBI->getSuccessor(i: 0) != PBI->getSuccessor(i: 1)) {
4602 // Okay, the outcome of this conditional branch is statically
4603 // knowable. If this block had a single pred, handle specially, otherwise
4604 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4605 if (BB->getSinglePredecessor()) {
4606 // Turn this into a branch on constant.
4607 bool CondIsTrue = PBI->getSuccessor(i: 0) == BB;
4608 BI->setCondition(
4609 ConstantInt::get(Ty: Type::getInt1Ty(C&: BB->getContext()), V: CondIsTrue));
4610 return true; // Nuke the branch on constant.
4611 }
4612 }
4613
4614 // If the previous block ended with a widenable branch, determine if reusing
4615 // the target block is profitable and legal. This will have the effect of
4616 // "widening" PBI, but doesn't require us to reason about hosting safety.
4617 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4618 return true;
4619
4620 // If both branches are conditional and both contain stores to the same
4621 // address, remove the stores from the conditionals and create a conditional
4622 // merged store at the end.
4623 if (MergeCondStores && mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
4624 return true;
4625
4626 // If this is a conditional branch in an empty block, and if any
4627 // predecessors are a conditional branch to one of our destinations,
4628 // fold the conditions into logical ops and one cond br.
4629
4630 // Ignore dbg intrinsics.
4631 if (&*BB->begin() != BI)
4632 return false;
4633
4634 int PBIOp, BIOp;
4635 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
4636 PBIOp = 0;
4637 BIOp = 0;
4638 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
4639 PBIOp = 0;
4640 BIOp = 1;
4641 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
4642 PBIOp = 1;
4643 BIOp = 0;
4644 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
4645 PBIOp = 1;
4646 BIOp = 1;
4647 } else {
4648 return false;
4649 }
4650
4651 // Check to make sure that the other destination of this branch
4652 // isn't BB itself. If so, this is an infinite loop that will
4653 // keep getting unwound.
4654 if (PBI->getSuccessor(i: PBIOp) == BB)
4655 return false;
4656
4657 // If predecessor's branch probability to BB is too low don't merge branches.
4658 SmallVector<uint32_t, 2> PredWeights;
4659 if (!PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
4660 extractBranchWeights(I: *PBI, Weights&: PredWeights) &&
4661 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4662
4663 BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4664 Numerator: PredWeights[PBIOp],
4665 Denominator: static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4666
4667 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4668 if (CommonDestProb >= Likely)
4669 return false;
4670 }
4671
4672 // Do not perform this transformation if it would require
4673 // insertion of a large number of select instructions. For targets
4674 // without predication/cmovs, this is a big pessimization.
4675
4676 BasicBlock *CommonDest = PBI->getSuccessor(i: PBIOp);
4677 BasicBlock *RemovedDest = PBI->getSuccessor(i: PBIOp ^ 1);
4678 unsigned NumPhis = 0;
4679 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(Val: II);
4680 ++II, ++NumPhis) {
4681 if (NumPhis > 2) // Disable this xform.
4682 return false;
4683 }
4684
4685 // Finally, if everything is ok, fold the branches to logical ops.
4686 BasicBlock *OtherDest = BI->getSuccessor(i: BIOp ^ 1);
4687
4688 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4689 << "AND: " << *BI->getParent());
4690
4691 SmallVector<DominatorTree::UpdateType, 5> Updates;
4692
4693 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4694 // branch in it, where one edge (OtherDest) goes back to itself but the other
4695 // exits. We don't *know* that the program avoids the infinite loop
4696 // (even though that seems likely). If we do this xform naively, we'll end up
4697 // recursively unpeeling the loop. Since we know that (after the xform is
4698 // done) that the block *is* infinite if reached, we just make it an obviously
4699 // infinite loop with no cond branch.
4700 if (OtherDest == BB) {
4701 // Insert it at the end of the function, because it's either code,
4702 // or it won't matter if it's hot. :)
4703 BasicBlock *InfLoopBlock =
4704 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
4705 UncondBrInst::Create(Target: InfLoopBlock, InsertBefore: InfLoopBlock);
4706 if (DTU)
4707 Updates.push_back(Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4708 OtherDest = InfLoopBlock;
4709 }
4710
4711 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4712
4713 // BI may have other predecessors. Because of this, we leave
4714 // it alone, but modify PBI.
4715
4716 // Make sure we get to CommonDest on True&True directions.
4717 Value *PBICond = PBI->getCondition();
4718 IRBuilder<NoFolder> Builder(PBI);
4719 if (PBIOp)
4720 PBICond = Builder.CreateNot(V: PBICond, Name: PBICond->getName() + ".not");
4721
4722 Value *BICond = BI->getCondition();
4723 if (BIOp)
4724 BICond = Builder.CreateNot(V: BICond, Name: BICond->getName() + ".not");
4725
4726 // Merge the conditions.
4727 Value *Cond =
4728 createLogicalOp(Builder, Opc: Instruction::Or, LHS: PBICond, RHS: BICond, Name: "brmerge");
4729
4730 // Modify PBI to branch on the new condition to the new dests.
4731 PBI->setCondition(Cond);
4732 PBI->setSuccessor(idx: 0, NewSucc: CommonDest);
4733 PBI->setSuccessor(idx: 1, NewSucc: OtherDest);
4734
4735 if (DTU) {
4736 Updates.push_back(Elt: {DominatorTree::Insert, PBI->getParent(), OtherDest});
4737 Updates.push_back(Elt: {DominatorTree::Delete, PBI->getParent(), RemovedDest});
4738
4739 DTU->applyUpdates(Updates);
4740 }
4741
4742 // Update branch weight for PBI.
4743 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4744 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4745 bool HasWeights =
4746 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4747 SuccTrueWeight, SuccFalseWeight);
4748 if (HasWeights) {
4749 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4750 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4751 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4752 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4753 // The weight to CommonDest should be PredCommon * SuccTotal +
4754 // PredOther * SuccCommon.
4755 // The weight to OtherDest should be PredOther * SuccOther.
4756 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4757 PredOther * SuccCommon,
4758 PredOther * SuccOther};
4759
4760 setFittedBranchWeights(I&: *PBI, Weights: NewWeights, /*IsExpected=*/false,
4761 /*ElideAllZero=*/true);
4762 // Cond may be a select instruction with the first operand set to "true", or
4763 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4764 if (!ProfcheckDisableMetadataFixes)
4765 if (auto *SI = dyn_cast<SelectInst>(Val: Cond)) {
4766 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4767 // The select is predicated on PBICond
4768 assert(SI->getCondition() == PBICond);
4769 // The corresponding probabilities are what was referred to above as
4770 // PredCommon and PredOther.
4771 setFittedBranchWeights(I&: *SI, Weights: {PredCommon, PredOther},
4772 /*IsExpected=*/false, /*ElideAllZero=*/true);
4773 }
4774 }
4775
4776 // OtherDest may have phi nodes. If so, add an entry from PBI's
4777 // block that are identical to the entries for BI's block.
4778 addPredecessorToBlock(Succ: OtherDest, NewPred: PBI->getParent(), ExistPred: BB);
4779
4780 // We know that the CommonDest already had an edge from PBI to
4781 // it. If it has PHIs though, the PHIs may have different
4782 // entries for BB and PBI's BB. If so, insert a select to make
4783 // them agree.
4784 for (PHINode &PN : CommonDest->phis()) {
4785 Value *BIV = PN.getIncomingValueForBlock(BB);
4786 unsigned PBBIdx = PN.getBasicBlockIndex(BB: PBI->getParent());
4787 Value *PBIV = PN.getIncomingValue(i: PBBIdx);
4788 if (BIV != PBIV) {
4789 // Insert a select in PBI to pick the right value.
4790 SelectInst *NV = cast<SelectInst>(
4791 Val: Builder.CreateSelect(C: PBICond, True: PBIV, False: BIV, Name: PBIV->getName() + ".mux"));
4792 PN.setIncomingValue(i: PBBIdx, V: NV);
4793 // The select has the same condition as PBI, in the same BB. The
4794 // probabilities don't change.
4795 if (HasWeights) {
4796 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4797 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4798 setFittedBranchWeights(I&: *NV, Weights: {TrueWeight, FalseWeight},
4799 /*IsExpected=*/false, /*ElideAllZero=*/true);
4800 }
4801 }
4802 }
4803
4804 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4805 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4806
4807 // This basic block is probably dead. We know it has at least
4808 // one fewer predecessor.
4809 return true;
4810}
4811
4812// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4813// true or to FalseBB if Cond is false.
4814// Takes care of updating the successors and removing the old terminator.
4815// Also makes sure not to introduce new successors by assuming that edges to
4816// non-successor TrueBBs and FalseBBs aren't reachable.
4817bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4818 Value *Cond, BasicBlock *TrueBB,
4819 BasicBlock *FalseBB,
4820 uint32_t TrueWeight,
4821 uint32_t FalseWeight) {
4822 auto *BB = OldTerm->getParent();
4823 // Remove any superfluous successor edges from the CFG.
4824 // First, figure out which successors to preserve.
4825 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4826 // successor.
4827 BasicBlock *KeepEdge1 = TrueBB;
4828 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4829
4830 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4831
4832 // Then remove the rest.
4833 for (BasicBlock *Succ : successors(I: OldTerm)) {
4834 // Make sure only to keep exactly one copy of each edge.
4835 if (Succ == KeepEdge1)
4836 KeepEdge1 = nullptr;
4837 else if (Succ == KeepEdge2)
4838 KeepEdge2 = nullptr;
4839 else {
4840 Succ->removePredecessor(Pred: BB,
4841 /*KeepOneInputPHIs=*/true);
4842
4843 if (Succ != TrueBB && Succ != FalseBB)
4844 RemovedSuccessors.insert(X: Succ);
4845 }
4846 }
4847
4848 IRBuilder<> Builder(OldTerm);
4849 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4850
4851 // Insert an appropriate new terminator.
4852 if (!KeepEdge1 && !KeepEdge2) {
4853 if (TrueBB == FalseBB) {
4854 // We were only looking for one successor, and it was present.
4855 // Create an unconditional branch to it.
4856 Builder.CreateBr(Dest: TrueBB);
4857 } else {
4858 // We found both of the successors we were looking for.
4859 // Create a conditional branch sharing the condition of the select.
4860 CondBrInst *NewBI = Builder.CreateCondBr(Cond, True: TrueBB, False: FalseBB);
4861 setBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
4862 /*IsExpected=*/false, /*ElideAllZero=*/true);
4863 }
4864 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4865 // Neither of the selected blocks were successors, so this
4866 // terminator must be unreachable.
4867 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4868 } else {
4869 // One of the selected values was a successor, but the other wasn't.
4870 // Insert an unconditional branch to the one that was found;
4871 // the edge to the one that wasn't must be unreachable.
4872 if (!KeepEdge1) {
4873 // Only TrueBB was found.
4874 Builder.CreateBr(Dest: TrueBB);
4875 } else {
4876 // Only FalseBB was found.
4877 Builder.CreateBr(Dest: FalseBB);
4878 }
4879 }
4880
4881 eraseTerminatorAndDCECond(TI: OldTerm);
4882
4883 if (DTU) {
4884 SmallVector<DominatorTree::UpdateType, 2> Updates;
4885 Updates.reserve(N: RemovedSuccessors.size());
4886 for (auto *RemovedSuccessor : RemovedSuccessors)
4887 Updates.push_back(Elt: {DominatorTree::Delete, BB, RemovedSuccessor});
4888 DTU->applyUpdates(Updates);
4889 }
4890
4891 return true;
4892}
4893
4894// Replaces
4895// (switch (select cond, X, Y)) on constant X, Y
4896// with a branch - conditional if X and Y lead to distinct BBs,
4897// unconditional otherwise.
4898bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4899 SelectInst *Select) {
4900 // Check for constant integer values in the select.
4901 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Val: Select->getTrueValue());
4902 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Val: Select->getFalseValue());
4903 if (!TrueVal || !FalseVal)
4904 return false;
4905
4906 // Find the relevant condition and destinations.
4907 Value *Condition = Select->getCondition();
4908 BasicBlock *TrueBB = SI->findCaseValue(C: TrueVal)->getCaseSuccessor();
4909 BasicBlock *FalseBB = SI->findCaseValue(C: FalseVal)->getCaseSuccessor();
4910
4911 // Get weight for TrueBB and FalseBB.
4912 uint32_t TrueWeight = 0, FalseWeight = 0;
4913 SmallVector<uint64_t, 8> Weights;
4914 bool HasWeights = hasBranchWeightMD(I: *SI);
4915 if (HasWeights) {
4916 getBranchWeights(TI: SI, Weights);
4917 if (Weights.size() == 1 + SI->getNumCases()) {
4918 TrueWeight =
4919 (uint32_t)Weights[SI->findCaseValue(C: TrueVal)->getSuccessorIndex()];
4920 FalseWeight =
4921 (uint32_t)Weights[SI->findCaseValue(C: FalseVal)->getSuccessorIndex()];
4922 }
4923 }
4924
4925 // Perform the actual simplification.
4926 return simplifyTerminatorOnSelect(OldTerm: SI, Cond: Condition, TrueBB, FalseBB, TrueWeight,
4927 FalseWeight);
4928}
4929
4930// Replaces
4931// (indirectbr (select cond, blockaddress(@fn, BlockA),
4932// blockaddress(@fn, BlockB)))
4933// with
4934// (br cond, BlockA, BlockB).
4935bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4936 SelectInst *SI) {
4937 // Check that both operands of the select are block addresses.
4938 BlockAddress *TBA = dyn_cast<BlockAddress>(Val: SI->getTrueValue());
4939 BlockAddress *FBA = dyn_cast<BlockAddress>(Val: SI->getFalseValue());
4940 if (!TBA || !FBA)
4941 return false;
4942
4943 // Extract the actual blocks.
4944 BasicBlock *TrueBB = TBA->getBasicBlock();
4945 BasicBlock *FalseBB = FBA->getBasicBlock();
4946
4947 // The select's profile becomes the profile of the conditional branch that
4948 // replaces the indirect branch.
4949 SmallVector<uint32_t> SelectBranchWeights(2);
4950 if (!ProfcheckDisableMetadataFixes)
4951 extractBranchWeights(I: *SI, Weights&: SelectBranchWeights);
4952 // Perform the actual simplification.
4953 return simplifyTerminatorOnSelect(OldTerm: IBI, Cond: SI->getCondition(), TrueBB, FalseBB,
4954 TrueWeight: SelectBranchWeights[0],
4955 FalseWeight: SelectBranchWeights[1]);
4956}
4957
4958/// This is called when we find an icmp instruction
4959/// (a seteq/setne with a constant) as the only instruction in a
4960/// block that ends with an uncond branch. We are looking for a very specific
4961/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4962/// this case, we merge the first two "or's of icmp" into a switch, but then the
4963/// default value goes to an uncond block with a seteq in it, we get something
4964/// like:
4965///
4966/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4967/// DEFAULT:
4968/// %tmp = icmp eq i8 %A, 92
4969/// br label %end
4970/// end:
4971/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4972///
4973/// We prefer to split the edge to 'end' so that there is a true/false entry to
4974/// the PHI, merging the third icmp into the switch.
4975bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4976 ICmpInst *ICI, IRBuilder<> &Builder) {
4977 // Select == nullptr means we assume that there is a hidden no-op select
4978 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
4979 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: nullptr, Builder);
4980}
4981
4982/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
4983/// case. This is called when we find an icmp instruction (a seteq/setne with a
4984/// constant) and its following select instruction as the only TWO instructions
4985/// in a block that ends with an uncond branch. We are looking for a very
4986/// specific pattern that occurs when "
4987/// if (A == 1) return C1;
4988/// if (A == 2) return C2;
4989/// if (A < 3) return C3;
4990/// return C4;
4991/// " gets simplified. In this case, we merge the first two "branches of icmp"
4992/// into a switch, but then the default value goes to an uncond block with a lt
4993/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
4994/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
4995/// get something like:
4996///
4997/// case1:
4998/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
4999/// case2:
5000/// br label %end
5001/// DEFAULT:
5002/// %tmp = icmp eq i8 %A, 2
5003/// %val = select i1 %tmp, i8 C3, i8 C4
5004/// br label %end
5005/// end:
5006/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5007///
5008/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5009/// to the PHI, merging the icmp & select into the switch, as follows:
5010///
5011/// case1:
5012/// switch i8 %A, label %DEFAULT [
5013/// i8 0, label %end
5014/// i8 1, label %case2
5015/// i8 2, label %case3
5016/// ]
5017/// case2:
5018/// br label %end
5019/// case3:
5020/// br label %end
5021/// DEFAULT:
5022/// br label %end
5023/// end:
5024/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5025bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5026 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5027 BasicBlock *BB = ICI->getParent();
5028
5029 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5030 // too complex.
5031 /// TODO: support multi-phis in succ BB of select's BB.
5032 if (isa<PHINode>(Val: BB->begin()) || !ICI->hasOneUse() ||
5033 (Select && !Select->hasOneUse()))
5034 return false;
5035
5036 // The pattern we're looking for is where our only predecessor is a switch on
5037 // 'V' and this block is the default case for the switch. In this case we can
5038 // fold the compared value into the switch to simplify things.
5039 BasicBlock *Pred = BB->getSinglePredecessor();
5040 if (!Pred || !isa<SwitchInst>(Val: Pred->getTerminator()))
5041 return false;
5042
5043 Value *IcmpCond;
5044 ConstantInt *NewCaseVal;
5045 CmpPredicate Predicate;
5046
5047 // Match icmp X, C
5048 if (!match(V: ICI,
5049 P: m_ICmp(Pred&: Predicate, L: m_Value(V&: IcmpCond), R: m_ConstantInt(CI&: NewCaseVal))))
5050 return false;
5051
5052 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5053 Instruction *User;
5054 if (!Select) {
5055 // If Select == nullptr, we can assume that there is a hidden no-op select
5056 // just after icmp
5057 SelectCond = ICI;
5058 SelectTrueVal = Builder.getTrue();
5059 SelectFalseVal = Builder.getFalse();
5060 User = ICI->user_back();
5061 } else {
5062 SelectCond = Select->getCondition();
5063 // Check if the select condition is the same as the icmp condition.
5064 if (SelectCond != ICI)
5065 return false;
5066 SelectTrueVal = Select->getTrueValue();
5067 SelectFalseVal = Select->getFalseValue();
5068 User = Select->user_back();
5069 }
5070
5071 SwitchInst *SI = cast<SwitchInst>(Val: Pred->getTerminator());
5072 if (SI->getCondition() != IcmpCond)
5073 return false;
5074
5075 // If BB is reachable on a non-default case, then we simply know the value of
5076 // V in this block. Substitute it and constant fold the icmp instruction
5077 // away.
5078 if (SI->getDefaultDest() != BB) {
5079 ConstantInt *VVal = SI->findCaseDest(BB);
5080 assert(VVal && "Should have a unique destination value");
5081 ICI->setOperand(i_nocapture: 0, Val_nocapture: VVal);
5082
5083 if (Value *V = simplifyInstruction(I: ICI, Q: {DL, ICI})) {
5084 ICI->replaceAllUsesWith(V);
5085 ICI->eraseFromParent();
5086 }
5087 // BB is now empty, so it is likely to simplify away.
5088 return requestResimplify();
5089 }
5090
5091 // Ok, the block is reachable from the default dest. If the constant we're
5092 // comparing exists in one of the other edges, then we can constant fold ICI
5093 // and zap it.
5094 if (SI->findCaseValue(C: NewCaseVal) != SI->case_default()) {
5095 Value *V;
5096 if (Predicate == ICmpInst::ICMP_EQ)
5097 V = ConstantInt::getFalse(Context&: BB->getContext());
5098 else
5099 V = ConstantInt::getTrue(Context&: BB->getContext());
5100
5101 ICI->replaceAllUsesWith(V);
5102 ICI->eraseFromParent();
5103 // BB is now empty, so it is likely to simplify away.
5104 return requestResimplify();
5105 }
5106
5107 // The use of the select has to be in the 'end' block, by the only PHI node in
5108 // the block.
5109 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(Idx: 0);
5110 PHINode *PHIUse = dyn_cast<PHINode>(Val: User);
5111 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5112 isa<PHINode>(Val: ++BasicBlock::iterator(PHIUse)))
5113 return false;
5114
5115 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5116 // edge gets SelectTrueVal in the PHI.
5117 Value *DefaultCst = SelectFalseVal;
5118 Value *NewCst = SelectTrueVal;
5119
5120 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5121 std::swap(a&: DefaultCst, b&: NewCst);
5122
5123 // Replace Select (which is used by the PHI for the default value) with
5124 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5125 if (Select) {
5126 Select->replaceAllUsesWith(V: DefaultCst);
5127 Select->eraseFromParent();
5128 } else {
5129 ICI->replaceAllUsesWith(V: DefaultCst);
5130 }
5131 ICI->eraseFromParent();
5132
5133 SmallVector<DominatorTree::UpdateType, 2> Updates;
5134
5135 // Okay, the switch goes to this block on a default value. Add an edge from
5136 // the switch to the merge point on the compared value.
5137 BasicBlock *NewBB =
5138 BasicBlock::Create(Context&: BB->getContext(), Name: "switch.edge", Parent: BB->getParent(), InsertBefore: BB);
5139 {
5140 SwitchInstProfUpdateWrapper SIW(*SI);
5141 auto W0 = SIW.getSuccessorWeight(idx: 0);
5142 SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
5143 if (W0) {
5144 NewW = ((uint64_t(*W0) + 1) >> 1);
5145 SIW.setSuccessorWeight(idx: 0, W: *NewW);
5146 }
5147 SIW.addCase(OnVal: NewCaseVal, Dest: NewBB, W: NewW);
5148 if (DTU)
5149 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewBB});
5150 }
5151
5152 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5153 Builder.SetInsertPoint(NewBB);
5154 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5155 Builder.CreateBr(Dest: SuccBlock);
5156 PHIUse->addIncoming(V: NewCst, BB: NewBB);
5157 if (DTU) {
5158 Updates.push_back(Elt: {DominatorTree::Insert, NewBB, SuccBlock});
5159 DTU->applyUpdates(Updates);
5160 }
5161 return true;
5162}
5163
5164/// Check to see if it is branching on an or/and chain of icmp instructions, and
5165/// fold it into a switch instruction if so.
5166bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5167 IRBuilder<> &Builder,
5168 const DataLayout &DL) {
5169 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
5170 if (!Cond)
5171 return false;
5172
5173 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5174 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5175 // 'setne's and'ed together, collect them.
5176
5177 // Try to gather values from a chain of and/or to be turned into a switch
5178 ConstantComparesGatherer ConstantCompare(Cond, DL);
5179 // Unpack the result
5180 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5181 Value *CompVal = ConstantCompare.CompValue;
5182 unsigned UsedICmps = ConstantCompare.UsedICmps;
5183 Value *ExtraCase = ConstantCompare.Extra;
5184 bool TrueWhenEqual = ConstantCompare.IsEq;
5185
5186 // If we didn't have a multiply compared value, fail.
5187 if (!CompVal)
5188 return false;
5189
5190 // Avoid turning single icmps into a switch.
5191 if (UsedICmps <= 1)
5192 return false;
5193
5194 // There might be duplicate constants in the list, which the switch
5195 // instruction can't handle, remove them now.
5196 array_pod_sort(Start: Values.begin(), End: Values.end(), Compare: constantIntSortPredicate);
5197 Values.erase(CS: llvm::unique(R&: Values), CE: Values.end());
5198
5199 // If Extra was used, we require at least two switch values to do the
5200 // transformation. A switch with one value is just a conditional branch.
5201 if (ExtraCase && Values.size() < 2)
5202 return false;
5203
5204 SmallVector<uint32_t> BranchWeights;
5205 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5206 extractBranchWeights(I: *BI, Weights&: BranchWeights);
5207
5208 // Figure out which block is which destination.
5209 BasicBlock *DefaultBB = BI->getSuccessor(i: 1);
5210 BasicBlock *EdgeBB = BI->getSuccessor(i: 0);
5211 if (!TrueWhenEqual) {
5212 std::swap(a&: DefaultBB, b&: EdgeBB);
5213 if (HasProfile)
5214 std::swap(a&: BranchWeights[0], b&: BranchWeights[1]);
5215 }
5216
5217 BasicBlock *BB = BI->getParent();
5218
5219 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5220 << " cases into SWITCH. BB is:\n"
5221 << *BB);
5222
5223 SmallVector<DominatorTree::UpdateType, 2> Updates;
5224
5225 // If there are any extra values that couldn't be folded into the switch
5226 // then we evaluate them with an explicit branch first. Split the block
5227 // right before the condbr to handle it.
5228 if (ExtraCase) {
5229 BasicBlock *NewBB = SplitBlock(Old: BB, SplitPt: BI, DTU, /*LI=*/nullptr,
5230 /*MSSAU=*/nullptr, BBName: "switch.early.test");
5231
5232 // Remove the uncond branch added to the old block.
5233 Instruction *OldTI = BB->getTerminator();
5234 Builder.SetInsertPoint(OldTI);
5235
5236 // There can be an unintended UB if extra values are Poison. Before the
5237 // transformation, extra values may not be evaluated according to the
5238 // condition, and it will not raise UB. But after transformation, we are
5239 // evaluating extra values before checking the condition, and it will raise
5240 // UB. It can be solved by adding freeze instruction to extra values.
5241 AssumptionCache *AC = Options.AC;
5242
5243 if (!isGuaranteedNotToBeUndefOrPoison(V: ExtraCase, AC, CtxI: BI, DT: nullptr))
5244 ExtraCase = Builder.CreateFreeze(V: ExtraCase);
5245
5246 // We don't have any info about this condition.
5247 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(Cond: ExtraCase, True: EdgeBB, False: NewBB)
5248 : Builder.CreateCondBr(Cond: ExtraCase, True: NewBB, False: EdgeBB);
5249 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *Br, DEBUG_TYPE);
5250
5251 OldTI->eraseFromParent();
5252
5253 if (DTU)
5254 Updates.push_back(Elt: {DominatorTree::Insert, BB, EdgeBB});
5255
5256 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5257 // for the edge we just added.
5258 addPredecessorToBlock(Succ: EdgeBB, NewPred: BB, ExistPred: NewBB);
5259
5260 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5261 << "\nEXTRABB = " << *BB);
5262 BB = NewBB;
5263 }
5264
5265 Builder.SetInsertPoint(BI);
5266 // Convert pointer to int before we switch.
5267 if (CompVal->getType()->isPointerTy()) {
5268 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5269 "Should not end up here with unstable pointers");
5270 CompVal = Builder.CreatePtrToInt(
5271 V: CompVal, DestTy: DL.getIntPtrType(CompVal->getType()), Name: "magicptr");
5272 }
5273
5274 // Check if we can represent the values as a contiguous range. If so, we use a
5275 // range check + conditional branch instead of a switch.
5276 if (Values.front()->getValue() - Values.back()->getValue() ==
5277 Values.size() - 1) {
5278 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5279 Lower: Values.back()->getValue(), Upper: Values.front()->getValue() + 1);
5280 APInt Offset, RHS;
5281 ICmpInst::Predicate Pred;
5282 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5283 Value *X = CompVal;
5284 if (!Offset.isZero())
5285 X = Builder.CreateAdd(LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: Offset));
5286 Value *Cond =
5287 Builder.CreateICmp(P: Pred, LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: RHS));
5288 CondBrInst *NewBI = Builder.CreateCondBr(Cond, True: EdgeBB, False: DefaultBB);
5289 if (HasProfile)
5290 setBranchWeights(I&: *NewBI, Weights: BranchWeights, /*IsExpected=*/false);
5291 // We don't need to update PHI nodes since we don't add any new edges.
5292 } else {
5293 // Create the new switch instruction now.
5294 SwitchInst *New = Builder.CreateSwitch(V: CompVal, Dest: DefaultBB, NumCases: Values.size());
5295 if (HasProfile) {
5296 // We know the weight of the default case. We don't know the weight of the
5297 // other cases, but rather than completely lose profiling info, we split
5298 // the remaining probability equally over them.
5299 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5300 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5301 // if TrueWhenEqual.
5302 for (auto &V : drop_begin(RangeOrContainer&: NewWeights))
5303 V = BranchWeights[0] / Values.size();
5304 setBranchWeights(I&: *New, Weights: NewWeights, /*IsExpected=*/false);
5305 }
5306
5307 // Add all of the 'cases' to the switch instruction.
5308 for (ConstantInt *Val : Values)
5309 New->addCase(OnVal: Val, Dest: EdgeBB);
5310
5311 // We added edges from PI to the EdgeBB. As such, if there were any
5312 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5313 // the number of edges added.
5314 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5315 PHINode *PN = cast<PHINode>(Val&: BBI);
5316 Value *InVal = PN->getIncomingValueForBlock(BB);
5317 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5318 PN->addIncoming(V: InVal, BB);
5319 }
5320 }
5321
5322 // Erase the old branch instruction.
5323 eraseTerminatorAndDCECond(TI: BI);
5324 if (DTU)
5325 DTU->applyUpdates(Updates);
5326
5327 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5328 return true;
5329}
5330
5331bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5332 if (isa<PHINode>(Val: RI->getValue()))
5333 return simplifyCommonResume(RI);
5334 else if (isa<LandingPadInst>(Val: RI->getParent()->getFirstNonPHIIt()) &&
5335 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5336 // The resume must unwind the exception that caused control to branch here.
5337 return simplifySingleResume(RI);
5338
5339 return false;
5340}
5341
5342// Check if cleanup block is empty
5343static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
5344 for (Instruction &I : R) {
5345 auto *II = dyn_cast<IntrinsicInst>(Val: &I);
5346 if (!II)
5347 return false;
5348
5349 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5350 switch (IntrinsicID) {
5351 case Intrinsic::dbg_declare:
5352 case Intrinsic::dbg_value:
5353 case Intrinsic::dbg_label:
5354 case Intrinsic::lifetime_end:
5355 break;
5356 default:
5357 return false;
5358 }
5359 }
5360 return true;
5361}
5362
5363// Simplify resume that is shared by several landing pads (phi of landing pad).
5364bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5365 BasicBlock *BB = RI->getParent();
5366
5367 // Check that there are no other instructions except for debug and lifetime
5368 // intrinsics between the phi's and resume instruction.
5369 if (!isCleanupBlockEmpty(R: make_range(x: RI->getParent()->getFirstNonPHIIt(),
5370 y: BB->getTerminator()->getIterator())))
5371 return false;
5372
5373 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5374 auto *PhiLPInst = cast<PHINode>(Val: RI->getValue());
5375
5376 // Check incoming blocks to see if any of them are trivial.
5377 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5378 Idx++) {
5379 auto *IncomingBB = PhiLPInst->getIncomingBlock(i: Idx);
5380 auto *IncomingValue = PhiLPInst->getIncomingValue(i: Idx);
5381
5382 // If the block has other successors, we can not delete it because
5383 // it has other dependents.
5384 if (IncomingBB->getUniqueSuccessor() != BB)
5385 continue;
5386
5387 auto *LandingPad = dyn_cast<LandingPadInst>(Val: IncomingBB->getFirstNonPHIIt());
5388 // Not the landing pad that caused the control to branch here.
5389 if (IncomingValue != LandingPad)
5390 continue;
5391
5392 if (isCleanupBlockEmpty(
5393 R: make_range(x: LandingPad->getNextNode(), y: IncomingBB->getTerminator())))
5394 TrivialUnwindBlocks.insert(X: IncomingBB);
5395 }
5396
5397 // If no trivial unwind blocks, don't do any simplifications.
5398 if (TrivialUnwindBlocks.empty())
5399 return false;
5400
5401 // Turn all invokes that unwind here into calls.
5402 for (auto *TrivialBB : TrivialUnwindBlocks) {
5403 // Blocks that will be simplified should be removed from the phi node.
5404 // Note there could be multiple edges to the resume block, and we need
5405 // to remove them all.
5406 while (PhiLPInst->getBasicBlockIndex(BB: TrivialBB) != -1)
5407 BB->removePredecessor(Pred: TrivialBB, KeepOneInputPHIs: true);
5408
5409 for (BasicBlock *Pred :
5410 llvm::make_early_inc_range(Range: predecessors(BB: TrivialBB))) {
5411 removeUnwindEdge(BB: Pred, DTU);
5412 ++NumInvokes;
5413 }
5414
5415 // In each SimplifyCFG run, only the current processed block can be erased.
5416 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5417 // of erasing TrivialBB, we only remove the branch to the common resume
5418 // block so that we can later erase the resume block since it has no
5419 // predecessors.
5420 TrivialBB->getTerminator()->eraseFromParent();
5421 new UnreachableInst(RI->getContext(), TrivialBB);
5422 if (DTU)
5423 DTU->applyUpdates(Updates: {{DominatorTree::Delete, TrivialBB, BB}});
5424 }
5425
5426 // Delete the resume block if all its predecessors have been removed.
5427 if (pred_empty(BB))
5428 DeleteDeadBlock(BB, DTU);
5429
5430 return !TrivialUnwindBlocks.empty();
5431}
5432
5433// Simplify resume that is only used by a single (non-phi) landing pad.
5434bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5435 BasicBlock *BB = RI->getParent();
5436 auto *LPInst = cast<LandingPadInst>(Val: BB->getFirstNonPHIIt());
5437 assert(RI->getValue() == LPInst &&
5438 "Resume must unwind the exception that caused control to here");
5439
5440 // Check that there are no other instructions except for debug intrinsics.
5441 if (!isCleanupBlockEmpty(
5442 R: make_range<Instruction *>(x: LPInst->getNextNode(), y: RI)))
5443 return false;
5444
5445 // Turn all invokes that unwind here into calls and delete the basic block.
5446 for (BasicBlock *Pred : llvm::make_early_inc_range(Range: predecessors(BB))) {
5447 removeUnwindEdge(BB: Pred, DTU);
5448 ++NumInvokes;
5449 }
5450
5451 // The landingpad is now unreachable. Zap it.
5452 DeleteDeadBlock(BB, DTU);
5453 return true;
5454}
5455
5456static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
5457 // If this is a trivial cleanup pad that executes no instructions, it can be
5458 // eliminated. If the cleanup pad continues to the caller, any predecessor
5459 // that is an EH pad will be updated to continue to the caller and any
5460 // predecessor that terminates with an invoke instruction will have its invoke
5461 // instruction converted to a call instruction. If the cleanup pad being
5462 // simplified does not continue to the caller, each predecessor will be
5463 // updated to continue to the unwind destination of the cleanup pad being
5464 // simplified.
5465 BasicBlock *BB = RI->getParent();
5466 CleanupPadInst *CPInst = RI->getCleanupPad();
5467 if (CPInst->getParent() != BB)
5468 // This isn't an empty cleanup.
5469 return false;
5470
5471 // We cannot kill the pad if it has multiple uses. This typically arises
5472 // from unreachable basic blocks.
5473 if (!CPInst->hasOneUse())
5474 return false;
5475
5476 // Check that there are no other instructions except for benign intrinsics.
5477 if (!isCleanupBlockEmpty(
5478 R: make_range<Instruction *>(x: CPInst->getNextNode(), y: RI)))
5479 return false;
5480
5481 // If the cleanup return we are simplifying unwinds to the caller, this will
5482 // set UnwindDest to nullptr.
5483 BasicBlock *UnwindDest = RI->getUnwindDest();
5484
5485 // We're about to remove BB from the control flow. Before we do, sink any
5486 // PHINodes into the unwind destination. Doing this before changing the
5487 // control flow avoids some potentially slow checks, since we can currently
5488 // be certain that UnwindDest and BB have no common predecessors (since they
5489 // are both EH pads).
5490 if (UnwindDest) {
5491 // First, go through the PHI nodes in UnwindDest and update any nodes that
5492 // reference the block we are removing
5493 for (PHINode &DestPN : UnwindDest->phis()) {
5494 int Idx = DestPN.getBasicBlockIndex(BB);
5495 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5496 assert(Idx != -1);
5497 // This PHI node has an incoming value that corresponds to a control
5498 // path through the cleanup pad we are removing. If the incoming
5499 // value is in the cleanup pad, it must be a PHINode (because we
5500 // verified above that the block is otherwise empty). Otherwise, the
5501 // value is either a constant or a value that dominates the cleanup
5502 // pad being removed.
5503 //
5504 // Because BB and UnwindDest are both EH pads, all of their
5505 // predecessors must unwind to these blocks, and since no instruction
5506 // can have multiple unwind destinations, there will be no overlap in
5507 // incoming blocks between SrcPN and DestPN.
5508 Value *SrcVal = DestPN.getIncomingValue(i: Idx);
5509 PHINode *SrcPN = dyn_cast<PHINode>(Val: SrcVal);
5510
5511 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5512 for (auto *Pred : predecessors(BB)) {
5513 Value *Incoming =
5514 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(BB: Pred) : SrcVal;
5515 DestPN.addIncoming(V: Incoming, BB: Pred);
5516 }
5517 }
5518
5519 // Sink any remaining PHI nodes directly into UnwindDest.
5520 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5521 for (PHINode &PN : make_early_inc_range(Range: BB->phis())) {
5522 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5523 // If the PHI node has no uses or all of its uses are in this basic
5524 // block (meaning they are debug or lifetime intrinsics), just leave
5525 // it. It will be erased when we erase BB below.
5526 continue;
5527
5528 // Otherwise, sink this PHI node into UnwindDest.
5529 // Any predecessors to UnwindDest which are not already represented
5530 // must be back edges which inherit the value from the path through
5531 // BB. In this case, the PHI value must reference itself.
5532 for (auto *pred : predecessors(BB: UnwindDest))
5533 if (pred != BB)
5534 PN.addIncoming(V: &PN, BB: pred);
5535 PN.moveBefore(InsertPos: InsertPt);
5536 // Also, add a dummy incoming value for the original BB itself,
5537 // so that the PHI is well-formed until we drop said predecessor.
5538 PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB);
5539 }
5540 }
5541
5542 std::vector<DominatorTree::UpdateType> Updates;
5543
5544 // We use make_early_inc_range here because we will remove all predecessors.
5545 for (BasicBlock *PredBB : llvm::make_early_inc_range(Range: predecessors(BB))) {
5546 if (UnwindDest == nullptr) {
5547 if (DTU) {
5548 DTU->applyUpdates(Updates);
5549 Updates.clear();
5550 }
5551 removeUnwindEdge(BB: PredBB, DTU);
5552 ++NumInvokes;
5553 } else {
5554 BB->removePredecessor(Pred: PredBB);
5555 Instruction *TI = PredBB->getTerminator();
5556 TI->replaceUsesOfWith(From: BB, To: UnwindDest);
5557 if (DTU) {
5558 Updates.push_back(x: {DominatorTree::Insert, PredBB, UnwindDest});
5559 Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
5560 }
5561 }
5562 }
5563
5564 if (DTU)
5565 DTU->applyUpdates(Updates);
5566
5567 DeleteDeadBlock(BB, DTU);
5568
5569 return true;
5570}
5571
5572// Try to merge two cleanuppads together.
5573static bool mergeCleanupPad(CleanupReturnInst *RI) {
5574 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5575 // with.
5576 BasicBlock *UnwindDest = RI->getUnwindDest();
5577 if (!UnwindDest)
5578 return false;
5579
5580 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5581 // be safe to merge without code duplication.
5582 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5583 return false;
5584
5585 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5586 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(Val: &UnwindDest->front());
5587 if (!SuccessorCleanupPad)
5588 return false;
5589
5590 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5591 // Replace any uses of the successor cleanupad with the predecessor pad
5592 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5593 // funclet bundle operands.
5594 SuccessorCleanupPad->replaceAllUsesWith(V: PredecessorCleanupPad);
5595 // Remove the old cleanuppad.
5596 SuccessorCleanupPad->eraseFromParent();
5597 // Now, we simply replace the cleanupret with a branch to the unwind
5598 // destination.
5599 UncondBrInst::Create(Target: UnwindDest, InsertBefore: RI->getParent());
5600 RI->eraseFromParent();
5601
5602 return true;
5603}
5604
5605bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5606 // It is possible to transiantly have an undef cleanuppad operand because we
5607 // have deleted some, but not all, dead blocks.
5608 // Eventually, this block will be deleted.
5609 if (isa<UndefValue>(Val: RI->getOperand(i_nocapture: 0)))
5610 return false;
5611
5612 if (mergeCleanupPad(RI))
5613 return true;
5614
5615 if (removeEmptyCleanup(RI, DTU))
5616 return true;
5617
5618 return false;
5619}
5620
5621// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5622bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5623 BasicBlock *BB = UI->getParent();
5624
5625 bool Changed = false;
5626
5627 // Ensure that any debug-info records that used to occur after the Unreachable
5628 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5629 // the block.
5630 BB->flushTerminatorDbgRecords();
5631
5632 // Debug-info records on the unreachable inst itself should be deleted, as
5633 // below we delete everything past the final executable instruction.
5634 UI->dropDbgRecords();
5635
5636 // If there are any instructions immediately before the unreachable that can
5637 // be removed, do so.
5638 while (UI->getIterator() != BB->begin()) {
5639 BasicBlock::iterator BBI = UI->getIterator();
5640 --BBI;
5641
5642 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBI))
5643 break; // Can not drop any more instructions. We're done here.
5644 // Otherwise, this instruction can be freely erased,
5645 // even if it is not side-effect free.
5646
5647 // Note that deleting EH's here is in fact okay, although it involves a bit
5648 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5649 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5650 // and we can therefore guarantee this block will be erased.
5651
5652 // If we're deleting this, we're deleting any subsequent debug info, so
5653 // delete DbgRecords.
5654 BBI->dropDbgRecords();
5655
5656 // Delete this instruction (any uses are guaranteed to be dead)
5657 BBI->replaceAllUsesWith(V: PoisonValue::get(T: BBI->getType()));
5658 BBI->eraseFromParent();
5659 Changed = true;
5660 }
5661
5662 // If the unreachable instruction is the first in the block, take a gander
5663 // at all of the predecessors of this instruction, and simplify them.
5664 if (&BB->front() != UI)
5665 return Changed;
5666
5667 std::vector<DominatorTree::UpdateType> Updates;
5668
5669 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5670 for (BasicBlock *Predecessor : Preds) {
5671 Instruction *TI = Predecessor->getTerminator();
5672 IRBuilder<> Builder(TI);
5673 if (isa<UncondBrInst>(Val: TI)) {
5674 new UnreachableInst(TI->getContext(), TI->getIterator());
5675 TI->eraseFromParent();
5676 Changed = true;
5677 if (DTU)
5678 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5679 } else if (auto *BI = dyn_cast<CondBrInst>(Val: TI)) {
5680 // We could either have a proper unconditional branch,
5681 // or a degenerate conditional branch with matching destinations.
5682 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
5683 new UnreachableInst(TI->getContext(), TI->getIterator());
5684 TI->eraseFromParent();
5685 Changed = true;
5686 } else {
5687 Value* Cond = BI->getCondition();
5688 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5689 "The destinations are guaranteed to be different here.");
5690 CallInst *Assumption;
5691 if (BI->getSuccessor(i: 0) == BB) {
5692 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
5693 Builder.CreateBr(Dest: BI->getSuccessor(i: 1));
5694 } else {
5695 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5696 Assumption = Builder.CreateAssumption(Cond);
5697 Builder.CreateBr(Dest: BI->getSuccessor(i: 0));
5698 }
5699 if (Options.AC)
5700 Options.AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
5701
5702 eraseTerminatorAndDCECond(TI: BI);
5703 Changed = true;
5704 }
5705 if (DTU)
5706 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5707 } else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) {
5708 SwitchInstProfUpdateWrapper SU(*SI);
5709 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5710 if (i->getCaseSuccessor() != BB) {
5711 ++i;
5712 continue;
5713 }
5714 BB->removePredecessor(Pred: SU->getParent());
5715 i = SU.removeCase(I: i);
5716 e = SU->case_end();
5717 Changed = true;
5718 }
5719 // Note that the default destination can't be removed!
5720 if (DTU && SI->getDefaultDest() != BB)
5721 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5722 } else if (auto *II = dyn_cast<InvokeInst>(Val: TI)) {
5723 if (II->getUnwindDest() == BB) {
5724 if (DTU) {
5725 DTU->applyUpdates(Updates);
5726 Updates.clear();
5727 }
5728 auto *CI = cast<CallInst>(Val: removeUnwindEdge(BB: TI->getParent(), DTU));
5729 if (!CI->doesNotThrow())
5730 CI->setDoesNotThrow();
5731 Changed = true;
5732 }
5733 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: TI)) {
5734 if (CSI->getUnwindDest() == BB) {
5735 if (DTU) {
5736 DTU->applyUpdates(Updates);
5737 Updates.clear();
5738 }
5739 removeUnwindEdge(BB: TI->getParent(), DTU);
5740 Changed = true;
5741 continue;
5742 }
5743
5744 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5745 E = CSI->handler_end();
5746 I != E; ++I) {
5747 if (*I == BB) {
5748 CSI->removeHandler(HI: I);
5749 --I;
5750 --E;
5751 Changed = true;
5752 }
5753 }
5754 if (DTU)
5755 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5756 if (CSI->getNumHandlers() == 0) {
5757 if (CSI->hasUnwindDest()) {
5758 // Redirect all predecessors of the block containing CatchSwitchInst
5759 // to instead branch to the CatchSwitchInst's unwind destination.
5760 if (DTU) {
5761 for (auto *PredecessorOfPredecessor : predecessors(BB: Predecessor)) {
5762 Updates.push_back(x: {DominatorTree::Insert,
5763 PredecessorOfPredecessor,
5764 CSI->getUnwindDest()});
5765 Updates.push_back(x: {DominatorTree::Delete,
5766 PredecessorOfPredecessor, Predecessor});
5767 }
5768 }
5769 Predecessor->replaceAllUsesWith(V: CSI->getUnwindDest());
5770 } else {
5771 // Rewrite all preds to unwind to caller (or from invoke to call).
5772 if (DTU) {
5773 DTU->applyUpdates(Updates);
5774 Updates.clear();
5775 }
5776 SmallVector<BasicBlock *, 8> EHPreds(predecessors(BB: Predecessor));
5777 for (BasicBlock *EHPred : EHPreds)
5778 removeUnwindEdge(BB: EHPred, DTU);
5779 }
5780 // The catchswitch is no longer reachable.
5781 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5782 CSI->eraseFromParent();
5783 Changed = true;
5784 }
5785 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: TI)) {
5786 (void)CRI;
5787 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5788 "Expected to always have an unwind to BB.");
5789 if (DTU)
5790 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5791 new UnreachableInst(TI->getContext(), TI->getIterator());
5792 TI->eraseFromParent();
5793 Changed = true;
5794 }
5795 }
5796
5797 if (DTU)
5798 DTU->applyUpdates(Updates);
5799
5800 // If this block is now dead, remove it.
5801 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5802 DeleteDeadBlock(BB, DTU);
5803 return true;
5804 }
5805
5806 return Changed;
5807}
5808
5809struct ContiguousCasesResult {
5810 ConstantInt *Min;
5811 ConstantInt *Max;
5812 BasicBlock *Dest;
5813 BasicBlock *OtherDest;
5814 SmallVectorImpl<ConstantInt *> *Cases;
5815 SmallVectorImpl<ConstantInt *> *OtherCases;
5816};
5817
5818static std::optional<ContiguousCasesResult>
5819findContiguousCases(Value *Condition, SmallVectorImpl<ConstantInt *> &Cases,
5820 SmallVectorImpl<ConstantInt *> &OtherCases,
5821 BasicBlock *Dest, BasicBlock *OtherDest) {
5822 assert(Cases.size() >= 1);
5823
5824 array_pod_sort(Start: Cases.begin(), End: Cases.end(), Compare: constantIntSortPredicate);
5825 const APInt &Min = Cases.back()->getValue();
5826 const APInt &Max = Cases.front()->getValue();
5827 APInt Offset = Max - Min;
5828 size_t ContiguousOffset = Cases.size() - 1;
5829 if (Offset == ContiguousOffset) {
5830 return ContiguousCasesResult{
5831 /*Min=*/Cases.back(),
5832 /*Max=*/Cases.front(),
5833 /*Dest=*/Dest,
5834 /*OtherDest=*/OtherDest,
5835 /*Cases=*/&Cases,
5836 /*OtherCases=*/&OtherCases,
5837 };
5838 }
5839 ConstantRange CR = computeConstantRange(V: Condition, /*ForSigned=*/false);
5840 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5841 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5842 // contiguous range for the other destination. N.B. If CR is not a full range,
5843 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5844 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5845 assert(Cases.size() >= 2);
5846 auto *It =
5847 std::adjacent_find(first: Cases.begin(), last: Cases.end(), binary_pred: [](auto L, auto R) {
5848 return L->getValue() != R->getValue() + 1;
5849 });
5850 if (It == Cases.end())
5851 return std::nullopt;
5852 auto [OtherMax, OtherMin] = std::make_pair(x&: *It, y&: *std::next(x: It));
5853 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5854 Cases.size() - 2) {
5855 return ContiguousCasesResult{
5856 /*Min=*/cast<ConstantInt>(
5857 Val: ConstantInt::get(Ty: OtherMin->getType(), V: OtherMin->getValue() + 1)),
5858 /*Max=*/
5859 cast<ConstantInt>(
5860 Val: ConstantInt::get(Ty: OtherMax->getType(), V: OtherMax->getValue() - 1)),
5861 /*Dest=*/OtherDest,
5862 /*OtherDest=*/Dest,
5863 /*Cases=*/&OtherCases,
5864 /*OtherCases=*/&Cases,
5865 };
5866 }
5867 }
5868 return std::nullopt;
5869}
5870
5871static void createUnreachableSwitchDefault(SwitchInst *Switch,
5872 DomTreeUpdater *DTU,
5873 bool RemoveOrigDefaultBlock = true) {
5874 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5875 auto *BB = Switch->getParent();
5876 auto *OrigDefaultBlock = Switch->getDefaultDest();
5877 if (RemoveOrigDefaultBlock)
5878 OrigDefaultBlock->removePredecessor(Pred: BB);
5879 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5880 Context&: BB->getContext(), Name: BB->getName() + ".unreachabledefault", Parent: BB->getParent(),
5881 InsertBefore: OrigDefaultBlock);
5882 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5883 UI->setDebugLoc(DebugLoc::getTemporary());
5884 Switch->setDefaultDest(&*NewDefaultBlock);
5885 if (DTU) {
5886 SmallVector<DominatorTree::UpdateType, 2> Updates;
5887 Updates.push_back(Elt: {DominatorTree::Insert, BB, &*NewDefaultBlock});
5888 if (RemoveOrigDefaultBlock &&
5889 !is_contained(Range: successors(BB), Element: OrigDefaultBlock))
5890 Updates.push_back(Elt: {DominatorTree::Delete, BB, &*OrigDefaultBlock});
5891 DTU->applyUpdates(Updates);
5892 }
5893}
5894
5895/// Turn a switch into an integer range comparison and branch.
5896/// Switches with more than 2 destinations are ignored.
5897/// Switches with 1 destination are also ignored.
5898bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5899 IRBuilder<> &Builder) {
5900 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5901
5902 bool HasDefault = !SI->defaultDestUnreachable();
5903
5904 auto *BB = SI->getParent();
5905 // Partition the cases into two sets with different destinations.
5906 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5907 BasicBlock *DestB = nullptr;
5908 SmallVector<ConstantInt *, 16> CasesA;
5909 SmallVector<ConstantInt *, 16> CasesB;
5910
5911 for (auto Case : SI->cases()) {
5912 BasicBlock *Dest = Case.getCaseSuccessor();
5913 if (!DestA)
5914 DestA = Dest;
5915 if (Dest == DestA) {
5916 CasesA.push_back(Elt: Case.getCaseValue());
5917 continue;
5918 }
5919 if (!DestB)
5920 DestB = Dest;
5921 if (Dest == DestB) {
5922 CasesB.push_back(Elt: Case.getCaseValue());
5923 continue;
5924 }
5925 return false; // More than two destinations.
5926 }
5927 if (!DestB)
5928 return false; // All destinations are the same and the default is unreachable
5929
5930 assert(DestA && DestB &&
5931 "Single-destination switch should have been folded.");
5932 assert(DestA != DestB);
5933 assert(DestB != SI->getDefaultDest());
5934 assert(!CasesB.empty() && "There must be non-default cases.");
5935 assert(!CasesA.empty() || HasDefault);
5936
5937 // Figure out if one of the sets of cases form a contiguous range.
5938 std::optional<ContiguousCasesResult> ContiguousCases;
5939
5940 // Only one icmp is needed when there is only one case.
5941 if (!HasDefault && CasesA.size() == 1)
5942 ContiguousCases = ContiguousCasesResult{
5943 /*Min=*/CasesA[0],
5944 /*Max=*/CasesA[0],
5945 /*Dest=*/DestA,
5946 /*OtherDest=*/DestB,
5947 /*Cases=*/&CasesA,
5948 /*OtherCases=*/&CasesB,
5949 };
5950 else if (CasesB.size() == 1)
5951 ContiguousCases = ContiguousCasesResult{
5952 /*Min=*/CasesB[0],
5953 /*Max=*/CasesB[0],
5954 /*Dest=*/DestB,
5955 /*OtherDest=*/DestA,
5956 /*Cases=*/&CasesB,
5957 /*OtherCases=*/&CasesA,
5958 };
5959 // Correctness: Cases to the default destination cannot be contiguous cases.
5960 else if (!HasDefault)
5961 ContiguousCases =
5962 findContiguousCases(Condition: SI->getCondition(), Cases&: CasesA, OtherCases&: CasesB, Dest: DestA, OtherDest: DestB);
5963
5964 if (!ContiguousCases)
5965 ContiguousCases =
5966 findContiguousCases(Condition: SI->getCondition(), Cases&: CasesB, OtherCases&: CasesA, Dest: DestB, OtherDest: DestA);
5967
5968 if (!ContiguousCases)
5969 return false;
5970
5971 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
5972
5973 // Start building the compare and branch.
5974
5975 Constant *Offset = ConstantExpr::getNeg(C: Min);
5976 Constant *NumCases = ConstantInt::get(Ty: Offset->getType(),
5977 V: Max->getValue() - Min->getValue() + 1);
5978 Instruction *NewBI;
5979 if (NumCases->isOneValue()) {
5980 assert(Max->getValue() == Min->getValue());
5981 Value *Cmp = Builder.CreateICmpEQ(LHS: SI->getCondition(), RHS: Min);
5982 NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
5983 }
5984 // If NumCases overflowed, then all possible values jump to the successor.
5985 else if (NumCases->isNullValue() && !Cases->empty()) {
5986 NewBI = Builder.CreateBr(Dest);
5987 } else {
5988 Value *Sub = SI->getCondition();
5989 if (!Offset->isNullValue())
5990 Sub = Builder.CreateAdd(LHS: Sub, RHS: Offset, Name: Sub->getName() + ".off");
5991 Value *Cmp = Builder.CreateICmpULT(LHS: Sub, RHS: NumCases, Name: "switch");
5992 NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
5993 }
5994
5995 // Update weight for the newly-created conditional branch.
5996 if (hasBranchWeightMD(I: *SI) && isa<CondBrInst>(Val: NewBI)) {
5997 SmallVector<uint64_t, 8> Weights;
5998 getBranchWeights(TI: SI, Weights);
5999 if (Weights.size() == 1 + SI->getNumCases()) {
6000 uint64_t TrueWeight = 0;
6001 uint64_t FalseWeight = 0;
6002 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6003 if (SI->getSuccessor(idx: I) == Dest)
6004 TrueWeight += Weights[I];
6005 else
6006 FalseWeight += Weights[I];
6007 }
6008 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6009 TrueWeight /= 2;
6010 FalseWeight /= 2;
6011 }
6012 setFittedBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
6013 /*IsExpected=*/false, /*ElideAllZero=*/true);
6014 }
6015 }
6016
6017 // Prune obsolete incoming values off the successors' PHI nodes.
6018 for (auto &PHI : make_early_inc_range(Range: Dest->phis())) {
6019 unsigned PreviousEdges = Cases->size();
6020 if (Dest == SI->getDefaultDest())
6021 ++PreviousEdges;
6022 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6023 PHI.removeIncomingValue(BB: SI->getParent());
6024 }
6025 for (auto &PHI : make_early_inc_range(Range: OtherDest->phis())) {
6026 unsigned PreviousEdges = OtherCases->size();
6027 if (OtherDest == SI->getDefaultDest())
6028 ++PreviousEdges;
6029 unsigned E = PreviousEdges - 1;
6030 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6031 if (isa<UncondBrInst>(Val: NewBI))
6032 ++E;
6033 for (unsigned I = 0; I != E; ++I)
6034 PHI.removeIncomingValue(BB: SI->getParent());
6035 }
6036
6037 // Clean up the default block - it may have phis or other instructions before
6038 // the unreachable terminator.
6039 if (!HasDefault)
6040 createUnreachableSwitchDefault(Switch: SI, DTU);
6041
6042 auto *UnreachableDefault = SI->getDefaultDest();
6043
6044 // Drop the switch.
6045 SI->eraseFromParent();
6046
6047 if (!HasDefault && DTU)
6048 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, UnreachableDefault}});
6049
6050 return true;
6051}
6052
6053/// Compute masked bits for the condition of a switch
6054/// and use it to remove dead cases.
6055static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
6056 AssumptionCache *AC,
6057 const DataLayout &DL) {
6058 Value *Cond = SI->getCondition();
6059 KnownBits Known = computeKnownBits(V: Cond, DL, AC, CxtI: SI);
6060 SmallPtrSet<const Constant *, 4> KnownValues;
6061 bool IsKnownValuesValid = collectPossibleValues(V: Cond, Constants&: KnownValues, MaxCount: 4);
6062
6063 // We can also eliminate cases by determining that their values are outside of
6064 // the limited range of the condition based on how many significant (non-sign)
6065 // bits are in the condition value.
6066 unsigned MaxSignificantBitsInCond =
6067 ComputeMaxSignificantBits(Op: Cond, DL, AC, CxtI: SI);
6068
6069 // Gather dead cases.
6070 SmallVector<ConstantInt *, 8> DeadCases;
6071 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6072 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6073 for (const auto &Case : SI->cases()) {
6074 auto *Successor = Case.getCaseSuccessor();
6075 if (DTU) {
6076 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Key: Successor);
6077 if (Inserted)
6078 UniqueSuccessors.push_back(Elt: Successor);
6079 ++It->second;
6080 }
6081 ConstantInt *CaseC = Case.getCaseValue();
6082 const APInt &CaseVal = CaseC->getValue();
6083 if (Known.Zero.intersects(RHS: CaseVal) || !Known.One.isSubsetOf(RHS: CaseVal) ||
6084 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6085 (IsKnownValuesValid && !KnownValues.contains(Ptr: CaseC))) {
6086 DeadCases.push_back(Elt: CaseC);
6087 if (DTU)
6088 --NumPerSuccessorCases[Successor];
6089 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6090 << " is dead.\n");
6091 } else if (IsKnownValuesValid)
6092 KnownValues.erase(Ptr: CaseC);
6093 }
6094
6095 // If we can prove that the cases must cover all possible values, the
6096 // default destination becomes dead and we can remove it. If we know some
6097 // of the bits in the value, we can use that to more precisely compute the
6098 // number of possible unique case values.
6099 bool HasDefault = !SI->defaultDestUnreachable();
6100 const unsigned NumUnknownBits =
6101 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6102 assert(NumUnknownBits <= Known.getBitWidth());
6103 if (HasDefault && DeadCases.empty()) {
6104 if (IsKnownValuesValid && all_of(Range&: KnownValues, P: IsaPred<UndefValue>)) {
6105 createUnreachableSwitchDefault(Switch: SI, DTU);
6106 return true;
6107 }
6108
6109 if (NumUnknownBits < 64 /* avoid overflow */) {
6110 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6111 if (SI->getNumCases() == AllNumCases) {
6112 createUnreachableSwitchDefault(Switch: SI, DTU);
6113 return true;
6114 }
6115 // When only one case value is missing, replace default with that case.
6116 // Eliminating the default branch will provide more opportunities for
6117 // optimization, such as lookup tables.
6118 if (SI->getNumCases() == AllNumCases - 1) {
6119 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6120 IntegerType *CondTy = cast<IntegerType>(Val: Cond->getType());
6121 if (CondTy->getIntegerBitWidth() > 64 ||
6122 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
6123 return false;
6124
6125 uint64_t MissingCaseVal = 0;
6126 for (const auto &Case : SI->cases())
6127 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6128 auto *MissingCase = cast<ConstantInt>(
6129 Val: ConstantInt::get(Ty: Cond->getType(), V: MissingCaseVal));
6130 SwitchInstProfUpdateWrapper SIW(*SI);
6131 SIW.addCase(OnVal: MissingCase, Dest: SI->getDefaultDest(),
6132 W: SIW.getSuccessorWeight(idx: 0));
6133 createUnreachableSwitchDefault(Switch: SI, DTU,
6134 /*RemoveOrigDefaultBlock*/ false);
6135 SIW.setSuccessorWeight(idx: 0, W: 0);
6136 return true;
6137 }
6138 }
6139 }
6140
6141 if (DeadCases.empty())
6142 return false;
6143
6144 SwitchInstProfUpdateWrapper SIW(*SI);
6145 for (ConstantInt *DeadCase : DeadCases) {
6146 SwitchInst::CaseIt CaseI = SI->findCaseValue(C: DeadCase);
6147 assert(CaseI != SI->case_default() &&
6148 "Case was not found. Probably mistake in DeadCases forming.");
6149 // Prune unused values from PHI nodes.
6150 CaseI->getCaseSuccessor()->removePredecessor(Pred: SI->getParent());
6151 SIW.removeCase(I: CaseI);
6152 }
6153
6154 if (DTU) {
6155 std::vector<DominatorTree::UpdateType> Updates;
6156 for (auto *Successor : UniqueSuccessors)
6157 if (NumPerSuccessorCases[Successor] == 0)
6158 Updates.push_back(x: {DominatorTree::Delete, SI->getParent(), Successor});
6159 DTU->applyUpdates(Updates);
6160 }
6161
6162 return true;
6163}
6164
6165/// If BB would be eligible for simplification by
6166/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6167/// by an unconditional branch), look at the phi node for BB in the successor
6168/// block and see if the incoming value is equal to CaseValue. If so, return
6169/// the phi node, and set PhiIndex to BB's index in the phi node.
6170static PHINode *findPHIForConditionForwarding(ConstantInt *CaseValue,
6171 BasicBlock *BB, int *PhiIndex) {
6172 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6173 return nullptr; // BB must be empty to be a candidate for simplification.
6174 if (!BB->getSinglePredecessor())
6175 return nullptr; // BB must be dominated by the switch.
6176
6177 UncondBrInst *Branch = dyn_cast<UncondBrInst>(Val: BB->getTerminator());
6178 if (!Branch)
6179 return nullptr; // Terminator must be unconditional branch.
6180
6181 BasicBlock *Succ = Branch->getSuccessor();
6182
6183 for (PHINode &PHI : Succ->phis()) {
6184 int Idx = PHI.getBasicBlockIndex(BB);
6185 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6186
6187 Value *InValue = PHI.getIncomingValue(i: Idx);
6188 if (InValue != CaseValue)
6189 continue;
6190
6191 *PhiIndex = Idx;
6192 return &PHI;
6193 }
6194
6195 return nullptr;
6196}
6197
6198/// Try to forward the condition of a switch instruction to a phi node
6199/// dominated by the switch, if that would mean that some of the destination
6200/// blocks of the switch can be folded away. Return true if a change is made.
6201static bool forwardSwitchConditionToPHI(SwitchInst *SI) {
6202 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6203
6204 ForwardingNodesMap ForwardingNodes;
6205 BasicBlock *SwitchBlock = SI->getParent();
6206 bool Changed = false;
6207 for (const auto &Case : SI->cases()) {
6208 ConstantInt *CaseValue = Case.getCaseValue();
6209 BasicBlock *CaseDest = Case.getCaseSuccessor();
6210
6211 // Replace phi operands in successor blocks that are using the constant case
6212 // value rather than the switch condition variable:
6213 // switchbb:
6214 // switch i32 %x, label %default [
6215 // i32 17, label %succ
6216 // ...
6217 // succ:
6218 // %r = phi i32 ... [ 17, %switchbb ] ...
6219 // -->
6220 // %r = phi i32 ... [ %x, %switchbb ] ...
6221
6222 for (PHINode &Phi : CaseDest->phis()) {
6223 // This only works if there is exactly 1 incoming edge from the switch to
6224 // a phi. If there is >1, that means multiple cases of the switch map to 1
6225 // value in the phi, and that phi value is not the switch condition. Thus,
6226 // this transform would not make sense (the phi would be invalid because
6227 // a phi can't have different incoming values from the same block).
6228 int SwitchBBIdx = Phi.getBasicBlockIndex(BB: SwitchBlock);
6229 if (Phi.getIncomingValue(i: SwitchBBIdx) == CaseValue &&
6230 count(Range: Phi.blocks(), Element: SwitchBlock) == 1) {
6231 Phi.setIncomingValue(i: SwitchBBIdx, V: SI->getCondition());
6232 Changed = true;
6233 }
6234 }
6235
6236 // Collect phi nodes that are indirectly using this switch's case constants.
6237 int PhiIdx;
6238 if (auto *Phi = findPHIForConditionForwarding(CaseValue, BB: CaseDest, PhiIndex: &PhiIdx))
6239 ForwardingNodes[Phi].push_back(Elt: PhiIdx);
6240 }
6241
6242 for (auto &ForwardingNode : ForwardingNodes) {
6243 PHINode *Phi = ForwardingNode.first;
6244 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6245 // Check if it helps to fold PHI.
6246 if (Indexes.size() < 2 && !llvm::is_contained(Range: Phi->incoming_values(), Element: SI->getCondition()))
6247 continue;
6248
6249 for (int Index : Indexes)
6250 Phi->setIncomingValue(i: Index, V: SI->getCondition());
6251 Changed = true;
6252 }
6253
6254 return Changed;
6255}
6256
6257/// Return true if the backend will be able to handle
6258/// initializing an array of constants like C.
6259static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
6260 if (C->isThreadDependent())
6261 return false;
6262 if (C->isDLLImportDependent())
6263 return false;
6264
6265 if (!isa<ConstantDataVector, ConstantExpr, ConstantFP, ConstantInt,
6266 ConstantPointerNull, GlobalValue, UndefValue>(Val: C))
6267 return false;
6268
6269 // Globals cannot contain scalable types.
6270 if (C->getType()->isScalableTy())
6271 return false;
6272
6273 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
6274 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6275 // materializing the array of constants.
6276 Constant *StrippedC = cast<Constant>(Val: CE->stripInBoundsConstantOffsets());
6277 if (StrippedC == C || !validLookupTableConstant(C: StrippedC, TTI))
6278 return false;
6279 }
6280
6281 if (!TTI.shouldBuildLookupTablesForConstant(C))
6282 return false;
6283
6284 return true;
6285}
6286
6287/// If V is a Constant, return it. Otherwise, try to look up
6288/// its constant value in ConstantPool, returning 0 if it's not there.
6289static Constant *
6290lookupConstant(Value *V,
6291 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6292 if (Constant *C = dyn_cast<Constant>(Val: V))
6293 return C;
6294 return ConstantPool.lookup(Val: V);
6295}
6296
6297/// Try to fold instruction I into a constant. This works for
6298/// simple instructions such as binary operations where both operands are
6299/// constant or can be replaced by constants from the ConstantPool. Returns the
6300/// resulting constant on success, 0 otherwise.
6301static Constant *
6302constantFold(Instruction *I, const DataLayout &DL,
6303 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6304 if (SelectInst *Select = dyn_cast<SelectInst>(Val: I)) {
6305 Constant *A = lookupConstant(V: Select->getCondition(), ConstantPool);
6306 if (!A)
6307 return nullptr;
6308 if (A->isAllOnesValue())
6309 return lookupConstant(V: Select->getTrueValue(), ConstantPool);
6310 if (A->isNullValue())
6311 return lookupConstant(V: Select->getFalseValue(), ConstantPool);
6312 return nullptr;
6313 }
6314
6315 SmallVector<Constant *, 4> COps;
6316 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6317 if (Constant *A = lookupConstant(V: I->getOperand(i: N), ConstantPool))
6318 COps.push_back(Elt: A);
6319 else
6320 return nullptr;
6321 }
6322
6323 return ConstantFoldInstOperands(I, Ops: COps, DL);
6324}
6325
6326/// Try to determine the resulting constant values in phi nodes
6327/// at the common destination basic block, *CommonDest, for one of the case
6328/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6329/// default case), of a switch instruction SI.
6330static bool
6331getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
6332 BasicBlock **CommonDest,
6333 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6334 const DataLayout &DL, const TargetTransformInfo &TTI) {
6335 // The block from which we enter the common destination.
6336 BasicBlock *Pred = SI->getParent();
6337
6338 // If CaseDest is empty except for some side-effect free instructions through
6339 // which we can constant-propagate the CaseVal, continue to its successor.
6340 SmallDenseMap<Value *, Constant *> ConstantPool;
6341 ConstantPool.insert(KV: std::make_pair(x: SI->getCondition(), y&: CaseVal));
6342 for (Instruction &I : *CaseDest) {
6343 if (I.isTerminator()) {
6344 // If the terminator is a simple branch, continue to the next block.
6345 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6346 return false;
6347 Pred = CaseDest;
6348 CaseDest = I.getSuccessor(Idx: 0);
6349 } else if (Constant *C = constantFold(I: &I, DL, ConstantPool)) {
6350 // Instruction is side-effect free and constant.
6351
6352 // If the instruction has uses outside this block or a phi node slot for
6353 // the block, it is not safe to bypass the instruction since it would then
6354 // no longer dominate all its uses.
6355 for (auto &Use : I.uses()) {
6356 User *User = Use.getUser();
6357 if (Instruction *I = dyn_cast<Instruction>(Val: User))
6358 if (I->getParent() == CaseDest)
6359 continue;
6360 if (PHINode *Phi = dyn_cast<PHINode>(Val: User))
6361 if (Phi->getIncomingBlock(U: Use) == CaseDest)
6362 continue;
6363 return false;
6364 }
6365
6366 ConstantPool.insert(KV: std::make_pair(x: &I, y&: C));
6367 } else {
6368 break;
6369 }
6370 }
6371
6372 // If we did not have a CommonDest before, use the current one.
6373 if (!*CommonDest)
6374 *CommonDest = CaseDest;
6375 // If the destination isn't the common one, abort.
6376 if (CaseDest != *CommonDest)
6377 return false;
6378
6379 // Get the values for this case from phi nodes in the destination block.
6380 for (PHINode &PHI : (*CommonDest)->phis()) {
6381 int Idx = PHI.getBasicBlockIndex(BB: Pred);
6382 if (Idx == -1)
6383 continue;
6384
6385 Constant *ConstVal =
6386 lookupConstant(V: PHI.getIncomingValue(i: Idx), ConstantPool);
6387 if (!ConstVal)
6388 return false;
6389
6390 // Be conservative about which kinds of constants we support.
6391 if (!validLookupTableConstant(C: ConstVal, TTI))
6392 return false;
6393
6394 Res.push_back(Elt: std::make_pair(x: &PHI, y&: ConstVal));
6395 }
6396
6397 return Res.size() > 0;
6398}
6399
6400// Helper function used to add CaseVal to the list of cases that generate
6401// Result. Returns the updated number of cases that generate this result.
6402static size_t mapCaseToResult(ConstantInt *CaseVal,
6403 SwitchCaseResultVectorTy &UniqueResults,
6404 Constant *Result) {
6405 for (auto &I : UniqueResults) {
6406 if (I.first == Result) {
6407 I.second.push_back(Elt: CaseVal);
6408 return I.second.size();
6409 }
6410 }
6411 UniqueResults.push_back(
6412 Elt: std::make_pair(x&: Result, y: SmallVector<ConstantInt *, 4>(1, CaseVal)));
6413 return 1;
6414}
6415
6416// Helper function that initializes a map containing
6417// results for the PHI node of the common destination block for a switch
6418// instruction. Returns false if multiple PHI nodes have been found or if
6419// there is not a common destination block for the switch.
6420static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
6421 BasicBlock *&CommonDest,
6422 SwitchCaseResultVectorTy &UniqueResults,
6423 Constant *&DefaultResult,
6424 const DataLayout &DL,
6425 const TargetTransformInfo &TTI,
6426 uintptr_t MaxUniqueResults) {
6427 for (const auto &I : SI->cases()) {
6428 ConstantInt *CaseVal = I.getCaseValue();
6429
6430 // Resulting value at phi nodes for this case value.
6431 SwitchCaseResultsTy Results;
6432 if (!getCaseResults(SI, CaseVal, CaseDest: I.getCaseSuccessor(), CommonDest: &CommonDest, Res&: Results,
6433 DL, TTI))
6434 return false;
6435
6436 // Only one value per case is permitted.
6437 if (Results.size() > 1)
6438 return false;
6439
6440 // Add the case->result mapping to UniqueResults.
6441 const size_t NumCasesForResult =
6442 mapCaseToResult(CaseVal, UniqueResults, Result: Results.begin()->second);
6443
6444 // Early out if there are too many cases for this result.
6445 if (NumCasesForResult > MaxSwitchCasesPerResult)
6446 return false;
6447
6448 // Early out if there are too many unique results.
6449 if (UniqueResults.size() > MaxUniqueResults)
6450 return false;
6451
6452 // Check the PHI consistency.
6453 if (!PHI)
6454 PHI = Results[0].first;
6455 else if (PHI != Results[0].first)
6456 return false;
6457 }
6458 // Find the default result value.
6459 SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
6460 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest, Res&: DefaultResults,
6461 DL, TTI);
6462 // If the default value is not found abort unless the default destination
6463 // is unreachable.
6464 DefaultResult =
6465 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6466
6467 return DefaultResult || SI->defaultDestUnreachable();
6468}
6469
6470// Helper function that checks if it is possible to transform a switch with only
6471// two cases (or two cases + default) that produces a result into a select.
6472// TODO: Handle switches with more than 2 cases that map to the same result.
6473// The branch weights correspond to the provided Condition (i.e. if Condition is
6474// modified from the original SwitchInst, the caller must adjust the weights)
6475static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6476 Constant *DefaultResult, Value *Condition,
6477 IRBuilder<> &Builder, const DataLayout &DL,
6478 ArrayRef<uint32_t> BranchWeights) {
6479 // If we are selecting between only two cases transform into a simple
6480 // select or a two-way select if default is possible.
6481 // Example:
6482 // switch (a) { %0 = icmp eq i32 %a, 10
6483 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6484 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6485 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6486 // }
6487
6488 const bool HasBranchWeights =
6489 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6490
6491 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6492 ResultVector[1].second.size() == 1) {
6493 ConstantInt *FirstCase = ResultVector[0].second[0];
6494 ConstantInt *SecondCase = ResultVector[1].second[0];
6495 Value *SelectValue = ResultVector[1].first;
6496 if (DefaultResult) {
6497 Value *ValueCompare =
6498 Builder.CreateICmpEQ(LHS: Condition, RHS: SecondCase, Name: "switch.selectcmp");
6499 SelectValue = Builder.CreateSelect(C: ValueCompare, True: ResultVector[1].first,
6500 False: DefaultResult, Name: "switch.select");
6501 if (auto *SI = dyn_cast<SelectInst>(Val: SelectValue);
6502 SI && HasBranchWeights) {
6503 // We start with 3 probabilities, where the numerator is the
6504 // corresponding BranchWeights[i], and the denominator is the sum over
6505 // BranchWeights. We want the probability and negative probability of
6506 // Condition == SecondCase.
6507 assert(BranchWeights.size() == 3);
6508 setBranchWeights(
6509 I&: *SI, Weights: {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6510 /*IsExpected=*/false, /*ElideAllZero=*/true);
6511 }
6512 }
6513 Value *ValueCompare =
6514 Builder.CreateICmpEQ(LHS: Condition, RHS: FirstCase, Name: "switch.selectcmp");
6515 Value *Ret = Builder.CreateSelect(C: ValueCompare, True: ResultVector[0].first,
6516 False: SelectValue, Name: "switch.select");
6517 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6518 // We may have had a DefaultResult. Base the position of the first and
6519 // second's branch weights accordingly. Also the proability that Condition
6520 // != FirstCase needs to take that into account.
6521 assert(BranchWeights.size() >= 2);
6522 size_t FirstCasePos = (Condition != nullptr);
6523 size_t SecondCasePos = FirstCasePos + 1;
6524 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6525 setBranchWeights(I&: *SI,
6526 Weights: {BranchWeights[FirstCasePos],
6527 DefaultCase + BranchWeights[SecondCasePos]},
6528 /*IsExpected=*/false, /*ElideAllZero=*/true);
6529 }
6530 return Ret;
6531 }
6532
6533 // Handle the degenerate case where two cases have the same result value.
6534 if (ResultVector.size() == 1 && DefaultResult) {
6535 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6536 unsigned CaseCount = CaseValues.size();
6537 // n bits group cases map to the same result:
6538 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6539 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6540 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6541 if (isPowerOf2_32(Value: CaseCount)) {
6542 ConstantInt *MinCaseVal = CaseValues[0];
6543 // If there are bits that are set exclusively by CaseValues, we
6544 // can transform the switch into a select if the conjunction of
6545 // all the values uniquely identify CaseValues.
6546 APInt AndMask = APInt::getAllOnes(numBits: MinCaseVal->getBitWidth());
6547
6548 // Find the minimum value and compute the and of all the case values.
6549 for (auto *Case : CaseValues) {
6550 if (Case->getValue().slt(RHS: MinCaseVal->getValue()))
6551 MinCaseVal = Case;
6552 AndMask &= Case->getValue();
6553 }
6554 KnownBits Known = computeKnownBits(V: Condition, DL);
6555
6556 if (!AndMask.isZero() && Known.getMaxValue().uge(RHS: AndMask)) {
6557 // Compute the number of bits that are free to vary.
6558 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6559
6560 // Check if the number of values covered by the mask is equal
6561 // to the number of cases.
6562 if (FreeBits == Log2_32(Value: CaseCount)) {
6563 Value *And = Builder.CreateAnd(LHS: Condition, RHS: AndMask);
6564 Value *Cmp = Builder.CreateICmpEQ(
6565 LHS: And, RHS: Constant::getIntegerValue(Ty: And->getType(), V: AndMask));
6566 Value *Ret =
6567 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6568 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6569 // We know there's a Default case. We base the resulting branch
6570 // weights off its probability.
6571 assert(BranchWeights.size() >= 2);
6572 setBranchWeights(
6573 I&: *SI,
6574 Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6575 /*IsExpected=*/false, /*ElideAllZero=*/true);
6576 }
6577 return Ret;
6578 }
6579 }
6580
6581 // Mark the bits case number touched.
6582 APInt BitMask = APInt::getZero(numBits: MinCaseVal->getBitWidth());
6583 for (auto *Case : CaseValues)
6584 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6585
6586 // Check if cases with the same result can cover all number
6587 // in touched bits.
6588 if (BitMask.popcount() == Log2_32(Value: CaseCount)) {
6589 if (!MinCaseVal->isNullValue())
6590 Condition = Builder.CreateSub(LHS: Condition, RHS: MinCaseVal);
6591 Value *And = Builder.CreateAnd(LHS: Condition, RHS: ~BitMask, Name: "switch.and");
6592 Value *Cmp = Builder.CreateICmpEQ(
6593 LHS: And, RHS: Constant::getNullValue(Ty: And->getType()), Name: "switch.selectcmp");
6594 Value *Ret =
6595 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6596 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6597 assert(BranchWeights.size() >= 2);
6598 setBranchWeights(
6599 I&: *SI,
6600 Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6601 /*IsExpected=*/false, /*ElideAllZero=*/true);
6602 }
6603 return Ret;
6604 }
6605 }
6606
6607 // Handle the degenerate case where two cases have the same value.
6608 if (CaseValues.size() == 2) {
6609 Value *Cmp1 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[0],
6610 Name: "switch.selectcmp.case1");
6611 Value *Cmp2 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[1],
6612 Name: "switch.selectcmp.case2");
6613 Value *Cmp = Builder.CreateOr(LHS: Cmp1, RHS: Cmp2, Name: "switch.selectcmp");
6614 Value *Ret =
6615 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6616 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6617 assert(BranchWeights.size() >= 2);
6618 setBranchWeights(
6619 I&: *SI, Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6620 /*IsExpected=*/false, /*ElideAllZero=*/true);
6621 }
6622 return Ret;
6623 }
6624 }
6625
6626 return nullptr;
6627}
6628
6629// Helper function to cleanup a switch instruction that has been converted into
6630// a select, fixing up PHI nodes and basic blocks.
6631static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
6632 Value *SelectValue,
6633 IRBuilder<> &Builder,
6634 DomTreeUpdater *DTU) {
6635 std::vector<DominatorTree::UpdateType> Updates;
6636
6637 BasicBlock *SelectBB = SI->getParent();
6638 BasicBlock *DestBB = PHI->getParent();
6639
6640 if (DTU && !is_contained(Range: predecessors(BB: DestBB), Element: SelectBB))
6641 Updates.push_back(x: {DominatorTree::Insert, SelectBB, DestBB});
6642 Builder.CreateBr(Dest: DestBB);
6643
6644 // Remove the switch.
6645
6646 PHI->removeIncomingValueIf(
6647 Predicate: [&](unsigned Idx) { return PHI->getIncomingBlock(i: Idx) == SelectBB; });
6648 PHI->addIncoming(V: SelectValue, BB: SelectBB);
6649
6650 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6651 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6652 BasicBlock *Succ = SI->getSuccessor(idx: i);
6653
6654 if (Succ == DestBB)
6655 continue;
6656 Succ->removePredecessor(Pred: SelectBB);
6657 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6658 Updates.push_back(x: {DominatorTree::Delete, SelectBB, Succ});
6659 }
6660 SI->eraseFromParent();
6661 if (DTU)
6662 DTU->applyUpdates(Updates);
6663}
6664
6665/// If a switch is only used to initialize one or more phi nodes in a common
6666/// successor block with only two different constant values, try to replace the
6667/// switch with a select. Returns true if the fold was made.
6668static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6669 DomTreeUpdater *DTU, const DataLayout &DL,
6670 const TargetTransformInfo &TTI) {
6671 Value *const Cond = SI->getCondition();
6672 PHINode *PHI = nullptr;
6673 BasicBlock *CommonDest = nullptr;
6674 Constant *DefaultResult;
6675 SwitchCaseResultVectorTy UniqueResults;
6676 // Collect all the cases that will deliver the same value from the switch.
6677 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6678 DL, TTI, /*MaxUniqueResults*/ 2))
6679 return false;
6680
6681 assert(PHI != nullptr && "PHI for value select not found");
6682 Builder.SetInsertPoint(SI);
6683 SmallVector<uint32_t, 4> BranchWeights;
6684 if (!ProfcheckDisableMetadataFixes) {
6685 [[maybe_unused]] auto HasWeights =
6686 extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights&: BranchWeights);
6687 assert(!HasWeights == (BranchWeights.empty()));
6688 }
6689 assert(BranchWeights.empty() ||
6690 (BranchWeights.size() >=
6691 UniqueResults.size() + (DefaultResult != nullptr)));
6692
6693 Value *SelectValue = foldSwitchToSelect(ResultVector: UniqueResults, DefaultResult, Condition: Cond,
6694 Builder, DL, BranchWeights);
6695 if (!SelectValue)
6696 return false;
6697
6698 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6699 return true;
6700}
6701
6702namespace {
6703
6704/// This class finds alternatives for switches to ultimately
6705/// replace the switch.
6706class SwitchReplacement {
6707public:
6708 /// Create a helper for optimizations to use as a switch replacement.
6709 /// Find a better representation for the content of Values,
6710 /// using DefaultValue to fill any holes in the table.
6711 SwitchReplacement(
6712 Module &M, uint64_t TableSize, ConstantInt *Offset,
6713 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6714 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6715
6716 /// Build instructions with Builder to retrieve values using Index
6717 /// and replace the switch.
6718 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6719 Function *Func);
6720
6721 /// Return true if a table with TableSize elements of
6722 /// type ElementType would fit in a target-legal register.
6723 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6724 Type *ElementType);
6725
6726 /// Return the default value of the switch.
6727 Constant *getDefaultValue();
6728
6729 /// Return true if the replacement is a lookup table.
6730 bool isLookupTable();
6731
6732 /// Return true if the replacement is a bit map.
6733 bool isBitMap();
6734
6735private:
6736 // Depending on the switch, there are different alternatives.
6737 enum {
6738 // For switches where each case contains the same value, we just have to
6739 // store that single value and return it for each lookup.
6740 SingleValueKind,
6741
6742 // For switches where there is a linear relationship between table index
6743 // and values. We calculate the result with a simple multiplication
6744 // and addition instead of a table lookup.
6745 LinearMapKind,
6746
6747 // For small tables with integer elements, we can pack them into a bitmap
6748 // that fits into a target-legal register. Values are retrieved by
6749 // shift and mask operations.
6750 BitMapKind,
6751
6752 // The table is stored as an array of values. Values are retrieved by load
6753 // instructions from the table.
6754 LookupTableKind
6755 } Kind;
6756
6757 // The default value of the switch.
6758 Constant *DefaultValue;
6759
6760 // The type of the output values.
6761 Type *ValueType;
6762
6763 // For SingleValueKind, this is the single value.
6764 Constant *SingleValue = nullptr;
6765
6766 // For BitMapKind, this is the bitmap.
6767 ConstantInt *BitMap = nullptr;
6768 IntegerType *BitMapElementTy = nullptr;
6769
6770 // For LinearMapKind, these are the constants used to derive the value.
6771 ConstantInt *LinearOffset = nullptr;
6772 ConstantInt *LinearMultiplier = nullptr;
6773 bool LinearMapValWrapped = false;
6774
6775 // For LookupTableKind, this is the table.
6776 Constant *Initializer = nullptr;
6777};
6778
6779} // end anonymous namespace
6780
6781SwitchReplacement::SwitchReplacement(
6782 Module &M, uint64_t TableSize, ConstantInt *Offset,
6783 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6784 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6785 : DefaultValue(DefaultValue) {
6786 assert(Values.size() && "Can't build lookup table without values!");
6787 assert(TableSize >= Values.size() && "Can't fit values in table!");
6788
6789 // If all values in the table are equal, this is that value.
6790 SingleValue = Values.begin()->second;
6791
6792 ValueType = Values.begin()->second->getType();
6793
6794 // Build up the table contents.
6795 SmallVector<Constant *, 64> TableContents(TableSize);
6796 for (const auto &[CaseVal, CaseRes] : Values) {
6797 assert(CaseRes->getType() == ValueType);
6798
6799 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6800 TableContents[Idx] = CaseRes;
6801
6802 if (SingleValue && !isa<PoisonValue>(Val: CaseRes) && CaseRes != SingleValue)
6803 SingleValue = isa<PoisonValue>(Val: SingleValue) ? CaseRes : nullptr;
6804 }
6805
6806 // Fill in any holes in the table with the default result.
6807 if (Values.size() < TableSize) {
6808 assert(DefaultValue &&
6809 "Need a default value to fill the lookup table holes.");
6810 assert(DefaultValue->getType() == ValueType);
6811 for (uint64_t I = 0; I < TableSize; ++I) {
6812 if (!TableContents[I])
6813 TableContents[I] = DefaultValue;
6814 }
6815
6816 // If the default value is poison, all the holes are poison.
6817 bool DefaultValueIsPoison = isa<PoisonValue>(Val: DefaultValue);
6818
6819 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6820 SingleValue = nullptr;
6821 }
6822
6823 // If each element in the table contains the same value, we only need to store
6824 // that single value.
6825 if (SingleValue) {
6826 Kind = SingleValueKind;
6827 return;
6828 }
6829
6830 // Check if we can derive the value with a linear transformation from the
6831 // table index.
6832 if (isa<IntegerType>(Val: ValueType)) {
6833 bool LinearMappingPossible = true;
6834 APInt PrevVal;
6835 APInt DistToPrev;
6836 // When linear map is monotonic and signed overflow doesn't happen on
6837 // maximum index, we can attach nsw on Add and Mul.
6838 bool NonMonotonic = false;
6839 assert(TableSize >= 2 && "Should be a SingleValue table.");
6840 // Check if there is the same distance between two consecutive values.
6841 for (uint64_t I = 0; I < TableSize; ++I) {
6842 ConstantInt *ConstVal = dyn_cast<ConstantInt>(Val: TableContents[I]);
6843
6844 if (!ConstVal && isa<PoisonValue>(Val: TableContents[I])) {
6845 // This is an poison, so it's (probably) a lookup table hole.
6846 // To prevent any regressions from before we switched to using poison as
6847 // the default value, holes will fall back to using the first value.
6848 // This can be removed once we add proper handling for poisons in lookup
6849 // tables.
6850 ConstVal = dyn_cast<ConstantInt>(Val: Values[0].second);
6851 }
6852
6853 if (!ConstVal) {
6854 // This is an undef. We could deal with it, but undefs in lookup tables
6855 // are very seldom. It's probably not worth the additional complexity.
6856 LinearMappingPossible = false;
6857 break;
6858 }
6859 const APInt &Val = ConstVal->getValue();
6860 if (I != 0) {
6861 APInt Dist = Val - PrevVal;
6862 if (I == 1) {
6863 DistToPrev = Dist;
6864 } else if (Dist != DistToPrev) {
6865 LinearMappingPossible = false;
6866 break;
6867 }
6868 NonMonotonic |=
6869 Dist.isStrictlyPositive() ? Val.sle(RHS: PrevVal) : Val.sgt(RHS: PrevVal);
6870 }
6871 PrevVal = Val;
6872 }
6873 if (LinearMappingPossible) {
6874 LinearOffset = cast<ConstantInt>(Val: TableContents[0]);
6875 LinearMultiplier = ConstantInt::get(Context&: M.getContext(), V: DistToPrev);
6876 APInt M = LinearMultiplier->getValue();
6877 bool MayWrap = true;
6878 if (isIntN(N: M.getBitWidth(), x: TableSize - 1))
6879 (void)M.smul_ov(RHS: APInt(M.getBitWidth(), TableSize - 1), Overflow&: MayWrap);
6880 LinearMapValWrapped = NonMonotonic || MayWrap;
6881 Kind = LinearMapKind;
6882 return;
6883 }
6884 }
6885
6886 // If the type is integer and the table fits in a register, build a bitmap.
6887 if (wouldFitInRegister(DL, TableSize, ElementType: ValueType)) {
6888 IntegerType *IT = cast<IntegerType>(Val: ValueType);
6889 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6890 for (uint64_t I = TableSize; I > 0; --I) {
6891 TableInt <<= IT->getBitWidth();
6892 // Insert values into the bitmap. Undef values are set to zero.
6893 if (!isa<UndefValue>(Val: TableContents[I - 1])) {
6894 ConstantInt *Val = cast<ConstantInt>(Val: TableContents[I - 1]);
6895 TableInt |= Val->getValue().zext(width: TableInt.getBitWidth());
6896 }
6897 }
6898 BitMap = ConstantInt::get(Context&: M.getContext(), V: TableInt);
6899 BitMapElementTy = IT;
6900 Kind = BitMapKind;
6901 return;
6902 }
6903
6904 // Store the table in an array.
6905 auto *TableTy = ArrayType::get(ElementType: ValueType, NumElements: TableSize);
6906 Initializer = ConstantArray::get(T: TableTy, V: TableContents);
6907
6908 Kind = LookupTableKind;
6909}
6910
6911Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6912 const DataLayout &DL, Function *Func) {
6913 switch (Kind) {
6914 case SingleValueKind:
6915 return SingleValue;
6916 case LinearMapKind: {
6917 ++NumLinearMaps;
6918 // Derive the result value from the input value.
6919 Value *Result = Builder.CreateIntCast(V: Index, DestTy: LinearMultiplier->getType(),
6920 isSigned: false, Name: "switch.idx.cast");
6921 if (!LinearMultiplier->isOne())
6922 Result = Builder.CreateMul(LHS: Result, RHS: LinearMultiplier, Name: "switch.idx.mult",
6923 /*HasNUW = */ false,
6924 /*HasNSW = */ !LinearMapValWrapped);
6925
6926 if (!LinearOffset->isZero())
6927 Result = Builder.CreateAdd(LHS: Result, RHS: LinearOffset, Name: "switch.offset",
6928 /*HasNUW = */ false,
6929 /*HasNSW = */ !LinearMapValWrapped);
6930 return Result;
6931 }
6932 case BitMapKind: {
6933 ++NumBitMaps;
6934 // Type of the bitmap (e.g. i59).
6935 IntegerType *MapTy = BitMap->getIntegerType();
6936
6937 // Cast Index to the same type as the bitmap.
6938 // Note: The Index is <= the number of elements in the table, so
6939 // truncating it to the width of the bitmask is safe.
6940 Value *ShiftAmt = Builder.CreateZExtOrTrunc(V: Index, DestTy: MapTy, Name: "switch.cast");
6941
6942 // Multiply the shift amount by the element width. NUW/NSW can always be
6943 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6944 // BitMap's bit width.
6945 ShiftAmt = Builder.CreateMul(
6946 LHS: ShiftAmt, RHS: ConstantInt::get(Ty: MapTy, V: BitMapElementTy->getBitWidth()),
6947 Name: "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6948
6949 // Shift down.
6950 Value *DownShifted =
6951 Builder.CreateLShr(LHS: BitMap, RHS: ShiftAmt, Name: "switch.downshift");
6952 // Mask off.
6953 return Builder.CreateTrunc(V: DownShifted, DestTy: BitMapElementTy, Name: "switch.masked");
6954 }
6955 case LookupTableKind: {
6956 ++NumLookupTables;
6957 auto *Table =
6958 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6959 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6960 Initializer, "switch.table." + Func->getName());
6961 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6962 // Set the alignment to that of an array items. We will be only loading one
6963 // value out of it.
6964 Table->setAlignment(DL.getPrefTypeAlign(Ty: ValueType));
6965 Type *IndexTy = DL.getIndexType(PtrTy: Table->getType());
6966 auto *ArrayTy = cast<ArrayType>(Val: Table->getValueType());
6967
6968 if (Index->getType() != IndexTy) {
6969 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6970 Index = Builder.CreateZExtOrTrunc(V: Index, DestTy: IndexTy);
6971 if (auto *Zext = dyn_cast<ZExtInst>(Val: Index))
6972 Zext->setNonNeg(
6973 isUIntN(N: OldBitWidth - 1, x: ArrayTy->getNumElements() - 1));
6974 }
6975
6976 Value *GEPIndices[] = {ConstantInt::get(Ty: IndexTy, V: 0), Index};
6977 Value *GEP =
6978 Builder.CreateInBoundsGEP(Ty: ArrayTy, Ptr: Table, IdxList: GEPIndices, Name: "switch.gep");
6979 return Builder.CreateLoad(Ty: ArrayTy->getElementType(), Ptr: GEP, Name: "switch.load");
6980 }
6981 }
6982 llvm_unreachable("Unknown helper kind!");
6983}
6984
6985bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6986 uint64_t TableSize,
6987 Type *ElementType) {
6988 auto *IT = dyn_cast<IntegerType>(Val: ElementType);
6989 if (!IT)
6990 return false;
6991 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6992 // are <= 15, we could try to narrow the type.
6993
6994 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6995 if (TableSize >= UINT_MAX / IT->getBitWidth())
6996 return false;
6997 return DL.fitsInLegalInteger(Width: TableSize * IT->getBitWidth());
6998}
6999
7000static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
7001 const DataLayout &DL) {
7002 // Allow any legal type.
7003 if (TTI.isTypeLegal(Ty))
7004 return true;
7005
7006 auto *IT = dyn_cast<IntegerType>(Val: Ty);
7007 if (!IT)
7008 return false;
7009
7010 // Also allow power of 2 integer types that have at least 8 bits and fit in
7011 // a register. These types are common in frontend languages and targets
7012 // usually support loads of these types.
7013 // TODO: We could relax this to any integer that fits in a register and rely
7014 // on ABI alignment and padding in the table to allow the load to be widened.
7015 // Or we could widen the constants and truncate the load.
7016 unsigned BitWidth = IT->getBitWidth();
7017 return BitWidth >= 8 && isPowerOf2_32(Value: BitWidth) &&
7018 DL.fitsInLegalInteger(Width: IT->getBitWidth());
7019}
7020
7021Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7022
7023bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7024
7025bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7026
7027static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7028 // 40% is the default density for building a jump table in optsize/minsize
7029 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7030 // function was based on.
7031 const uint64_t MinDensity = 40;
7032
7033 if (CaseRange >= UINT64_MAX / 100)
7034 return false; // Avoid multiplication overflows below.
7035
7036 return NumCases * 100 >= CaseRange * MinDensity;
7037}
7038
7039static bool isSwitchDense(ArrayRef<int64_t> Values) {
7040 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7041 uint64_t Range = Diff + 1;
7042 if (Range < Diff)
7043 return false; // Overflow.
7044
7045 return isSwitchDense(NumCases: Values.size(), CaseRange: Range);
7046}
7047
7048/// Determine whether a lookup table should be built for this switch, based on
7049/// the number of cases, size of the table, and the types of the results.
7050// TODO: We could support larger than legal types by limiting based on the
7051// number of loads required and/or table size. If the constants are small we
7052// could use smaller table entries and extend after the load.
7053static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
7054 const TargetTransformInfo &TTI,
7055 const DataLayout &DL,
7056 const SmallVector<Type *> &ResultTypes) {
7057 if (SI->getNumCases() > TableSize)
7058 return false; // TableSize overflowed.
7059
7060 bool AllTablesFitInRegister = true;
7061 bool HasIllegalType = false;
7062 for (const auto &Ty : ResultTypes) {
7063 // Saturate this flag to true.
7064 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7065
7066 // Saturate this flag to false.
7067 AllTablesFitInRegister =
7068 AllTablesFitInRegister &&
7069 SwitchReplacement::wouldFitInRegister(DL, TableSize, ElementType: Ty);
7070
7071 // If both flags saturate, we're done. NOTE: This *only* works with
7072 // saturating flags, and all flags have to saturate first due to the
7073 // non-deterministic behavior of iterating over a dense map.
7074 if (HasIllegalType && !AllTablesFitInRegister)
7075 break;
7076 }
7077
7078 // If each table would fit in a register, we should build it anyway.
7079 if (AllTablesFitInRegister)
7080 return true;
7081
7082 // Don't build a table that doesn't fit in-register if it has illegal types.
7083 if (HasIllegalType)
7084 return false;
7085
7086 return isSwitchDense(NumCases: SI->getNumCases(), CaseRange: TableSize);
7087}
7088
7089static bool shouldUseSwitchConditionAsTableIndex(
7090 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7091 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7092 const DataLayout &DL, const TargetTransformInfo &TTI) {
7093 if (MinCaseVal.isNullValue())
7094 return true;
7095 if (MinCaseVal.isNegative() ||
7096 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7097 !HasDefaultResults)
7098 return false;
7099 return all_of(Range: ResultTypes, P: [&](const auto &ResultType) {
7100 return SwitchReplacement::wouldFitInRegister(
7101 DL, TableSize: MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ElementType: ResultType);
7102 });
7103}
7104
7105/// Try to reuse the switch table index compare. Following pattern:
7106/// \code
7107/// if (idx < tablesize)
7108/// r = table[idx]; // table does not contain default_value
7109/// else
7110/// r = default_value;
7111/// if (r != default_value)
7112/// ...
7113/// \endcode
7114/// Is optimized to:
7115/// \code
7116/// cond = idx < tablesize;
7117/// if (cond)
7118/// r = table[idx];
7119/// else
7120/// r = default_value;
7121/// if (cond)
7122/// ...
7123/// \endcode
7124/// Jump threading will then eliminate the second if(cond).
7125static void reuseTableCompare(
7126 User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch,
7127 Constant *DefaultValue,
7128 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7129 ICmpInst *CmpInst = dyn_cast<ICmpInst>(Val: PhiUser);
7130 if (!CmpInst)
7131 return;
7132
7133 // We require that the compare is in the same block as the phi so that jump
7134 // threading can do its work afterwards.
7135 if (CmpInst->getParent() != PhiBlock)
7136 return;
7137
7138 Constant *CmpOp1 = dyn_cast<Constant>(Val: CmpInst->getOperand(i_nocapture: 1));
7139 if (!CmpOp1)
7140 return;
7141
7142 Value *RangeCmp = RangeCheckBranch->getCondition();
7143 Constant *TrueConst = ConstantInt::getTrue(Ty: RangeCmp->getType());
7144 Constant *FalseConst = ConstantInt::getFalse(Ty: RangeCmp->getType());
7145
7146 // Check if the compare with the default value is constant true or false.
7147 const DataLayout &DL = PhiBlock->getDataLayout();
7148 Constant *DefaultConst = ConstantFoldCompareInstOperands(
7149 Predicate: CmpInst->getPredicate(), LHS: DefaultValue, RHS: CmpOp1, DL);
7150 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7151 return;
7152
7153 // Check if the compare with the case values is distinct from the default
7154 // compare result.
7155 for (auto ValuePair : Values) {
7156 Constant *CaseConst = ConstantFoldCompareInstOperands(
7157 Predicate: CmpInst->getPredicate(), LHS: ValuePair.second, RHS: CmpOp1, DL);
7158 if (!CaseConst || CaseConst == DefaultConst ||
7159 (CaseConst != TrueConst && CaseConst != FalseConst))
7160 return;
7161 }
7162
7163 // Check if the branch instruction dominates the phi node. It's a simple
7164 // dominance check, but sufficient for our needs.
7165 // Although this check is invariant in the calling loops, it's better to do it
7166 // at this late stage. Practically we do it at most once for a switch.
7167 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7168 for (BasicBlock *Pred : predecessors(BB: PhiBlock)) {
7169 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7170 return;
7171 }
7172
7173 if (DefaultConst == FalseConst) {
7174 // The compare yields the same result. We can replace it.
7175 CmpInst->replaceAllUsesWith(V: RangeCmp);
7176 ++NumTableCmpReuses;
7177 } else {
7178 // The compare yields the same result, just inverted. We can replace it.
7179 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7180 V1: RangeCmp, V2: ConstantInt::get(Ty: RangeCmp->getType(), V: 1), Name: "inverted.cmp",
7181 InsertBefore: RangeCheckBranch->getIterator());
7182 CmpInst->replaceAllUsesWith(V: InvertedTableCmp);
7183 ++NumTableCmpReuses;
7184 }
7185}
7186
7187/// If the switch is only used to initialize one or more phi nodes in a common
7188/// successor block with different constant values, replace the switch with
7189/// lookup tables.
7190static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
7191 DomTreeUpdater *DTU, const DataLayout &DL,
7192 const TargetTransformInfo &TTI,
7193 bool ConvertSwitchToLookupTable) {
7194 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7195
7196 BasicBlock *BB = SI->getParent();
7197 Function *Fn = BB->getParent();
7198
7199 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7200 // split off a dense part and build a lookup table for that.
7201
7202 // FIXME: This creates arrays of GEPs to constant strings, which means each
7203 // GEP needs a runtime relocation in PIC code. We should just build one big
7204 // string and lookup indices into that.
7205
7206 // Ignore switches with less than three cases. Lookup tables will not make
7207 // them faster, so we don't analyze them.
7208 if (SI->getNumCases() < 3)
7209 return false;
7210
7211 // Figure out the corresponding result for each case value and phi node in the
7212 // common destination, as well as the min and max case values.
7213 assert(!SI->cases().empty());
7214 SwitchInst::CaseIt CI = SI->case_begin();
7215 ConstantInt *MinCaseVal = CI->getCaseValue();
7216 ConstantInt *MaxCaseVal = CI->getCaseValue();
7217
7218 BasicBlock *CommonDest = nullptr;
7219
7220 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7221 SmallDenseMap<PHINode *, ResultListTy> ResultLists;
7222
7223 SmallDenseMap<PHINode *, Constant *> DefaultResults;
7224 SmallVector<Type *> ResultTypes;
7225 SmallVector<PHINode *, 4> PHIs;
7226
7227 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7228 ConstantInt *CaseVal = CI->getCaseValue();
7229 if (CaseVal->getValue().slt(RHS: MinCaseVal->getValue()))
7230 MinCaseVal = CaseVal;
7231 if (CaseVal->getValue().sgt(RHS: MaxCaseVal->getValue()))
7232 MaxCaseVal = CaseVal;
7233
7234 // Resulting value at phi nodes for this case value.
7235 using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
7236 ResultsTy Results;
7237 if (!getCaseResults(SI, CaseVal, CaseDest: CI->getCaseSuccessor(), CommonDest: &CommonDest,
7238 Res&: Results, DL, TTI))
7239 return false;
7240
7241 // Append the result and result types from this case to the list for each
7242 // phi.
7243 for (const auto &I : Results) {
7244 PHINode *PHI = I.first;
7245 Constant *Value = I.second;
7246 auto [It, Inserted] = ResultLists.try_emplace(Key: PHI);
7247 if (Inserted)
7248 PHIs.push_back(Elt: PHI);
7249 It->second.push_back(Elt: std::make_pair(x&: CaseVal, y&: Value));
7250 ResultTypes.push_back(Elt: PHI->getType());
7251 }
7252 }
7253
7254 // If the table has holes, we need a constant result for the default case
7255 // or a bitmask that fits in a register.
7256 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7257 bool HasDefaultResults =
7258 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest,
7259 Res&: DefaultResultsList, DL, TTI);
7260 for (const auto &I : DefaultResultsList) {
7261 PHINode *PHI = I.first;
7262 Constant *Result = I.second;
7263 DefaultResults[PHI] = Result;
7264 }
7265
7266 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7267 MinCaseVal&: *MinCaseVal, MaxCaseVal: *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7268 uint64_t TableSize;
7269 ConstantInt *TableIndexOffset;
7270 if (UseSwitchConditionAsTableIndex) {
7271 TableSize = MaxCaseVal->getLimitedValue() + 1;
7272 TableIndexOffset = ConstantInt::get(Ty: MaxCaseVal->getIntegerType(), V: 0);
7273 } else {
7274 TableSize =
7275 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7276
7277 TableIndexOffset = MinCaseVal;
7278 }
7279
7280 // If the default destination is unreachable, or if the lookup table covers
7281 // all values of the conditional variable, branch directly to the lookup table
7282 // BB. Otherwise, check that the condition is within the case range.
7283 uint64_t NumResults = ResultLists[PHIs[0]].size();
7284 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7285
7286 bool TableHasHoles = (NumResults < TableSize);
7287
7288 // If the table has holes but the default destination doesn't produce any
7289 // constant results, the lookup table entries corresponding to the holes will
7290 // contain poison.
7291 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7292
7293 // If the default destination doesn't produce a constant result but is still
7294 // reachable, and the lookup table has holes, we need to use a mask to
7295 // determine if the current index should load from the lookup table or jump
7296 // to the default case.
7297 // The mask is unnecessary if the table has holes but the default destination
7298 // is unreachable, as in that case the holes must also be unreachable.
7299 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7300 if (NeedMask) {
7301 // As an extra penalty for the validity test we require more cases.
7302 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7303 return false;
7304 if (!DL.fitsInLegalInteger(Width: TableSize))
7305 return false;
7306 }
7307
7308 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7309 return false;
7310
7311 // Compute the table index value.
7312 Value *TableIndex;
7313 if (UseSwitchConditionAsTableIndex) {
7314 TableIndex = SI->getCondition();
7315 if (HasDefaultResults) {
7316 // Grow the table to cover all possible index values to avoid the range
7317 // check. It will use the default result to fill in the table hole later,
7318 // so make sure it exist.
7319 ConstantRange CR =
7320 computeConstantRange(V: TableIndex, /* ForSigned */ false);
7321 // Grow the table shouldn't have any size impact by checking
7322 // wouldFitInRegister.
7323 // TODO: Consider growing the table also when it doesn't fit in a register
7324 // if no optsize is specified.
7325 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7326 if (!CR.isUpperWrapped() &&
7327 all_of(Range&: ResultTypes, P: [&](const auto &ResultType) {
7328 return SwitchReplacement::wouldFitInRegister(DL, TableSize: UpperBound,
7329 ElementType: ResultType);
7330 })) {
7331 // There may be some case index larger than the UpperBound (unreachable
7332 // case), so make sure the table size does not get smaller.
7333 TableSize = std::max(a: UpperBound, b: TableSize);
7334 // The default branch is unreachable after we enlarge the lookup table.
7335 // Adjust DefaultIsReachable to reuse code path.
7336 DefaultIsReachable = false;
7337 }
7338 }
7339 }
7340
7341 // Keep track of the switch replacement for each phi
7342 SmallDenseMap<PHINode *, SwitchReplacement> PhiToReplacementMap;
7343 for (PHINode *PHI : PHIs) {
7344 const auto &ResultList = ResultLists[PHI];
7345
7346 Type *ResultType = ResultList.begin()->second->getType();
7347 // Use any value to fill the lookup table holes.
7348 Constant *DefaultVal =
7349 AllHolesArePoison ? PoisonValue::get(T: ResultType) : DefaultResults[PHI];
7350 StringRef FuncName = Fn->getName();
7351 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7352 ResultList, DefaultVal, DL, FuncName);
7353 PhiToReplacementMap.insert(KV: {PHI, Replacement});
7354 }
7355
7356 bool AnyLookupTables = any_of(
7357 Range&: PhiToReplacementMap, P: [](auto &KV) { return KV.second.isLookupTable(); });
7358 bool AnyBitMaps = any_of(Range&: PhiToReplacementMap,
7359 P: [](auto &KV) { return KV.second.isBitMap(); });
7360
7361 // A few conditions prevent the generation of lookup tables:
7362 // 1. The target does not support lookup tables.
7363 // 2. The "no-jump-tables" function attribute is set.
7364 // However, these objections do not apply to other switch replacements, like
7365 // the bitmap, so we only stop here if any of these conditions are met and we
7366 // want to create a LUT. Otherwise, continue with the switch replacement.
7367 if (AnyLookupTables &&
7368 (!TTI.shouldBuildLookupTables() ||
7369 Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()))
7370 return false;
7371
7372 // In the early optimization pipeline, disable formation of lookup tables,
7373 // bit maps and mask checks, as they may inhibit further optimization.
7374 if (!ConvertSwitchToLookupTable &&
7375 (AnyLookupTables || AnyBitMaps || NeedMask))
7376 return false;
7377
7378 Builder.SetInsertPoint(SI);
7379 // TableIndex is the switch condition - TableIndexOffset if we don't
7380 // use the condition directly
7381 if (!UseSwitchConditionAsTableIndex) {
7382 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7383 // we can try to attach nsw.
7384 bool MayWrap = true;
7385 if (!DefaultIsReachable) {
7386 APInt Res =
7387 MaxCaseVal->getValue().ssub_ov(RHS: MinCaseVal->getValue(), Overflow&: MayWrap);
7388 (void)Res;
7389 }
7390 TableIndex = Builder.CreateSub(LHS: SI->getCondition(), RHS: TableIndexOffset,
7391 Name: "switch.tableidx", /*HasNUW =*/false,
7392 /*HasNSW =*/!MayWrap);
7393 }
7394
7395 std::vector<DominatorTree::UpdateType> Updates;
7396
7397 // Compute the maximum table size representable by the integer type we are
7398 // switching upon.
7399 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7400 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7401 assert(MaxTableSize >= TableSize &&
7402 "It is impossible for a switch to have more entries than the max "
7403 "representable value of its input integer type's size.");
7404
7405 // Create the BB that does the lookups.
7406 Module &Mod = *CommonDest->getParent()->getParent();
7407 BasicBlock *LookupBB = BasicBlock::Create(
7408 Context&: Mod.getContext(), Name: "switch.lookup", Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7409
7410 CondBrInst *RangeCheckBranch = nullptr;
7411 CondBrInst *CondBranch = nullptr;
7412
7413 Builder.SetInsertPoint(SI);
7414 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7415 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7416 Builder.CreateBr(Dest: LookupBB);
7417 if (DTU)
7418 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7419 // Note: We call removeProdecessor later since we need to be able to get the
7420 // PHI value for the default case in case we're using a bit mask.
7421 } else {
7422 Value *Cmp = Builder.CreateICmpULT(
7423 LHS: TableIndex, RHS: ConstantInt::get(Ty: MinCaseVal->getType(), V: TableSize));
7424 RangeCheckBranch =
7425 Builder.CreateCondBr(Cond: Cmp, True: LookupBB, False: SI->getDefaultDest());
7426 CondBranch = RangeCheckBranch;
7427 if (DTU)
7428 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7429 }
7430
7431 // Populate the BB that does the lookups.
7432 Builder.SetInsertPoint(LookupBB);
7433
7434 if (NeedMask) {
7435 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7436 // re-purposed to do the hole check, and we create a new LookupBB.
7437 BasicBlock *MaskBB = LookupBB;
7438 MaskBB->setName("switch.hole_check");
7439 LookupBB = BasicBlock::Create(Context&: Mod.getContext(), Name: "switch.lookup",
7440 Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7441
7442 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7443 // unnecessary illegal types.
7444 uint64_t TableSizePowOf2 = NextPowerOf2(A: std::max(a: 7ULL, b: TableSize - 1ULL));
7445 APInt MaskInt(TableSizePowOf2, 0);
7446 APInt One(TableSizePowOf2, 1);
7447 // Build bitmask; fill in a 1 bit for every case.
7448 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7449 for (const auto &Result : ResultList) {
7450 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7451 .getLimitedValue();
7452 MaskInt |= One << Idx;
7453 }
7454 ConstantInt *TableMask = ConstantInt::get(Context&: Mod.getContext(), V: MaskInt);
7455
7456 // Get the TableIndex'th bit of the bitmask.
7457 // If this bit is 0 (meaning hole) jump to the default destination,
7458 // else continue with table lookup.
7459 IntegerType *MapTy = TableMask->getIntegerType();
7460 Value *MaskIndex =
7461 Builder.CreateZExtOrTrunc(V: TableIndex, DestTy: MapTy, Name: "switch.maskindex");
7462 Value *Shifted = Builder.CreateLShr(LHS: TableMask, RHS: MaskIndex, Name: "switch.shifted");
7463 Value *LoBit = Builder.CreateTrunc(
7464 V: Shifted, DestTy: Type::getInt1Ty(C&: Mod.getContext()), Name: "switch.lobit");
7465 CondBranch = Builder.CreateCondBr(Cond: LoBit, True: LookupBB, False: SI->getDefaultDest());
7466 if (DTU) {
7467 Updates.push_back(x: {DominatorTree::Insert, MaskBB, LookupBB});
7468 Updates.push_back(x: {DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7469 }
7470 Builder.SetInsertPoint(LookupBB);
7471 addPredecessorToBlock(Succ: SI->getDefaultDest(), NewPred: MaskBB, ExistPred: BB);
7472 }
7473
7474 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7475 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7476 // do not delete PHINodes here.
7477 SI->getDefaultDest()->removePredecessor(Pred: BB,
7478 /*KeepOneInputPHIs=*/true);
7479 if (DTU)
7480 Updates.push_back(x: {DominatorTree::Delete, BB, SI->getDefaultDest()});
7481 }
7482
7483 for (PHINode *PHI : PHIs) {
7484 const ResultListTy &ResultList = ResultLists[PHI];
7485 auto Replacement = PhiToReplacementMap.at(Val: PHI);
7486 auto *Result = Replacement.replaceSwitch(Index: TableIndex, Builder, DL, Func: Fn);
7487 // Do a small peephole optimization: re-use the switch table compare if
7488 // possible.
7489 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7490 BasicBlock *PhiBlock = PHI->getParent();
7491 // Search for compare instructions which use the phi.
7492 for (auto *User : PHI->users()) {
7493 reuseTableCompare(PhiUser: User, PhiBlock, RangeCheckBranch,
7494 DefaultValue: Replacement.getDefaultValue(), Values: ResultList);
7495 }
7496 }
7497
7498 PHI->addIncoming(V: Result, BB: LookupBB);
7499 }
7500
7501 Builder.CreateBr(Dest: CommonDest);
7502 if (DTU)
7503 Updates.push_back(x: {DominatorTree::Insert, LookupBB, CommonDest});
7504
7505 SmallVector<uint32_t> BranchWeights;
7506 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7507 extractBranchWeights(I: *SI, Weights&: BranchWeights);
7508 uint64_t ToLookupWeight = 0;
7509 uint64_t ToDefaultWeight = 0;
7510
7511 // Remove the switch.
7512 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7513 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7514 BasicBlock *Succ = SI->getSuccessor(idx: I);
7515
7516 if (Succ == SI->getDefaultDest()) {
7517 if (HasBranchWeights)
7518 ToDefaultWeight += BranchWeights[I];
7519 continue;
7520 }
7521 Succ->removePredecessor(Pred: BB);
7522 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
7523 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7524 if (HasBranchWeights)
7525 ToLookupWeight += BranchWeights[I];
7526 }
7527 SI->eraseFromParent();
7528 if (HasBranchWeights)
7529 setFittedBranchWeights(I&: *CondBranch, Weights: {ToLookupWeight, ToDefaultWeight},
7530 /*IsExpected=*/false);
7531 if (DTU)
7532 DTU->applyUpdates(Updates);
7533
7534 if (NeedMask)
7535 ++NumLookupTablesHoles;
7536 return true;
7537}
7538
7539/// Try to transform a switch that has "holes" in it to a contiguous sequence
7540/// of cases.
7541///
7542/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7543/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7544///
7545/// This converts a sparse switch into a dense switch which allows better
7546/// lowering and could also allow transforming into a lookup table.
7547static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7548 const DataLayout &DL,
7549 const TargetTransformInfo &TTI) {
7550 auto *CondTy = cast<IntegerType>(Val: SI->getCondition()->getType());
7551 if (CondTy->getIntegerBitWidth() > 64 ||
7552 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7553 return false;
7554 // Only bother with this optimization if there are more than 3 switch cases;
7555 // SDAG will only bother creating jump tables for 4 or more cases.
7556 if (SI->getNumCases() < 4)
7557 return false;
7558
7559 // This transform is agnostic to the signedness of the input or case values. We
7560 // can treat the case values as signed or unsigned. We can optimize more common
7561 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7562 // as signed.
7563 SmallVector<int64_t,4> Values;
7564 for (const auto &C : SI->cases())
7565 Values.push_back(Elt: C.getCaseValue()->getValue().getSExtValue());
7566 llvm::sort(C&: Values);
7567
7568 // If the switch is already dense, there's nothing useful to do here.
7569 if (isSwitchDense(Values))
7570 return false;
7571
7572 // First, transform the values such that they start at zero and ascend.
7573 int64_t Base = Values[0];
7574 for (auto &V : Values)
7575 V -= (uint64_t)(Base);
7576
7577 // Now we have signed numbers that have been shifted so that, given enough
7578 // precision, there are no negative values. Since the rest of the transform
7579 // is bitwise only, we switch now to an unsigned representation.
7580
7581 // This transform can be done speculatively because it is so cheap - it
7582 // results in a single rotate operation being inserted.
7583
7584 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7585 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7586 // less than 64.
7587 unsigned Shift = 64;
7588 for (auto &V : Values)
7589 Shift = std::min(a: Shift, b: (unsigned)llvm::countr_zero(Val: (uint64_t)V));
7590 assert(Shift < 64);
7591 if (Shift > 0)
7592 for (auto &V : Values)
7593 V = (int64_t)((uint64_t)V >> Shift);
7594
7595 if (!isSwitchDense(Values))
7596 // Transform didn't create a dense switch.
7597 return false;
7598
7599 // The obvious transform is to shift the switch condition right and emit a
7600 // check that the condition actually cleanly divided by GCD, i.e.
7601 // C & (1 << Shift - 1) == 0
7602 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7603 //
7604 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7605 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7606 // are nonzero then the switch condition will be very large and will hit the
7607 // default case.
7608
7609 auto *Ty = cast<IntegerType>(Val: SI->getCondition()->getType());
7610 Builder.SetInsertPoint(SI);
7611 Value *Sub =
7612 Builder.CreateSub(LHS: SI->getCondition(), RHS: ConstantInt::getSigned(Ty, V: Base));
7613 Value *Rot = Builder.CreateIntrinsic(
7614 RetTy: Ty, ID: Intrinsic::fshl,
7615 Args: {Sub, Sub, ConstantInt::get(Ty, V: Ty->getBitWidth() - Shift)});
7616 SI->replaceUsesOfWith(From: SI->getCondition(), To: Rot);
7617
7618 for (auto Case : SI->cases()) {
7619 auto *Orig = Case.getCaseValue();
7620 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7621 Case.setValue(cast<ConstantInt>(Val: ConstantInt::get(Ty, V: Sub.lshr(shiftAmt: Shift))));
7622 }
7623 return true;
7624}
7625
7626/// Tries to transform the switch when the condition is umin with a constant.
7627/// In that case, the default branch can be replaced by the constant's branch.
7628/// This method also removes dead cases when the simplification cannot replace
7629/// the default branch.
7630///
7631/// For example:
7632/// switch(umin(a, 3)) {
7633/// case 0:
7634/// case 1:
7635/// case 2:
7636/// case 3:
7637/// case 4:
7638/// // ...
7639/// default:
7640/// unreachable
7641/// }
7642///
7643/// Transforms into:
7644///
7645/// switch(a) {
7646/// case 0:
7647/// case 1:
7648/// case 2:
7649/// default:
7650/// // This is case 3
7651/// }
7652static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU) {
7653 Value *A;
7654 ConstantInt *Constant;
7655
7656 if (!match(V: SI->getCondition(), P: m_UMin(L: m_Value(V&: A), R: m_ConstantInt(CI&: Constant))))
7657 return false;
7658
7659 SmallVector<DominatorTree::UpdateType> Updates;
7660 SwitchInstProfUpdateWrapper SIW(*SI);
7661 BasicBlock *BB = SIW->getParent();
7662
7663 // Dead cases are removed even when the simplification fails.
7664 // A case is dead when its value is higher than the Constant.
7665 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7666 if (!I->getCaseValue()->getValue().ugt(RHS: Constant->getValue())) {
7667 ++I;
7668 continue;
7669 }
7670 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7671 DeadCaseBB->removePredecessor(Pred: BB);
7672 Updates.push_back(Elt: {DominatorTree::Delete, BB, DeadCaseBB});
7673 I = SIW.removeCase(I);
7674 E = SIW->case_end();
7675 }
7676
7677 auto Case = SI->findCaseValue(C: Constant);
7678 // If the case value is not found, `findCaseValue` returns the default case.
7679 // In this scenario, since there is no explicit `case 3:`, the simplification
7680 // fails. The simplification also fails when the switch’s default destination
7681 // is reachable.
7682 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7683 if (DTU)
7684 DTU->applyUpdates(Updates);
7685 return !Updates.empty();
7686 }
7687
7688 BasicBlock *Unreachable = SI->getDefaultDest();
7689 SIW.replaceDefaultDest(I: Case);
7690 SIW.removeCase(I: Case);
7691 SIW->setCondition(A);
7692
7693 Updates.push_back(Elt: {DominatorTree::Delete, BB, Unreachable});
7694
7695 if (DTU)
7696 DTU->applyUpdates(Updates);
7697
7698 return true;
7699}
7700
7701/// Tries to transform switch of powers of two to reduce switch range.
7702/// For example, switch like:
7703/// switch (C) { case 1: case 2: case 64: case 128: }
7704/// will be transformed to:
7705/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7706///
7707/// This transformation allows better lowering and may transform the switch
7708/// instruction into a sequence of bit manipulation and a smaller
7709/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7710/// address of the jump target, and indirectly jump to it).
7711static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7712 DomTreeUpdater *DTU,
7713 const DataLayout &DL,
7714 const TargetTransformInfo &TTI) {
7715 Value *Condition = SI->getCondition();
7716 LLVMContext &Context = SI->getContext();
7717 auto *CondTy = cast<IntegerType>(Val: Condition->getType());
7718
7719 if (CondTy->getIntegerBitWidth() > 64 ||
7720 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7721 return false;
7722
7723 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7724 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7725 {Condition, ConstantInt::getTrue(Context)});
7726 if (TTI.getIntrinsicInstrCost(ICA: Attrs, CostKind: TTI::TCK_SizeAndLatency) >
7727 TTI::TCC_Basic * 2)
7728 return false;
7729
7730 // Only bother with this optimization if there are more than 3 switch cases.
7731 // SDAG will start emitting jump tables for 4 or more cases.
7732 if (SI->getNumCases() < 4)
7733 return false;
7734
7735 // Check that switch cases are powers of two.
7736 SmallVector<uint64_t, 4> Values;
7737 for (const auto &Case : SI->cases()) {
7738 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7739 if (llvm::has_single_bit(Value: CaseValue))
7740 Values.push_back(Elt: CaseValue);
7741 else
7742 return false;
7743 }
7744
7745 // isSwichDense requires case values to be sorted.
7746 llvm::sort(C&: Values);
7747 if (!isSwitchDense(NumCases: Values.size(), CaseRange: llvm::countr_zero(Val: Values.back()) -
7748 llvm::countr_zero(Val: Values.front()) + 1))
7749 // Transform is unable to generate dense switch.
7750 return false;
7751
7752 Builder.SetInsertPoint(SI);
7753
7754 if (!SI->defaultDestUnreachable()) {
7755 // Let non-power-of-two inputs jump to the default case, when the latter is
7756 // reachable.
7757 auto *PopC = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: Condition);
7758 auto *IsPow2 = Builder.CreateICmpEQ(LHS: PopC, RHS: ConstantInt::get(Ty: CondTy, V: 1));
7759
7760 auto *OrigBB = SI->getParent();
7761 auto *DefaultCaseBB = SI->getDefaultDest();
7762 BasicBlock *SplitBB = SplitBlock(Old: OrigBB, SplitPt: SI, DTU);
7763 auto It = OrigBB->getTerminator()->getIterator();
7764 SmallVector<uint32_t> Weights;
7765 auto HasWeights =
7766 !ProfcheckDisableMetadataFixes && extractBranchWeights(I: *SI, Weights);
7767 auto *BI = CondBrInst::Create(Cond: IsPow2, IfTrue: SplitBB, IfFalse: DefaultCaseBB, InsertBefore: It);
7768 if (HasWeights && any_of(Range&: Weights, P: not_equal_to(Arg: 0))) {
7769 // IsPow2 covers a subset of the cases in which we'd go to the default
7770 // label. The other is those powers of 2 that don't appear in the case
7771 // statement. We don't know the distribution of the values coming in, so
7772 // the safest is to split 50-50 the original probability to `default`.
7773 uint64_t OrigDenominator =
7774 sum_of(Range: map_range(C&: Weights, F: StaticCastTo<uint64_t>));
7775 SmallVector<uint64_t> NewWeights(2);
7776 NewWeights[1] = Weights[0] / 2;
7777 NewWeights[0] = OrigDenominator - NewWeights[1];
7778 setFittedBranchWeights(I&: *BI, Weights: NewWeights, /*IsExpected=*/false);
7779 // The probability of executing the default block stays constant. It was
7780 // p_d = Weights[0] / OrigDenominator
7781 // we rewrite as W/D
7782 // We want to find the probability of the default branch of the switch
7783 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7784 // i.e. the original probability is the probability we go to the default
7785 // branch from the BI branch, or we take the default branch on the SI.
7786 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7787 // This matches using W/2 for the default branch probability numerator and
7788 // D-W/2 as the denominator.
7789 Weights[0] = NewWeights[1];
7790 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7791 for (auto &W : drop_begin(RangeOrContainer&: Weights))
7792 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7793
7794 setBranchWeights(I&: *SI, Weights, /*IsExpected=*/false);
7795 }
7796 // BI is handling the default case for SI, and so should share its DebugLoc.
7797 BI->setDebugLoc(SI->getDebugLoc());
7798 It->eraseFromParent();
7799
7800 addPredecessorToBlock(Succ: DefaultCaseBB, NewPred: OrigBB, ExistPred: SplitBB);
7801 if (DTU)
7802 DTU->applyUpdates(Updates: {{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7803 }
7804
7805 // Replace each case with its trailing zeros number.
7806 for (auto &Case : SI->cases()) {
7807 auto *OrigValue = Case.getCaseValue();
7808 Case.setValue(ConstantInt::get(Ty: OrigValue->getIntegerType(),
7809 V: OrigValue->getValue().countr_zero()));
7810 }
7811
7812 // Replace condition with its trailing zeros number.
7813 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7814 ID: Intrinsic::cttz, Types: {CondTy}, Args: {Condition, ConstantInt::getTrue(Context)});
7815
7816 SI->setCondition(ConditionTrailingZeros);
7817
7818 return true;
7819}
7820
7821/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7822/// the same destination.
7823static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
7824 DomTreeUpdater *DTU) {
7825 auto *Cmp = dyn_cast<CmpIntrinsic>(Val: SI->getCondition());
7826 if (!Cmp || !Cmp->hasOneUse())
7827 return false;
7828
7829 SmallVector<uint32_t, 4> Weights;
7830 bool HasWeights = extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights);
7831 if (!HasWeights)
7832 Weights.resize(N: 4); // Avoid checking HasWeights everywhere.
7833
7834 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7835 int64_t Res;
7836 BasicBlock *Succ, *OtherSucc;
7837 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7838 BasicBlock *Unreachable = nullptr;
7839
7840 if (SI->getNumCases() == 2) {
7841 // Find which of 1, 0 or -1 is missing (handled by default dest).
7842 SmallSet<int64_t, 3> Missing;
7843 Missing.insert(V: 1);
7844 Missing.insert(V: 0);
7845 Missing.insert(V: -1);
7846
7847 Succ = SI->getDefaultDest();
7848 SuccWeight = Weights[0];
7849 OtherSucc = nullptr;
7850 for (auto &Case : SI->cases()) {
7851 std::optional<int64_t> Val =
7852 Case.getCaseValue()->getValue().trySExtValue();
7853 if (!Val)
7854 return false;
7855 if (!Missing.erase(V: *Val))
7856 return false;
7857 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7858 return false;
7859 OtherSucc = Case.getCaseSuccessor();
7860 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7861 }
7862
7863 assert(Missing.size() == 1 && "Should have one case left");
7864 Res = *Missing.begin();
7865 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7866 // Normalize so that Succ is taken once and OtherSucc twice.
7867 Unreachable = SI->getDefaultDest();
7868 Succ = OtherSucc = nullptr;
7869 for (auto &Case : SI->cases()) {
7870 BasicBlock *NewSucc = Case.getCaseSuccessor();
7871 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7872 if (!OtherSucc || OtherSucc == NewSucc) {
7873 OtherSucc = NewSucc;
7874 OtherSuccWeight += Weight;
7875 } else if (!Succ) {
7876 Succ = NewSucc;
7877 SuccWeight = Weight;
7878 } else if (Succ == NewSucc) {
7879 std::swap(a&: Succ, b&: OtherSucc);
7880 std::swap(a&: SuccWeight, b&: OtherSuccWeight);
7881 } else
7882 return false;
7883 }
7884 for (auto &Case : SI->cases()) {
7885 std::optional<int64_t> Val =
7886 Case.getCaseValue()->getValue().trySExtValue();
7887 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7888 return false;
7889 if (Case.getCaseSuccessor() == Succ) {
7890 Res = *Val;
7891 break;
7892 }
7893 }
7894 } else {
7895 return false;
7896 }
7897
7898 // Determine predicate for the missing case.
7899 ICmpInst::Predicate Pred;
7900 switch (Res) {
7901 case 1:
7902 Pred = ICmpInst::ICMP_UGT;
7903 break;
7904 case 0:
7905 Pred = ICmpInst::ICMP_EQ;
7906 break;
7907 case -1:
7908 Pred = ICmpInst::ICMP_ULT;
7909 break;
7910 }
7911 if (Cmp->isSigned())
7912 Pred = ICmpInst::getSignedPredicate(Pred);
7913
7914 MDNode *NewWeights = nullptr;
7915 if (HasWeights)
7916 NewWeights = MDBuilder(SI->getContext())
7917 .createBranchWeights(TrueWeight: SuccWeight, FalseWeight: OtherSuccWeight);
7918
7919 BasicBlock *BB = SI->getParent();
7920 Builder.SetInsertPoint(SI->getIterator());
7921 Value *ICmp = Builder.CreateICmp(P: Pred, LHS: Cmp->getLHS(), RHS: Cmp->getRHS());
7922 Builder.CreateCondBr(Cond: ICmp, True: Succ, False: OtherSucc, BranchWeights: NewWeights,
7923 Unpredictable: SI->getMetadata(KindID: LLVMContext::MD_unpredictable));
7924 OtherSucc->removePredecessor(Pred: BB);
7925 if (Unreachable)
7926 Unreachable->removePredecessor(Pred: BB);
7927 SI->eraseFromParent();
7928 Cmp->eraseFromParent();
7929 if (DTU && Unreachable)
7930 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, Unreachable}});
7931 return true;
7932}
7933
7934/// Checking whether two BBs are equal depends on the contents of the
7935/// BasicBlock and the incoming values of their successor PHINodes.
7936/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7937/// calling this function on each BasicBlock every time isEqual is called,
7938/// especially since the same BasicBlock may be passed as an argument multiple
7939/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7940/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7941/// of the incoming values.
7942struct EqualBBWrapper {
7943 BasicBlock *BB;
7944
7945 // One Phi usually has < 8 incoming values.
7946 using BB2ValueMap = SmallDenseMap<BasicBlock *, Value *, 8>;
7947 using Phi2IVsMap = DenseMap<PHINode *, BB2ValueMap>;
7948 Phi2IVsMap *PhiPredIVs;
7949
7950 // We only merge the identical non-entry BBs with
7951 // - terminator unconditional br to Succ (pending relaxation),
7952 // - does not have address taken / weird control.
7953 static bool canBeMerged(const BasicBlock *BB) {
7954 assert(BB && "Expected non-null BB");
7955 // Entry block cannot be eliminated or have predecessors.
7956 if (BB->isEntryBlock())
7957 return false;
7958
7959 // Single successor and must be Succ.
7960 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7961 // on other kinds of terminators. We decide to only support unconditional
7962 // branches for now for compile time reasons.
7963 auto *BI = dyn_cast<UncondBrInst>(Val: BB->getTerminator());
7964 if (!BI)
7965 return false;
7966
7967 // Avoid blocks that are "address-taken" (blockaddress) or have unusual
7968 // uses.
7969 if (BB->hasAddressTaken() || BB->isEHPad())
7970 return false;
7971
7972 // TODO: relax this condition to merge equal blocks with >1 instructions?
7973 // Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
7974 if (&BB->front() != &BB->back())
7975 return false;
7976
7977 // The BB must have at least one predecessor.
7978 if (pred_empty(BB))
7979 return false;
7980
7981 return true;
7982 }
7983};
7984
7985template <> struct llvm::DenseMapInfo<const EqualBBWrapper *> {
7986 static const EqualBBWrapper *getEmptyKey() {
7987 return static_cast<EqualBBWrapper *>(DenseMapInfo<void *>::getEmptyKey());
7988 }
7989 static const EqualBBWrapper *getTombstoneKey() {
7990 return static_cast<EqualBBWrapper *>(
7991 DenseMapInfo<void *>::getTombstoneKey());
7992 }
7993 static unsigned getHashValue(const EqualBBWrapper *EBW) {
7994 BasicBlock *BB = EBW->BB;
7995 UncondBrInst *BI = cast<UncondBrInst>(Val: BB->getTerminator());
7996 assert(BB->size() == 1 && "Expected just a single branch in the BB");
7997
7998 // Since we assume the BB is just a single UncondBrInst with a single
7999 // successor, we hash as the BB and the incoming Values of its successor
8000 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8001 // including the incoming PHI values leads to better performance.
8002 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8003 // time and passing it in EqualBBWrapper, but this slowed down the average
8004 // compile time without having any impact on the worst case compile time.
8005 BasicBlock *Succ = BI->getSuccessor();
8006 auto PhiValsForBB = map_range(C: Succ->phis(), F: [&](PHINode &Phi) {
8007 return (*EBW->PhiPredIVs)[&Phi][BB];
8008 });
8009 return hash_combine(args: Succ, args: hash_combine_range(R&: PhiValsForBB));
8010 }
8011 static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
8012 auto *EKey = DenseMapInfo<EqualBBWrapper *>::getEmptyKey();
8013 auto *TKey = DenseMapInfo<EqualBBWrapper *>::getTombstoneKey();
8014 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8015 return LHS == RHS;
8016
8017 BasicBlock *A = LHS->BB;
8018 BasicBlock *B = RHS->BB;
8019
8020 // FIXME: we checked that the size of A and B are both 1 in
8021 // mergeIdenticalUncondBBs to make the Case list smaller to
8022 // improve performance. If we decide to support BasicBlocks with more
8023 // than just a single instruction, we need to check that A.size() ==
8024 // B.size() here, and we need to check more than just the BranchInsts
8025 // for equality.
8026
8027 UncondBrInst *ABI = cast<UncondBrInst>(Val: A->getTerminator());
8028 UncondBrInst *BBI = cast<UncondBrInst>(Val: B->getTerminator());
8029 if (ABI->getSuccessor() != BBI->getSuccessor())
8030 return false;
8031
8032 // Need to check that PHIs in successor have matching values.
8033 BasicBlock *Succ = ABI->getSuccessor();
8034 auto IfPhiIVMatch = [&](PHINode &Phi) {
8035 // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
8036 // query.
8037 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8038 return PredIVs[A] == PredIVs[B];
8039 };
8040 return all_of(Range: Succ->phis(), P: IfPhiIVMatch);
8041 }
8042};
8043
8044// Merge identical BBs into one of them.
8045static bool mergeIdenticalBBs(ArrayRef<BasicBlock *> Candidates,
8046 DomTreeUpdater *DTU) {
8047 if (Candidates.size() < 2)
8048 return false;
8049
8050 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8051 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8052 // an entire PHI at once after the loop, opposed to calling
8053 // getIncomingValueForBlock inside this loop, since each call to
8054 // getIncomingValueForBlock is O(|Preds|).
8055 EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8056 SmallVector<EqualBBWrapper> BBs2Merge;
8057 BBs2Merge.reserve(N: Candidates.size());
8058 SmallSetVector<PHINode *, 8> Phis;
8059
8060 for (BasicBlock *BB : Candidates) {
8061 BasicBlock *Succ = BB->getSingleSuccessor();
8062 assert(Succ && "Expected unconditional BB");
8063 BBs2Merge.emplace_back(Args: EqualBBWrapper{.BB: BB, .PhiPredIVs: &PhiPredIVs});
8064 Phis.insert_range(R: make_pointer_range(Range: Succ->phis()));
8065 }
8066
8067 // Precompute a data structure to improve performance of isEqual for
8068 // EqualBBWrapper.
8069 PhiPredIVs.reserve(NumEntries: Phis.size());
8070 for (PHINode *Phi : Phis) {
8071 auto &IVs =
8072 PhiPredIVs.try_emplace(Key: Phi, Args: Phi->getNumIncomingValues()).first->second;
8073 // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8074 // O(|Pred|).
8075 for (auto &IV : Phi->incoming_values())
8076 IVs.insert(KV: {Phi->getIncomingBlock(U: IV), IV.get()});
8077 }
8078
8079 // Group duplicates using DenseSet with custom equality/hashing.
8080 // Build a set such that if the EqualBBWrapper exists in the set and another
8081 // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8082 // the set should be replaced with the one in the set. If the EqualBBWrapper
8083 // is not in the set, then it should be added to the set so other
8084 // EqualBBWrapper can check against it in the same manner. We use
8085 // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8086 // information to isEquality, getHashValue, and when doing the replacement
8087 // with better performance.
8088 DenseSet<const EqualBBWrapper *> Keep;
8089 Keep.reserve(Size: BBs2Merge.size());
8090
8091 SmallVector<DominatorTree::UpdateType> Updates;
8092 Updates.reserve(N: BBs2Merge.size() * 2);
8093
8094 bool MadeChange = false;
8095
8096 // Helper: redirect all edges X -> DeadPred to X -> LivePred.
8097 auto RedirectIncomingEdges = [&](BasicBlock *Dead, BasicBlock *Live) {
8098 SmallSetVector<BasicBlock *, 8> DeadPreds(llvm::from_range,
8099 predecessors(BB: Dead));
8100 if (DTU) {
8101 // All predecessors of DeadPred (except the common predecessor) will be
8102 // moved to LivePred.
8103 Updates.reserve(N: Updates.size() + DeadPreds.size() * 2);
8104 SmallPtrSet<BasicBlock *, 16> LivePreds(llvm::from_range,
8105 predecessors(BB: Live));
8106 for (BasicBlock *PredOfDead : DeadPreds) {
8107 // Do not modify those common predecessors of DeadPred and LivePred.
8108 if (!LivePreds.contains(Ptr: PredOfDead))
8109 Updates.push_back(Elt: {DominatorTree::Insert, PredOfDead, Live});
8110 Updates.push_back(Elt: {DominatorTree::Delete, PredOfDead, Dead});
8111 }
8112 }
8113 LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8114 Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8115 Live->printAsOperand(dbgs()); dbgs() << " for ";
8116 Live->getSingleSuccessor()->printAsOperand(dbgs());
8117 dbgs() << "\n");
8118 // Replace successors in all predecessors of DeadPred.
8119 for (BasicBlock *PredOfDead : DeadPreds) {
8120 Instruction *T = PredOfDead->getTerminator();
8121 T->replaceSuccessorWith(OldBB: Dead, NewBB: Live);
8122 }
8123 };
8124
8125 // Try to eliminate duplicate predecessors.
8126 for (const auto &EBW : BBs2Merge) {
8127 // EBW is a candidate for simplification. If we find a duplicate BB,
8128 // replace it.
8129 const auto &[It, Inserted] = Keep.insert(V: &EBW);
8130 if (Inserted)
8131 continue;
8132
8133 // Found duplicate: merge P into canonical predecessor It->Pred.
8134 BasicBlock *KeepBB = (*It)->BB;
8135 BasicBlock *DeadBB = EBW.BB;
8136
8137 // Avoid merging a BB with itself.
8138 if (KeepBB == DeadBB)
8139 continue;
8140
8141 // Redirect all edges into DeadPred to KeepPred.
8142 RedirectIncomingEdges(DeadBB, KeepBB);
8143
8144 // Now DeadBB should become unreachable; leave DCE to later,
8145 // but we can try to simplify it if it only branches to Succ.
8146 // (We won't erase here to keep the routine simple and DT-safe.)
8147 assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8148 MadeChange = true;
8149 }
8150
8151 if (DTU && !Updates.empty())
8152 DTU->applyUpdates(Updates);
8153
8154 return MadeChange;
8155}
8156
8157bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8158 DomTreeUpdater *DTU) {
8159 // Collect candidate switch-arms top-down.
8160 SmallSetVector<BasicBlock *, 16> FilteredArms(
8161 llvm::from_range,
8162 make_filter_range(Range: successors(I: SI), Pred: EqualBBWrapper::canBeMerged));
8163 return mergeIdenticalBBs(Candidates: FilteredArms.getArrayRef(), DTU);
8164}
8165
8166bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8167 DomTreeUpdater *DTU) {
8168 // Need at least 2 predecessors to do anything.
8169 if (!BB || !BB->hasNPredecessorsOrMore(N: 2))
8170 return false;
8171
8172 // Compilation time consideration: retain the canonical loop, otherwise, we
8173 // require more time in the later loop canonicalization.
8174 if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BB))
8175 return false;
8176
8177 // Collect candidate predecessors bottom-up.
8178 SmallSetVector<BasicBlock *, 8> FilteredPreds(
8179 llvm::from_range,
8180 make_filter_range(Range: predecessors(BB), Pred: EqualBBWrapper::canBeMerged));
8181 return mergeIdenticalBBs(Candidates: FilteredPreds.getArrayRef(), DTU);
8182}
8183
8184bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8185 BasicBlock *BB = SI->getParent();
8186
8187 if (isValueEqualityComparison(TI: SI)) {
8188 // If we only have one predecessor, and if it is a branch on this value,
8189 // see if that predecessor totally determines the outcome of this switch.
8190 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8191 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: SI, Pred: OnlyPred, Builder))
8192 return requestResimplify();
8193
8194 Value *Cond = SI->getCondition();
8195 if (SelectInst *Select = dyn_cast<SelectInst>(Val: Cond))
8196 if (simplifySwitchOnSelect(SI, Select))
8197 return requestResimplify();
8198
8199 // If the block only contains the switch, see if we can fold the block
8200 // away into any preds.
8201 if (SI == &*BB->begin())
8202 if (foldValueComparisonIntoPredecessors(TI: SI, Builder))
8203 return requestResimplify();
8204 }
8205
8206 // Try to transform the switch into an icmp and a branch.
8207 // The conversion from switch to comparison may lose information on
8208 // impossible switch values, so disable it early in the pipeline.
8209 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8210 return requestResimplify();
8211
8212 // Remove unreachable cases.
8213 if (eliminateDeadSwitchCases(SI, DTU, AC: Options.AC, DL))
8214 return requestResimplify();
8215
8216 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8217 return requestResimplify();
8218
8219 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8220 return requestResimplify();
8221
8222 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8223 return requestResimplify();
8224
8225 // The conversion of switches to arithmetic or lookup table is disabled in
8226 // the early optimization pipeline, as it may lose information or make the
8227 // resulting code harder to analyze.
8228 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8229 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8230 ConvertSwitchToLookupTable: Options.ConvertSwitchToLookupTable))
8231 return requestResimplify();
8232
8233 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8234 return requestResimplify();
8235
8236 if (reduceSwitchRange(SI, Builder, DL, TTI))
8237 return requestResimplify();
8238
8239 if (HoistCommon &&
8240 hoistCommonCodeFromSuccessors(TI: SI, AllInstsEqOnly: !Options.HoistCommonInsts))
8241 return requestResimplify();
8242
8243 // We can merge identical switch arms early to enhance more aggressive
8244 // optimization on switch.
8245 if (simplifyDuplicateSwitchArms(SI, DTU))
8246 return requestResimplify();
8247
8248 if (simplifySwitchWhenUMin(SI, DTU))
8249 return requestResimplify();
8250
8251 return false;
8252}
8253
8254bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8255 BasicBlock *BB = IBI->getParent();
8256 bool Changed = false;
8257 SmallVector<uint32_t> BranchWeights;
8258 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8259 extractBranchWeights(I: *IBI, Weights&: BranchWeights);
8260
8261 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8262 if (HasBranchWeights)
8263 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8264 TargetWeight[IBI->getDestination(i: I)] += BranchWeights[I];
8265
8266 // Eliminate redundant destinations.
8267 SmallPtrSet<Value *, 8> Succs;
8268 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8269 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8270 BasicBlock *Dest = IBI->getDestination(i: I);
8271 if (!Dest->hasAddressTaken() || !Succs.insert(Ptr: Dest).second) {
8272 if (!Dest->hasAddressTaken())
8273 RemovedSuccs.insert(X: Dest);
8274 Dest->removePredecessor(Pred: BB);
8275 IBI->removeDestination(i: I);
8276 --I;
8277 --E;
8278 Changed = true;
8279 }
8280 }
8281
8282 if (DTU) {
8283 std::vector<DominatorTree::UpdateType> Updates;
8284 Updates.reserve(n: RemovedSuccs.size());
8285 for (auto *RemovedSucc : RemovedSuccs)
8286 Updates.push_back(x: {DominatorTree::Delete, BB, RemovedSucc});
8287 DTU->applyUpdates(Updates);
8288 }
8289
8290 if (IBI->getNumDestinations() == 0) {
8291 // If the indirectbr has no successors, change it to unreachable.
8292 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8293 eraseTerminatorAndDCECond(TI: IBI);
8294 return true;
8295 }
8296
8297 if (IBI->getNumDestinations() == 1) {
8298 // If the indirectbr has one successor, change it to a direct branch.
8299 UncondBrInst::Create(Target: IBI->getDestination(i: 0), InsertBefore: IBI->getIterator());
8300 eraseTerminatorAndDCECond(TI: IBI);
8301 return true;
8302 }
8303 if (HasBranchWeights) {
8304 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8305 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8306 NewBranchWeights[I] += TargetWeight.find(Val: IBI->getDestination(i: I))->second;
8307 setFittedBranchWeights(I&: *IBI, Weights: NewBranchWeights, /*IsExpected=*/false);
8308 }
8309 if (SelectInst *SI = dyn_cast<SelectInst>(Val: IBI->getAddress())) {
8310 if (simplifyIndirectBrOnSelect(IBI, SI))
8311 return requestResimplify();
8312 }
8313 return Changed;
8314}
8315
8316/// Given an block with only a single landing pad and a unconditional branch
8317/// try to find another basic block which this one can be merged with. This
8318/// handles cases where we have multiple invokes with unique landing pads, but
8319/// a shared handler.
8320///
8321/// We specifically choose to not worry about merging non-empty blocks
8322/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8323/// practice, the optimizer produces empty landing pad blocks quite frequently
8324/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8325/// sinking in this file)
8326///
8327/// This is primarily a code size optimization. We need to avoid performing
8328/// any transform which might inhibit optimization (such as our ability to
8329/// specialize a particular handler via tail commoning). We do this by not
8330/// merging any blocks which require us to introduce a phi. Since the same
8331/// values are flowing through both blocks, we don't lose any ability to
8332/// specialize. If anything, we make such specialization more likely.
8333///
8334/// TODO - This transformation could remove entries from a phi in the target
8335/// block when the inputs in the phi are the same for the two blocks being
8336/// merged. In some cases, this could result in removal of the PHI entirely.
8337static bool tryToMergeLandingPad(LandingPadInst *LPad, UncondBrInst *BI,
8338 BasicBlock *BB, DomTreeUpdater *DTU) {
8339 auto Succ = BB->getUniqueSuccessor();
8340 assert(Succ);
8341 // If there's a phi in the successor block, we'd likely have to introduce
8342 // a phi into the merged landing pad block.
8343 if (isa<PHINode>(Val: *Succ->begin()))
8344 return false;
8345
8346 for (BasicBlock *OtherPred : predecessors(BB: Succ)) {
8347 if (BB == OtherPred)
8348 continue;
8349 BasicBlock::iterator I = OtherPred->begin();
8350 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(Val&: I);
8351 if (!LPad2 || !LPad2->isIdenticalTo(I: LPad))
8352 continue;
8353 ++I;
8354 UncondBrInst *BI2 = dyn_cast<UncondBrInst>(Val&: I);
8355 if (!BI2 || !BI2->isIdenticalTo(I: BI))
8356 continue;
8357
8358 std::vector<DominatorTree::UpdateType> Updates;
8359
8360 // We've found an identical block. Update our predecessors to take that
8361 // path instead and make ourselves dead.
8362 SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
8363 for (BasicBlock *Pred : UniquePreds) {
8364 InvokeInst *II = cast<InvokeInst>(Val: Pred->getTerminator());
8365 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8366 "unexpected successor");
8367 II->setUnwindDest(OtherPred);
8368 if (DTU) {
8369 Updates.push_back(x: {DominatorTree::Insert, Pred, OtherPred});
8370 Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
8371 }
8372 }
8373
8374 SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
8375 for (BasicBlock *Succ : UniqueSuccs) {
8376 Succ->removePredecessor(Pred: BB);
8377 if (DTU)
8378 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
8379 }
8380
8381 IRBuilder<> Builder(BI);
8382 Builder.CreateUnreachable();
8383 BI->eraseFromParent();
8384 if (DTU)
8385 DTU->applyUpdates(Updates);
8386 return true;
8387 }
8388 return false;
8389}
8390
8391bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8392 IRBuilder<> &Builder) {
8393 BasicBlock *BB = BI->getParent();
8394 BasicBlock *Succ = BI->getSuccessor(i: 0);
8395
8396 // If the Terminator is the only non-phi instruction, simplify the block.
8397 // If LoopHeader is provided, check if the block or its successor is a loop
8398 // header. (This is for early invocations before loop simplify and
8399 // vectorization to keep canonical loop forms for nested loops. These blocks
8400 // can be eliminated when the pass is invoked later in the back-end.)
8401 // Note that if BB has only one predecessor then we do not introduce new
8402 // backedge, so we can eliminate BB.
8403 bool NeedCanonicalLoop =
8404 Options.NeedCanonicalLoop &&
8405 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(N: 2) &&
8406 (is_contained(Range&: LoopHeaders, Element: BB) || is_contained(Range&: LoopHeaders, Element: Succ)));
8407 BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
8408 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8409 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8410 return true;
8411
8412 // If the only instruction in the block is a seteq/setne comparison against a
8413 // constant, try to simplify the block.
8414 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val&: I)) {
8415 if (ICI->isEquality() && isa<ConstantInt>(Val: ICI->getOperand(i_nocapture: 1))) {
8416 ++I;
8417 if (I->isTerminator() &&
8418 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8419 return true;
8420 if (isa<SelectInst>(Val: I) && I->getNextNode()->isTerminator() &&
8421 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: cast<SelectInst>(Val&: I),
8422 Builder))
8423 return true;
8424 }
8425 }
8426
8427 // See if we can merge an empty landing pad block with another which is
8428 // equivalent.
8429 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(Val&: I)) {
8430 ++I;
8431 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8432 return true;
8433 }
8434
8435 return false;
8436}
8437
8438static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
8439 BasicBlock *PredPred = nullptr;
8440 for (auto *P : predecessors(BB)) {
8441 BasicBlock *PPred = P->getSinglePredecessor();
8442 if (!PPred || (PredPred && PredPred != PPred))
8443 return nullptr;
8444 PredPred = PPred;
8445 }
8446 return PredPred;
8447}
8448
8449/// Fold the following pattern:
8450/// bb0:
8451/// br i1 %cond1, label %bb1, label %bb2
8452/// bb1:
8453/// br i1 %cond2, label %bb3, label %bb4
8454/// bb2:
8455/// br i1 %cond2, label %bb4, label %bb3
8456/// bb3:
8457/// ...
8458/// bb4:
8459/// ...
8460/// into
8461/// bb0:
8462/// %cond = xor i1 %cond1, %cond2
8463/// br i1 %cond, label %bb4, label %bb3
8464/// bb3:
8465/// ...
8466/// bb4:
8467/// ...
8468/// NOTE: %cond2 always dominates the terminator of bb0.
8469static bool mergeNestedCondBranch(CondBrInst *BI, DomTreeUpdater *DTU) {
8470 BasicBlock *BB = BI->getParent();
8471 BasicBlock *BB1 = BI->getSuccessor(i: 0);
8472 BasicBlock *BB2 = BI->getSuccessor(i: 1);
8473 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, CondBrInst *&SuccBI) {
8474 if (Succ == BB)
8475 return false;
8476 if (&Succ->front() != Succ->getTerminator())
8477 return false;
8478 SuccBI = dyn_cast<CondBrInst>(Val: Succ->getTerminator());
8479 if (!SuccBI)
8480 return false;
8481 BasicBlock *Succ1 = SuccBI->getSuccessor(i: 0);
8482 BasicBlock *Succ2 = SuccBI->getSuccessor(i: 1);
8483 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8484 !isa<PHINode>(Val: Succ1->front()) && !isa<PHINode>(Val: Succ2->front());
8485 };
8486 CondBrInst *BB1BI, *BB2BI;
8487 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8488 return false;
8489
8490 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8491 BB1BI->getSuccessor(i: 0) != BB2BI->getSuccessor(i: 1) ||
8492 BB1BI->getSuccessor(i: 1) != BB2BI->getSuccessor(i: 0))
8493 return false;
8494
8495 BasicBlock *BB3 = BB1BI->getSuccessor(i: 0);
8496 BasicBlock *BB4 = BB1BI->getSuccessor(i: 1);
8497 IRBuilder<> Builder(BI);
8498 BI->setCondition(
8499 Builder.CreateXor(LHS: BI->getCondition(), RHS: BB1BI->getCondition()));
8500 BB1->removePredecessor(Pred: BB);
8501 BI->setSuccessor(idx: 0, NewSucc: BB4);
8502 BB2->removePredecessor(Pred: BB);
8503 BI->setSuccessor(idx: 1, NewSucc: BB3);
8504 if (DTU) {
8505 SmallVector<DominatorTree::UpdateType, 4> Updates;
8506 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB1});
8507 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB4});
8508 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB2});
8509 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB3});
8510
8511 DTU->applyUpdates(Updates);
8512 }
8513 bool HasWeight = false;
8514 uint64_t BBTWeight, BBFWeight;
8515 if (extractBranchWeights(I: *BI, TrueVal&: BBTWeight, FalseVal&: BBFWeight))
8516 HasWeight = true;
8517 else
8518 BBTWeight = BBFWeight = 1;
8519 uint64_t BB1TWeight, BB1FWeight;
8520 if (extractBranchWeights(I: *BB1BI, TrueVal&: BB1TWeight, FalseVal&: BB1FWeight))
8521 HasWeight = true;
8522 else
8523 BB1TWeight = BB1FWeight = 1;
8524 uint64_t BB2TWeight, BB2FWeight;
8525 if (extractBranchWeights(I: *BB2BI, TrueVal&: BB2TWeight, FalseVal&: BB2FWeight))
8526 HasWeight = true;
8527 else
8528 BB2TWeight = BB2FWeight = 1;
8529 if (HasWeight) {
8530 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8531 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8532 setFittedBranchWeights(I&: *BI, Weights, /*IsExpected=*/false,
8533 /*ElideAllZero=*/true);
8534 }
8535 return true;
8536}
8537
8538bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8539 assert(
8540 !isa<ConstantInt>(BI->getCondition()) &&
8541 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8542 "Tautological conditional branch should have been eliminated already.");
8543
8544 BasicBlock *BB = BI->getParent();
8545 if (!Options.SimplifyCondBranch ||
8546 BI->getFunction()->hasFnAttribute(Kind: Attribute::OptForFuzzing))
8547 return false;
8548
8549 // Conditional branch
8550 if (isValueEqualityComparison(TI: BI)) {
8551 // If we only have one predecessor, and if it is a branch on this value,
8552 // see if that predecessor totally determines the outcome of this
8553 // switch.
8554 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8555 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: BI, Pred: OnlyPred, Builder))
8556 return requestResimplify();
8557
8558 // This block must be empty, except for the setcond inst, if it exists.
8559 // Ignore pseudo intrinsics.
8560 for (auto &I : *BB) {
8561 if (isa<PseudoProbeInst>(Val: I) ||
8562 &I == cast<Instruction>(Val: BI->getCondition()))
8563 continue;
8564 if (&I == BI)
8565 if (foldValueComparisonIntoPredecessors(TI: BI, Builder))
8566 return requestResimplify();
8567 break;
8568 }
8569 }
8570
8571 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8572 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8573 return true;
8574
8575 // If this basic block has dominating predecessor blocks and the dominating
8576 // blocks' conditions imply BI's condition, we know the direction of BI.
8577 std::optional<bool> Imp = isImpliedByDomCondition(Cond: BI->getCondition(), ContextI: BI, DL);
8578 if (Imp) {
8579 // Turn this into a branch on constant.
8580 auto *OldCond = BI->getCondition();
8581 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(Context&: BB->getContext())
8582 : ConstantInt::getFalse(Context&: BB->getContext());
8583 BI->setCondition(TorF);
8584 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
8585 return requestResimplify();
8586 }
8587
8588 // If this basic block is ONLY a compare and a branch, and if a predecessor
8589 // branches to us and one of our successors, fold the comparison into the
8590 // predecessor and use logical operations to pick the right destination.
8591 if (Options.SpeculateBlocks &&
8592 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, TTI: &TTI,
8593 BonusInstThreshold: Options.BonusInstThreshold))
8594 return requestResimplify();
8595
8596 // We have a conditional branch to two blocks that are only reachable
8597 // from BI. We know that the condbr dominates the two blocks, so see if
8598 // there is any identical code in the "then" and "else" blocks. If so, we
8599 // can hoist it up to the branching block.
8600 if (BI->getSuccessor(i: 0)->getSinglePredecessor()) {
8601 if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8602 if (HoistCommon &&
8603 hoistCommonCodeFromSuccessors(TI: BI, AllInstsEqOnly: !Options.HoistCommonInsts))
8604 return requestResimplify();
8605
8606 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8607 isProfitableToSpeculate(BI, Invert: std::nullopt, TTI)) {
8608 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8609 auto CanSpeculateConditionalLoadsStores = [&]() {
8610 for (auto *Succ : successors(BB)) {
8611 for (Instruction &I : *Succ) {
8612 if (I.isTerminator()) {
8613 if (I.getNumSuccessors() > 1)
8614 return false;
8615 continue;
8616 } else if (!isSafeCheapLoadStore(I: &I, TTI) ||
8617 SpeculatedConditionalLoadsStores.size() ==
8618 HoistLoadsStoresWithCondFaultingThreshold) {
8619 return false;
8620 }
8621 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
8622 }
8623 }
8624 return !SpeculatedConditionalLoadsStores.empty();
8625 };
8626
8627 if (CanSpeculateConditionalLoadsStores()) {
8628 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8629 Invert: std::nullopt, Sel: nullptr);
8630 return requestResimplify();
8631 }
8632 }
8633 } else {
8634 // If Successor #1 has multiple preds, we may be able to conditionally
8635 // execute Successor #0 if it branches to Successor #1.
8636 Instruction *Succ0TI = BI->getSuccessor(i: 0)->getTerminator();
8637 if (Succ0TI->getNumSuccessors() == 1 &&
8638 Succ0TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 1))
8639 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 0)))
8640 return requestResimplify();
8641 }
8642 } else if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8643 // If Successor #0 has multiple preds, we may be able to conditionally
8644 // execute Successor #1 if it branches to Successor #0.
8645 Instruction *Succ1TI = BI->getSuccessor(i: 1)->getTerminator();
8646 if (Succ1TI->getNumSuccessors() == 1 &&
8647 Succ1TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 0))
8648 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 1)))
8649 return requestResimplify();
8650 }
8651
8652 // If this is a branch on something for which we know the constant value in
8653 // predecessors (e.g. a phi node in the current block), thread control
8654 // through this block.
8655 if (foldCondBranchOnValueKnownInPredecessor(BI))
8656 return requestResimplify();
8657
8658 // Scan predecessor blocks for conditional branches.
8659 for (BasicBlock *Pred : predecessors(BB))
8660 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Val: Pred->getTerminator()))
8661 if (PBI != BI)
8662 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8663 return requestResimplify();
8664
8665 // Look for diamond patterns.
8666 if (MergeCondStores)
8667 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8668 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Val: PrevBB->getTerminator()))
8669 if (PBI != BI)
8670 if (mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
8671 return requestResimplify();
8672
8673 // Look for nested conditional branches.
8674 if (mergeNestedCondBranch(BI, DTU))
8675 return requestResimplify();
8676
8677 return false;
8678}
8679
8680/// Check if passing a value to an instruction will cause undefined behavior.
8681static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8682 assert(V->getType() == I->getType() && "Mismatched types");
8683 Constant *C = dyn_cast<Constant>(Val: V);
8684 if (!C)
8685 return false;
8686
8687 if (I->use_empty())
8688 return false;
8689
8690 if (C->isNullValue() || isa<UndefValue>(Val: C)) {
8691 // Only look at the first use we can handle, avoid hurting compile time with
8692 // long uselists
8693 auto FindUse = llvm::find_if(Range: I->uses(), P: [](auto &U) {
8694 auto *Use = cast<Instruction>(U.getUser());
8695 // Change this list when we want to add new instructions.
8696 switch (Use->getOpcode()) {
8697 default:
8698 return false;
8699 case Instruction::GetElementPtr:
8700 case Instruction::Ret:
8701 case Instruction::BitCast:
8702 case Instruction::Load:
8703 case Instruction::Store:
8704 case Instruction::Call:
8705 case Instruction::CallBr:
8706 case Instruction::Invoke:
8707 case Instruction::UDiv:
8708 case Instruction::URem:
8709 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8710 // implemented to avoid code complexity as it is unclear how useful such
8711 // logic is.
8712 case Instruction::SDiv:
8713 case Instruction::SRem:
8714 return true;
8715 }
8716 });
8717 if (FindUse == I->use_end())
8718 return false;
8719 auto &Use = *FindUse;
8720 auto *User = cast<Instruction>(Val: Use.getUser());
8721 // Bail out if User is not in the same BB as I or User == I or User comes
8722 // before I in the block. The latter two can be the case if User is a
8723 // PHI node.
8724 if (User->getParent() != I->getParent() || User == I ||
8725 User->comesBefore(Other: I))
8726 return false;
8727
8728 // Now make sure that there are no instructions in between that can alter
8729 // control flow (eg. calls)
8730 auto InstrRange =
8731 make_range(x: std::next(x: I->getIterator()), y: User->getIterator());
8732 if (any_of(Range&: InstrRange, P: [](Instruction &I) {
8733 return !isGuaranteedToTransferExecutionToSuccessor(I: &I);
8734 }))
8735 return false;
8736
8737 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8738 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: User))
8739 if (GEP->getPointerOperand() == I) {
8740 // The type of GEP may differ from the type of base pointer.
8741 // Bail out on vector GEPs, as they are not handled by other checks.
8742 if (GEP->getType()->isVectorTy())
8743 return false;
8744 // The current base address is null, there are four cases to consider:
8745 // getelementptr (TY, null, 0) -> null
8746 // getelementptr (TY, null, not zero) -> may be modified
8747 // getelementptr inbounds (TY, null, 0) -> null
8748 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8749 // undefined?
8750 if (!GEP->hasAllZeroIndices() &&
8751 (!GEP->isInBounds() ||
8752 NullPointerIsDefined(F: GEP->getFunction(),
8753 AS: GEP->getPointerAddressSpace())))
8754 PtrValueMayBeModified = true;
8755 return passingValueIsAlwaysUndefined(V, I: GEP, PtrValueMayBeModified);
8756 }
8757
8758 // Look through return.
8759 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: User)) {
8760 bool HasNoUndefAttr =
8761 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NoUndef);
8762 // Return undefined to a noundef return value is undefined.
8763 if (isa<UndefValue>(Val: C) && HasNoUndefAttr)
8764 return true;
8765 // Return null to a nonnull+noundef return value is undefined.
8766 if (C->isNullValue() && HasNoUndefAttr &&
8767 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NonNull)) {
8768 return !PtrValueMayBeModified;
8769 }
8770 }
8771
8772 // Load from null is undefined.
8773 if (LoadInst *LI = dyn_cast<LoadInst>(Val: User))
8774 if (!LI->isVolatile())
8775 return !NullPointerIsDefined(F: LI->getFunction(),
8776 AS: LI->getPointerAddressSpace());
8777
8778 // Store to null is undefined.
8779 if (StoreInst *SI = dyn_cast<StoreInst>(Val: User))
8780 if (!SI->isVolatile())
8781 return (!NullPointerIsDefined(F: SI->getFunction(),
8782 AS: SI->getPointerAddressSpace())) &&
8783 SI->getPointerOperand() == I;
8784
8785 // llvm.assume(false/undef) always triggers immediate UB.
8786 if (auto *Assume = dyn_cast<AssumeInst>(Val: User)) {
8787 // Ignore assume operand bundles.
8788 if (I == Assume->getArgOperand(i: 0))
8789 return true;
8790 }
8791
8792 if (auto *CB = dyn_cast<CallBase>(Val: User)) {
8793 if (C->isNullValue() && NullPointerIsDefined(F: CB->getFunction()))
8794 return false;
8795 // A call to null is undefined.
8796 if (CB->getCalledOperand() == I)
8797 return true;
8798
8799 if (CB->isArgOperand(U: &Use)) {
8800 unsigned ArgIdx = CB->getArgOperandNo(U: &Use);
8801 // Passing null to a nonnnull+noundef argument is undefined.
8802 if (isa<ConstantPointerNull>(Val: C) &&
8803 CB->paramHasNonNullAttr(ArgNo: ArgIdx, /*AllowUndefOrPoison=*/false))
8804 return !PtrValueMayBeModified;
8805 // Passing undef to a noundef argument is undefined.
8806 if (isa<UndefValue>(Val: C) && CB->isPassingUndefUB(ArgNo: ArgIdx))
8807 return true;
8808 }
8809 }
8810 // Div/Rem by zero is immediate UB
8811 if (match(V: User, P: m_BinOp(L: m_Value(), R: m_Specific(V: I))) && User->isIntDivRem())
8812 return true;
8813 }
8814 return false;
8815}
8816
8817/// If BB has an incoming value that will always trigger undefined behavior
8818/// (eg. null pointer dereference), remove the branch leading here.
8819static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
8820 DomTreeUpdater *DTU,
8821 AssumptionCache *AC) {
8822 for (PHINode &PHI : BB->phis())
8823 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8824 if (passingValueIsAlwaysUndefined(V: PHI.getIncomingValue(i), I: &PHI)) {
8825 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8826 Instruction *T = Predecessor->getTerminator();
8827 IRBuilder<> Builder(T);
8828 if (isa<UncondBrInst>(Val: T)) {
8829 BB->removePredecessor(Pred: Predecessor);
8830 // Turn unconditional branches into unreachables.
8831 Builder.CreateUnreachable();
8832 T->eraseFromParent();
8833 if (DTU)
8834 DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8835 return true;
8836 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: T)) {
8837 BB->removePredecessor(Pred: Predecessor);
8838 // Preserve guarding condition in assume, because it might not be
8839 // inferrable from any dominating condition.
8840 Value *Cond = BI->getCondition();
8841 CallInst *Assumption;
8842 if (BI->getSuccessor(i: 0) == BB)
8843 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
8844 else
8845 Assumption = Builder.CreateAssumption(Cond);
8846 if (AC)
8847 AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
8848 Builder.CreateBr(Dest: BI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 1)
8849 : BI->getSuccessor(i: 0));
8850 BI->eraseFromParent();
8851 if (DTU)
8852 DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8853 return true;
8854 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: T)) {
8855 // Redirect all branches leading to UB into
8856 // a newly created unreachable block.
8857 BasicBlock *Unreachable = BasicBlock::Create(
8858 Context&: Predecessor->getContext(), Name: "unreachable", Parent: BB->getParent(), InsertBefore: BB);
8859 Builder.SetInsertPoint(Unreachable);
8860 // The new block contains only one instruction: Unreachable
8861 Builder.CreateUnreachable();
8862 for (const auto &Case : SI->cases())
8863 if (Case.getCaseSuccessor() == BB) {
8864 BB->removePredecessor(Pred: Predecessor);
8865 Case.setSuccessor(Unreachable);
8866 }
8867 if (SI->getDefaultDest() == BB) {
8868 BB->removePredecessor(Pred: Predecessor);
8869 SI->setDefaultDest(Unreachable);
8870 }
8871
8872 if (DTU)
8873 DTU->applyUpdates(
8874 Updates: { { DominatorTree::Insert, Predecessor, Unreachable },
8875 { DominatorTree::Delete, Predecessor, BB } });
8876 return true;
8877 }
8878 }
8879
8880 return false;
8881}
8882
8883bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8884 bool Changed = false;
8885
8886 assert(BB && BB->getParent() && "Block not embedded in function!");
8887 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8888
8889 // Remove basic blocks that have no predecessors (except the entry block)...
8890 // or that just have themself as a predecessor. These are unreachable.
8891 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8892 BB->getSinglePredecessor() == BB) {
8893 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8894 DeleteDeadBlock(BB, DTU);
8895 return true;
8896 }
8897
8898 // Check to see if we can constant propagate this terminator instruction
8899 // away...
8900 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8901 /*TLI=*/nullptr, DTU);
8902
8903 // Check for and eliminate duplicate PHI nodes in this block.
8904 Changed |= EliminateDuplicatePHINodes(BB);
8905
8906 // Check for and remove branches that will always cause undefined behavior.
8907 if (removeUndefIntroducingPredecessor(BB, DTU, AC: Options.AC))
8908 return requestResimplify();
8909
8910 // Merge basic blocks into their predecessor if there is only one distinct
8911 // pred, and if there is only one distinct successor of the predecessor, and
8912 // if there are no PHI nodes.
8913 if (MergeBlockIntoPredecessor(BB, DTU))
8914 return true;
8915
8916 if (SinkCommon && Options.SinkCommonInsts) {
8917 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8918 mergeCompatibleInvokes(BB, DTU)) {
8919 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8920 // so we may now how duplicate PHI's.
8921 // Let's rerun EliminateDuplicatePHINodes() first,
8922 // before foldTwoEntryPHINode() potentially converts them into select's,
8923 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8924 return true;
8925 }
8926 // Merge identical predecessors of this block.
8927 if (simplifyDuplicatePredecessors(BB, DTU))
8928 return true;
8929 }
8930
8931 if (Options.SpeculateBlocks &&
8932 !BB->getParent()->hasFnAttribute(Kind: Attribute::OptForFuzzing)) {
8933 // If there is a trivial two-entry PHI node in this basic block, and we can
8934 // eliminate it, do so now.
8935 if (auto *PN = dyn_cast<PHINode>(Val: BB->begin()))
8936 if (PN->getNumIncomingValues() == 2)
8937 if (foldTwoEntryPHINode(PN, TTI, DTU, AC: Options.AC, DL,
8938 SpeculateUnpredictables: Options.SpeculateUnpredictables))
8939 return true;
8940 }
8941
8942 IRBuilder<> Builder(BB);
8943 Instruction *Terminator = BB->getTerminator();
8944 Builder.SetInsertPoint(Terminator);
8945 switch (Terminator->getOpcode()) {
8946 case Instruction::UncondBr:
8947 Changed |= simplifyUncondBranch(BI: cast<UncondBrInst>(Val: Terminator), Builder);
8948 break;
8949 case Instruction::CondBr:
8950 Changed |= simplifyCondBranch(BI: cast<CondBrInst>(Val: Terminator), Builder);
8951 break;
8952 case Instruction::Resume:
8953 Changed |= simplifyResume(RI: cast<ResumeInst>(Val: Terminator), Builder);
8954 break;
8955 case Instruction::CleanupRet:
8956 Changed |= simplifyCleanupReturn(RI: cast<CleanupReturnInst>(Val: Terminator));
8957 break;
8958 case Instruction::Switch:
8959 Changed |= simplifySwitch(SI: cast<SwitchInst>(Val: Terminator), Builder);
8960 break;
8961 case Instruction::Unreachable:
8962 Changed |= simplifyUnreachable(UI: cast<UnreachableInst>(Val: Terminator));
8963 break;
8964 case Instruction::IndirectBr:
8965 Changed |= simplifyIndirectBr(IBI: cast<IndirectBrInst>(Val: Terminator));
8966 break;
8967 }
8968
8969 return Changed;
8970}
8971
8972bool SimplifyCFGOpt::run(BasicBlock *BB) {
8973 bool Changed = false;
8974
8975 // Repeated simplify BB as long as resimplification is requested.
8976 do {
8977 Resimplify = false;
8978
8979 // Perform one round of simplifcation. Resimplify flag will be set if
8980 // another iteration is requested.
8981 Changed |= simplifyOnce(BB);
8982 } while (Resimplify);
8983
8984 return Changed;
8985}
8986
8987bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
8988 DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
8989 ArrayRef<WeakVH> LoopHeaders) {
8990 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8991 Options)
8992 .run(BB);
8993}
8994