1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
19#include "llvm/ADT/SetOperations.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/CaptureTracking.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/Analysis/DomTreeUpdater.h"
29#include "llvm/Analysis/GuardUtils.h"
30#include "llvm/Analysis/InstructionSimplify.h"
31#include "llvm/Analysis/Loads.h"
32#include "llvm/Analysis/MemorySSA.h"
33#include "llvm/Analysis/MemorySSAUpdater.h"
34#include "llvm/Analysis/TargetTransformInfo.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
40#include "llvm/IR/ConstantRange.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
51#include "llvm/IR/Instructions.h"
52#include "llvm/IR/IntrinsicInst.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PatternMatch.h"
61#include "llvm/IR/ProfDataUtils.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/BranchProbability.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/ErrorHandling.h"
72#include "llvm/Support/KnownBits.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Transforms/Utils/BasicBlockUtils.h"
76#include "llvm/Transforms/Utils/Cloning.h"
77#include "llvm/Transforms/Utils/Local.h"
78#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
79#include "llvm/Transforms/Utils/ValueMapper.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
101cl::opt<bool> RequireAndPreserveDomTree(
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
112static cl::opt<unsigned> PHINodeFoldingThreshold(
113 "phi-node-folding-threshold", cl::Hidden, cl::init(Val: 2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
117static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(Val: 4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(Val: true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
127static cl::opt<bool> HoistLoadsWithCondFaulting(
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(Val: true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
131static cl::opt<bool> HoistStoresWithCondFaulting(
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(Val: true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
135static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(Val: 6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
141static cl::opt<unsigned>
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(Val: 20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(Val: true),
149 cl::desc("Sink common instructions down to the end block"));
150
151static cl::opt<bool> HoistCondStores(
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(Val: true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
155static cl::opt<bool> MergeCondStores(
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(Val: true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
161static cl::opt<bool> MergeCondStoresAggressively(
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(Val: false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
166static cl::opt<bool> SpeculateOneExpensiveInst(
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(Val: true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
171static cl::opt<unsigned> MaxSpeculationDepth(
172 "max-speculation-depth", cl::Hidden, cl::init(Val: 10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(Val: 10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
183static cl::opt<unsigned>
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(Val: 2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
189static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(Val: 2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
196static cl::opt<bool> EnableMergeCompatibleInvokes(
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(Val: true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
200static cl::opt<unsigned> MaxSwitchCasesPerResult(
201 "max-switch-cases-per-result", cl::Hidden, cl::init(Val: 16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
204static cl::opt<unsigned> MaxJumpThreadingLiveBlocks(
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(Val: 24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
209extern cl::opt<bool> ProfcheckDisableMetadataFixes;
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
218STATISTIC(
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
226STATISTIC(
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
247 SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
256 ConstantInt *Value;
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
299 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
300 bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
301
302 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
303 IRBuilder<> &Builder);
304 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
305 SelectInst *Select,
306 IRBuilder<> &Builder);
307 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
308 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
309 Instruction *TI, Instruction *I1,
310 SmallVectorImpl<Instruction *> &OtherSuccTIs,
311 ArrayRef<BasicBlock *> UniqueSuccessors);
312 bool speculativelyExecuteBB(CondBrInst *BI, BasicBlock *ThenBB);
313 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
314 BasicBlock *TrueBB, BasicBlock *FalseBB,
315 uint32_t TrueWeight, uint32_t FalseWeight);
316 bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
317 const DataLayout &DL);
318 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
319 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
320 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
321 bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(Val: SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(Val: SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(Val: SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(Val: SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
363static bool incomingValuesAreCompatible(
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(Range: BB->phis(), P: [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(BB: IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(BB: IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(Ptr: IV0) &&
378 EquivalenceSet->contains(Ptr: IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
387safeToMergeTerminators(Instruction *SI1, Instruction *SI2,
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
398 SmallPtrSet<BasicBlock *, 16> SI1Succs(llvm::from_range, successors(BB: SI1BB));
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(BB: SI2BB)) {
401 if (!SI1Succs.count(Ptr: Succ))
402 continue;
403 if (incomingValuesAreCompatible(BB: Succ, IncomingBlocks: {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(X: Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(V: PN.getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(BB: Succ))
426 MPhi->addIncoming(V: MPhi->getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
433static InstructionCost computeSpeculationCost(const User *I,
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
455static bool dominatesMergePoint(
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
458 InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
464 if (Depth == MaxSpeculationDepth)
465 return false;
466
467 Instruction *I = dyn_cast<Instruction>(Val: V);
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
483 UncondBrInst *BI = dyn_cast<UncondBrInst>(Val: PBB->getTerminator());
484 if (!BI || BI->getSuccessor() != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(Ptr: I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, CtxI: InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(V: I, P: m_ExtractValue<1>(V: m_OneUse(SubPattern: m_WithOverflowInst(I&: OverflowInst))))) {
504 ZeroCostInstructions.insert(Ptr: OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(Ptr: I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(V: Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth: Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(Ptr: I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
533static ConstantInt *getConstantInt(Value *V, const DataLayout &DL) {
534 // Normal constant int.
535 ConstantInt *CI = dyn_cast<ConstantInt>(Val: V);
536 if (CI || !isa<Constant>(Val: V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(Ty: V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(Val: DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
549 if (isa<ConstantPointerNull>(Val: V))
550 return ConstantInt::get(Ty: IntPtrTy, V: 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
554 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: 0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 Val: ConstantFoldIntegerCast(C: CI, DestTy: IntPtrTy, /*isSigned=*/IsSigned: false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
589 SmallVector<ConstantInt *, 8> Vals;
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(V: Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(V: Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(V: I, P: m_Not(V: m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(V: I, P: m_NUWTrunc(Op: m_Value(V&: Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(Elt: ConstantInt::get(Ty: cast<IntegerType>(Val: Val->getType()), V: isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(Val: I)) &&
657 (C = getConstantInt(V: I->getOperand(i: 1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(V: ICI->getOperand(i_nocapture: 0),
709 P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(Elt: C);
717 Vals.push_back(
718 Elt: ConstantInt::get(Context&: C->getContext(),
719 V: C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(V: ICI->getOperand(i_nocapture: 0),
732 P: m_Or(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(Elt: C);
740 Vals.push_back(Elt: ConstantInt::get(Context&: C->getContext(),
741 V: C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(i_nocapture: 0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(Elt: C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
758 ConstantRange::makeExactICmpRegion(Pred: ICI->getPredicate(), Other: C->getValue());
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(i: 0);
763 if (match(V: I->getOperand(i: 0), P: m_Add(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
764 Span = Span.subtract(CI: *RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(MaxSize: 8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(Elt: ConstantInt::get(Context&: I->getContext(), V: Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
801 IsEq = true;
802 else if (match(V, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))
816 : match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
817 if (Visited.insert(Ptr: Op1).second)
818 DFT.push_back(Elt: Op1);
819 if (Visited.insert(Ptr: Op0).second)
820 DFT.push_back(Elt: Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, isEQ: IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
847static void eraseTerminatorAndDCECond(Instruction *TI,
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
850 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
851 Cond = dyn_cast<Instruction>(Val: SI->getCondition());
852 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
853 Cond = dyn_cast<Instruction>(Val: BI->getCondition());
854 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(Val: TI)) {
855 Cond = dyn_cast<Instruction>(Val: IBI->getAddress());
856 }
857
858 TI->eraseFromParent();
859 if (Cond)
860 RecursivelyDeleteTriviallyDeadInstructions(V: Cond, TLI: nullptr, MSSAU);
861}
862
863/// Return true if the specified terminator checks
864/// to see if a value is equal to constant integer value.
865Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
866 Value *CV = nullptr;
867 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
868 // Do not permit merging of large switch instructions into their
869 // predecessors unless there is only one predecessor.
870 if (!SI->getParent()->hasNPredecessorsOrMore(N: 128 / SI->getNumSuccessors()))
871 CV = SI->getCondition();
872 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI))
873 if (BI->getCondition()->hasOneUse()) {
874 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition())) {
875 if (ICI->isEquality() && getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL))
876 CV = ICI->getOperand(i_nocapture: 0);
877 } else if (auto *Trunc = dyn_cast<TruncInst>(Val: BI->getCondition())) {
878 if (Trunc->hasNoUnsignedWrap())
879 CV = Trunc->getOperand(i_nocapture: 0);
880 }
881 }
882
883 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
884 if (CV) {
885 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(Val: CV)) {
886 Value *Ptr = PTII->getPointerOperand();
887 if (DL.hasUnstableRepresentation(Ty: Ptr->getType()))
888 return CV;
889 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
890 CV = Ptr;
891 }
892 }
893 return CV;
894}
895
896/// Given a value comparison instruction,
897/// decode all of the 'cases' that it represents and return the 'default' block.
898BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
899 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
900 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
901 Cases.reserve(n: SI->getNumCases());
902 for (auto Case : SI->cases())
903 Cases.push_back(x: ValueEqualityComparisonCase(Case.getCaseValue(),
904 Case.getCaseSuccessor()));
905 return SI->getDefaultDest();
906 }
907
908 CondBrInst *BI = cast<CondBrInst>(Val: TI);
909 Value *Cond = BI->getCondition();
910 ICmpInst::Predicate Pred;
911 ConstantInt *C;
912 if (auto *ICI = dyn_cast<ICmpInst>(Val: Cond)) {
913 Pred = ICI->getPredicate();
914 C = getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL);
915 } else {
916 Pred = ICmpInst::ICMP_NE;
917 auto *Trunc = cast<TruncInst>(Val: Cond);
918 C = ConstantInt::get(Ty: cast<IntegerType>(Val: Trunc->getOperand(i_nocapture: 0)->getType()), V: 0);
919 }
920 BasicBlock *Succ = BI->getSuccessor(i: Pred == ICmpInst::ICMP_NE);
921 Cases.push_back(x: ValueEqualityComparisonCase(C, Succ));
922 return BI->getSuccessor(i: Pred == ICmpInst::ICMP_EQ);
923}
924
925/// Given a vector of bb/value pairs, remove any entries
926/// in the list that match the specified block.
927static void
928eliminateBlockCases(BasicBlock *BB,
929 std::vector<ValueEqualityComparisonCase> &Cases) {
930 llvm::erase(C&: Cases, V: BB);
931}
932
933/// Return true if there are any keys in C1 that exist in C2 as well.
934static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
935 std::vector<ValueEqualityComparisonCase> &C2) {
936 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
937
938 // Make V1 be smaller than V2.
939 if (V1->size() > V2->size())
940 std::swap(a&: V1, b&: V2);
941
942 if (V1->empty())
943 return false;
944 if (V1->size() == 1) {
945 // Just scan V2.
946 ConstantInt *TheVal = (*V1)[0].Value;
947 for (const ValueEqualityComparisonCase &VECC : *V2)
948 if (TheVal == VECC.Value)
949 return true;
950 }
951
952 // Otherwise, just sort both lists and compare element by element.
953 array_pod_sort(Start: V1->begin(), End: V1->end());
954 array_pod_sort(Start: V2->begin(), End: V2->end());
955 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
956 while (i1 != e1 && i2 != e2) {
957 if ((*V1)[i1].Value == (*V2)[i2].Value)
958 return true;
959 if ((*V1)[i1].Value < (*V2)[i2].Value)
960 ++i1;
961 else
962 ++i2;
963 }
964 return false;
965}
966
967/// If TI is known to be a terminator instruction and its block is known to
968/// only have a single predecessor block, check to see if that predecessor is
969/// also a value comparison with the same value, and if that comparison
970/// determines the outcome of this comparison. If so, simplify TI. This does a
971/// very limited form of jump threading.
972bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
973 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
974 Value *PredVal = isValueEqualityComparison(TI: Pred->getTerminator());
975 if (!PredVal)
976 return false; // Not a value comparison in predecessor.
977
978 Value *ThisVal = isValueEqualityComparison(TI);
979 assert(ThisVal && "This isn't a value comparison!!");
980 if (ThisVal != PredVal)
981 return false; // Different predicates.
982
983 // TODO: Preserve branch weight metadata, similarly to how
984 // foldValueComparisonIntoPredecessors preserves it.
985
986 // Find out information about when control will move from Pred to TI's block.
987 std::vector<ValueEqualityComparisonCase> PredCases;
988 BasicBlock *PredDef =
989 getValueEqualityComparisonCases(TI: Pred->getTerminator(), Cases&: PredCases);
990 eliminateBlockCases(BB: PredDef, Cases&: PredCases); // Remove default from cases.
991
992 // Find information about how control leaves this block.
993 std::vector<ValueEqualityComparisonCase> ThisCases;
994 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, Cases&: ThisCases);
995 eliminateBlockCases(BB: ThisDef, Cases&: ThisCases); // Remove default from cases.
996
997 // If TI's block is the default block from Pred's comparison, potentially
998 // simplify TI based on this knowledge.
999 if (PredDef == TI->getParent()) {
1000 // If we are here, we know that the value is none of those cases listed in
1001 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1002 // can simplify TI.
1003 if (!valuesOverlap(C1&: PredCases, C2&: ThisCases))
1004 return false;
1005
1006 if (isa<CondBrInst>(Val: TI)) {
1007 // Okay, one of the successors of this condbr is dead. Convert it to a
1008 // uncond br.
1009 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1010 // Insert the new branch.
1011 Instruction *NI = Builder.CreateBr(Dest: ThisDef);
1012 (void)NI;
1013
1014 // Remove PHI node entries for the dead edge.
1015 ThisCases[0].Dest->removePredecessor(Pred: PredDef);
1016
1017 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1018 << "Through successor TI: " << *TI << "Leaving: " << *NI
1019 << "\n");
1020
1021 eraseTerminatorAndDCECond(TI);
1022
1023 if (DTU)
1024 DTU->applyUpdates(
1025 Updates: {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1026
1027 return true;
1028 }
1029
1030 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(Val: TI);
1031 // Okay, TI has cases that are statically dead, prune them away.
1032 SmallPtrSet<Constant *, 16> DeadCases;
1033 for (const ValueEqualityComparisonCase &Case : PredCases)
1034 DeadCases.insert(Ptr: Case.Value);
1035
1036 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037 << "Through successor TI: " << *TI);
1038
1039 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1040 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1041 --i;
1042 auto *Successor = i->getCaseSuccessor();
1043 if (DTU)
1044 ++NumPerSuccessorCases[Successor];
1045 if (DeadCases.count(Ptr: i->getCaseValue())) {
1046 Successor->removePredecessor(Pred: PredDef);
1047 SI.removeCase(I: i);
1048 if (DTU)
1049 --NumPerSuccessorCases[Successor];
1050 }
1051 }
1052
1053 if (DTU) {
1054 std::vector<DominatorTree::UpdateType> Updates;
1055 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1056 if (I.second == 0)
1057 Updates.push_back(x: {DominatorTree::Delete, PredDef, I.first});
1058 DTU->applyUpdates(Updates);
1059 }
1060
1061 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1062 return true;
1063 }
1064
1065 // Otherwise, TI's block must correspond to some matched value. Find out
1066 // which value (or set of values) this is.
1067 ConstantInt *TIV = nullptr;
1068 BasicBlock *TIBB = TI->getParent();
1069 for (const auto &[Value, Dest] : PredCases)
1070 if (Dest == TIBB) {
1071 if (TIV)
1072 return false; // Cannot handle multiple values coming to this block.
1073 TIV = Value;
1074 }
1075 assert(TIV && "No edge from pred to succ?");
1076
1077 // Okay, we found the one constant that our value can be if we get into TI's
1078 // BB. Find out which successor will unconditionally be branched to.
1079 BasicBlock *TheRealDest = nullptr;
1080 for (const auto &[Value, Dest] : ThisCases)
1081 if (Value == TIV) {
1082 TheRealDest = Dest;
1083 break;
1084 }
1085
1086 // If not handled by any explicit cases, it is handled by the default case.
1087 if (!TheRealDest)
1088 TheRealDest = ThisDef;
1089
1090 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1091
1092 // Remove PHI node entries for dead edges.
1093 BasicBlock *CheckEdge = TheRealDest;
1094 for (BasicBlock *Succ : successors(BB: TIBB))
1095 if (Succ != CheckEdge) {
1096 if (Succ != TheRealDest)
1097 RemovedSuccs.insert(Ptr: Succ);
1098 Succ->removePredecessor(Pred: TIBB);
1099 } else
1100 CheckEdge = nullptr;
1101
1102 // Insert the new branch.
1103 Instruction *NI = Builder.CreateBr(Dest: TheRealDest);
1104 (void)NI;
1105
1106 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1107 << "Through successor TI: " << *TI << "Leaving: " << *NI
1108 << "\n");
1109
1110 eraseTerminatorAndDCECond(TI);
1111 if (DTU) {
1112 SmallVector<DominatorTree::UpdateType, 2> Updates;
1113 Updates.reserve(N: RemovedSuccs.size());
1114 for (auto *RemovedSucc : RemovedSuccs)
1115 Updates.push_back(Elt: {DominatorTree::Delete, TIBB, RemovedSucc});
1116 DTU->applyUpdates(Updates);
1117 }
1118 return true;
1119}
1120
1121namespace {
1122
1123/// This class implements a stable ordering of constant
1124/// integers that does not depend on their address. This is important for
1125/// applications that sort ConstantInt's to ensure uniqueness.
1126struct ConstantIntOrdering {
1127 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1128 return LHS->getValue().ult(RHS: RHS->getValue());
1129 }
1130};
1131
1132} // end anonymous namespace
1133
1134static int constantIntSortPredicate(ConstantInt *const *P1,
1135 ConstantInt *const *P2) {
1136 const ConstantInt *LHS = *P1;
1137 const ConstantInt *RHS = *P2;
1138 if (LHS == RHS)
1139 return 0;
1140 return LHS->getValue().ult(RHS: RHS->getValue()) ? 1 : -1;
1141}
1142
1143/// Get Weights of a given terminator, the default weight is at the front
1144/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1145/// metadata.
1146static void getBranchWeights(Instruction *TI,
1147 SmallVectorImpl<uint64_t> &Weights) {
1148 MDNode *MD = TI->getMetadata(KindID: LLVMContext::MD_prof);
1149 assert(MD && "Invalid branch-weight metadata");
1150 extractFromBranchWeightMD64(ProfileData: MD, Weights);
1151
1152 // If TI is a conditional eq, the default case is the false case,
1153 // and the corresponding branch-weight data is at index 2. We swap the
1154 // default weight to be the first entry.
1155 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
1156 assert(Weights.size() == 2);
1157 auto *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition());
1158 if (!ICI)
1159 return;
1160
1161 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1162 std::swap(a&: Weights.front(), b&: Weights.back());
1163 }
1164}
1165
1166static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1167 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1168 Instruction *PTI = PredBlock->getTerminator();
1169
1170 // If we have bonus instructions, clone them into the predecessor block.
1171 // Note that there may be multiple predecessor blocks, so we cannot move
1172 // bonus instructions to a predecessor block.
1173 for (Instruction &BonusInst : *BB) {
1174 if (BonusInst.isTerminator())
1175 continue;
1176
1177 // Skip cloning pseudo probes into the predecessor, as it would overcount
1178 // otherwise.
1179 if (isa<PseudoProbeInst>(Val: BonusInst))
1180 continue;
1181
1182 Instruction *NewBonusInst = BonusInst.clone();
1183
1184 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(Other: PTI->getDebugLoc())) {
1185 // Unless the instruction has the same !dbg location as the original
1186 // branch, drop it. When we fold the bonus instructions we want to make
1187 // sure we reset their debug locations in order to avoid stepping on
1188 // dead code caused by folding dead branches.
1189 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1190 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1191 mapAtomInstance(DL, VMap);
1192 }
1193
1194 RemapInstruction(I: NewBonusInst, VM&: VMap,
1195 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1196
1197 // If we speculated an instruction, we need to drop any metadata that may
1198 // result in undefined behavior, as the metadata might have been valid
1199 // only given the branch precondition.
1200 // Similarly strip attributes on call parameters that may cause UB in
1201 // location the call is moved to.
1202 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1203
1204 NewBonusInst->insertInto(ParentBB: PredBlock, It: PTI->getIterator());
1205 auto Range = NewBonusInst->cloneDebugInfoFrom(From: &BonusInst);
1206 RemapDbgRecordRange(M: NewBonusInst->getModule(), Range, VM&: VMap,
1207 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1208
1209 NewBonusInst->takeName(V: &BonusInst);
1210 BonusInst.setName(NewBonusInst->getName() + ".old");
1211 VMap[&BonusInst] = NewBonusInst;
1212
1213 // Update (liveout) uses of bonus instructions,
1214 // now that the bonus instruction has been cloned into predecessor.
1215 // Note that we expect to be in a block-closed SSA form for this to work!
1216 for (Use &U : make_early_inc_range(Range: BonusInst.uses())) {
1217 auto *UI = cast<Instruction>(Val: U.getUser());
1218 auto *PN = dyn_cast<PHINode>(Val: UI);
1219 if (!PN) {
1220 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1221 "If the user is not a PHI node, then it should be in the same "
1222 "block as, and come after, the original bonus instruction.");
1223 continue; // Keep using the original bonus instruction.
1224 }
1225 // Is this the block-closed SSA form PHI node?
1226 if (PN->getIncomingBlock(U) == BB)
1227 continue; // Great, keep using the original bonus instruction.
1228 // The only other alternative is an "use" when coming from
1229 // the predecessor block - here we should refer to the cloned bonus instr.
1230 assert(PN->getIncomingBlock(U) == PredBlock &&
1231 "Not in block-closed SSA form?");
1232 U.set(NewBonusInst);
1233 }
1234 }
1235
1236 // Key Instructions: We may have propagated atom info into the pred. If the
1237 // pred's terminator already has atom info do nothing as merging would drop
1238 // one atom group anyway. If it doesn't, propagte the remapped atom group
1239 // from BB's terminator.
1240 if (auto &PredDL = PTI->getDebugLoc()) {
1241 auto &DL = BB->getTerminator()->getDebugLoc();
1242 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1243 PredDL.isSameSourceLocation(Other: DL)) {
1244 PTI->setDebugLoc(DL);
1245 RemapSourceAtom(I: PTI, VM&: VMap);
1246 }
1247 }
1248}
1249
1250bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1251 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1252 BasicBlock *BB = TI->getParent();
1253 BasicBlock *Pred = PTI->getParent();
1254
1255 SmallVector<DominatorTree::UpdateType, 32> Updates;
1256
1257 // Figure out which 'cases' to copy from SI to PSI.
1258 std::vector<ValueEqualityComparisonCase> BBCases;
1259 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, Cases&: BBCases);
1260
1261 std::vector<ValueEqualityComparisonCase> PredCases;
1262 BasicBlock *PredDefault = getValueEqualityComparisonCases(TI: PTI, Cases&: PredCases);
1263
1264 // Based on whether the default edge from PTI goes to BB or not, fill in
1265 // PredCases and PredDefault with the new switch cases we would like to
1266 // build.
1267 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1268
1269 // Update the branch weight metadata along the way
1270 SmallVector<uint64_t, 8> Weights;
1271 bool PredHasWeights = hasBranchWeightMD(I: *PTI);
1272 bool SuccHasWeights = hasBranchWeightMD(I: *TI);
1273
1274 if (PredHasWeights) {
1275 getBranchWeights(TI: PTI, Weights);
1276 // branch-weight metadata is inconsistent here.
1277 if (Weights.size() != 1 + PredCases.size())
1278 PredHasWeights = SuccHasWeights = false;
1279 } else if (SuccHasWeights)
1280 // If there are no predecessor weights but there are successor weights,
1281 // populate Weights with 1, which will later be scaled to the sum of
1282 // successor's weights
1283 Weights.assign(NumElts: 1 + PredCases.size(), Elt: 1);
1284
1285 SmallVector<uint64_t, 8> SuccWeights;
1286 if (SuccHasWeights) {
1287 getBranchWeights(TI, Weights&: SuccWeights);
1288 // branch-weight metadata is inconsistent here.
1289 if (SuccWeights.size() != 1 + BBCases.size())
1290 PredHasWeights = SuccHasWeights = false;
1291 } else if (PredHasWeights)
1292 SuccWeights.assign(NumElts: 1 + BBCases.size(), Elt: 1);
1293
1294 if (PredDefault == BB) {
1295 // If this is the default destination from PTI, only the edges in TI
1296 // that don't occur in PTI, or that branch to BB will be activated.
1297 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1298 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1299 if (PredCases[i].Dest != BB)
1300 PTIHandled.insert(x: PredCases[i].Value);
1301 else {
1302 // The default destination is BB, we don't need explicit targets.
1303 std::swap(a&: PredCases[i], b&: PredCases.back());
1304
1305 if (PredHasWeights || SuccHasWeights) {
1306 // Increase weight for the default case.
1307 Weights[0] += Weights[i + 1];
1308 std::swap(a&: Weights[i + 1], b&: Weights.back());
1309 Weights.pop_back();
1310 }
1311
1312 PredCases.pop_back();
1313 --i;
1314 --e;
1315 }
1316
1317 // Reconstruct the new switch statement we will be building.
1318 if (PredDefault != BBDefault) {
1319 PredDefault->removePredecessor(Pred);
1320 if (DTU && PredDefault != BB)
1321 Updates.push_back(Elt: {DominatorTree::Delete, Pred, PredDefault});
1322 PredDefault = BBDefault;
1323 ++NewSuccessors[BBDefault];
1324 }
1325
1326 unsigned CasesFromPred = Weights.size();
1327 uint64_t ValidTotalSuccWeight = 0;
1328 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1329 if (!PTIHandled.count(x: BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1330 PredCases.push_back(x: BBCases[i]);
1331 ++NewSuccessors[BBCases[i].Dest];
1332 if (SuccHasWeights || PredHasWeights) {
1333 // The default weight is at index 0, so weight for the ith case
1334 // should be at index i+1. Scale the cases from successor by
1335 // PredDefaultWeight (Weights[0]).
1336 Weights.push_back(Elt: Weights[0] * SuccWeights[i + 1]);
1337 ValidTotalSuccWeight += SuccWeights[i + 1];
1338 }
1339 }
1340
1341 if (SuccHasWeights || PredHasWeights) {
1342 ValidTotalSuccWeight += SuccWeights[0];
1343 // Scale the cases from predecessor by ValidTotalSuccWeight.
1344 for (unsigned i = 1; i < CasesFromPred; ++i)
1345 Weights[i] *= ValidTotalSuccWeight;
1346 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1347 Weights[0] *= SuccWeights[0];
1348 }
1349 } else {
1350 // If this is not the default destination from PSI, only the edges
1351 // in SI that occur in PSI with a destination of BB will be
1352 // activated.
1353 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1354 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1355 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1356 if (PredCases[i].Dest == BB) {
1357 PTIHandled.insert(x: PredCases[i].Value);
1358
1359 if (PredHasWeights || SuccHasWeights) {
1360 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1361 std::swap(a&: Weights[i + 1], b&: Weights.back());
1362 Weights.pop_back();
1363 }
1364
1365 std::swap(a&: PredCases[i], b&: PredCases.back());
1366 PredCases.pop_back();
1367 --i;
1368 --e;
1369 }
1370
1371 // Okay, now we know which constants were sent to BB from the
1372 // predecessor. Figure out where they will all go now.
1373 for (const ValueEqualityComparisonCase &Case : BBCases)
1374 if (PTIHandled.count(x: Case.Value)) {
1375 // If this is one we are capable of getting...
1376 if (PredHasWeights || SuccHasWeights)
1377 Weights.push_back(Elt: WeightsForHandled[Case.Value]);
1378 PredCases.push_back(x: Case);
1379 ++NewSuccessors[Case.Dest];
1380 PTIHandled.erase(x: Case.Value); // This constant is taken care of
1381 }
1382
1383 // If there are any constants vectored to BB that TI doesn't handle,
1384 // they must go to the default destination of TI.
1385 for (ConstantInt *I : PTIHandled) {
1386 if (PredHasWeights || SuccHasWeights)
1387 Weights.push_back(Elt: WeightsForHandled[I]);
1388 PredCases.push_back(x: ValueEqualityComparisonCase(I, BBDefault));
1389 ++NewSuccessors[BBDefault];
1390 }
1391 }
1392
1393 // Okay, at this point, we know which new successor Pred will get. Make
1394 // sure we update the number of entries in the PHI nodes for these
1395 // successors.
1396 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1397 if (DTU) {
1398 SuccsOfPred = {llvm::from_range, successors(BB: Pred)};
1399 Updates.reserve(N: Updates.size() + NewSuccessors.size());
1400 }
1401 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1402 NewSuccessors) {
1403 for (auto I : seq(Size: NewSuccessor.second)) {
1404 (void)I;
1405 addPredecessorToBlock(Succ: NewSuccessor.first, NewPred: Pred, ExistPred: BB);
1406 }
1407 if (DTU && !SuccsOfPred.contains(Ptr: NewSuccessor.first))
1408 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewSuccessor.first});
1409 }
1410
1411 Builder.SetInsertPoint(PTI);
1412 // Convert pointer to int before we switch.
1413 if (CV->getType()->isPointerTy()) {
1414 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1415 "Should not end up here with unstable pointers");
1416 CV =
1417 Builder.CreatePtrToInt(V: CV, DestTy: DL.getIntPtrType(CV->getType()), Name: "magicptr");
1418 }
1419
1420 // Now that the successors are updated, create the new Switch instruction.
1421 SwitchInst *NewSI = Builder.CreateSwitch(V: CV, Dest: PredDefault, NumCases: PredCases.size());
1422 NewSI->setDebugLoc(PTI->getDebugLoc());
1423 for (ValueEqualityComparisonCase &V : PredCases)
1424 NewSI->addCase(OnVal: V.Value, Dest: V.Dest);
1425
1426 if (PredHasWeights || SuccHasWeights)
1427 setFittedBranchWeights(I&: *NewSI, Weights, /*IsExpected=*/false,
1428 /*ElideAllZero=*/true);
1429
1430 eraseTerminatorAndDCECond(TI: PTI);
1431
1432 // Okay, last check. If BB is still a successor of PSI, then we must
1433 // have an infinite loop case. If so, add an infinitely looping block
1434 // to handle the case to preserve the behavior of the code.
1435 BasicBlock *InfLoopBlock = nullptr;
1436 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1437 if (NewSI->getSuccessor(idx: i) == BB) {
1438 if (!InfLoopBlock) {
1439 // Insert it at the end of the function, because it's either code,
1440 // or it won't matter if it's hot. :)
1441 InfLoopBlock =
1442 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
1443 UncondBrInst::Create(Target: InfLoopBlock, InsertBefore: InfLoopBlock);
1444 if (DTU)
1445 Updates.push_back(
1446 Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1447 }
1448 NewSI->setSuccessor(idx: i, NewSucc: InfLoopBlock);
1449 }
1450
1451 if (DTU) {
1452 if (InfLoopBlock)
1453 Updates.push_back(Elt: {DominatorTree::Insert, Pred, InfLoopBlock});
1454
1455 Updates.push_back(Elt: {DominatorTree::Delete, Pred, BB});
1456
1457 DTU->applyUpdates(Updates);
1458 }
1459
1460 ++NumFoldValueComparisonIntoPredecessors;
1461 return true;
1462}
1463
1464/// The specified terminator is a value equality comparison instruction
1465/// (either a switch or a branch on "X == c").
1466/// See if any of the predecessors of the terminator block are value comparisons
1467/// on the same value. If so, and if safe to do so, fold them together.
1468bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1469 IRBuilder<> &Builder) {
1470 BasicBlock *BB = TI->getParent();
1471 Value *CV = isValueEqualityComparison(TI); // CondVal
1472 assert(CV && "Not a comparison?");
1473
1474 bool Changed = false;
1475
1476 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1477 while (!Preds.empty()) {
1478 BasicBlock *Pred = Preds.pop_back_val();
1479 Instruction *PTI = Pred->getTerminator();
1480
1481 // Don't try to fold into itself.
1482 if (Pred == BB)
1483 continue;
1484
1485 // See if the predecessor is a comparison with the same value.
1486 Value *PCV = isValueEqualityComparison(TI: PTI); // PredCondVal
1487 if (PCV != CV)
1488 continue;
1489
1490 SmallSetVector<BasicBlock *, 4> FailBlocks;
1491 if (!safeToMergeTerminators(SI1: TI, SI2: PTI, FailBlocks: &FailBlocks)) {
1492 for (auto *Succ : FailBlocks) {
1493 if (!SplitBlockPredecessors(BB: Succ, Preds: TI->getParent(), Suffix: ".fold.split", DTU))
1494 return false;
1495 }
1496 }
1497
1498 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1499 Changed = true;
1500 }
1501 return Changed;
1502}
1503
1504// If we would need to insert a select that uses the value of this invoke
1505// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1506// need to do this), we can't hoist the invoke, as there is nowhere to put the
1507// select in this case.
1508static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1509 Instruction *I1, Instruction *I2) {
1510 for (BasicBlock *Succ : successors(BB: BB1)) {
1511 for (const PHINode &PN : Succ->phis()) {
1512 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1513 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1514 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1515 return false;
1516 }
1517 }
1518 }
1519 return true;
1520}
1521
1522// Get interesting characteristics of instructions that
1523// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1524// instructions can be reordered across.
1525enum SkipFlags {
1526 SkipReadMem = 1,
1527 SkipSideEffect = 2,
1528 SkipImplicitControlFlow = 4
1529};
1530
1531static unsigned skippedInstrFlags(Instruction *I) {
1532 // Pseudo probes don't constrain reordering of other instructions.
1533 if (isa<PseudoProbeInst>(Val: I))
1534 return 0;
1535 unsigned Flags = 0;
1536 if (I->mayReadFromMemory())
1537 Flags |= SkipReadMem;
1538 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1539 // inalloca) across stacksave/stackrestore boundaries.
1540 if (I->mayHaveSideEffects() || isa<AllocaInst>(Val: I))
1541 Flags |= SkipSideEffect;
1542 if (!isGuaranteedToTransferExecutionToSuccessor(I))
1543 Flags |= SkipImplicitControlFlow;
1544 return Flags;
1545}
1546
1547// Returns true if it is safe to reorder an instruction across preceding
1548// instructions in a basic block.
1549static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1550 // Don't reorder a store over a load.
1551 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1552 return false;
1553
1554 // If we have seen an instruction with side effects, it's unsafe to reorder an
1555 // instruction which reads memory or itself has side effects.
1556 if ((Flags & SkipSideEffect) &&
1557 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(Val: I)))
1558 return false;
1559
1560 // Reordering across an instruction which does not necessarily transfer
1561 // control to the next instruction is speculation.
1562 if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1563 return false;
1564
1565 // Hoisting of llvm.deoptimize is only legal together with the next return
1566 // instruction, which this pass is not always able to do.
1567 if (auto *CB = dyn_cast<CallBase>(Val: I))
1568 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1569 return false;
1570
1571 // It's also unsafe/illegal to hoist an instruction above its instruction
1572 // operands
1573 BasicBlock *BB = I->getParent();
1574 for (Value *Op : I->operands()) {
1575 if (auto *J = dyn_cast<Instruction>(Val: Op))
1576 if (J->getParent() == BB)
1577 return false;
1578 }
1579
1580 return true;
1581}
1582
1583static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1584
1585/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1586/// instructions \p I1 and \p I2 can and should be hoisted.
1587static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
1588 const TargetTransformInfo &TTI) {
1589 // If we're going to hoist a call, make sure that the two instructions
1590 // we're commoning/hoisting are both marked with musttail, or neither of
1591 // them is marked as such. Otherwise, we might end up in a situation where
1592 // we hoist from a block where the terminator is a `ret` to a block where
1593 // the terminator is a `br`, and `musttail` calls expect to be followed by
1594 // a return.
1595 auto *C1 = dyn_cast<CallInst>(Val: I1);
1596 auto *C2 = dyn_cast<CallInst>(Val: I2);
1597 if (C1 && C2)
1598 if (C1->isMustTailCall() != C2->isMustTailCall())
1599 return false;
1600
1601 if (!TTI.isProfitableToHoist(I: I1) || !TTI.isProfitableToHoist(I: I2))
1602 return false;
1603
1604 // If any of the two call sites has nomerge or convergent attribute, stop
1605 // hoisting.
1606 if (const auto *CB1 = dyn_cast<CallBase>(Val: I1))
1607 if (CB1->cannotMerge() || CB1->isConvergent())
1608 return false;
1609 if (const auto *CB2 = dyn_cast<CallBase>(Val: I2))
1610 if (CB2->cannotMerge() || CB2->isConvergent())
1611 return false;
1612
1613 return true;
1614}
1615
1616/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1617/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1618/// hoistCommonCodeFromSuccessors. e.g. The input:
1619/// I1 DVRs: { x, z },
1620/// OtherInsts: { I2 DVRs: { x, y, z } }
1621/// would result in hoisting only DbgVariableRecord x.
1622static void hoistLockstepIdenticalDbgVariableRecords(
1623 Instruction *TI, Instruction *I1,
1624 SmallVectorImpl<Instruction *> &OtherInsts) {
1625 if (!I1->hasDbgRecords())
1626 return;
1627 using CurrentAndEndIt =
1628 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1629 // Vector of {Current, End} iterators.
1630 SmallVector<CurrentAndEndIt> Itrs;
1631 Itrs.reserve(N: OtherInsts.size() + 1);
1632 // Helper lambdas for lock-step checks:
1633 // Return true if this Current == End.
1634 auto atEnd = [](const CurrentAndEndIt &Pair) {
1635 return Pair.first == Pair.second;
1636 };
1637 // Return true if all Current are identical.
1638 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1639 return all_of(Range: make_first_range(c: ArrayRef(Itrs).drop_front()),
1640 P: [&](DbgRecord::self_iterator I) {
1641 return Itrs[0].first->isIdenticalToWhenDefined(R: *I);
1642 });
1643 };
1644
1645 // Collect the iterators.
1646 Itrs.push_back(
1647 Elt: {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1648 for (Instruction *Other : OtherInsts) {
1649 if (!Other->hasDbgRecords())
1650 return;
1651 Itrs.push_back(
1652 Elt: {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1653 }
1654
1655 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1656 // the lock-step DbgRecord are identical, hoist all of them to TI.
1657 // This replicates the dbg.* intrinsic behaviour in
1658 // hoistCommonCodeFromSuccessors.
1659 while (none_of(Range&: Itrs, P: atEnd)) {
1660 bool HoistDVRs = allIdentical(Itrs);
1661 for (CurrentAndEndIt &Pair : Itrs) {
1662 // Increment Current iterator now as we may be about to move the
1663 // DbgRecord.
1664 DbgRecord &DR = *Pair.first++;
1665 if (HoistDVRs) {
1666 DR.removeFromParent();
1667 TI->getParent()->insertDbgRecordBefore(DR: &DR, Here: TI->getIterator());
1668 }
1669 }
1670 }
1671}
1672
1673static bool areIdenticalUpToCommutativity(const Instruction *I1,
1674 const Instruction *I2) {
1675 if (I1->isIdenticalToWhenDefined(I: I2, /*IntersectAttrs=*/true))
1676 return true;
1677
1678 if (auto *Cmp1 = dyn_cast<CmpInst>(Val: I1))
1679 if (auto *Cmp2 = dyn_cast<CmpInst>(Val: I2))
1680 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1681 Cmp1->getOperand(i_nocapture: 0) == Cmp2->getOperand(i_nocapture: 1) &&
1682 Cmp1->getOperand(i_nocapture: 1) == Cmp2->getOperand(i_nocapture: 0);
1683
1684 if (I1->isCommutative() && I1->isSameOperationAs(I: I2)) {
1685 return I1->getOperand(i: 0) == I2->getOperand(i: 1) &&
1686 I1->getOperand(i: 1) == I2->getOperand(i: 0) &&
1687 equal(LRange: drop_begin(RangeOrContainer: I1->operands(), N: 2), RRange: drop_begin(RangeOrContainer: I2->operands(), N: 2));
1688 }
1689
1690 return false;
1691}
1692
1693/// If the target supports conditional faulting,
1694/// we look for the following pattern:
1695/// \code
1696/// BB:
1697/// ...
1698/// %cond = icmp ult %x, %y
1699/// br i1 %cond, label %TrueBB, label %FalseBB
1700/// FalseBB:
1701/// store i32 1, ptr %q, align 4
1702/// ...
1703/// TrueBB:
1704/// %maskedloadstore = load i32, ptr %b, align 4
1705/// store i32 %maskedloadstore, ptr %p, align 4
1706/// ...
1707/// \endcode
1708///
1709/// and transform it into:
1710///
1711/// \code
1712/// BB:
1713/// ...
1714/// %cond = icmp ult %x, %y
1715/// %maskedloadstore = cload i32, ptr %b, %cond
1716/// cstore i32 %maskedloadstore, ptr %p, %cond
1717/// cstore i32 1, ptr %q, ~%cond
1718/// br i1 %cond, label %TrueBB, label %FalseBB
1719/// FalseBB:
1720/// ...
1721/// TrueBB:
1722/// ...
1723/// \endcode
1724///
1725/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1726/// e.g.
1727///
1728/// \code
1729/// %vcond = bitcast i1 %cond to <1 x i1>
1730/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1731/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1732/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1733/// call void @llvm.masked.store.v1i32.p0
1734/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1735/// %cond.not = xor i1 %cond, true
1736/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1737/// call void @llvm.masked.store.v1i32.p0
1738/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1739/// \endcode
1740///
1741/// So we need to turn hoisted load/store into cload/cstore.
1742///
1743/// \param BI The branch instruction.
1744/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1745/// will be speculated.
1746/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1747static void hoistConditionalLoadsStores(
1748 CondBrInst *BI,
1749 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1750 std::optional<bool> Invert, Instruction *Sel) {
1751 auto &Context = BI->getParent()->getContext();
1752 auto *VCondTy = FixedVectorType::get(ElementType: Type::getInt1Ty(C&: Context), NumElts: 1);
1753 auto *Cond = BI->getCondition();
1754 // Construct the condition if needed.
1755 BasicBlock *BB = BI->getParent();
1756 Value *Mask = nullptr;
1757 Value *MaskFalse = nullptr;
1758 Value *MaskTrue = nullptr;
1759 if (Invert.has_value()) {
1760 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1761 Mask = Builder.CreateBitCast(
1762 V: *Invert ? Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)) : Cond,
1763 DestTy: VCondTy);
1764 } else {
1765 IRBuilder<> Builder(BI);
1766 MaskFalse = Builder.CreateBitCast(
1767 V: Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)), DestTy: VCondTy);
1768 MaskTrue = Builder.CreateBitCast(V: Cond, DestTy: VCondTy);
1769 }
1770 auto PeekThroughBitcasts = [](Value *V) {
1771 while (auto *BitCast = dyn_cast<BitCastInst>(Val: V))
1772 V = BitCast->getOperand(i_nocapture: 0);
1773 return V;
1774 };
1775 for (auto *I : SpeculatedConditionalLoadsStores) {
1776 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1777 if (!Invert.has_value())
1778 Mask = I->getParent() == BI->getSuccessor(i: 0) ? MaskTrue : MaskFalse;
1779 // We currently assume conditional faulting load/store is supported for
1780 // scalar types only when creating new instructions. This can be easily
1781 // extended for vector types in the future.
1782 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1783 auto *Op0 = I->getOperand(i: 0);
1784 CallInst *MaskedLoadStore = nullptr;
1785 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1786 // Handle Load.
1787 auto *Ty = I->getType();
1788 PHINode *PN = nullptr;
1789 Value *PassThru = nullptr;
1790 if (Invert.has_value())
1791 for (User *U : I->users()) {
1792 if ((PN = dyn_cast<PHINode>(Val: U))) {
1793 PassThru = Builder.CreateBitCast(
1794 V: PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1795 DestTy: FixedVectorType::get(ElementType: Ty, NumElts: 1));
1796 } else if (auto *Ins = cast<Instruction>(Val: U);
1797 Sel && Ins->getParent() == BB) {
1798 // This happens when store or/and a speculative instruction between
1799 // load and store were hoisted to the BB. Make sure the masked load
1800 // inserted before its use.
1801 // We assume there's one of such use.
1802 Builder.SetInsertPoint(Ins);
1803 }
1804 }
1805 MaskedLoadStore = Builder.CreateMaskedLoad(
1806 Ty: FixedVectorType::get(ElementType: Ty, NumElts: 1), Ptr: Op0, Alignment: LI->getAlign(), Mask, PassThru);
1807 Value *NewLoadStore = Builder.CreateBitCast(V: MaskedLoadStore, DestTy: Ty);
1808 if (PN)
1809 PN->setIncomingValue(i: PN->getBasicBlockIndex(BB), V: NewLoadStore);
1810 I->replaceAllUsesWith(V: NewLoadStore);
1811 } else {
1812 // Handle Store.
1813 auto *StoredVal = Builder.CreateBitCast(
1814 V: PeekThroughBitcasts(Op0), DestTy: FixedVectorType::get(ElementType: Op0->getType(), NumElts: 1));
1815 MaskedLoadStore = Builder.CreateMaskedStore(
1816 Val: StoredVal, Ptr: I->getOperand(i: 1), Alignment: cast<StoreInst>(Val: I)->getAlign(), Mask);
1817 }
1818 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1819 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1820 //
1821 // !nonnull, !align : Not support pointer type, no need to keep.
1822 // !range: Load type is changed from scalar to vector, but the metadata on
1823 // vector specifies a per-element range, so the semantics stay the
1824 // same. Keep it.
1825 // !annotation: Not impact semantics. Keep it.
1826 if (const MDNode *Ranges = I->getMetadata(KindID: LLVMContext::MD_range))
1827 MaskedLoadStore->addRangeRetAttr(CR: getConstantRangeFromMetadata(RangeMD: *Ranges));
1828 I->dropUBImplyingAttrsAndUnknownMetadata(KnownIDs: {LLVMContext::MD_annotation});
1829 // FIXME: DIAssignID is not supported for masked store yet.
1830 // (Verifier::visitDIAssignIDMetadata)
1831 at::deleteAssignmentMarkers(Inst: I);
1832 I->eraseMetadataIf(Pred: [](unsigned MDKind, MDNode *Node) {
1833 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1834 });
1835 MaskedLoadStore->copyMetadata(SrcInst: *I);
1836 I->eraseFromParent();
1837 }
1838}
1839
1840static bool isSafeCheapLoadStore(const Instruction *I,
1841 const TargetTransformInfo &TTI) {
1842 // Not handle volatile or atomic.
1843 bool IsStore = false;
1844 if (auto *L = dyn_cast<LoadInst>(Val: I)) {
1845 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1846 return false;
1847 } else if (auto *S = dyn_cast<StoreInst>(Val: I)) {
1848 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1849 return false;
1850 IsStore = true;
1851 } else
1852 return false;
1853
1854 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1855 // That's why we have the alignment limitation.
1856 // FIXME: Update the prototype of the intrinsics?
1857 return TTI.hasConditionalLoadStoreForType(Ty: getLoadStoreType(I), IsStore) &&
1858 getLoadStoreAlignment(I) < Value::MaximumAlignment;
1859}
1860
1861/// Hoist any common code in the successor blocks up into the block. This
1862/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1863/// given, only perform hoisting in case all successors blocks contain matching
1864/// instructions only. In that case, all instructions can be hoisted and the
1865/// original branch will be replaced and selects for PHIs are added.
1866bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1867 bool AllInstsEqOnly) {
1868 // This does very trivial matching, with limited scanning, to find identical
1869 // instructions in the two blocks. In particular, we don't want to get into
1870 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1871 // such, we currently just scan for obviously identical instructions in an
1872 // identical order, possibly separated by the same number of non-identical
1873 // instructions.
1874 BasicBlock *BB = TI->getParent();
1875 unsigned int SuccSize = succ_size(BB);
1876 if (SuccSize < 2)
1877 return false;
1878
1879 // If either of the blocks has it's address taken, then we can't do this fold,
1880 // because the code we'd hoist would no longer run when we jump into the block
1881 // by it's address.
1882 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1883 for (auto *Succ : UniqueSuccessors) {
1884 if (Succ->hasAddressTaken())
1885 return false;
1886 // Use getUniquePredecessor instead of getSinglePredecessor to support
1887 // multi-cases successors in switch.
1888 if (Succ->getUniquePredecessor())
1889 continue;
1890 // If Succ has >1 predecessors, continue to check if the Succ contains only
1891 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1892 // can relax the condition based on the assumptiom that the program would
1893 // never enter Succ and trigger such an UB.
1894 if (isa<UnreachableInst>(Val: *Succ->begin()))
1895 continue;
1896 return false;
1897 }
1898 // The second of pair is a SkipFlags bitmask.
1899 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1900 SmallVector<SuccIterPair, 8> SuccIterPairs;
1901 for (auto *Succ : UniqueSuccessors) {
1902 BasicBlock::iterator SuccItr = Succ->begin();
1903 if (isa<PHINode>(Val: *SuccItr))
1904 return false;
1905 SuccIterPairs.push_back(Elt: SuccIterPair(SuccItr, 0));
1906 }
1907
1908 if (AllInstsEqOnly) {
1909 // Check if all instructions in the successor blocks match. This allows
1910 // hoisting all instructions and removing the blocks we are hoisting from,
1911 // so does not add any new instructions.
1912
1913 // Check if sizes and terminators of all successors match.
1914 unsigned Size0 = UniqueSuccessors[0]->size();
1915 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1916 bool AllSame =
1917 all_of(Range: drop_begin(RangeOrContainer&: UniqueSuccessors), P: [Term0, Size0](BasicBlock *Succ) {
1918 return Succ->getTerminator()->isIdenticalTo(I: Term0) &&
1919 Succ->size() == Size0;
1920 });
1921 if (!AllSame)
1922 return false;
1923 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1924 while (LRI.isValid()) {
1925 Instruction *I0 = (*LRI)[0];
1926 if (any_of(Range: *LRI, P: [I0](Instruction *I) {
1927 return !areIdenticalUpToCommutativity(I1: I0, I2: I);
1928 })) {
1929 return false;
1930 }
1931 --LRI;
1932 }
1933 // Now we know that all instructions in all successors can be hoisted. Let
1934 // the loop below handle the hoisting.
1935 }
1936
1937 // Count how many instructions were not hoisted so far. There's a limit on how
1938 // many instructions we skip, serving as a compilation time control as well as
1939 // preventing excessive increase of life ranges.
1940 unsigned NumSkipped = 0;
1941 // If we find an unreachable instruction at the beginning of a basic block, we
1942 // can still hoist instructions from the rest of the basic blocks.
1943 if (SuccIterPairs.size() > 2) {
1944 erase_if(C&: SuccIterPairs,
1945 P: [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1946 if (SuccIterPairs.size() < 2)
1947 return false;
1948 }
1949
1950 bool Changed = false;
1951
1952 for (;;) {
1953 auto *SuccIterPairBegin = SuccIterPairs.begin();
1954 auto &BB1ItrPair = *SuccIterPairBegin++;
1955 auto OtherSuccIterPairRange =
1956 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1957 auto OtherSuccIterRange = make_first_range(c&: OtherSuccIterPairRange);
1958
1959 Instruction *I1 = &*BB1ItrPair.first;
1960
1961 bool AllInstsAreIdentical = true;
1962 bool HasTerminator = I1->isTerminator();
1963 for (auto &SuccIter : OtherSuccIterRange) {
1964 Instruction *I2 = &*SuccIter;
1965 HasTerminator |= I2->isTerminator();
1966 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1967 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1968 AllInstsAreIdentical = false;
1969 }
1970
1971 SmallVector<Instruction *, 8> OtherInsts;
1972 for (auto &SuccIter : OtherSuccIterRange)
1973 OtherInsts.push_back(Elt: &*SuccIter);
1974
1975 // If we are hoisting the terminator instruction, don't move one (making a
1976 // broken BB), instead clone it, and remove BI.
1977 if (HasTerminator) {
1978 // Even if BB, which contains only one unreachable instruction, is ignored
1979 // at the beginning of the loop, we can hoist the terminator instruction.
1980 // If any instructions remain in the block, we cannot hoist terminators.
1981 if (NumSkipped || !AllInstsAreIdentical) {
1982 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1983 return Changed;
1984 }
1985
1986 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1987 TI, I1, OtherSuccTIs&: OtherInsts, UniqueSuccessors: UniqueSuccessors.getArrayRef()) ||
1988 Changed;
1989 }
1990
1991 if (AllInstsAreIdentical) {
1992 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1993 AllInstsAreIdentical =
1994 isSafeToHoistInstr(I: I1, Flags: SkipFlagsBB1) &&
1995 all_of(Range&: OtherSuccIterPairRange, P: [=](const auto &Pair) {
1996 Instruction *I2 = &*Pair.first;
1997 unsigned SkipFlagsBB2 = Pair.second;
1998 // Even if the instructions are identical, it may not
1999 // be safe to hoist them if we have skipped over
2000 // instructions with side effects or their operands
2001 // weren't hoisted.
2002 return isSafeToHoistInstr(I: I2, Flags: SkipFlagsBB2) &&
2003 shouldHoistCommonInstructions(I1, I2, TTI);
2004 });
2005 }
2006
2007 if (AllInstsAreIdentical) {
2008 BB1ItrPair.first++;
2009 // For a normal instruction, we just move one to right before the
2010 // branch, then replace all uses of the other with the first. Finally,
2011 // we remove the now redundant second instruction.
2012 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2013 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2014 // and leave any that were not hoisted behind (by calling moveBefore
2015 // rather than moveBeforePreserving).
2016 I1->moveBefore(InsertPos: TI->getIterator());
2017 for (auto &SuccIter : OtherSuccIterRange) {
2018 Instruction *I2 = &*SuccIter++;
2019 assert(I2 != I1);
2020 if (!I2->use_empty())
2021 I2->replaceAllUsesWith(V: I1);
2022 I1->andIRFlags(V: I2);
2023 if (auto *CB = dyn_cast<CallBase>(Val: I1)) {
2024 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I2));
2025 assert(Success && "We should not be trying to hoist callbases "
2026 "with non-intersectable attributes");
2027 // For NDEBUG Compile.
2028 (void)Success;
2029 }
2030
2031 combineMetadataForCSE(K: I1, J: I2, DoesKMove: true);
2032 // I1 and I2 are being combined into a single instruction. Its debug
2033 // location is the merged locations of the original instructions.
2034 I1->applyMergedLocation(LocA: I1->getDebugLoc(), LocB: I2->getDebugLoc());
2035 I2->eraseFromParent();
2036 }
2037 if (!Changed)
2038 NumHoistCommonCode += SuccIterPairs.size();
2039 Changed = true;
2040 NumHoistCommonInstrs += SuccIterPairs.size();
2041 } else {
2042 if (NumSkipped >= HoistCommonSkipLimit) {
2043 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2044 return Changed;
2045 }
2046 // We are about to skip over a pair of non-identical instructions. Record
2047 // if any have characteristics that would prevent reordering instructions
2048 // across them.
2049 for (auto &SuccIterPair : SuccIterPairs) {
2050 Instruction *I = &*SuccIterPair.first++;
2051 SuccIterPair.second |= skippedInstrFlags(I);
2052 }
2053 ++NumSkipped;
2054 }
2055 }
2056}
2057
2058bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2059 Instruction *TI, Instruction *I1,
2060 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2061 ArrayRef<BasicBlock *> UniqueSuccessors) {
2062
2063 auto *BI = dyn_cast<CondBrInst>(Val: TI);
2064
2065 bool Changed = false;
2066 BasicBlock *TIParent = TI->getParent();
2067 BasicBlock *BB1 = I1->getParent();
2068
2069 // Use only for an if statement.
2070 auto *I2 = *OtherSuccTIs.begin();
2071 auto *BB2 = I2->getParent();
2072 if (BI) {
2073 assert(OtherSuccTIs.size() == 1);
2074 assert(BI->getSuccessor(0) == I1->getParent());
2075 assert(BI->getSuccessor(1) == I2->getParent());
2076 }
2077
2078 // In the case of an if statement, we try to hoist an invoke.
2079 // FIXME: Can we define a safety predicate for CallBr?
2080 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2081 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2082 if (isa<InvokeInst>(Val: I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2083 return false;
2084
2085 // TODO: callbr hoisting currently disabled pending further study.
2086 if (isa<CallBrInst>(Val: I1))
2087 return false;
2088
2089 for (BasicBlock *Succ : successors(BB: BB1)) {
2090 for (PHINode &PN : Succ->phis()) {
2091 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2092 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2093 Value *BB2V = PN.getIncomingValueForBlock(BB: OtherSuccTI->getParent());
2094 if (BB1V == BB2V)
2095 continue;
2096
2097 // In the case of an if statement, check for
2098 // passingValueIsAlwaysUndefined here because we would rather eliminate
2099 // undefined control flow then converting it to a select.
2100 if (!BI || passingValueIsAlwaysUndefined(V: BB1V, I: &PN) ||
2101 passingValueIsAlwaysUndefined(V: BB2V, I: &PN))
2102 return false;
2103 }
2104 }
2105 }
2106
2107 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2108 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2109 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts&: OtherSuccTIs);
2110 // Clone the terminator and hoist it into the pred, without any debug info.
2111 Instruction *NT = I1->clone();
2112 NT->insertInto(ParentBB: TIParent, It: TI->getIterator());
2113 if (!NT->getType()->isVoidTy()) {
2114 I1->replaceAllUsesWith(V: NT);
2115 for (Instruction *OtherSuccTI : OtherSuccTIs)
2116 OtherSuccTI->replaceAllUsesWith(V: NT);
2117 NT->takeName(V: I1);
2118 }
2119 Changed = true;
2120 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2121
2122 // Ensure terminator gets a debug location, even an unknown one, in case
2123 // it involves inlinable calls.
2124 SmallVector<DebugLoc, 4> Locs;
2125 Locs.push_back(Elt: I1->getDebugLoc());
2126 for (auto *OtherSuccTI : OtherSuccTIs)
2127 Locs.push_back(Elt: OtherSuccTI->getDebugLoc());
2128 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2129
2130 // PHIs created below will adopt NT's merged DebugLoc.
2131 IRBuilder<NoFolder> Builder(NT);
2132
2133 // In the case of an if statement, hoisting one of the terminators from our
2134 // successor is a great thing. Unfortunately, the successors of the if/else
2135 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2136 // must agree for all PHI nodes, so we insert select instruction to compute
2137 // the final result.
2138 if (BI) {
2139 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2140 for (BasicBlock *Succ : successors(BB: BB1)) {
2141 for (PHINode &PN : Succ->phis()) {
2142 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2143 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
2144 if (BB1V == BB2V)
2145 continue;
2146
2147 // These values do not agree. Insert a select instruction before NT
2148 // that determines the right value.
2149 SelectInst *&SI = InsertedSelects[std::make_pair(x&: BB1V, y&: BB2V)];
2150 if (!SI) {
2151 // Propagate fast-math-flags from phi node to its replacement select.
2152 SI = cast<SelectInst>(Val: Builder.CreateSelectFMF(
2153 C: BI->getCondition(), True: BB1V, False: BB2V,
2154 FMFSource: isa<FPMathOperator>(Val: PN) ? &PN : nullptr,
2155 Name: BB1V->getName() + "." + BB2V->getName(), MDFrom: BI));
2156 }
2157
2158 // Make the PHI node use the select for all incoming values for BB1/BB2
2159 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2160 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2161 PN.setIncomingValue(i, V: SI);
2162 }
2163 }
2164 }
2165
2166 SmallVector<DominatorTree::UpdateType, 4> Updates;
2167
2168 // Update any PHI nodes in our new successors.
2169 for (BasicBlock *Succ : successors(BB: BB1)) {
2170 addPredecessorToBlock(Succ, NewPred: TIParent, ExistPred: BB1);
2171 if (DTU)
2172 Updates.push_back(Elt: {DominatorTree::Insert, TIParent, Succ});
2173 }
2174
2175 if (DTU) {
2176 // TI might be a switch with multi-cases destination, so we need to care for
2177 // the duplication of successors.
2178 for (BasicBlock *Succ : UniqueSuccessors)
2179 Updates.push_back(Elt: {DominatorTree::Delete, TIParent, Succ});
2180 }
2181
2182 eraseTerminatorAndDCECond(TI);
2183 if (DTU)
2184 DTU->applyUpdates(Updates);
2185 return Changed;
2186}
2187
2188// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2189// into variables.
2190static bool replacingOperandWithVariableIsCheap(const Instruction *I,
2191 int OpIdx) {
2192 // Divide/Remainder by constant is typically much cheaper than by variable.
2193 if (I->isIntDivRem())
2194 return OpIdx != 1;
2195 return !isa<IntrinsicInst>(Val: I);
2196}
2197
2198// All instructions in Insts belong to different blocks that all unconditionally
2199// branch to a common successor. Analyze each instruction and return true if it
2200// would be possible to sink them into their successor, creating one common
2201// instruction instead. For every value that would be required to be provided by
2202// PHI node (because an operand varies in each input block), add to PHIOperands.
2203static bool canSinkInstructions(
2204 ArrayRef<Instruction *> Insts,
2205 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2206 // Prune out obviously bad instructions to move. Each instruction must have
2207 // the same number of uses, and we check later that the uses are consistent.
2208 std::optional<unsigned> NumUses;
2209 for (auto *I : Insts) {
2210 // These instructions may change or break semantics if moved.
2211 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
2212 I->getType()->isTokenTy())
2213 return false;
2214
2215 // Do not try to sink an instruction in an infinite loop - it can cause
2216 // this algorithm to infinite loop.
2217 if (I->getParent()->getSingleSuccessor() == I->getParent())
2218 return false;
2219
2220 // Conservatively return false if I is an inline-asm instruction. Sinking
2221 // and merging inline-asm instructions can potentially create arguments
2222 // that cannot satisfy the inline-asm constraints.
2223 // If the instruction has nomerge or convergent attribute, return false.
2224 if (const auto *C = dyn_cast<CallBase>(Val: I))
2225 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2226 return false;
2227
2228 if (!NumUses)
2229 NumUses = I->getNumUses();
2230 else if (NumUses != I->getNumUses())
2231 return false;
2232 }
2233
2234 const Instruction *I0 = Insts.front();
2235 const auto I0MMRA = MMRAMetadata(*I0);
2236 for (auto *I : Insts) {
2237 if (!I->isSameOperationAs(I: I0, flags: Instruction::CompareUsingIntersectedAttrs))
2238 return false;
2239
2240 // Treat MMRAs conservatively. This pass can be quite aggressive and
2241 // could drop a lot of MMRAs otherwise.
2242 if (MMRAMetadata(*I) != I0MMRA)
2243 return false;
2244 }
2245
2246 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2247 // then the other phi operands must match the instructions from Insts. This
2248 // also has to hold true for any phi nodes that would be created as a result
2249 // of sinking. Both of these cases are represented by PhiOperands.
2250 for (const Use &U : I0->uses()) {
2251 auto It = PHIOperands.find(Val: &U);
2252 if (It == PHIOperands.end())
2253 // There may be uses in other blocks when sinking into a loop header.
2254 return false;
2255 if (!equal(LRange&: Insts, RRange&: It->second))
2256 return false;
2257 }
2258
2259 // For calls to be sinkable, they must all be indirect, or have same callee.
2260 // I.e. if we have two direct calls to different callees, we don't want to
2261 // turn that into an indirect call. Likewise, if we have an indirect call,
2262 // and a direct call, we don't actually want to have a single indirect call.
2263 if (isa<CallBase>(Val: I0)) {
2264 auto IsIndirectCall = [](const Instruction *I) {
2265 return cast<CallBase>(Val: I)->isIndirectCall();
2266 };
2267 bool HaveIndirectCalls = any_of(Range&: Insts, P: IsIndirectCall);
2268 bool AllCallsAreIndirect = all_of(Range&: Insts, P: IsIndirectCall);
2269 if (HaveIndirectCalls) {
2270 if (!AllCallsAreIndirect)
2271 return false;
2272 } else {
2273 // All callees must be identical.
2274 Value *Callee = nullptr;
2275 for (const Instruction *I : Insts) {
2276 Value *CurrCallee = cast<CallBase>(Val: I)->getCalledOperand();
2277 if (!Callee)
2278 Callee = CurrCallee;
2279 else if (Callee != CurrCallee)
2280 return false;
2281 }
2282 }
2283 }
2284
2285 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2286 Value *Op = I0->getOperand(i: OI);
2287 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2288 assert(I->getNumOperands() == I0->getNumOperands());
2289 return I->getOperand(i: OI) == I0->getOperand(i: OI);
2290 };
2291 if (!all_of(Range&: Insts, P: SameAsI0)) {
2292 if ((isa<Constant>(Val: Op) && !replacingOperandWithVariableIsCheap(I: I0, OpIdx: OI)) ||
2293 !canReplaceOperandWithVariable(I: I0, OpIdx: OI))
2294 // We can't create a PHI from this GEP.
2295 return false;
2296 auto &Ops = PHIOperands[&I0->getOperandUse(i: OI)];
2297 for (auto *I : Insts)
2298 Ops.push_back(Elt: I->getOperand(i: OI));
2299 }
2300 }
2301 return true;
2302}
2303
2304// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2305// instruction of every block in Blocks to their common successor, commoning
2306// into one instruction.
2307static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2308 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(Idx: 0);
2309
2310 // canSinkInstructions returning true guarantees that every block has at
2311 // least one non-terminator instruction.
2312 SmallVector<Instruction*,4> Insts;
2313 for (auto *BB : Blocks) {
2314 Instruction *I = BB->getTerminator();
2315 I = I->getPrevNode();
2316 Insts.push_back(Elt: I);
2317 }
2318
2319 // We don't need to do any more checking here; canSinkInstructions should
2320 // have done it all for us.
2321 SmallVector<Value*, 4> NewOperands;
2322 Instruction *I0 = Insts.front();
2323 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2324 // This check is different to that in canSinkInstructions. There, we
2325 // cared about the global view once simplifycfg (and instcombine) have
2326 // completed - it takes into account PHIs that become trivially
2327 // simplifiable. However here we need a more local view; if an operand
2328 // differs we create a PHI and rely on instcombine to clean up the very
2329 // small mess we may make.
2330 bool NeedPHI = any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
2331 return I->getOperand(i: O) != I0->getOperand(i: O);
2332 });
2333 if (!NeedPHI) {
2334 NewOperands.push_back(Elt: I0->getOperand(i: O));
2335 continue;
2336 }
2337
2338 // Create a new PHI in the successor block and populate it.
2339 auto *Op = I0->getOperand(i: O);
2340 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2341 auto *PN =
2342 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
2343 PN->insertBefore(InsertPos: BBEnd->begin());
2344 for (auto *I : Insts)
2345 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
2346 NewOperands.push_back(Elt: PN);
2347 }
2348
2349 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2350 // and move it to the start of the successor block.
2351 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2352 I0->getOperandUse(i: O).set(NewOperands[O]);
2353
2354 I0->moveBefore(BB&: *BBEnd, I: BBEnd->getFirstInsertionPt());
2355
2356 // Update metadata and IR flags, and merge debug locations.
2357 for (auto *I : Insts)
2358 if (I != I0) {
2359 // The debug location for the "common" instruction is the merged locations
2360 // of all the commoned instructions. We start with the original location
2361 // of the "common" instruction and iteratively merge each location in the
2362 // loop below.
2363 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2364 // However, as N-way merge for CallInst is rare, so we use simplified API
2365 // instead of using complex API for N-way merge.
2366 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
2367 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
2368 I0->andIRFlags(V: I);
2369 if (auto *CB = dyn_cast<CallBase>(Val: I0)) {
2370 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I));
2371 assert(Success && "We should not be trying to sink callbases "
2372 "with non-intersectable attributes");
2373 // For NDEBUG Compile.
2374 (void)Success;
2375 }
2376 }
2377
2378 for (User *U : make_early_inc_range(Range: I0->users())) {
2379 // canSinkLastInstruction checked that all instructions are only used by
2380 // phi nodes in a way that allows replacing the phi node with the common
2381 // instruction.
2382 auto *PN = cast<PHINode>(Val: U);
2383 PN->replaceAllUsesWith(V: I0);
2384 PN->eraseFromParent();
2385 }
2386
2387 // Finally nuke all instructions apart from the common instruction.
2388 for (auto *I : Insts) {
2389 if (I == I0)
2390 continue;
2391 // The remaining uses are debug users, replace those with the common inst.
2392 // In most (all?) cases this just introduces a use-before-def.
2393 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2394 I->replaceAllUsesWith(V: I0);
2395 I->eraseFromParent();
2396 }
2397}
2398
2399/// Check whether BB's predecessors end with unconditional branches. If it is
2400/// true, sink any common code from the predecessors to BB.
2401static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
2402 DomTreeUpdater *DTU) {
2403 // We support two situations:
2404 // (1) all incoming arcs are unconditional
2405 // (2) there are non-unconditional incoming arcs
2406 //
2407 // (2) is very common in switch defaults and
2408 // else-if patterns;
2409 //
2410 // if (a) f(1);
2411 // else if (b) f(2);
2412 //
2413 // produces:
2414 //
2415 // [if]
2416 // / \
2417 // [f(1)] [if]
2418 // | | \
2419 // | | |
2420 // | [f(2)]|
2421 // \ | /
2422 // [ end ]
2423 //
2424 // [end] has two unconditional predecessor arcs and one conditional. The
2425 // conditional refers to the implicit empty 'else' arc. This conditional
2426 // arc can also be caused by an empty default block in a switch.
2427 //
2428 // In this case, we attempt to sink code from all *unconditional* arcs.
2429 // If we can sink instructions from these arcs (determined during the scan
2430 // phase below) we insert a common successor for all unconditional arcs and
2431 // connect that to [end], to enable sinking:
2432 //
2433 // [if]
2434 // / \
2435 // [x(1)] [if]
2436 // | | \
2437 // | | \
2438 // | [x(2)] |
2439 // \ / |
2440 // [sink.split] |
2441 // \ /
2442 // [ end ]
2443 //
2444 SmallVector<BasicBlock*,4> UnconditionalPreds;
2445 bool HaveNonUnconditionalPredecessors = false;
2446 for (auto *PredBB : predecessors(BB)) {
2447 auto *PredBr = dyn_cast<UncondBrInst>(Val: PredBB->getTerminator());
2448 if (PredBr)
2449 UnconditionalPreds.push_back(Elt: PredBB);
2450 else
2451 HaveNonUnconditionalPredecessors = true;
2452 }
2453 if (UnconditionalPreds.size() < 2)
2454 return false;
2455
2456 // We take a two-step approach to tail sinking. First we scan from the end of
2457 // each block upwards in lockstep. If the n'th instruction from the end of each
2458 // block can be sunk, those instructions are added to ValuesToSink and we
2459 // carry on. If we can sink an instruction but need to PHI-merge some operands
2460 // (because they're not identical in each instruction) we add these to
2461 // PHIOperands.
2462 // We prepopulate PHIOperands with the phis that already exist in BB.
2463 DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
2464 for (PHINode &PN : BB->phis()) {
2465 SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
2466 for (const Use &U : PN.incoming_values())
2467 IncomingVals.insert(KV: {PN.getIncomingBlock(U), &U});
2468 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2469 for (BasicBlock *Pred : UnconditionalPreds)
2470 Ops.push_back(Elt: *IncomingVals[Pred]);
2471 }
2472
2473 int ScanIdx = 0;
2474 SmallPtrSet<Value*,4> InstructionsToSink;
2475 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2476 while (LRI.isValid() &&
2477 canSinkInstructions(Insts: *LRI, PHIOperands)) {
2478 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2479 << "\n");
2480 InstructionsToSink.insert_range(R: *LRI);
2481 ++ScanIdx;
2482 --LRI;
2483 }
2484
2485 // If no instructions can be sunk, early-return.
2486 if (ScanIdx == 0)
2487 return false;
2488
2489 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2490
2491 if (!followedByDeoptOrUnreachable) {
2492 // Check whether this is the pointer operand of a load/store.
2493 auto IsMemOperand = [](Use &U) {
2494 auto *I = cast<Instruction>(Val: U.getUser());
2495 if (isa<LoadInst>(Val: I))
2496 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2497 if (isa<StoreInst>(Val: I))
2498 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2499 return false;
2500 };
2501
2502 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2503 // actually sink before encountering instruction that is unprofitable to
2504 // sink?
2505 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2506 unsigned NumPHIInsts = 0;
2507 for (Use &U : (*LRI)[0]->operands()) {
2508 auto It = PHIOperands.find(Val: &U);
2509 if (It != PHIOperands.end() && !all_of(Range&: It->second, P: [&](Value *V) {
2510 return InstructionsToSink.contains(Ptr: V);
2511 })) {
2512 ++NumPHIInsts;
2513 // Do not separate a load/store from the gep producing the address.
2514 // The gep can likely be folded into the load/store as an addressing
2515 // mode. Additionally, a load of a gep is easier to analyze than a
2516 // load of a phi.
2517 if (IsMemOperand(U) &&
2518 any_of(Range&: It->second, P: [](Value *V) { return isa<GEPOperator>(Val: V); }))
2519 return false;
2520 // FIXME: this check is overly optimistic. We may end up not sinking
2521 // said instruction, due to the very same profitability check.
2522 // See @creating_too_many_phis in sink-common-code.ll.
2523 }
2524 }
2525 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2526 return NumPHIInsts <= 1;
2527 };
2528
2529 // We've determined that we are going to sink last ScanIdx instructions,
2530 // and recorded them in InstructionsToSink. Now, some instructions may be
2531 // unprofitable to sink. But that determination depends on the instructions
2532 // that we are going to sink.
2533
2534 // First, forward scan: find the first instruction unprofitable to sink,
2535 // recording all the ones that are profitable to sink.
2536 // FIXME: would it be better, after we detect that not all are profitable.
2537 // to either record the profitable ones, or erase the unprofitable ones?
2538 // Maybe we need to choose (at runtime) the one that will touch least
2539 // instrs?
2540 LRI.reset();
2541 int Idx = 0;
2542 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2543 while (Idx < ScanIdx) {
2544 if (!ProfitableToSinkInstruction(LRI)) {
2545 // Too many PHIs would be created.
2546 LLVM_DEBUG(
2547 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2548 break;
2549 }
2550 InstructionsProfitableToSink.insert_range(R: *LRI);
2551 --LRI;
2552 ++Idx;
2553 }
2554
2555 // If no instructions can be sunk, early-return.
2556 if (Idx == 0)
2557 return false;
2558
2559 // Did we determine that (only) some instructions are unprofitable to sink?
2560 if (Idx < ScanIdx) {
2561 // Okay, some instructions are unprofitable.
2562 ScanIdx = Idx;
2563 InstructionsToSink = InstructionsProfitableToSink;
2564
2565 // But, that may make other instructions unprofitable, too.
2566 // So, do a backward scan, do any earlier instructions become
2567 // unprofitable?
2568 assert(
2569 !ProfitableToSinkInstruction(LRI) &&
2570 "We already know that the last instruction is unprofitable to sink");
2571 ++LRI;
2572 --Idx;
2573 while (Idx >= 0) {
2574 // If we detect that an instruction becomes unprofitable to sink,
2575 // all earlier instructions won't be sunk either,
2576 // so preemptively keep InstructionsProfitableToSink in sync.
2577 // FIXME: is this the most performant approach?
2578 for (auto *I : *LRI)
2579 InstructionsProfitableToSink.erase(Ptr: I);
2580 if (!ProfitableToSinkInstruction(LRI)) {
2581 // Everything starting with this instruction won't be sunk.
2582 ScanIdx = Idx;
2583 InstructionsToSink = InstructionsProfitableToSink;
2584 }
2585 ++LRI;
2586 --Idx;
2587 }
2588 }
2589
2590 // If no instructions can be sunk, early-return.
2591 if (ScanIdx == 0)
2592 return false;
2593 }
2594
2595 bool Changed = false;
2596
2597 if (HaveNonUnconditionalPredecessors) {
2598 if (!followedByDeoptOrUnreachable) {
2599 // It is always legal to sink common instructions from unconditional
2600 // predecessors. However, if not all predecessors are unconditional,
2601 // this transformation might be pessimizing. So as a rule of thumb,
2602 // don't do it unless we'd sink at least one non-speculatable instruction.
2603 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2604 LRI.reset();
2605 int Idx = 0;
2606 bool Profitable = false;
2607 while (Idx < ScanIdx) {
2608 if (!isSafeToSpeculativelyExecute(I: (*LRI)[0])) {
2609 Profitable = true;
2610 break;
2611 }
2612 --LRI;
2613 ++Idx;
2614 }
2615 if (!Profitable)
2616 return false;
2617 }
2618
2619 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2620 // We have a conditional edge and we're going to sink some instructions.
2621 // Insert a new block postdominating all blocks we're going to sink from.
2622 if (!SplitBlockPredecessors(BB, Preds: UnconditionalPreds, Suffix: ".sink.split", DTU))
2623 // Edges couldn't be split.
2624 return false;
2625 Changed = true;
2626 }
2627
2628 // Now that we've analyzed all potential sinking candidates, perform the
2629 // actual sink. We iteratively sink the last non-terminator of the source
2630 // blocks into their common successor unless doing so would require too
2631 // many PHI instructions to be generated (currently only one PHI is allowed
2632 // per sunk instruction).
2633 //
2634 // We can use InstructionsToSink to discount values needing PHI-merging that will
2635 // actually be sunk in a later iteration. This allows us to be more
2636 // aggressive in what we sink. This does allow a false positive where we
2637 // sink presuming a later value will also be sunk, but stop half way through
2638 // and never actually sink it which means we produce more PHIs than intended.
2639 // This is unlikely in practice though.
2640 int SinkIdx = 0;
2641 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2642 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2643 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2644 << "\n");
2645
2646 // Because we've sunk every instruction in turn, the current instruction to
2647 // sink is always at index 0.
2648 LRI.reset();
2649
2650 sinkLastInstruction(Blocks: UnconditionalPreds);
2651 NumSinkCommonInstrs++;
2652 Changed = true;
2653 }
2654 if (SinkIdx != 0)
2655 ++NumSinkCommonCode;
2656 return Changed;
2657}
2658
2659namespace {
2660
2661struct CompatibleSets {
2662 using SetTy = SmallVector<InvokeInst *, 2>;
2663
2664 SmallVector<SetTy, 1> Sets;
2665
2666 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2667
2668 SetTy &getCompatibleSet(InvokeInst *II);
2669
2670 void insert(InvokeInst *II);
2671};
2672
2673CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2674 // Perform a linear scan over all the existing sets, see if the new `invoke`
2675 // is compatible with any particular set. Since we know that all the `invokes`
2676 // within a set are compatible, only check the first `invoke` in each set.
2677 // WARNING: at worst, this has quadratic complexity.
2678 for (CompatibleSets::SetTy &Set : Sets) {
2679 if (CompatibleSets::shouldBelongToSameSet(Invokes: {Set.front(), II}))
2680 return Set;
2681 }
2682
2683 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2684 return Sets.emplace_back();
2685}
2686
2687void CompatibleSets::insert(InvokeInst *II) {
2688 getCompatibleSet(II).emplace_back(Args&: II);
2689}
2690
2691bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2692 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2693
2694 // Can we theoretically merge these `invoke`s?
2695 auto IsIllegalToMerge = [](InvokeInst *II) {
2696 return II->cannotMerge() || II->isInlineAsm();
2697 };
2698 if (any_of(Range&: Invokes, P: IsIllegalToMerge))
2699 return false;
2700
2701 // Either both `invoke`s must be direct,
2702 // or both `invoke`s must be indirect.
2703 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2704 bool HaveIndirectCalls = any_of(Range&: Invokes, P: IsIndirectCall);
2705 bool AllCallsAreIndirect = all_of(Range&: Invokes, P: IsIndirectCall);
2706 if (HaveIndirectCalls) {
2707 if (!AllCallsAreIndirect)
2708 return false;
2709 } else {
2710 // All callees must be identical.
2711 Value *Callee = nullptr;
2712 for (InvokeInst *II : Invokes) {
2713 Value *CurrCallee = II->getCalledOperand();
2714 assert(CurrCallee && "There is always a called operand.");
2715 if (!Callee)
2716 Callee = CurrCallee;
2717 else if (Callee != CurrCallee)
2718 return false;
2719 }
2720 }
2721
2722 // Either both `invoke`s must not have a normal destination,
2723 // or both `invoke`s must have a normal destination,
2724 auto HasNormalDest = [](InvokeInst *II) {
2725 return !isa<UnreachableInst>(Val: II->getNormalDest()->getFirstNonPHIOrDbg());
2726 };
2727 if (any_of(Range&: Invokes, P: HasNormalDest)) {
2728 // Do not merge `invoke` that does not have a normal destination with one
2729 // that does have a normal destination, even though doing so would be legal.
2730 if (!all_of(Range&: Invokes, P: HasNormalDest))
2731 return false;
2732
2733 // All normal destinations must be identical.
2734 BasicBlock *NormalBB = nullptr;
2735 for (InvokeInst *II : Invokes) {
2736 BasicBlock *CurrNormalBB = II->getNormalDest();
2737 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2738 if (!NormalBB)
2739 NormalBB = CurrNormalBB;
2740 else if (NormalBB != CurrNormalBB)
2741 return false;
2742 }
2743
2744 // In the normal destination, the incoming values for these two `invoke`s
2745 // must be compatible.
2746 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2747 if (!incomingValuesAreCompatible(
2748 BB: NormalBB, IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()},
2749 EquivalenceSet: &EquivalenceSet))
2750 return false;
2751 }
2752
2753#ifndef NDEBUG
2754 // All unwind destinations must be identical.
2755 // We know that because we have started from said unwind destination.
2756 BasicBlock *UnwindBB = nullptr;
2757 for (InvokeInst *II : Invokes) {
2758 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2759 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2760 if (!UnwindBB)
2761 UnwindBB = CurrUnwindBB;
2762 else
2763 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2764 }
2765#endif
2766
2767 // In the unwind destination, the incoming values for these two `invoke`s
2768 // must be compatible.
2769 if (!incomingValuesAreCompatible(
2770 BB: Invokes.front()->getUnwindDest(),
2771 IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2772 return false;
2773
2774 // Ignoring arguments, these `invoke`s must be identical,
2775 // including operand bundles.
2776 const InvokeInst *II0 = Invokes.front();
2777 for (auto *II : Invokes.drop_front())
2778 if (!II->isSameOperationAs(I: II0, flags: Instruction::CompareUsingIntersectedAttrs))
2779 return false;
2780
2781 // Can we theoretically form the data operands for the merged `invoke`?
2782 auto IsIllegalToMergeArguments = [](auto Ops) {
2783 Use &U0 = std::get<0>(Ops);
2784 Use &U1 = std::get<1>(Ops);
2785 if (U0 == U1)
2786 return false;
2787 return !canReplaceOperandWithVariable(I: cast<Instruction>(Val: U0.getUser()),
2788 OpIdx: U0.getOperandNo());
2789 };
2790 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2791 if (any_of(Range: zip(t: Invokes[0]->data_ops(), u: Invokes[1]->data_ops()),
2792 P: IsIllegalToMergeArguments))
2793 return false;
2794
2795 return true;
2796}
2797
2798} // namespace
2799
2800// Merge all invokes in the provided set, all of which are compatible
2801// as per the `CompatibleSets::shouldBelongToSameSet()`.
2802static void mergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2803 DomTreeUpdater *DTU) {
2804 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2805
2806 SmallVector<DominatorTree::UpdateType, 8> Updates;
2807 if (DTU)
2808 Updates.reserve(N: 2 + 3 * Invokes.size());
2809
2810 bool HasNormalDest =
2811 !isa<UnreachableInst>(Val: Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2812
2813 // Clone one of the invokes into a new basic block.
2814 // Since they are all compatible, it doesn't matter which invoke is cloned.
2815 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2816 InvokeInst *II0 = Invokes.front();
2817 BasicBlock *II0BB = II0->getParent();
2818 BasicBlock *InsertBeforeBlock =
2819 II0->getParent()->getIterator()->getNextNode();
2820 Function *Func = II0BB->getParent();
2821 LLVMContext &Ctx = II0->getContext();
2822
2823 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2824 Context&: Ctx, Name: II0BB->getName() + ".invoke", Parent: Func, InsertBefore: InsertBeforeBlock);
2825
2826 auto *MergedInvoke = cast<InvokeInst>(Val: II0->clone());
2827 // NOTE: all invokes have the same attributes, so no handling needed.
2828 MergedInvoke->insertInto(ParentBB: MergedInvokeBB, It: MergedInvokeBB->end());
2829
2830 if (!HasNormalDest) {
2831 // This set does not have a normal destination,
2832 // so just form a new block with unreachable terminator.
2833 BasicBlock *MergedNormalDest = BasicBlock::Create(
2834 Context&: Ctx, Name: II0BB->getName() + ".cont", Parent: Func, InsertBefore: InsertBeforeBlock);
2835 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2836 UI->setDebugLoc(DebugLoc::getTemporary());
2837 MergedInvoke->setNormalDest(MergedNormalDest);
2838 }
2839
2840 // The unwind destination, however, remainds identical for all invokes here.
2841
2842 return MergedInvoke;
2843 }();
2844
2845 if (DTU) {
2846 // Predecessor blocks that contained these invokes will now branch to
2847 // the new block that contains the merged invoke, ...
2848 for (InvokeInst *II : Invokes)
2849 Updates.push_back(
2850 Elt: {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2851
2852 // ... which has the new `unreachable` block as normal destination,
2853 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2854 for (BasicBlock *SuccBBOfMergedInvoke : successors(I: MergedInvoke))
2855 Updates.push_back(Elt: {DominatorTree::Insert, MergedInvoke->getParent(),
2856 SuccBBOfMergedInvoke});
2857
2858 // Since predecessor blocks now unconditionally branch to a new block,
2859 // they no longer branch to their original successors.
2860 for (InvokeInst *II : Invokes)
2861 for (BasicBlock *SuccOfPredBB : successors(BB: II->getParent()))
2862 Updates.push_back(
2863 Elt: {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2864 }
2865
2866 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2867
2868 // Form the merged operands for the merged invoke.
2869 for (Use &U : MergedInvoke->operands()) {
2870 // Only PHI together the indirect callees and data operands.
2871 if (MergedInvoke->isCallee(U: &U)) {
2872 if (!IsIndirectCall)
2873 continue;
2874 } else if (!MergedInvoke->isDataOperand(U: &U))
2875 continue;
2876
2877 // Don't create trivial PHI's with all-identical incoming values.
2878 bool NeedPHI = any_of(Range&: Invokes, P: [&U](InvokeInst *II) {
2879 return II->getOperand(i_nocapture: U.getOperandNo()) != U.get();
2880 });
2881 if (!NeedPHI)
2882 continue;
2883
2884 // Form a PHI out of all the data ops under this index.
2885 PHINode *PN = PHINode::Create(
2886 Ty: U->getType(), /*NumReservedValues=*/Invokes.size(), NameStr: "", InsertBefore: MergedInvoke->getIterator());
2887 for (InvokeInst *II : Invokes)
2888 PN->addIncoming(V: II->getOperand(i_nocapture: U.getOperandNo()), BB: II->getParent());
2889
2890 U.set(PN);
2891 }
2892
2893 // We've ensured that each PHI node has compatible (identical) incoming values
2894 // when coming from each of the `invoke`s in the current merge set,
2895 // so update the PHI nodes accordingly.
2896 for (BasicBlock *Succ : successors(I: MergedInvoke))
2897 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2898 /*ExistPred=*/Invokes.front()->getParent());
2899
2900 // And finally, replace the original `invoke`s with an unconditional branch
2901 // to the block with the merged `invoke`. Also, give that merged `invoke`
2902 // the merged debugloc of all the original `invoke`s.
2903 DILocation *MergedDebugLoc = nullptr;
2904 for (InvokeInst *II : Invokes) {
2905 // Compute the debug location common to all the original `invoke`s.
2906 if (!MergedDebugLoc)
2907 MergedDebugLoc = II->getDebugLoc();
2908 else
2909 MergedDebugLoc =
2910 DebugLoc::getMergedLocation(LocA: MergedDebugLoc, LocB: II->getDebugLoc());
2911
2912 // And replace the old `invoke` with an unconditionally branch
2913 // to the block with the merged `invoke`.
2914 for (BasicBlock *OrigSuccBB : successors(BB: II->getParent()))
2915 OrigSuccBB->removePredecessor(Pred: II->getParent());
2916 auto *BI = UncondBrInst::Create(Target: MergedInvoke->getParent(), InsertBefore: II->getParent());
2917 // The unconditional branch is part of the replacement for the original
2918 // invoke, so should use its DebugLoc.
2919 BI->setDebugLoc(II->getDebugLoc());
2920 bool Success = MergedInvoke->tryIntersectAttributes(Other: II);
2921 assert(Success && "Merged invokes with incompatible attributes");
2922 // For NDEBUG Compile
2923 (void)Success;
2924 II->replaceAllUsesWith(V: MergedInvoke);
2925 II->eraseFromParent();
2926 ++NumInvokesMerged;
2927 }
2928 MergedInvoke->setDebugLoc(MergedDebugLoc);
2929 ++NumInvokeSetsFormed;
2930
2931 if (DTU)
2932 DTU->applyUpdates(Updates);
2933}
2934
2935/// If this block is a `landingpad` exception handling block, categorize all
2936/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2937/// being "mergeable" together, and then merge invokes in each set together.
2938///
2939/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2940/// [...] [...]
2941/// | |
2942/// [invoke0] [invoke1]
2943/// / \ / \
2944/// [cont0] [landingpad] [cont1]
2945/// to:
2946/// [...] [...]
2947/// \ /
2948/// [invoke]
2949/// / \
2950/// [cont] [landingpad]
2951///
2952/// But of course we can only do that if the invokes share the `landingpad`,
2953/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2954/// and the invoked functions are "compatible".
2955static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
2956 if (!EnableMergeCompatibleInvokes)
2957 return false;
2958
2959 bool Changed = false;
2960
2961 // FIXME: generalize to all exception handling blocks?
2962 if (!BB->isLandingPad())
2963 return Changed;
2964
2965 CompatibleSets Grouper;
2966
2967 // Record all the predecessors of this `landingpad`. As per verifier,
2968 // the only allowed predecessor is the unwind edge of an `invoke`.
2969 // We want to group "compatible" `invokes` into the same set to be merged.
2970 for (BasicBlock *PredBB : predecessors(BB))
2971 Grouper.insert(II: cast<InvokeInst>(Val: PredBB->getTerminator()));
2972
2973 // And now, merge `invoke`s that were grouped togeter.
2974 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2975 if (Invokes.size() < 2)
2976 continue;
2977 Changed = true;
2978 mergeCompatibleInvokesImpl(Invokes, DTU);
2979 }
2980
2981 return Changed;
2982}
2983
2984namespace {
2985/// Track ephemeral values, which should be ignored for cost-modelling
2986/// purposes. Requires walking instructions in reverse order.
2987class EphemeralValueTracker {
2988 SmallPtrSet<const Instruction *, 32> EphValues;
2989
2990 bool isEphemeral(const Instruction *I) {
2991 if (isa<AssumeInst>(Val: I))
2992 return true;
2993 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2994 all_of(Range: I->users(), P: [&](const User *U) {
2995 return EphValues.count(Ptr: cast<Instruction>(Val: U));
2996 });
2997 }
2998
2999public:
3000 bool track(const Instruction *I) {
3001 if (isEphemeral(I)) {
3002 EphValues.insert(Ptr: I);
3003 return true;
3004 }
3005 return false;
3006 }
3007
3008 bool contains(const Instruction *I) const { return EphValues.contains(Ptr: I); }
3009};
3010} // namespace
3011
3012/// Determine if we can hoist sink a sole store instruction out of a
3013/// conditional block.
3014///
3015/// We are looking for code like the following:
3016/// BrBB:
3017/// store i32 %add, i32* %arrayidx2
3018/// ... // No other stores or function calls (we could be calling a memory
3019/// ... // function).
3020/// %cmp = icmp ult %x, %y
3021/// br i1 %cmp, label %EndBB, label %ThenBB
3022/// ThenBB:
3023/// store i32 %add5, i32* %arrayidx2
3024/// br label EndBB
3025/// EndBB:
3026/// ...
3027/// We are going to transform this into:
3028/// BrBB:
3029/// store i32 %add, i32* %arrayidx2
3030/// ... //
3031/// %cmp = icmp ult %x, %y
3032/// %add.add5 = select i1 %cmp, i32 %add, %add5
3033/// store i32 %add.add5, i32* %arrayidx2
3034/// ...
3035///
3036/// \return The pointer to the value of the previous store if the store can be
3037/// hoisted into the predecessor block. 0 otherwise.
3038static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
3039 BasicBlock *StoreBB, BasicBlock *EndBB) {
3040 StoreInst *StoreToHoist = dyn_cast<StoreInst>(Val: I);
3041 if (!StoreToHoist)
3042 return nullptr;
3043
3044 // Volatile or atomic.
3045 if (!StoreToHoist->isSimple())
3046 return nullptr;
3047
3048 Value *StorePtr = StoreToHoist->getPointerOperand();
3049 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3050
3051 // Look for a store to the same pointer in BrBB.
3052 unsigned MaxNumInstToLookAt = 9;
3053 // Skip pseudo probe intrinsic calls which are not really killing any memory
3054 // accesses.
3055 for (Instruction &CurI : reverse(C&: *BrBB)) {
3056 if (!MaxNumInstToLookAt)
3057 break;
3058 --MaxNumInstToLookAt;
3059
3060 if (isa<PseudoProbeInst>(Val: CurI))
3061 continue;
3062
3063 // Could be calling an instruction that affects memory like free().
3064 if (CurI.mayWriteToMemory() && !isa<StoreInst>(Val: CurI))
3065 return nullptr;
3066
3067 if (auto *SI = dyn_cast<StoreInst>(Val: &CurI)) {
3068 // Found the previous store to same location and type. Make sure it is
3069 // simple, to avoid introducing a spurious non-atomic write after an
3070 // atomic write.
3071 if (SI->getPointerOperand() == StorePtr &&
3072 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3073 SI->getAlign() >= StoreToHoist->getAlign())
3074 // Found the previous store, return its value operand.
3075 return SI->getValueOperand();
3076 return nullptr; // Unknown store.
3077 }
3078
3079 if (auto *LI = dyn_cast<LoadInst>(Val: &CurI)) {
3080 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3081 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3082 Value *Obj = getUnderlyingObject(V: StorePtr);
3083 bool ExplicitlyDereferenceableOnly;
3084 // The dereferenceability query here is only required to satisfy the
3085 // writable contract, actual dereferenceability is proven by the
3086 // presence of an access. As such, we can ignore frees.
3087 if (isWritableObject(Object: Obj, ExplicitlyDereferenceableOnly) &&
3088 capturesNothing(
3089 CC: PointerMayBeCaptured(V: Obj, Mask: CaptureComponents::Provenance)
3090 .WithoutRet) &&
3091 (!ExplicitlyDereferenceableOnly ||
3092 isDereferenceablePointer(V: StorePtr, Ty: StoreTy, Q: LI->getDataLayout(),
3093 /*IgnoreFree=*/true))) {
3094 // Found a previous load, return it.
3095 return LI;
3096 }
3097 }
3098 // The load didn't work out, but we may still find a store.
3099 }
3100 }
3101
3102 return nullptr;
3103}
3104
3105/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3106/// converted to selects.
3107static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
3108 BasicBlock *EndBB,
3109 unsigned &SpeculatedInstructions,
3110 InstructionCost &Cost,
3111 const TargetTransformInfo &TTI) {
3112 TargetTransformInfo::TargetCostKind CostKind =
3113 BB->getParent()->hasMinSize()
3114 ? TargetTransformInfo::TCK_CodeSize
3115 : TargetTransformInfo::TCK_SizeAndLatency;
3116
3117 bool HaveRewritablePHIs = false;
3118 for (PHINode &PN : EndBB->phis()) {
3119 Value *OrigV = PN.getIncomingValueForBlock(BB);
3120 Value *ThenV = PN.getIncomingValueForBlock(BB: ThenBB);
3121
3122 // FIXME: Try to remove some of the duplication with
3123 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3124 if (ThenV == OrigV)
3125 continue;
3126
3127 Cost += TTI.getCmpSelInstrCost(Opcode: Instruction::Select, ValTy: PN.getType(),
3128 CondTy: CmpInst::makeCmpResultType(opnd_type: PN.getType()),
3129 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
3130
3131 // Don't convert to selects if we could remove undefined behavior instead.
3132 if (passingValueIsAlwaysUndefined(V: OrigV, I: &PN) ||
3133 passingValueIsAlwaysUndefined(V: ThenV, I: &PN))
3134 return false;
3135
3136 HaveRewritablePHIs = true;
3137 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(Val: OrigV);
3138 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(Val: ThenV);
3139 if (!OrigCE && !ThenCE)
3140 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3141
3142 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(I: OrigCE, TTI) : 0;
3143 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(I: ThenCE, TTI) : 0;
3144 InstructionCost MaxCost =
3145 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3146 if (OrigCost + ThenCost > MaxCost)
3147 return false;
3148
3149 // Account for the cost of an unfolded ConstantExpr which could end up
3150 // getting expanded into Instructions.
3151 // FIXME: This doesn't account for how many operations are combined in the
3152 // constant expression.
3153 ++SpeculatedInstructions;
3154 if (SpeculatedInstructions > 1)
3155 return false;
3156 }
3157
3158 return HaveRewritablePHIs;
3159}
3160
3161static bool isProfitableToSpeculate(const CondBrInst *BI,
3162 std::optional<bool> Invert,
3163 const TargetTransformInfo &TTI) {
3164 // If the branch is non-unpredictable, and is predicted to *not* branch to
3165 // the `then` block, then avoid speculating it.
3166 if (BI->getMetadata(KindID: LLVMContext::MD_unpredictable))
3167 return true;
3168
3169 uint64_t TWeight, FWeight;
3170 if (!extractBranchWeights(I: *BI, TrueVal&: TWeight, FalseVal&: FWeight) || (TWeight + FWeight) == 0)
3171 return true;
3172
3173 if (!Invert.has_value())
3174 return false;
3175
3176 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3177 BranchProbability BIEndProb =
3178 BranchProbability::getBranchProbability(Numerator: EndWeight, Denominator: TWeight + FWeight);
3179 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3180 return BIEndProb < Likely;
3181}
3182
3183/// Speculate a conditional basic block flattening the CFG.
3184///
3185/// Note that this is a very risky transform currently. Speculating
3186/// instructions like this is most often not desirable. Instead, there is an MI
3187/// pass which can do it with full awareness of the resource constraints.
3188/// However, some cases are "obvious" and we should do directly. An example of
3189/// this is speculating a single, reasonably cheap instruction.
3190///
3191/// There is only one distinct advantage to flattening the CFG at the IR level:
3192/// it makes very common but simplistic optimizations such as are common in
3193/// instcombine and the DAG combiner more powerful by removing CFG edges and
3194/// modeling their effects with easier to reason about SSA value graphs.
3195///
3196///
3197/// An illustration of this transform is turning this IR:
3198/// \code
3199/// BB:
3200/// %cmp = icmp ult %x, %y
3201/// br i1 %cmp, label %EndBB, label %ThenBB
3202/// ThenBB:
3203/// %sub = sub %x, %y
3204/// br label BB2
3205/// EndBB:
3206/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3207/// ...
3208/// \endcode
3209///
3210/// Into this IR:
3211/// \code
3212/// BB:
3213/// %cmp = icmp ult %x, %y
3214/// %sub = sub %x, %y
3215/// %cond = select i1 %cmp, 0, %sub
3216/// ...
3217/// \endcode
3218///
3219/// \returns true if the conditional block is removed.
3220bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3221 BasicBlock *ThenBB) {
3222 if (!Options.SpeculateBlocks)
3223 return false;
3224
3225 // Be conservative for now. FP select instruction can often be expensive.
3226 Value *BrCond = BI->getCondition();
3227 if (isa<FCmpInst>(Val: BrCond))
3228 return false;
3229
3230 BasicBlock *BB = BI->getParent();
3231 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(Idx: 0);
3232 InstructionCost Budget =
3233 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3234
3235 // If ThenBB is actually on the false edge of the conditional branch, remember
3236 // to swap the select operands later.
3237 bool Invert = false;
3238 if (ThenBB != BI->getSuccessor(i: 0)) {
3239 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3240 Invert = true;
3241 }
3242 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3243
3244 if (!isProfitableToSpeculate(BI, Invert, TTI))
3245 return false;
3246
3247 // Keep a count of how many times instructions are used within ThenBB when
3248 // they are candidates for sinking into ThenBB. Specifically:
3249 // - They are defined in BB, and
3250 // - They have no side effects, and
3251 // - All of their uses are in ThenBB.
3252 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3253
3254 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3255
3256 unsigned SpeculatedInstructions = 0;
3257 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3258 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3259 Value *SpeculatedStoreValue = nullptr;
3260 StoreInst *SpeculatedStore = nullptr;
3261 EphemeralValueTracker EphTracker;
3262 for (Instruction &I : reverse(C: drop_end(RangeOrContainer&: *ThenBB))) {
3263 // Skip pseudo probes. The consequence is we lose track of the branch
3264 // probability for ThenBB, which is fine since the optimization here takes
3265 // place regardless of the branch probability.
3266 if (isa<PseudoProbeInst>(Val: I)) {
3267 // The probe should be deleted so that it will not be over-counted when
3268 // the samples collected on the non-conditional path are counted towards
3269 // the conditional path. We leave it for the counts inference algorithm to
3270 // figure out a proper count for an unknown probe.
3271 SpeculatedPseudoProbes.push_back(Elt: &I);
3272 continue;
3273 }
3274
3275 // Ignore ephemeral values, they will be dropped by the transform.
3276 if (EphTracker.track(I: &I))
3277 continue;
3278
3279 // Only speculatively execute a single instruction (not counting the
3280 // terminator) for now.
3281 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3282 isSafeCheapLoadStore(I: &I, TTI) &&
3283 SpeculatedConditionalLoadsStores.size() <
3284 HoistLoadsStoresWithCondFaultingThreshold;
3285 // Not count load/store into cost if target supports conditional faulting
3286 // b/c it's cheap to speculate it.
3287 if (IsSafeCheapLoadStore)
3288 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
3289 else
3290 ++SpeculatedInstructions;
3291
3292 if (SpeculatedInstructions > 1)
3293 return false;
3294
3295 // Don't hoist the instruction if it's unsafe or expensive.
3296 if (!IsSafeCheapLoadStore &&
3297 !isSafeToSpeculativelyExecute(I: &I, CtxI: BI, AC: Options.AC) &&
3298 !(HoistCondStores && !SpeculatedStoreValue &&
3299 (SpeculatedStoreValue =
3300 isSafeToSpeculateStore(I: &I, BrBB: BB, StoreBB: ThenBB, EndBB))))
3301 return false;
3302 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3303 computeSpeculationCost(I: &I, TTI) >
3304 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3305 return false;
3306
3307 // Store the store speculation candidate.
3308 if (!SpeculatedStore && SpeculatedStoreValue)
3309 SpeculatedStore = cast<StoreInst>(Val: &I);
3310
3311 // Do not hoist the instruction if any of its operands are defined but not
3312 // used in BB. The transformation will prevent the operand from
3313 // being sunk into the use block.
3314 for (Use &Op : I.operands()) {
3315 Instruction *OpI = dyn_cast<Instruction>(Val&: Op);
3316 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3317 continue; // Not a candidate for sinking.
3318
3319 ++SinkCandidateUseCounts[OpI];
3320 }
3321 }
3322
3323 // Consider any sink candidates which are only used in ThenBB as costs for
3324 // speculation. Note, while we iterate over a DenseMap here, we are summing
3325 // and so iteration order isn't significant.
3326 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3327 if (Inst->hasNUses(N: Count)) {
3328 ++SpeculatedInstructions;
3329 if (SpeculatedInstructions > 1)
3330 return false;
3331 }
3332
3333 // Check that we can insert the selects and that it's not too expensive to do
3334 // so.
3335 bool Convert =
3336 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3337 InstructionCost Cost = 0;
3338 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3339 SpeculatedInstructions, Cost, TTI);
3340 if (!Convert || Cost > Budget)
3341 return false;
3342
3343 // If we get here, we can hoist the instruction and if-convert.
3344 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3345
3346 Instruction *Sel = nullptr;
3347 // Insert a select of the value of the speculated store.
3348 if (SpeculatedStoreValue) {
3349 IRBuilder<NoFolder> Builder(BI);
3350 Value *OrigV = SpeculatedStore->getValueOperand();
3351 Value *TrueV = SpeculatedStore->getValueOperand();
3352 Value *FalseV = SpeculatedStoreValue;
3353 if (Invert)
3354 std::swap(a&: TrueV, b&: FalseV);
3355 Value *S = Builder.CreateSelect(
3356 C: BrCond, True: TrueV, False: FalseV, Name: "spec.store.select", MDFrom: BI);
3357 Sel = cast<Instruction>(Val: S);
3358 SpeculatedStore->setOperand(i_nocapture: 0, Val_nocapture: S);
3359 SpeculatedStore->applyMergedLocation(LocA: BI->getDebugLoc(),
3360 LocB: SpeculatedStore->getDebugLoc());
3361 // The value stored is still conditional, but the store itself is now
3362 // unconditionally executed, so we must be sure that any linked dbg.assign
3363 // intrinsics are tracking the new stored value (the result of the
3364 // select). If we don't, and the store were to be removed by another pass
3365 // (e.g. DSE), then we'd eventually end up emitting a location describing
3366 // the conditional value, unconditionally.
3367 //
3368 // === Before this transformation ===
3369 // pred:
3370 // store %one, %x.dest, !DIAssignID !1
3371 // dbg.assign %one, "x", ..., !1, ...
3372 // br %cond if.then
3373 //
3374 // if.then:
3375 // store %two, %x.dest, !DIAssignID !2
3376 // dbg.assign %two, "x", ..., !2, ...
3377 //
3378 // === After this transformation ===
3379 // pred:
3380 // store %one, %x.dest, !DIAssignID !1
3381 // dbg.assign %one, "x", ..., !1
3382 /// ...
3383 // %merge = select %cond, %two, %one
3384 // store %merge, %x.dest, !DIAssignID !2
3385 // dbg.assign %merge, "x", ..., !2
3386 for (DbgVariableRecord *DbgAssign :
3387 at::getDVRAssignmentMarkers(Inst: SpeculatedStore))
3388 if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: OrigV))
3389 DbgAssign->replaceVariableLocationOp(OldValue: OrigV, NewValue: S);
3390 }
3391
3392 // Metadata can be dependent on the condition we are hoisting above.
3393 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3394 // to avoid making it appear as if the condition is a constant, which would
3395 // be misleading while debugging.
3396 // Similarly strip attributes that maybe dependent on condition we are
3397 // hoisting above.
3398 for (auto &I : make_early_inc_range(Range&: *ThenBB)) {
3399 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3400 I.dropLocation();
3401 }
3402 I.dropUBImplyingAttrsAndMetadata();
3403
3404 // Drop ephemeral values.
3405 if (EphTracker.contains(I: &I)) {
3406 I.replaceAllUsesWith(V: PoisonValue::get(T: I.getType()));
3407 I.eraseFromParent();
3408 }
3409 }
3410
3411 // Hoist the instructions.
3412 // Drop DbgVariableRecords attached to these instructions.
3413 for (auto &It : *ThenBB)
3414 for (DbgRecord &DR : make_early_inc_range(Range: It.getDbgRecordRange()))
3415 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3416 // equivalent).
3417 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(Val: &DR);
3418 !DVR || !DVR->isDbgAssign())
3419 It.dropOneDbgRecord(I: &DR);
3420 BB->splice(ToIt: BI->getIterator(), FromBB: ThenBB, FromBeginIt: ThenBB->begin(),
3421 FromEndIt: std::prev(x: ThenBB->end()));
3422
3423 if (!SpeculatedConditionalLoadsStores.empty())
3424 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3425 Sel);
3426
3427 // Insert selects and rewrite the PHI operands.
3428 IRBuilder<NoFolder> Builder(BI);
3429 for (PHINode &PN : EndBB->phis()) {
3430 unsigned OrigI = PN.getBasicBlockIndex(BB);
3431 unsigned ThenI = PN.getBasicBlockIndex(BB: ThenBB);
3432 Value *OrigV = PN.getIncomingValue(i: OrigI);
3433 Value *ThenV = PN.getIncomingValue(i: ThenI);
3434
3435 // Skip PHIs which are trivial.
3436 if (OrigV == ThenV)
3437 continue;
3438
3439 // Create a select whose true value is the speculatively executed value and
3440 // false value is the pre-existing value. Swap them if the branch
3441 // destinations were inverted.
3442 Value *TrueV = ThenV, *FalseV = OrigV;
3443 if (Invert)
3444 std::swap(a&: TrueV, b&: FalseV);
3445 Value *V = Builder.CreateSelect(C: BrCond, True: TrueV, False: FalseV, Name: "spec.select", MDFrom: BI);
3446 PN.setIncomingValue(i: OrigI, V);
3447 PN.setIncomingValue(i: ThenI, V);
3448 }
3449
3450 // Remove speculated pseudo probes.
3451 for (Instruction *I : SpeculatedPseudoProbes)
3452 I->eraseFromParent();
3453
3454 ++NumSpeculations;
3455 return true;
3456}
3457
3458using BlocksSet = SmallPtrSet<BasicBlock *, 8>;
3459
3460// Return false if number of blocks searched is too much.
3461static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3462 BlocksSet &ReachesNonLocalUses) {
3463 if (BB == DefBB)
3464 return true;
3465 if (!ReachesNonLocalUses.insert(Ptr: BB).second)
3466 return true;
3467
3468 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3469 return false;
3470 for (BasicBlock *Pred : predecessors(BB))
3471 if (!findReaching(BB: Pred, DefBB, ReachesNonLocalUses))
3472 return false;
3473 return true;
3474}
3475
3476/// Return true if we can thread a branch across this block.
3477static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3478 BlocksSet &NonLocalUseBlocks) {
3479 int Size = 0;
3480 EphemeralValueTracker EphTracker;
3481
3482 // Walk the loop in reverse so that we can identify ephemeral values properly
3483 // (values only feeding assumes).
3484 for (Instruction &I : reverse(C&: *BB)) {
3485 // Can't fold blocks that contain noduplicate or convergent calls.
3486 if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
3487 if (CI->cannotDuplicate() || CI->isConvergent())
3488 return false;
3489
3490 // Ignore ephemeral values which are deleted during codegen.
3491 // We will delete Phis while threading, so Phis should not be accounted in
3492 // block's size.
3493 if (!EphTracker.track(I: &I) && !isa<PHINode>(Val: I)) {
3494 if (Size++ > MaxSmallBlockSize)
3495 return false; // Don't clone large BB's.
3496 }
3497
3498 // Record blocks with non-local uses of values defined in the current basic
3499 // block.
3500 for (User *U : I.users()) {
3501 Instruction *UI = cast<Instruction>(Val: U);
3502 BasicBlock *UsedInBB = UI->getParent();
3503 if (UsedInBB == BB) {
3504 if (isa<PHINode>(Val: UI))
3505 return false;
3506 } else
3507 NonLocalUseBlocks.insert(Ptr: UsedInBB);
3508 }
3509
3510 // Looks ok, continue checking.
3511 }
3512
3513 return true;
3514}
3515
3516static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
3517 BasicBlock *To) {
3518 // Don't look past the block defining the value, we might get the value from
3519 // a previous loop iteration.
3520 auto *I = dyn_cast<Instruction>(Val: V);
3521 if (I && I->getParent() == To)
3522 return nullptr;
3523
3524 // We know the value if the From block branches on it.
3525 auto *BI = dyn_cast<CondBrInst>(Val: From->getTerminator());
3526 if (BI && BI->getCondition() == V &&
3527 BI->getSuccessor(i: 0) != BI->getSuccessor(i: 1))
3528 return BI->getSuccessor(i: 0) == To ? ConstantInt::getTrue(Context&: BI->getContext())
3529 : ConstantInt::getFalse(Context&: BI->getContext());
3530
3531 return nullptr;
3532}
3533
3534/// If we have a conditional branch on something for which we know the constant
3535/// value in predecessors (e.g. a phi node in the current block), thread edges
3536/// from the predecessor to their ultimate destination.
3537static std::optional<bool>
3538foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst *BI, DomTreeUpdater *DTU,
3539 const DataLayout &DL,
3540 AssumptionCache *AC) {
3541 SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
3542 BasicBlock *BB = BI->getParent();
3543 Value *Cond = BI->getCondition();
3544 PHINode *PN = dyn_cast<PHINode>(Val: Cond);
3545 if (PN && PN->getParent() == BB) {
3546 // Degenerate case of a single entry PHI.
3547 if (PN->getNumIncomingValues() == 1) {
3548 FoldSingleEntryPHINodes(BB: PN->getParent());
3549 return true;
3550 }
3551
3552 for (Use &U : PN->incoming_values())
3553 if (auto *CB = dyn_cast<ConstantInt>(Val&: U))
3554 KnownValues[CB].insert(X: PN->getIncomingBlock(U));
3555 } else {
3556 for (BasicBlock *Pred : predecessors(BB)) {
3557 if (ConstantInt *CB = getKnownValueOnEdge(V: Cond, From: Pred, To: BB))
3558 KnownValues[CB].insert(X: Pred);
3559 }
3560 }
3561
3562 if (KnownValues.empty())
3563 return false;
3564
3565 // Now we know that this block has multiple preds and two succs.
3566 // Check that the block is small enough and record which non-local blocks use
3567 // values defined in the block.
3568
3569 BlocksSet NonLocalUseBlocks;
3570 BlocksSet ReachesNonLocalUseBlocks;
3571 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3572 return false;
3573
3574 // Jump-threading can only be done to destinations where no values defined
3575 // in BB are live.
3576
3577 // Quickly check if both destinations have uses. If so, jump-threading cannot
3578 // be done.
3579 if (NonLocalUseBlocks.contains(Ptr: BI->getSuccessor(i: 0)) &&
3580 NonLocalUseBlocks.contains(Ptr: BI->getSuccessor(i: 1)))
3581 return false;
3582
3583 // Search backward from NonLocalUseBlocks to find which blocks
3584 // reach non-local uses.
3585 for (BasicBlock *UseBB : NonLocalUseBlocks)
3586 // Give up if too many blocks are searched.
3587 if (!findReaching(BB: UseBB, DefBB: BB, ReachesNonLocalUses&: ReachesNonLocalUseBlocks))
3588 return false;
3589
3590 for (const auto &Pair : KnownValues) {
3591 ConstantInt *CB = Pair.first;
3592 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3593 BasicBlock *RealDest = BI->getSuccessor(i: !CB->getZExtValue());
3594
3595 // Okay, we now know that all edges from PredBB should be revectored to
3596 // branch to RealDest.
3597 if (RealDest == BB)
3598 continue; // Skip self loops.
3599
3600 // Skip if the predecessor's terminator is an indirect branch.
3601 if (any_of(Range&: PredBBs, P: [](BasicBlock *PredBB) {
3602 return isa<IndirectBrInst>(Val: PredBB->getTerminator());
3603 }))
3604 continue;
3605
3606 // Only revector to RealDest if no values defined in BB are live.
3607 if (ReachesNonLocalUseBlocks.contains(Ptr: RealDest))
3608 continue;
3609
3610 LLVM_DEBUG({
3611 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3612 << " has value " << *Pair.first << " in predecessors:\n";
3613 for (const BasicBlock *PredBB : Pair.second)
3614 dbgs() << " " << PredBB->getName() << "\n";
3615 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3616 });
3617
3618 // Split the predecessors we are threading into a new edge block. We'll
3619 // clone the instructions into this block, and then redirect it to RealDest.
3620 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, Preds: PredBBs, Suffix: ".critedge", DTU);
3621 if (!EdgeBB)
3622 continue;
3623
3624 // TODO: These just exist to reduce test diff, we can drop them if we like.
3625 EdgeBB->setName(RealDest->getName() + ".critedge");
3626 EdgeBB->moveBefore(MovePos: RealDest);
3627
3628 // Update PHI nodes.
3629 addPredecessorToBlock(Succ: RealDest, NewPred: EdgeBB, ExistPred: BB);
3630
3631 // BB may have instructions that are being threaded over. Clone these
3632 // instructions into EdgeBB. We know that there will be no uses of the
3633 // cloned instructions outside of EdgeBB.
3634 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3635 ValueToValueMapTy TranslateMap; // Track translated values.
3636 TranslateMap[Cond] = CB;
3637
3638 // RemoveDIs: track instructions that we optimise away while folding, so
3639 // that we can copy DbgVariableRecords from them later.
3640 BasicBlock::iterator SrcDbgCursor = BB->begin();
3641 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3642 if (PHINode *PN = dyn_cast<PHINode>(Val&: BBI)) {
3643 TranslateMap[PN] = PN->getIncomingValueForBlock(BB: EdgeBB);
3644 continue;
3645 }
3646 // Clone the instruction.
3647 Instruction *N = BBI->clone();
3648 // Insert the new instruction into its new home.
3649 N->insertInto(ParentBB: EdgeBB, It: InsertPt);
3650
3651 if (BBI->hasName())
3652 N->setName(BBI->getName() + ".c");
3653
3654 // Update operands due to translation.
3655 // Key Instructions: Remap all the atom groups.
3656 if (const DebugLoc &DL = BBI->getDebugLoc())
3657 mapAtomInstance(DL, VMap&: TranslateMap);
3658 RemapInstruction(I: N, VM&: TranslateMap,
3659 Flags: RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
3660
3661 // Check for trivial simplification.
3662 if (Value *V = simplifyInstruction(I: N, Q: {DL, nullptr, nullptr, AC})) {
3663 if (!BBI->use_empty())
3664 TranslateMap[&*BBI] = V;
3665 if (!N->mayHaveSideEffects()) {
3666 N->eraseFromParent(); // Instruction folded away, don't need actual
3667 // inst
3668 N = nullptr;
3669 }
3670 } else {
3671 if (!BBI->use_empty())
3672 TranslateMap[&*BBI] = N;
3673 }
3674 if (N) {
3675 // Copy all debug-info attached to instructions from the last we
3676 // successfully clone, up to this instruction (they might have been
3677 // folded away).
3678 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3679 N->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3680 SrcDbgCursor = std::next(x: BBI);
3681 // Clone debug-info on this instruction too.
3682 N->cloneDebugInfoFrom(From: &*BBI);
3683
3684 // Register the new instruction with the assumption cache if necessary.
3685 if (auto *Assume = dyn_cast<AssumeInst>(Val: N))
3686 if (AC)
3687 AC->registerAssumption(CI: Assume);
3688 }
3689 }
3690
3691 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3692 InsertPt->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3693 InsertPt->cloneDebugInfoFrom(From: BI);
3694
3695 BB->removePredecessor(Pred: EdgeBB);
3696 UncondBrInst *EdgeBI = cast<UncondBrInst>(Val: EdgeBB->getTerminator());
3697 EdgeBI->setSuccessor(idx: 0, NewSucc: RealDest);
3698 EdgeBI->setDebugLoc(BI->getDebugLoc());
3699
3700 if (DTU) {
3701 SmallVector<DominatorTree::UpdateType, 2> Updates;
3702 Updates.push_back(Elt: {DominatorTree::Delete, EdgeBB, BB});
3703 Updates.push_back(Elt: {DominatorTree::Insert, EdgeBB, RealDest});
3704 DTU->applyUpdates(Updates);
3705 }
3706
3707 // For simplicity, we created a separate basic block for the edge. Merge
3708 // it back into the predecessor if possible. This not only avoids
3709 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3710 // bypass the check for trivial cycles above.
3711 MergeBlockIntoPredecessor(BB: EdgeBB, DTU);
3712
3713 // Signal repeat, simplifying any other constants.
3714 return std::nullopt;
3715 }
3716
3717 return false;
3718}
3719
3720bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3721 // Note: If BB is a loop header then there is a risk that threading introduces
3722 // a non-canonical loop by moving a back edge. So we avoid this optimization
3723 // for loop headers if NeedCanonicalLoop is set.
3724 if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BI->getParent()))
3725 return false;
3726
3727 std::optional<bool> Result;
3728 bool EverChanged = false;
3729 do {
3730 // Note that None means "we changed things, but recurse further."
3731 Result =
3732 foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC: Options.AC);
3733 EverChanged |= Result == std::nullopt || *Result;
3734 } while (Result == std::nullopt);
3735 return EverChanged;
3736}
3737
3738/// Given a BB that starts with the specified two-entry PHI node,
3739/// see if we can eliminate it.
3740static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
3741 DomTreeUpdater *DTU, AssumptionCache *AC,
3742 const DataLayout &DL,
3743 bool SpeculateUnpredictables) {
3744 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3745 // statement", which has a very simple dominance structure. Basically, we
3746 // are trying to find the condition that is being branched on, which
3747 // subsequently causes this merge to happen. We really want control
3748 // dependence information for this check, but simplifycfg can't keep it up
3749 // to date, and this catches most of the cases we care about anyway.
3750 BasicBlock *BB = PN->getParent();
3751
3752 BasicBlock *IfTrue, *IfFalse;
3753 CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3754 if (!DomBI)
3755 return false;
3756 Value *IfCond = DomBI->getCondition();
3757 // Don't bother if the branch will be constant folded trivially.
3758 if (isa<ConstantInt>(Val: IfCond))
3759 return false;
3760
3761 BasicBlock *DomBlock = DomBI->getParent();
3762 SmallVector<BasicBlock *, 2> IfBlocks;
3763 llvm::copy_if(Range: PN->blocks(), Out: std::back_inserter(x&: IfBlocks),
3764 P: [](BasicBlock *IfBlock) {
3765 return isa<UncondBrInst>(Val: IfBlock->getTerminator());
3766 });
3767 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3768 "Will have either one or two blocks to speculate.");
3769
3770 // If the branch is non-unpredictable, see if we either predictably jump to
3771 // the merge bb (if we have only a single 'then' block), or if we predictably
3772 // jump to one specific 'then' block (if we have two of them).
3773 // It isn't beneficial to speculatively execute the code
3774 // from the block that we know is predictably not entered.
3775 bool IsUnpredictable = DomBI->getMetadata(KindID: LLVMContext::MD_unpredictable);
3776 if (!IsUnpredictable) {
3777 uint64_t TWeight, FWeight;
3778 if (extractBranchWeights(I: *DomBI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3779 (TWeight + FWeight) != 0) {
3780 BranchProbability BITrueProb =
3781 BranchProbability::getBranchProbability(Numerator: TWeight, Denominator: TWeight + FWeight);
3782 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3783 BranchProbability BIFalseProb = BITrueProb.getCompl();
3784 if (IfBlocks.size() == 1) {
3785 BranchProbability BIBBProb =
3786 DomBI->getSuccessor(i: 0) == BB ? BITrueProb : BIFalseProb;
3787 if (BIBBProb >= Likely)
3788 return false;
3789 } else {
3790 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3791 return false;
3792 }
3793 }
3794 }
3795
3796 // Don't try to fold an unreachable block. For example, the phi node itself
3797 // can't be the candidate if-condition for a select that we want to form.
3798 if (auto *IfCondPhiInst = dyn_cast<PHINode>(Val: IfCond))
3799 if (IfCondPhiInst->getParent() == BB)
3800 return false;
3801
3802 // Okay, we found that we can merge this two-entry phi node into a select.
3803 // Doing so would require us to fold *all* two entry phi nodes in this block.
3804 // At some point this becomes non-profitable (particularly if the target
3805 // doesn't support cmov's). Only do this transformation if there are two or
3806 // fewer PHI nodes in this block.
3807 unsigned NumPhis = 0;
3808 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(Val: I); ++NumPhis, ++I)
3809 if (NumPhis > 2)
3810 return false;
3811
3812 // Loop over the PHI's seeing if we can promote them all to select
3813 // instructions. While we are at it, keep track of the instructions
3814 // that need to be moved to the dominating block.
3815 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3816 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3817 InstructionCost Cost = 0;
3818 InstructionCost Budget =
3819 TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3820 if (SpeculateUnpredictables && IsUnpredictable)
3821 Budget += TTI.getBranchMispredictPenalty();
3822
3823 bool Changed = false;
3824 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(Val: II);) {
3825 PHINode *PN = cast<PHINode>(Val: II++);
3826 if (Value *V = simplifyInstruction(I: PN, Q: {DL, PN})) {
3827 PN->replaceAllUsesWith(V);
3828 PN->eraseFromParent();
3829 Changed = true;
3830 continue;
3831 }
3832
3833 if (!dominatesMergePoint(V: PN->getIncomingValue(i: 0), BB, InsertPt: DomBI,
3834 AggressiveInsts, Cost, Budget, TTI, AC,
3835 ZeroCostInstructions) ||
3836 !dominatesMergePoint(V: PN->getIncomingValue(i: 1), BB, InsertPt: DomBI,
3837 AggressiveInsts, Cost, Budget, TTI, AC,
3838 ZeroCostInstructions))
3839 return Changed;
3840 }
3841
3842 // If we folded the first phi, PN dangles at this point. Refresh it. If
3843 // we ran out of PHIs then we simplified them all.
3844 PN = dyn_cast<PHINode>(Val: BB->begin());
3845 if (!PN)
3846 return true;
3847
3848 // Don't fold i1 branches on PHIs which contain binary operators or
3849 // (possibly inverted) select form of or/ands if their parameters are
3850 // an equality test.
3851 auto IsBinOpOrAndEq = [](Value *V) {
3852 CmpPredicate Pred;
3853 if (match(V, P: m_CombineOr(
3854 Ps: m_CombineOr(
3855 Ps: m_BinOp(L: m_Cmp(Pred, L: m_Value(), R: m_Value()), R: m_Value()),
3856 Ps: m_BinOp(L: m_Value(), R: m_Cmp(Pred, L: m_Value(), R: m_Value()))),
3857 Ps: m_c_Select(L: m_ImmConstant(),
3858 R: m_Cmp(Pred, L: m_Value(), R: m_Value()))))) {
3859 return CmpInst::isEquality(pred: Pred);
3860 }
3861 return false;
3862 };
3863 if (PN->getType()->isIntegerTy(BitWidth: 1) &&
3864 (IsBinOpOrAndEq(PN->getIncomingValue(i: 0)) ||
3865 IsBinOpOrAndEq(PN->getIncomingValue(i: 1)) || IsBinOpOrAndEq(IfCond)))
3866 return Changed;
3867
3868 // If all PHI nodes are promotable, check to make sure that all instructions
3869 // in the predecessor blocks can be promoted as well. If not, we won't be able
3870 // to get rid of the control flow, so it's not worth promoting to select
3871 // instructions.
3872 for (BasicBlock *IfBlock : IfBlocks)
3873 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3874 if (!AggressiveInsts.count(Ptr: &*I) && !I->isDebugOrPseudoInst()) {
3875 // This is not an aggressive instruction that we can promote.
3876 // Because of this, we won't be able to get rid of the control flow, so
3877 // the xform is not worth it.
3878 return Changed;
3879 }
3880
3881 // If either of the blocks has it's address taken, we can't do this fold.
3882 if (any_of(Range&: IfBlocks,
3883 P: [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3884 return Changed;
3885
3886 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3887 if (IsUnpredictable) dbgs() << " (unpredictable)";
3888 dbgs() << " T: " << IfTrue->getName()
3889 << " F: " << IfFalse->getName() << "\n");
3890
3891 // If we can still promote the PHI nodes after this gauntlet of tests,
3892 // do all of the PHI's now.
3893
3894 // Move all 'aggressive' instructions, which are defined in the
3895 // conditional parts of the if's up to the dominating block.
3896 for (BasicBlock *IfBlock : IfBlocks)
3897 hoistAllInstructionsInto(DomBlock, InsertPt: DomBI, BB: IfBlock);
3898
3899 IRBuilder<NoFolder> Builder(DomBI);
3900 // Propagate fast-math-flags from phi nodes to replacement selects.
3901 while (PHINode *PN = dyn_cast<PHINode>(Val: BB->begin())) {
3902 // Change the PHI node into a select instruction.
3903 Value *TrueVal = PN->getIncomingValueForBlock(BB: IfTrue);
3904 Value *FalseVal = PN->getIncomingValueForBlock(BB: IfFalse);
3905
3906 Value *Sel = Builder.CreateSelectFMF(C: IfCond, True: TrueVal, False: FalseVal,
3907 FMFSource: isa<FPMathOperator>(Val: PN) ? PN : nullptr,
3908 Name: "", MDFrom: DomBI);
3909 PN->replaceAllUsesWith(V: Sel);
3910 Sel->takeName(V: PN);
3911 PN->eraseFromParent();
3912 }
3913
3914 // At this point, all IfBlocks are empty, so our if statement
3915 // has been flattened. Change DomBlock to jump directly to our new block to
3916 // avoid other simplifycfg's kicking in on the diamond.
3917 Builder.CreateBr(Dest: BB);
3918
3919 SmallVector<DominatorTree::UpdateType, 3> Updates;
3920 if (DTU) {
3921 Updates.push_back(Elt: {DominatorTree::Insert, DomBlock, BB});
3922 for (auto *Successor : successors(BB: DomBlock))
3923 Updates.push_back(Elt: {DominatorTree::Delete, DomBlock, Successor});
3924 }
3925
3926 DomBI->eraseFromParent();
3927 if (DTU)
3928 DTU->applyUpdates(Updates);
3929
3930 return true;
3931}
3932
3933static Value *createLogicalOp(IRBuilderBase &Builder,
3934 Instruction::BinaryOps Opc, Value *LHS,
3935 Value *RHS, const Twine &Name = "") {
3936 // Try to relax logical op to binary op.
3937 if (impliesPoison(ValAssumedPoison: RHS, V: LHS))
3938 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3939 if (Opc == Instruction::And)
3940 return Builder.CreateLogicalAnd(Cond1: LHS, Cond2: RHS, Name);
3941 if (Opc == Instruction::Or)
3942 return Builder.CreateLogicalOr(Cond1: LHS, Cond2: RHS, Name);
3943 llvm_unreachable("Invalid logical opcode");
3944}
3945
3946/// Return true if either PBI or BI has branch weight available, and store
3947/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3948/// not have branch weight, use 1:1 as its weight.
3949static bool extractPredSuccWeights(CondBrInst *PBI, CondBrInst *BI,
3950 uint64_t &PredTrueWeight,
3951 uint64_t &PredFalseWeight,
3952 uint64_t &SuccTrueWeight,
3953 uint64_t &SuccFalseWeight) {
3954 bool PredHasWeights =
3955 extractBranchWeights(I: *PBI, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight);
3956 bool SuccHasWeights =
3957 extractBranchWeights(I: *BI, TrueVal&: SuccTrueWeight, FalseVal&: SuccFalseWeight);
3958 if (PredHasWeights || SuccHasWeights) {
3959 if (!PredHasWeights)
3960 PredTrueWeight = PredFalseWeight = 1;
3961 if (!SuccHasWeights)
3962 SuccTrueWeight = SuccFalseWeight = 1;
3963 return true;
3964 } else {
3965 return false;
3966 }
3967}
3968
3969/// Determine if the two branches share a common destination and deduce a glue
3970/// that joins the branches' conditions to arrive at the common destination if
3971/// that would be profitable.
3972static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3973shouldFoldCondBranchesToCommonDestination(CondBrInst *BI, CondBrInst *PBI,
3974 const TargetTransformInfo *TTI) {
3975 assert(BI && PBI && "Both blocks must end with a conditional branches.");
3976 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3977 "PredBB must be a predecessor of BB.");
3978
3979 // We have the potential to fold the conditions together, but if the
3980 // predecessor branch is predictable, we may not want to merge them.
3981 uint64_t PTWeight, PFWeight;
3982 BranchProbability PBITrueProb, Likely;
3983 if (TTI && !PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
3984 extractBranchWeights(I: *PBI, TrueVal&: PTWeight, FalseVal&: PFWeight) &&
3985 (PTWeight + PFWeight) != 0) {
3986 PBITrueProb =
3987 BranchProbability::getBranchProbability(Numerator: PTWeight, Denominator: PTWeight + PFWeight);
3988 Likely = TTI->getPredictableBranchThreshold();
3989 }
3990
3991 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
3992 // Speculate the 2nd condition unless the 1st is probably true.
3993 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3994 return {{BI->getSuccessor(i: 0), Instruction::Or, false}};
3995 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
3996 // Speculate the 2nd condition unless the 1st is probably false.
3997 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3998 return {{BI->getSuccessor(i: 1), Instruction::And, false}};
3999 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
4000 // Speculate the 2nd condition unless the 1st is probably true.
4001 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4002 return {{BI->getSuccessor(i: 1), Instruction::And, true}};
4003 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
4004 // Speculate the 2nd condition unless the 1st is probably false.
4005 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4006 return {{BI->getSuccessor(i: 0), Instruction::Or, true}};
4007 }
4008 return std::nullopt;
4009}
4010
4011static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI,
4012 DomTreeUpdater *DTU,
4013 MemorySSAUpdater *MSSAU,
4014 const TargetTransformInfo *TTI) {
4015 BasicBlock *BB = BI->getParent();
4016 BasicBlock *PredBlock = PBI->getParent();
4017
4018 // Determine if the two branches share a common destination.
4019 BasicBlock *CommonSucc;
4020 Instruction::BinaryOps Opc;
4021 bool InvertPredCond;
4022 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) =
4023 *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
4024
4025 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4026
4027 IRBuilder<ConstantFolder, IRBuilderCallbackInserter> Builder(
4028 BB->getContext(), ConstantFolder{},
4029 IRBuilderCallbackInserter([&BB](Instruction *I) {
4030 // The builder is used to create instructions to eliminate the branch in
4031 // BB. If BB's terminator has !annotation metadata, add it to the new
4032 // instructions.
4033 I->copyMetadata(SrcInst: *BB->getTerminator(), WL: LLVMContext::MD_annotation);
4034 }));
4035 Builder.SetInsertPoint(PBI);
4036
4037 // If we need to invert the condition in the pred block to match, do so now.
4038 if (InvertPredCond) {
4039 InvertBranch(PBI, Builder);
4040 }
4041
4042 BasicBlock *UniqueSucc =
4043 PBI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 0) : BI->getSuccessor(i: 1);
4044
4045 // Before cloning instructions, notify the successor basic block that it
4046 // is about to have a new predecessor. This will update PHI nodes,
4047 // which will allow us to update live-out uses of bonus instructions.
4048 addPredecessorToBlock(Succ: UniqueSucc, NewPred: PredBlock, ExistPred: BB, MSSAU);
4049
4050 // Try to update branch weights.
4051 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4052 SmallVector<uint64_t, 2> MDWeights;
4053 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4054 SuccTrueWeight, SuccFalseWeight)) {
4055
4056 if (PBI->getSuccessor(i: 0) == BB) {
4057 // PBI: br i1 %x, BB, FalseDest
4058 // BI: br i1 %y, UniqueSucc, FalseDest
4059 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4060 MDWeights.push_back(Elt: PredTrueWeight * SuccTrueWeight);
4061 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4062 // TrueWeight for PBI * FalseWeight for BI.
4063 // We assume that total weights of a CondBrInst can fit into 32 bits.
4064 // Therefore, we will not have overflow using 64-bit arithmetic.
4065 MDWeights.push_back(Elt: PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4066 PredTrueWeight * SuccFalseWeight);
4067 } else {
4068 // PBI: br i1 %x, TrueDest, BB
4069 // BI: br i1 %y, TrueDest, UniqueSucc
4070 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4071 // FalseWeight for PBI * TrueWeight for BI.
4072 MDWeights.push_back(Elt: PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4073 PredFalseWeight * SuccTrueWeight);
4074 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4075 MDWeights.push_back(Elt: PredFalseWeight * SuccFalseWeight);
4076 }
4077
4078 setFittedBranchWeights(I&: *PBI, Weights: MDWeights, /*IsExpected=*/false,
4079 /*ElideAllZero=*/true);
4080
4081 // TODO: If BB is reachable from all paths through PredBlock, then we
4082 // could replace PBI's branch probabilities with BI's.
4083 } else
4084 PBI->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
4085
4086 // Now, update the CFG.
4087 PBI->setSuccessor(idx: PBI->getSuccessor(i: 0) != BB, NewSucc: UniqueSucc);
4088
4089 if (DTU)
4090 DTU->applyUpdates(Updates: {{DominatorTree::Insert, PredBlock, UniqueSucc},
4091 {DominatorTree::Delete, PredBlock, BB}});
4092
4093 // If BI was a loop latch, it may have had associated loop metadata.
4094 // We need to copy it to the new latch, that is, PBI.
4095 if (MDNode *LoopMD = BI->getMetadata(KindID: LLVMContext::MD_loop))
4096 PBI->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
4097
4098 ValueToValueMapTy VMap; // maps original values to cloned values
4099 cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
4100
4101 Module *M = BB->getModule();
4102
4103 PredBlock->getTerminator()->cloneDebugInfoFrom(From: BB->getTerminator());
4104 for (DbgVariableRecord &DVR :
4105 filterDbgVars(R: PredBlock->getTerminator()->getDbgRecordRange())) {
4106 RemapDbgRecord(M, DR: &DVR, VM&: VMap,
4107 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
4108 }
4109
4110 // Now that the Cond was cloned into the predecessor basic block,
4111 // or/and the two conditions together.
4112 Value *BICond = VMap[BI->getCondition()];
4113 PBI->setCondition(
4114 createLogicalOp(Builder, Opc, LHS: PBI->getCondition(), RHS: BICond, Name: "or.cond"));
4115 if (!ProfcheckDisableMetadataFixes)
4116 if (auto *SI = dyn_cast<SelectInst>(Val: PBI->getCondition()))
4117 if (!MDWeights.empty()) {
4118 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4119 setFittedBranchWeights(I&: *SI, Weights: {MDWeights[0], MDWeights[1]},
4120 /*IsExpected=*/false, /*ElideAllZero=*/true);
4121 }
4122
4123 ++NumFoldBranchToCommonDest;
4124 return true;
4125}
4126
4127/// Return if an instruction's type or any of its operands' types are a vector
4128/// type.
4129static bool isVectorOp(Instruction &I) {
4130 return I.getType()->isVectorTy() || any_of(Range: I.operands(), P: [](Use &U) {
4131 return U->getType()->isVectorTy();
4132 });
4133}
4134
4135/// If this basic block is simple enough, and if a predecessor branches to us
4136/// and one of our successors, fold the block into the predecessor and use
4137/// logical operations to pick the right destination.
4138bool llvm::foldBranchToCommonDest(CondBrInst *BI, DomTreeUpdater *DTU,
4139 MemorySSAUpdater *MSSAU,
4140 const TargetTransformInfo *TTI,
4141 AssumptionCache *AC,
4142 unsigned BonusInstThreshold) {
4143 BasicBlock *BB = BI->getParent();
4144 TargetTransformInfo::TargetCostKind CostKind =
4145 BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
4146 : TargetTransformInfo::TCK_SizeAndLatency;
4147
4148 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
4149
4150 if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Val: Cond) ||
4151 Cond->getParent() != BB || !Cond->hasOneUse())
4152 return false;
4153
4154 // Finally, don't infinitely unroll conditional loops.
4155 if (is_contained(Range: successors(BB), Element: BB))
4156 return false;
4157
4158 // With which predecessors will we want to deal with?
4159 SmallVector<BasicBlock *, 8> Preds;
4160 for (BasicBlock *PredBlock : predecessors(BB)) {
4161 CondBrInst *PBI = dyn_cast<CondBrInst>(Val: PredBlock->getTerminator());
4162
4163 // Check that we have two conditional branches. If there is a PHI node in
4164 // the common successor, verify that the same value flows in from both
4165 // blocks.
4166 if (!PBI || !safeToMergeTerminators(SI1: BI, SI2: PBI))
4167 continue;
4168
4169 // Determine if the two branches share a common destination.
4170 BasicBlock *CommonSucc;
4171 Instruction::BinaryOps Opc;
4172 bool InvertPredCond;
4173 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4174 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) = *Recipe;
4175 else
4176 continue;
4177
4178 // Check the cost of inserting the necessary logic before performing the
4179 // transformation.
4180 if (TTI) {
4181 Type *Ty = BI->getCondition()->getType();
4182 InstructionCost Cost = TTI->getArithmeticInstrCost(Opcode: Opc, Ty, CostKind);
4183 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4184 !isa<CmpInst>(Val: PBI->getCondition())))
4185 Cost += TTI->getArithmeticInstrCost(Opcode: Instruction::Xor, Ty, CostKind);
4186
4187 if (Cost > BranchFoldThreshold)
4188 continue;
4189 }
4190
4191 // Ok, we do want to deal with this predecessor. Record it.
4192 Preds.emplace_back(Args&: PredBlock);
4193 }
4194
4195 // If there aren't any predecessors into which we can fold,
4196 // don't bother checking the cost.
4197 if (Preds.empty())
4198 return false;
4199
4200 // Only allow this transformation if computing the condition doesn't involve
4201 // too many instructions and these involved instructions can be executed
4202 // unconditionally. We denote all involved instructions except the condition
4203 // as "bonus instructions", and only allow this transformation when the
4204 // number of the bonus instructions we'll need to create when cloning into
4205 // each predecessor does not exceed a certain threshold.
4206 unsigned NumBonusInsts = 0;
4207 bool SawVectorOp = false;
4208 const unsigned PredCount = Preds.size();
4209 // Speculated instructions will be inserted before the terminator of the
4210 // predecessor. Only handle the simple case of one predecessor.
4211 const Instruction *CxtI =
4212 PredCount == 1 ? Preds[0]->getTerminator() : nullptr;
4213 for (Instruction &I : *BB) {
4214 // Don't check the branch condition comparison itself.
4215 if (&I == Cond)
4216 continue;
4217 // Ignore the terminator.
4218 if (isa<UncondBrInst, CondBrInst>(Val: I))
4219 continue;
4220 // Pseudo probes aren't speculatable but can be dropped on fold.
4221 if (isa<PseudoProbeInst>(Val: I))
4222 continue;
4223 // I must be safe to execute unconditionally.
4224 if (!isSafeToSpeculativelyExecute(I: &I, CtxI: CxtI, AC))
4225 return false;
4226 SawVectorOp |= isVectorOp(I);
4227
4228 // Account for the cost of duplicating this instruction into each
4229 // predecessor. Ignore free instructions.
4230 if (!TTI || TTI->getInstructionCost(U: &I, CostKind) !=
4231 TargetTransformInfo::TCC_Free) {
4232 NumBonusInsts += PredCount;
4233
4234 // Early exits once we reach the limit.
4235 if (NumBonusInsts >
4236 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4237 return false;
4238 }
4239
4240 auto IsBCSSAUse = [BB, &I](Use &U) {
4241 auto *UI = cast<Instruction>(Val: U.getUser());
4242 if (auto *PN = dyn_cast<PHINode>(Val: UI))
4243 return PN->getIncomingBlock(U) == BB;
4244 return UI->getParent() == BB && I.comesBefore(Other: UI);
4245 };
4246
4247 // Does this instruction require rewriting of uses?
4248 if (!all_of(Range: I.uses(), P: IsBCSSAUse))
4249 return false;
4250 }
4251 if (NumBonusInsts >
4252 BonusInstThreshold *
4253 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4254 return false;
4255
4256 // Ok, we have the budget. Perform the transformation.
4257 for (BasicBlock *PredBlock : Preds) {
4258 auto *PBI = cast<CondBrInst>(Val: PredBlock->getTerminator());
4259 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4260 }
4261 return false;
4262}
4263
4264// If there is only one store in BB1 and BB2, return it, otherwise return
4265// nullptr.
4266static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
4267 StoreInst *S = nullptr;
4268 for (auto *BB : {BB1, BB2}) {
4269 if (!BB)
4270 continue;
4271 for (auto &I : *BB)
4272 if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
4273 if (S)
4274 // Multiple stores seen.
4275 return nullptr;
4276 else
4277 S = SI;
4278 }
4279 }
4280 return S;
4281}
4282
4283static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
4284 Value *AlternativeV = nullptr) {
4285 // PHI is going to be a PHI node that allows the value V that is defined in
4286 // BB to be referenced in BB's only successor.
4287 //
4288 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4289 // doesn't matter to us what the other operand is (it'll never get used). We
4290 // could just create a new PHI with an undef incoming value, but that could
4291 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4292 // other PHI. So here we directly look for some PHI in BB's successor with V
4293 // as an incoming operand. If we find one, we use it, else we create a new
4294 // one.
4295 //
4296 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4297 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4298 // where OtherBB is the single other predecessor of BB's only successor.
4299 PHINode *PHI = nullptr;
4300 BasicBlock *Succ = BB->getSingleSuccessor();
4301
4302 for (auto I = Succ->begin(); isa<PHINode>(Val: I); ++I)
4303 if (cast<PHINode>(Val&: I)->getIncomingValueForBlock(BB) == V) {
4304 PHI = cast<PHINode>(Val&: I);
4305 if (!AlternativeV)
4306 break;
4307
4308 assert(Succ->hasNPredecessors(2));
4309 auto PredI = pred_begin(BB: Succ);
4310 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4311 if (PHI->getIncomingValueForBlock(BB: OtherPredBB) == AlternativeV)
4312 break;
4313 PHI = nullptr;
4314 }
4315 if (PHI)
4316 return PHI;
4317
4318 // If V is not an instruction defined in BB, just return it.
4319 if (!AlternativeV &&
4320 (!isa<Instruction>(Val: V) || cast<Instruction>(Val: V)->getParent() != BB))
4321 return V;
4322
4323 PHI = PHINode::Create(Ty: V->getType(), NumReservedValues: 2, NameStr: "simplifycfg.merge");
4324 PHI->insertBefore(InsertPos: Succ->begin());
4325 PHI->addIncoming(V, BB);
4326 for (BasicBlock *PredBB : predecessors(BB: Succ))
4327 if (PredBB != BB)
4328 PHI->addIncoming(
4329 V: AlternativeV ? AlternativeV : PoisonValue::get(T: V->getType()), BB: PredBB);
4330 return PHI;
4331}
4332
4333static bool mergeConditionalStoreToAddress(
4334 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4335 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4336 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4337 // For every pointer, there must be exactly two stores, one coming from
4338 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4339 // store (to any address) in PTB,PFB or QTB,QFB.
4340 // FIXME: We could relax this restriction with a bit more work and performance
4341 // testing.
4342 StoreInst *PStore = findUniqueStoreInBlocks(BB1: PTB, BB2: PFB);
4343 StoreInst *QStore = findUniqueStoreInBlocks(BB1: QTB, BB2: QFB);
4344 if (!PStore || !QStore)
4345 return false;
4346
4347 // Now check the stores are compatible.
4348 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4349 PStore->getOrdering() != QStore->getOrdering() ||
4350 PStore->getSyncScopeID() != QStore->getSyncScopeID() ||
4351 PStore->getValueOperand()->getType() !=
4352 QStore->getValueOperand()->getType())
4353 return false;
4354
4355 // Check that sinking the store won't cause program behavior changes. Sinking
4356 // the store out of the Q blocks won't change any behavior as we're sinking
4357 // from a block to its unconditional successor. But we're moving a store from
4358 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4359 // So we need to check that there are no aliasing loads or stores in
4360 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4361 // operations between PStore and the end of its parent block.
4362 //
4363 // The ideal way to do this is to query AliasAnalysis, but we don't
4364 // preserve AA currently so that is dangerous. Be super safe and just
4365 // check there are no other memory operations at all.
4366 for (auto &I : *QFB->getSinglePredecessor())
4367 if (I.mayReadOrWriteMemory())
4368 return false;
4369 for (auto &I : *QFB)
4370 if (&I != QStore && I.mayReadOrWriteMemory())
4371 return false;
4372 if (QTB)
4373 for (auto &I : *QTB)
4374 if (&I != QStore && I.mayReadOrWriteMemory())
4375 return false;
4376 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4377 I != E; ++I)
4378 if (&*I != PStore && I->mayReadOrWriteMemory())
4379 return false;
4380
4381 // If we're not in aggressive mode, we only optimize if we have some
4382 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4383 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4384 if (!BB)
4385 return true;
4386 // Heuristic: if the block can be if-converted/phi-folded and the
4387 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4388 // thread this store.
4389 InstructionCost Cost = 0;
4390 InstructionCost Budget =
4391 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4392 for (auto &I : *BB) {
4393 // Consider terminator instruction to be free.
4394 if (I.isTerminator())
4395 continue;
4396 // If this is one the stores that we want to speculate out of this BB,
4397 // then don't count it's cost, consider it to be free.
4398 if (auto *S = dyn_cast<StoreInst>(Val: &I))
4399 if (llvm::find(Range&: FreeStores, Val: S))
4400 continue;
4401 // Else, we have a white-list of instructions that we are ak speculating.
4402 if (!isa<BinaryOperator>(Val: I) && !isa<GetElementPtrInst>(Val: I))
4403 return false; // Not in white-list - not worthwhile folding.
4404 // And finally, if this is a non-free instruction that we are okay
4405 // speculating, ensure that we consider the speculation budget.
4406 Cost +=
4407 TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
4408 if (Cost > Budget)
4409 return false; // Eagerly refuse to fold as soon as we're out of budget.
4410 }
4411 assert(Cost <= Budget &&
4412 "When we run out of budget we will eagerly return from within the "
4413 "per-instruction loop.");
4414 return true;
4415 };
4416
4417 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4418 if (!MergeCondStoresAggressively &&
4419 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4420 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4421 return false;
4422
4423 // If PostBB has more than two predecessors, we need to split it so we can
4424 // sink the store.
4425 if (std::next(x: pred_begin(BB: PostBB), n: 2) != pred_end(BB: PostBB)) {
4426 // We know that QFB's only successor is PostBB. And QFB has a single
4427 // predecessor. If QTB exists, then its only successor is also PostBB.
4428 // If QTB does not exist, then QFB's only predecessor has a conditional
4429 // branch to QFB and PostBB.
4430 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4431 BasicBlock *NewBB =
4432 SplitBlockPredecessors(BB: PostBB, Preds: {QFB, TruePred}, Suffix: "condstore.split", DTU);
4433 if (!NewBB)
4434 return false;
4435 PostBB = NewBB;
4436 }
4437
4438 // OK, we're going to sink the stores to PostBB. The store has to be
4439 // conditional though, so first create the predicate.
4440 CondBrInst *PBranch =
4441 cast<CondBrInst>(Val: PFB->getSinglePredecessor()->getTerminator());
4442 CondBrInst *QBranch =
4443 cast<CondBrInst>(Val: QFB->getSinglePredecessor()->getTerminator());
4444 Value *PCond = PBranch->getCondition();
4445 Value *QCond = QBranch->getCondition();
4446
4447 Value *PPHI = ensureValueAvailableInSuccessor(V: PStore->getValueOperand(),
4448 BB: PStore->getParent());
4449 Value *QPHI = ensureValueAvailableInSuccessor(V: QStore->getValueOperand(),
4450 BB: QStore->getParent(), AlternativeV: PPHI);
4451
4452 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4453 IRBuilder<> QB(PostBB, PostBBFirst);
4454 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4455
4456 InvertPCond ^= (PStore->getParent() != PTB);
4457 InvertQCond ^= (QStore->getParent() != QTB);
4458 Value *PPred = InvertPCond ? QB.CreateNot(V: PCond) : PCond;
4459 Value *QPred = InvertQCond ? QB.CreateNot(V: QCond) : QCond;
4460
4461 Value *CombinedPred = QB.CreateOr(LHS: PPred, RHS: QPred);
4462
4463 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4464 auto *T = SplitBlockAndInsertIfThen(Cond: CombinedPred, SplitBefore: InsertPt,
4465 /*Unreachable=*/false,
4466 /*BranchWeights=*/nullptr, DTU);
4467 if (hasBranchWeightMD(I: *PBranch) && hasBranchWeightMD(I: *QBranch) &&
4468 !ProfcheckDisableMetadataFixes) {
4469 SmallVector<uint32_t, 2> PWeights, QWeights;
4470 extractBranchWeights(I: *PBranch, Weights&: PWeights);
4471 extractBranchWeights(I: *QBranch, Weights&: QWeights);
4472 if (InvertPCond)
4473 std::swap(a&: PWeights[0], b&: PWeights[1]);
4474 if (InvertQCond)
4475 std::swap(a&: QWeights[0], b&: QWeights[1]);
4476 auto CombinedWeights = getDisjunctionWeights(B1: PWeights, B2: QWeights);
4477 setFittedBranchWeights(I&: *PostBB->getTerminator(),
4478 Weights: {CombinedWeights[0], CombinedWeights[1]},
4479 /*IsExpected=*/false, /*ElideAllZero=*/true);
4480 }
4481
4482 QB.SetInsertPoint(T);
4483 StoreInst *SI = cast<StoreInst>(Val: QB.CreateStore(Val: QPHI, Ptr: Address));
4484 combineMetadataForCSE(K: QStore, J: PStore, DoesKMove: true);
4485 SI->copyMetadata(SrcInst: *QStore);
4486 // Update any dbg.assign intrinsics to track the merged value (QPHI) instead
4487 // of the original constant values, likely making these identical.
4488 for (auto *DbgAssign : at::getDVRAssignmentMarkers(Inst: SI)) {
4489 if (llvm::is_contained(Range: DbgAssign->location_ops(),
4490 Element: PStore->getValueOperand()))
4491 DbgAssign->replaceVariableLocationOp(OldValue: PStore->getValueOperand(), NewValue: QPHI);
4492 if (llvm::is_contained(Range: DbgAssign->location_ops(),
4493 Element: QStore->getValueOperand()))
4494 DbgAssign->replaceVariableLocationOp(OldValue: QStore->getValueOperand(), NewValue: QPHI);
4495 }
4496
4497 // Choose the minimum alignment. If we could prove both stores execute, we
4498 // could use biggest one. In this case, though, we only know that one of the
4499 // stores executes. And we don't know it's safe to take the alignment from a
4500 // store that doesn't execute.
4501 SI->setAlignment(std::min(a: PStore->getAlign(), b: QStore->getAlign()));
4502
4503 if (QStore->isAtomic())
4504 SI->setAtomic(Ordering: QStore->getOrdering(), SSID: QStore->getSyncScopeID());
4505
4506 QStore->eraseFromParent();
4507 PStore->eraseFromParent();
4508
4509 return true;
4510}
4511
4512static bool mergeConditionalStores(CondBrInst *PBI, CondBrInst *QBI,
4513 DomTreeUpdater *DTU, const DataLayout &DL,
4514 const TargetTransformInfo &TTI) {
4515 // The intention here is to find diamonds or triangles (see below) where each
4516 // conditional block contains a store to the same address. Both of these
4517 // stores are conditional, so they can't be unconditionally sunk. But it may
4518 // be profitable to speculatively sink the stores into one merged store at the
4519 // end, and predicate the merged store on the union of the two conditions of
4520 // PBI and QBI.
4521 //
4522 // This can reduce the number of stores executed if both of the conditions are
4523 // true, and can allow the blocks to become small enough to be if-converted.
4524 // This optimization will also chain, so that ladders of test-and-set
4525 // sequences can be if-converted away.
4526 //
4527 // We only deal with simple diamonds or triangles:
4528 //
4529 // PBI or PBI or a combination of the two
4530 // / \ | \
4531 // PTB PFB | PFB
4532 // \ / | /
4533 // QBI QBI
4534 // / \ | \
4535 // QTB QFB | QFB
4536 // \ / | /
4537 // PostBB PostBB
4538 //
4539 // We model triangles as a type of diamond with a nullptr "true" block.
4540 // Triangles are canonicalized so that the fallthrough edge is represented by
4541 // a true condition, as in the diagram above.
4542 BasicBlock *PTB = PBI->getSuccessor(i: 0);
4543 BasicBlock *PFB = PBI->getSuccessor(i: 1);
4544 BasicBlock *QTB = QBI->getSuccessor(i: 0);
4545 BasicBlock *QFB = QBI->getSuccessor(i: 1);
4546 BasicBlock *PostBB = QFB->getSingleSuccessor();
4547
4548 // Make sure we have a good guess for PostBB. If QTB's only successor is
4549 // QFB, then QFB is a better PostBB.
4550 if (QTB->getSingleSuccessor() == QFB)
4551 PostBB = QFB;
4552
4553 // If we couldn't find a good PostBB, stop.
4554 if (!PostBB)
4555 return false;
4556
4557 bool InvertPCond = false, InvertQCond = false;
4558 // Canonicalize fallthroughs to the true branches.
4559 if (PFB == QBI->getParent()) {
4560 std::swap(a&: PFB, b&: PTB);
4561 InvertPCond = true;
4562 }
4563 if (QFB == PostBB) {
4564 std::swap(a&: QFB, b&: QTB);
4565 InvertQCond = true;
4566 }
4567
4568 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4569 // and QFB may not. Model fallthroughs as a nullptr block.
4570 if (PTB == QBI->getParent())
4571 PTB = nullptr;
4572 if (QTB == PostBB)
4573 QTB = nullptr;
4574
4575 // Legality bailouts. We must have at least the non-fallthrough blocks and
4576 // the post-dominating block, and the non-fallthroughs must only have one
4577 // predecessor.
4578 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4579 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4580 };
4581 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4582 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4583 return false;
4584 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4585 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4586 return false;
4587 if (!QBI->getParent()->hasNUses(N: 2))
4588 return false;
4589
4590 // OK, this is a sequence of two diamonds or triangles.
4591 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4592 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4593 for (auto *BB : {PTB, PFB}) {
4594 if (!BB)
4595 continue;
4596 for (auto &I : *BB)
4597 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4598 PStoreAddresses.insert(Ptr: SI->getPointerOperand());
4599 }
4600 for (auto *BB : {QTB, QFB}) {
4601 if (!BB)
4602 continue;
4603 for (auto &I : *BB)
4604 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4605 QStoreAddresses.insert(Ptr: SI->getPointerOperand());
4606 }
4607
4608 set_intersect(S1&: PStoreAddresses, S2: QStoreAddresses);
4609 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4610 // clear what it contains.
4611 auto &CommonAddresses = PStoreAddresses;
4612
4613 bool Changed = false;
4614 for (auto *Address : CommonAddresses)
4615 Changed |=
4616 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4617 InvertPCond, InvertQCond, DTU, DL, TTI);
4618 return Changed;
4619}
4620
4621/// If the previous block ended with a widenable branch, determine if reusing
4622/// the target block is profitable and legal. This will have the effect of
4623/// "widening" PBI, but doesn't require us to reason about hosting safety.
4624static bool tryWidenCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI,
4625 DomTreeUpdater *DTU) {
4626 // TODO: This can be generalized in two important ways:
4627 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4628 // values from the PBI edge.
4629 // 2) We can sink side effecting instructions into BI's fallthrough
4630 // successor provided they doesn't contribute to computation of
4631 // BI's condition.
4632 BasicBlock *IfTrueBB = PBI->getSuccessor(i: 0);
4633 BasicBlock *IfFalseBB = PBI->getSuccessor(i: 1);
4634 if (!isWidenableBranch(U: PBI) || IfTrueBB != BI->getParent() ||
4635 !BI->getParent()->getSinglePredecessor())
4636 return false;
4637 if (!IfFalseBB->phis().empty())
4638 return false; // TODO
4639 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4640 // may undo the transform done here.
4641 // TODO: There might be a more fine-grained solution to this.
4642 if (!llvm::succ_empty(BB: IfFalseBB))
4643 return false;
4644 // Use lambda to lazily compute expensive condition after cheap ones.
4645 auto NoSideEffects = [](BasicBlock &BB) {
4646 return llvm::none_of(Range&: BB, P: [](const Instruction &I) {
4647 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4648 });
4649 };
4650 if (BI->getSuccessor(i: 1) != IfFalseBB && // no inf looping
4651 BI->getSuccessor(i: 1)->getTerminatingDeoptimizeCall() && // profitability
4652 NoSideEffects(*BI->getParent())) {
4653 auto *OldSuccessor = BI->getSuccessor(i: 1);
4654 OldSuccessor->removePredecessor(Pred: BI->getParent());
4655 BI->setSuccessor(idx: 1, NewSucc: IfFalseBB);
4656 if (DTU)
4657 DTU->applyUpdates(
4658 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4659 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4660 return true;
4661 }
4662 if (BI->getSuccessor(i: 0) != IfFalseBB && // no inf looping
4663 BI->getSuccessor(i: 0)->getTerminatingDeoptimizeCall() && // profitability
4664 NoSideEffects(*BI->getParent())) {
4665 auto *OldSuccessor = BI->getSuccessor(i: 0);
4666 OldSuccessor->removePredecessor(Pred: BI->getParent());
4667 BI->setSuccessor(idx: 0, NewSucc: IfFalseBB);
4668 if (DTU)
4669 DTU->applyUpdates(
4670 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4671 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4672 return true;
4673 }
4674 return false;
4675}
4676
4677/// If we have a conditional branch as a predecessor of another block,
4678/// this function tries to simplify it. We know
4679/// that PBI and BI are both conditional branches, and BI is in one of the
4680/// successor blocks of PBI - PBI branches to BI.
4681static bool SimplifyCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI,
4682 DomTreeUpdater *DTU,
4683 const DataLayout &DL,
4684 const TargetTransformInfo &TTI) {
4685 BasicBlock *BB = BI->getParent();
4686
4687 // If this block ends with a branch instruction, and if there is a
4688 // predecessor that ends on a branch of the same condition, make
4689 // this conditional branch redundant.
4690 if (PBI->getCondition() == BI->getCondition() &&
4691 PBI->getSuccessor(i: 0) != PBI->getSuccessor(i: 1)) {
4692 // Okay, the outcome of this conditional branch is statically
4693 // knowable. If this block had a single pred, handle specially, otherwise
4694 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4695 if (BB->getSinglePredecessor()) {
4696 // Turn this into a branch on constant.
4697 bool CondIsTrue = PBI->getSuccessor(i: 0) == BB;
4698 BI->setCondition(
4699 ConstantInt::get(Ty: Type::getInt1Ty(C&: BB->getContext()), V: CondIsTrue));
4700 return true; // Nuke the branch on constant.
4701 }
4702 }
4703
4704 // If the previous block ended with a widenable branch, determine if reusing
4705 // the target block is profitable and legal. This will have the effect of
4706 // "widening" PBI, but doesn't require us to reason about hosting safety.
4707 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4708 return true;
4709
4710 // If both branches are conditional and both contain stores to the same
4711 // address, remove the stores from the conditionals and create a conditional
4712 // merged store at the end.
4713 if (MergeCondStores && mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
4714 return true;
4715
4716 // If this is a conditional branch in an empty block, and if any
4717 // predecessors are a conditional branch to one of our destinations,
4718 // fold the conditions into logical ops and one cond br.
4719
4720 // Ignore dbg intrinsics.
4721 if (&*BB->begin() != BI)
4722 return false;
4723
4724 int PBIOp, BIOp;
4725 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
4726 PBIOp = 0;
4727 BIOp = 0;
4728 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
4729 PBIOp = 0;
4730 BIOp = 1;
4731 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
4732 PBIOp = 1;
4733 BIOp = 0;
4734 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
4735 PBIOp = 1;
4736 BIOp = 1;
4737 } else {
4738 return false;
4739 }
4740
4741 // Check to make sure that the other destination of this branch
4742 // isn't BB itself. If so, this is an infinite loop that will
4743 // keep getting unwound.
4744 if (PBI->getSuccessor(i: PBIOp) == BB)
4745 return false;
4746
4747 // If predecessor's branch probability to BB is too low don't merge branches.
4748 SmallVector<uint32_t, 2> PredWeights;
4749 if (!PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
4750 extractBranchWeights(I: *PBI, Weights&: PredWeights) &&
4751 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4752
4753 BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4754 Numerator: PredWeights[PBIOp],
4755 Denominator: static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4756
4757 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4758 if (CommonDestProb >= Likely)
4759 return false;
4760 }
4761
4762 // Do not perform this transformation if it would require
4763 // insertion of a large number of select instructions. For targets
4764 // without predication/cmovs, this is a big pessimization.
4765
4766 BasicBlock *CommonDest = PBI->getSuccessor(i: PBIOp);
4767 BasicBlock *RemovedDest = PBI->getSuccessor(i: PBIOp ^ 1);
4768 unsigned NumPhis = 0;
4769 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(Val: II);
4770 ++II, ++NumPhis) {
4771 if (NumPhis > 2) // Disable this xform.
4772 return false;
4773 }
4774
4775 // Finally, if everything is ok, fold the branches to logical ops.
4776 BasicBlock *OtherDest = BI->getSuccessor(i: BIOp ^ 1);
4777
4778 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4779 << "AND: " << *BI->getParent());
4780
4781 SmallVector<DominatorTree::UpdateType, 5> Updates;
4782
4783 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4784 // branch in it, where one edge (OtherDest) goes back to itself but the other
4785 // exits. We don't *know* that the program avoids the infinite loop
4786 // (even though that seems likely). If we do this xform naively, we'll end up
4787 // recursively unpeeling the loop. Since we know that (after the xform is
4788 // done) that the block *is* infinite if reached, we just make it an obviously
4789 // infinite loop with no cond branch.
4790 if (OtherDest == BB) {
4791 // Insert it at the end of the function, because it's either code,
4792 // or it won't matter if it's hot. :)
4793 BasicBlock *InfLoopBlock =
4794 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
4795 UncondBrInst::Create(Target: InfLoopBlock, InsertBefore: InfLoopBlock);
4796 if (DTU)
4797 Updates.push_back(Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4798 OtherDest = InfLoopBlock;
4799 }
4800
4801 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4802
4803 // BI may have other predecessors. Because of this, we leave
4804 // it alone, but modify PBI.
4805
4806 // Make sure we get to CommonDest on True&True directions.
4807 Value *PBICond = PBI->getCondition();
4808 IRBuilder<NoFolder> Builder(PBI);
4809 if (PBIOp)
4810 PBICond = Builder.CreateNot(V: PBICond, Name: PBICond->getName() + ".not");
4811
4812 Value *BICond = BI->getCondition();
4813 if (BIOp)
4814 BICond = Builder.CreateNot(V: BICond, Name: BICond->getName() + ".not");
4815
4816 // Merge the conditions.
4817 Value *Cond =
4818 createLogicalOp(Builder, Opc: Instruction::Or, LHS: PBICond, RHS: BICond, Name: "brmerge");
4819
4820 // Modify PBI to branch on the new condition to the new dests.
4821 PBI->setCondition(Cond);
4822 PBI->setSuccessor(idx: 0, NewSucc: CommonDest);
4823 PBI->setSuccessor(idx: 1, NewSucc: OtherDest);
4824
4825 if (DTU) {
4826 Updates.push_back(Elt: {DominatorTree::Insert, PBI->getParent(), OtherDest});
4827 Updates.push_back(Elt: {DominatorTree::Delete, PBI->getParent(), RemovedDest});
4828
4829 DTU->applyUpdates(Updates);
4830 }
4831
4832 // Update branch weight for PBI.
4833 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4834 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4835 bool HasWeights =
4836 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4837 SuccTrueWeight, SuccFalseWeight);
4838 if (HasWeights) {
4839 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4840 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4841 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4842 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4843 // The weight to CommonDest should be PredCommon * SuccTotal +
4844 // PredOther * SuccCommon.
4845 // The weight to OtherDest should be PredOther * SuccOther.
4846 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4847 PredOther * SuccCommon,
4848 PredOther * SuccOther};
4849
4850 setFittedBranchWeights(I&: *PBI, Weights: NewWeights, /*IsExpected=*/false,
4851 /*ElideAllZero=*/true);
4852 // Cond may be a select instruction with the first operand set to "true", or
4853 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4854 if (!ProfcheckDisableMetadataFixes)
4855 if (auto *SI = dyn_cast<SelectInst>(Val: Cond)) {
4856 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4857 // The select is predicated on PBICond
4858 assert(SI->getCondition() == PBICond);
4859 // The corresponding probabilities are what was referred to above as
4860 // PredCommon and PredOther.
4861 setFittedBranchWeights(I&: *SI, Weights: {PredCommon, PredOther},
4862 /*IsExpected=*/false, /*ElideAllZero=*/true);
4863 }
4864 }
4865
4866 // OtherDest may have phi nodes. If so, add an entry from PBI's
4867 // block that are identical to the entries for BI's block.
4868 addPredecessorToBlock(Succ: OtherDest, NewPred: PBI->getParent(), ExistPred: BB);
4869
4870 // We know that the CommonDest already had an edge from PBI to
4871 // it. If it has PHIs though, the PHIs may have different
4872 // entries for BB and PBI's BB. If so, insert a select to make
4873 // them agree.
4874 for (PHINode &PN : CommonDest->phis()) {
4875 Value *BIV = PN.getIncomingValueForBlock(BB);
4876 unsigned PBBIdx = PN.getBasicBlockIndex(BB: PBI->getParent());
4877 Value *PBIV = PN.getIncomingValue(i: PBBIdx);
4878 if (BIV != PBIV) {
4879 // Insert a select in PBI to pick the right value.
4880 SelectInst *NV = cast<SelectInst>(
4881 Val: Builder.CreateSelect(C: PBICond, True: PBIV, False: BIV, Name: PBIV->getName() + ".mux"));
4882 PN.setIncomingValue(i: PBBIdx, V: NV);
4883 // The select has the same condition as PBI, in the same BB. The
4884 // probabilities don't change.
4885 if (HasWeights) {
4886 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4887 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4888 setFittedBranchWeights(I&: *NV, Weights: {TrueWeight, FalseWeight},
4889 /*IsExpected=*/false, /*ElideAllZero=*/true);
4890 }
4891 }
4892 }
4893
4894 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4895 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4896
4897 // This basic block is probably dead. We know it has at least
4898 // one fewer predecessor.
4899 return true;
4900}
4901
4902// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4903// true or to FalseBB if Cond is false.
4904// Takes care of updating the successors and removing the old terminator.
4905// Also makes sure not to introduce new successors by assuming that edges to
4906// non-successor TrueBBs and FalseBBs aren't reachable.
4907bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4908 Value *Cond, BasicBlock *TrueBB,
4909 BasicBlock *FalseBB,
4910 uint32_t TrueWeight,
4911 uint32_t FalseWeight) {
4912 auto *BB = OldTerm->getParent();
4913 // Remove any superfluous successor edges from the CFG.
4914 // First, figure out which successors to preserve.
4915 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4916 // successor.
4917 BasicBlock *KeepEdge1 = TrueBB;
4918 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4919
4920 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4921
4922 // Then remove the rest.
4923 for (BasicBlock *Succ : successors(I: OldTerm)) {
4924 // Make sure only to keep exactly one copy of each edge.
4925 if (Succ == KeepEdge1)
4926 KeepEdge1 = nullptr;
4927 else if (Succ == KeepEdge2)
4928 KeepEdge2 = nullptr;
4929 else {
4930 Succ->removePredecessor(Pred: BB,
4931 /*KeepOneInputPHIs=*/true);
4932
4933 if (Succ != TrueBB && Succ != FalseBB)
4934 RemovedSuccessors.insert(X: Succ);
4935 }
4936 }
4937
4938 IRBuilder<> Builder(OldTerm);
4939 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4940
4941 // Insert an appropriate new terminator.
4942 if (!KeepEdge1 && !KeepEdge2) {
4943 if (TrueBB == FalseBB) {
4944 // We were only looking for one successor, and it was present.
4945 // Create an unconditional branch to it.
4946 Builder.CreateBr(Dest: TrueBB);
4947 } else {
4948 // We found both of the successors we were looking for.
4949 // Create a conditional branch sharing the condition of the select.
4950 CondBrInst *NewBI = Builder.CreateCondBr(Cond, True: TrueBB, False: FalseBB);
4951 setBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
4952 /*IsExpected=*/false, /*ElideAllZero=*/true);
4953 }
4954 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4955 // Neither of the selected blocks were successors, so this
4956 // terminator must be unreachable.
4957 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4958 } else {
4959 // One of the selected values was a successor, but the other wasn't.
4960 // Insert an unconditional branch to the one that was found;
4961 // the edge to the one that wasn't must be unreachable.
4962 if (!KeepEdge1) {
4963 // Only TrueBB was found.
4964 Builder.CreateBr(Dest: TrueBB);
4965 } else {
4966 // Only FalseBB was found.
4967 Builder.CreateBr(Dest: FalseBB);
4968 }
4969 }
4970
4971 eraseTerminatorAndDCECond(TI: OldTerm);
4972
4973 if (DTU) {
4974 SmallVector<DominatorTree::UpdateType, 2> Updates;
4975 Updates.reserve(N: RemovedSuccessors.size());
4976 for (auto *RemovedSuccessor : RemovedSuccessors)
4977 Updates.push_back(Elt: {DominatorTree::Delete, BB, RemovedSuccessor});
4978 DTU->applyUpdates(Updates);
4979 }
4980
4981 return true;
4982}
4983
4984// Replaces
4985// (switch (select cond, X, Y)) on constant X, Y
4986// with a branch - conditional if X and Y lead to distinct BBs,
4987// unconditional otherwise.
4988bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4989 SelectInst *Select) {
4990 // Check for constant integer values in the select.
4991 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Val: Select->getTrueValue());
4992 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Val: Select->getFalseValue());
4993 if (!TrueVal || !FalseVal)
4994 return false;
4995
4996 // Find the relevant condition and destinations.
4997 Value *Condition = Select->getCondition();
4998 BasicBlock *TrueBB = SI->findCaseValue(C: TrueVal)->getCaseSuccessor();
4999 BasicBlock *FalseBB = SI->findCaseValue(C: FalseVal)->getCaseSuccessor();
5000
5001 // Get weight for TrueBB and FalseBB.
5002 uint32_t TrueWeight = 0, FalseWeight = 0;
5003 SmallVector<uint64_t, 8> Weights;
5004 bool HasWeights = hasBranchWeightMD(I: *SI);
5005 if (HasWeights) {
5006 getBranchWeights(TI: SI, Weights);
5007 if (Weights.size() == 1 + SI->getNumCases()) {
5008 TrueWeight =
5009 (uint32_t)Weights[SI->findCaseValue(C: TrueVal)->getSuccessorIndex()];
5010 FalseWeight =
5011 (uint32_t)Weights[SI->findCaseValue(C: FalseVal)->getSuccessorIndex()];
5012 }
5013 }
5014
5015 // Perform the actual simplification.
5016 return simplifyTerminatorOnSelect(OldTerm: SI, Cond: Condition, TrueBB, FalseBB, TrueWeight,
5017 FalseWeight);
5018}
5019
5020// Replaces
5021// (indirectbr (select cond, blockaddress(@fn, BlockA),
5022// blockaddress(@fn, BlockB)))
5023// with
5024// (br cond, BlockA, BlockB).
5025bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
5026 SelectInst *SI) {
5027 // Check that both operands of the select are block addresses.
5028 BlockAddress *TBA = dyn_cast<BlockAddress>(Val: SI->getTrueValue());
5029 BlockAddress *FBA = dyn_cast<BlockAddress>(Val: SI->getFalseValue());
5030 if (!TBA || !FBA)
5031 return false;
5032
5033 // Extract the actual blocks.
5034 BasicBlock *TrueBB = TBA->getBasicBlock();
5035 BasicBlock *FalseBB = FBA->getBasicBlock();
5036
5037 // The select's profile becomes the profile of the conditional branch that
5038 // replaces the indirect branch.
5039 SmallVector<uint32_t> SelectBranchWeights(2);
5040 if (!ProfcheckDisableMetadataFixes)
5041 extractBranchWeights(I: *SI, Weights&: SelectBranchWeights);
5042 // Perform the actual simplification.
5043 return simplifyTerminatorOnSelect(OldTerm: IBI, Cond: SI->getCondition(), TrueBB, FalseBB,
5044 TrueWeight: SelectBranchWeights[0],
5045 FalseWeight: SelectBranchWeights[1]);
5046}
5047
5048/// This is called when we find an icmp instruction
5049/// (a seteq/setne with a constant) as the only instruction in a
5050/// block that ends with an uncond branch. We are looking for a very specific
5051/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5052/// this case, we merge the first two "or's of icmp" into a switch, but then the
5053/// default value goes to an uncond block with a seteq in it, we get something
5054/// like:
5055///
5056/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5057/// DEFAULT:
5058/// %tmp = icmp eq i8 %A, 92
5059/// br label %end
5060/// end:
5061/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5062///
5063/// We prefer to split the edge to 'end' so that there is a true/false entry to
5064/// the PHI, merging the third icmp into the switch.
5065bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5066 ICmpInst *ICI, IRBuilder<> &Builder) {
5067 // Select == nullptr means we assume that there is a hidden no-op select
5068 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5069 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: nullptr, Builder);
5070}
5071
5072/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5073/// case. This is called when we find an icmp instruction (a seteq/setne with a
5074/// constant) and its following select instruction as the only TWO instructions
5075/// in a block that ends with an uncond branch. We are looking for a very
5076/// specific pattern that occurs when "
5077/// if (A == 1) return C1;
5078/// if (A == 2) return C2;
5079/// if (A < 3) return C3;
5080/// return C4;
5081/// " gets simplified. In this case, we merge the first two "branches of icmp"
5082/// into a switch, but then the default value goes to an uncond block with a lt
5083/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5084/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5085/// get something like:
5086///
5087/// case1:
5088/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5089/// case2:
5090/// br label %end
5091/// DEFAULT:
5092/// %tmp = icmp eq i8 %A, 2
5093/// %val = select i1 %tmp, i8 C3, i8 C4
5094/// br label %end
5095/// end:
5096/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5097///
5098/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5099/// to the PHI, merging the icmp & select into the switch, as follows:
5100///
5101/// case1:
5102/// switch i8 %A, label %DEFAULT [
5103/// i8 0, label %end
5104/// i8 1, label %case2
5105/// i8 2, label %case3
5106/// ]
5107/// case2:
5108/// br label %end
5109/// case3:
5110/// br label %end
5111/// DEFAULT:
5112/// br label %end
5113/// end:
5114/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5115bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5116 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5117 BasicBlock *BB = ICI->getParent();
5118
5119 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5120 // too complex.
5121 /// TODO: support multi-phis in succ BB of select's BB.
5122 if (isa<PHINode>(Val: BB->begin()) || !ICI->hasOneUse() ||
5123 (Select && !Select->hasOneUse()))
5124 return false;
5125
5126 // The pattern we're looking for is where our only predecessor is a switch on
5127 // 'V' and this block is the default case for the switch. In this case we can
5128 // fold the compared value into the switch to simplify things.
5129 BasicBlock *Pred = BB->getSinglePredecessor();
5130 if (!Pred || !isa<SwitchInst>(Val: Pred->getTerminator()))
5131 return false;
5132
5133 Value *IcmpCond;
5134 ConstantInt *NewCaseVal;
5135 CmpPredicate Predicate;
5136
5137 // Match icmp X, C
5138 if (!match(V: ICI,
5139 P: m_ICmp(Pred&: Predicate, L: m_Value(V&: IcmpCond), R: m_ConstantInt(CI&: NewCaseVal))))
5140 return false;
5141
5142 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5143 Instruction *User;
5144 if (!Select) {
5145 // If Select == nullptr, we can assume that there is a hidden no-op select
5146 // just after icmp
5147 SelectCond = ICI;
5148 SelectTrueVal = Builder.getTrue();
5149 SelectFalseVal = Builder.getFalse();
5150 User = ICI->user_back();
5151 } else {
5152 SelectCond = Select->getCondition();
5153 // Check if the select condition is the same as the icmp condition.
5154 if (SelectCond != ICI)
5155 return false;
5156 SelectTrueVal = Select->getTrueValue();
5157 SelectFalseVal = Select->getFalseValue();
5158 User = Select->user_back();
5159 }
5160
5161 SwitchInst *SI = cast<SwitchInst>(Val: Pred->getTerminator());
5162 if (SI->getCondition() != IcmpCond)
5163 return false;
5164
5165 // If BB is reachable on a non-default case, then we simply know the value of
5166 // V in this block. Substitute it and constant fold the icmp instruction
5167 // away.
5168 if (SI->getDefaultDest() != BB) {
5169 ConstantInt *VVal = SI->findCaseDest(BB);
5170 assert(VVal && "Should have a unique destination value");
5171 ICI->setOperand(i_nocapture: 0, Val_nocapture: VVal);
5172
5173 if (Value *V = simplifyInstruction(I: ICI, Q: {DL, ICI})) {
5174 ICI->replaceAllUsesWith(V);
5175 ICI->eraseFromParent();
5176 }
5177 // BB is now empty, so it is likely to simplify away.
5178 return requestResimplify();
5179 }
5180
5181 // Ok, the block is reachable from the default dest. If the constant we're
5182 // comparing exists in one of the other edges, then we can constant fold ICI
5183 // and zap it.
5184 if (SI->findCaseValue(C: NewCaseVal) != SI->case_default()) {
5185 Value *V;
5186 if (Predicate == ICmpInst::ICMP_EQ)
5187 V = ConstantInt::getFalse(Context&: BB->getContext());
5188 else
5189 V = ConstantInt::getTrue(Context&: BB->getContext());
5190
5191 ICI->replaceAllUsesWith(V);
5192 ICI->eraseFromParent();
5193 // BB is now empty, so it is likely to simplify away.
5194 return requestResimplify();
5195 }
5196
5197 // The use of the select has to be in the 'end' block, by the only PHI node in
5198 // the block.
5199 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(Idx: 0);
5200 PHINode *PHIUse = dyn_cast<PHINode>(Val: User);
5201 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5202 isa<PHINode>(Val: ++BasicBlock::iterator(PHIUse)))
5203 return false;
5204
5205 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5206 // edge gets SelectTrueVal in the PHI.
5207 Value *DefaultCst = SelectFalseVal;
5208 Value *NewCst = SelectTrueVal;
5209
5210 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5211 std::swap(a&: DefaultCst, b&: NewCst);
5212
5213 // Replace Select (which is used by the PHI for the default value) with
5214 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5215 if (Select) {
5216 Select->replaceAllUsesWith(V: DefaultCst);
5217 Select->eraseFromParent();
5218 } else {
5219 ICI->replaceAllUsesWith(V: DefaultCst);
5220 }
5221 ICI->eraseFromParent();
5222
5223 SmallVector<DominatorTree::UpdateType, 2> Updates;
5224
5225 // Okay, the switch goes to this block on a default value. Add an edge from
5226 // the switch to the merge point on the compared value.
5227 BasicBlock *NewBB =
5228 BasicBlock::Create(Context&: BB->getContext(), Name: "switch.edge", Parent: BB->getParent(), InsertBefore: BB);
5229 {
5230 SwitchInstProfUpdateWrapper SIW(*SI);
5231 auto W0 = SIW.getSuccessorWeight(idx: 0);
5232 SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
5233 if (W0) {
5234 NewW = ((uint64_t(*W0) + 1) >> 1);
5235 SIW.setSuccessorWeight(idx: 0, W: *NewW);
5236 }
5237 SIW.addCase(OnVal: NewCaseVal, Dest: NewBB, W: NewW);
5238 if (DTU)
5239 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewBB});
5240 }
5241
5242 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5243 Builder.SetInsertPoint(NewBB);
5244 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5245 Builder.CreateBr(Dest: SuccBlock);
5246 PHIUse->addIncoming(V: NewCst, BB: NewBB);
5247 if (DTU) {
5248 Updates.push_back(Elt: {DominatorTree::Insert, NewBB, SuccBlock});
5249 DTU->applyUpdates(Updates);
5250 }
5251 return true;
5252}
5253
5254/// Check to see if it is branching on an or/and chain of icmp instructions, and
5255/// fold it into a switch instruction if so.
5256bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5257 IRBuilder<> &Builder,
5258 const DataLayout &DL) {
5259 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
5260 if (!Cond)
5261 return false;
5262
5263 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5264 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5265 // 'setne's and'ed together, collect them.
5266
5267 // Try to gather values from a chain of and/or to be turned into a switch
5268 ConstantComparesGatherer ConstantCompare(Cond, DL);
5269 // Unpack the result
5270 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5271 Value *CompVal = ConstantCompare.CompValue;
5272 unsigned UsedICmps = ConstantCompare.UsedICmps;
5273 Value *ExtraCase = ConstantCompare.Extra;
5274 bool TrueWhenEqual = ConstantCompare.IsEq;
5275
5276 // If we didn't have a multiply compared value, fail.
5277 if (!CompVal)
5278 return false;
5279
5280 // Avoid turning single icmps into a switch.
5281 if (UsedICmps <= 1)
5282 return false;
5283
5284 // There might be duplicate constants in the list, which the switch
5285 // instruction can't handle, remove them now.
5286 array_pod_sort(Start: Values.begin(), End: Values.end(), Compare: constantIntSortPredicate);
5287 Values.erase(CS: llvm::unique(R&: Values), CE: Values.end());
5288
5289 // If Extra was used, we require at least two switch values to do the
5290 // transformation. A switch with one value is just a conditional branch.
5291 if (ExtraCase && Values.size() < 2)
5292 return false;
5293
5294 SmallVector<uint32_t> BranchWeights;
5295 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5296 extractBranchWeights(I: *BI, Weights&: BranchWeights);
5297
5298 // Figure out which block is which destination.
5299 BasicBlock *DefaultBB = BI->getSuccessor(i: 1);
5300 BasicBlock *EdgeBB = BI->getSuccessor(i: 0);
5301 if (!TrueWhenEqual) {
5302 std::swap(a&: DefaultBB, b&: EdgeBB);
5303 if (HasProfile)
5304 std::swap(a&: BranchWeights[0], b&: BranchWeights[1]);
5305 }
5306
5307 BasicBlock *BB = BI->getParent();
5308
5309 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5310 << " cases into SWITCH. BB is:\n"
5311 << *BB);
5312
5313 SmallVector<DominatorTree::UpdateType, 2> Updates;
5314
5315 // If there are any extra values that couldn't be folded into the switch
5316 // then we evaluate them with an explicit branch first. Split the block
5317 // right before the condbr to handle it.
5318 if (ExtraCase) {
5319 BasicBlock *NewBB = SplitBlock(Old: BB, SplitPt: BI, DTU, /*LI=*/nullptr,
5320 /*MSSAU=*/nullptr, BBName: "switch.early.test");
5321
5322 // Remove the uncond branch added to the old block.
5323 Instruction *OldTI = BB->getTerminator();
5324 Builder.SetInsertPoint(OldTI);
5325
5326 // There can be an unintended UB if extra values are Poison. Before the
5327 // transformation, extra values may not be evaluated according to the
5328 // condition, and it will not raise UB. But after transformation, we are
5329 // evaluating extra values before checking the condition, and it will raise
5330 // UB. It can be solved by adding freeze instruction to extra values.
5331 AssumptionCache *AC = Options.AC;
5332
5333 if (!isGuaranteedNotToBeUndefOrPoison(V: ExtraCase, AC, CtxI: BI, DT: nullptr))
5334 ExtraCase = Builder.CreateFreeze(V: ExtraCase);
5335
5336 // We don't have any info about this condition.
5337 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(Cond: ExtraCase, True: EdgeBB, False: NewBB)
5338 : Builder.CreateCondBr(Cond: ExtraCase, True: NewBB, False: EdgeBB);
5339 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *Br, DEBUG_TYPE);
5340
5341 OldTI->eraseFromParent();
5342
5343 if (DTU)
5344 Updates.push_back(Elt: {DominatorTree::Insert, BB, EdgeBB});
5345
5346 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5347 // for the edge we just added.
5348 addPredecessorToBlock(Succ: EdgeBB, NewPred: BB, ExistPred: NewBB);
5349
5350 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5351 << "\nEXTRABB = " << *BB);
5352 BB = NewBB;
5353 }
5354
5355 Builder.SetInsertPoint(BI);
5356 // Convert pointer to int before we switch.
5357 if (CompVal->getType()->isPointerTy()) {
5358 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5359 "Should not end up here with unstable pointers");
5360 CompVal = Builder.CreatePtrToInt(
5361 V: CompVal, DestTy: DL.getIntPtrType(CompVal->getType()), Name: "magicptr");
5362 }
5363
5364 // Check if we can represent the values as a contiguous range. If so, we use a
5365 // range check + conditional branch instead of a switch.
5366 if (Values.front()->getValue() - Values.back()->getValue() ==
5367 Values.size() - 1) {
5368 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5369 Lower: Values.back()->getValue(), Upper: Values.front()->getValue() + 1);
5370 APInt Offset, RHS;
5371 ICmpInst::Predicate Pred;
5372 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5373 Value *X = CompVal;
5374 if (!Offset.isZero())
5375 X = Builder.CreateAdd(LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: Offset));
5376 Value *Cond =
5377 Builder.CreateICmp(P: Pred, LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: RHS));
5378 CondBrInst *NewBI = Builder.CreateCondBr(Cond, True: EdgeBB, False: DefaultBB);
5379 if (HasProfile)
5380 setBranchWeights(I&: *NewBI, Weights: BranchWeights, /*IsExpected=*/false);
5381 // We don't need to update PHI nodes since we don't add any new edges.
5382 } else {
5383 // Create the new switch instruction now.
5384 SwitchInst *New = Builder.CreateSwitch(V: CompVal, Dest: DefaultBB, NumCases: Values.size());
5385 if (HasProfile) {
5386 // We know the weight of the default case. We don't know the weight of the
5387 // other cases, but rather than completely lose profiling info, we split
5388 // the remaining probability equally over them.
5389 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5390 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5391 // if TrueWhenEqual.
5392 for (auto &V : drop_begin(RangeOrContainer&: NewWeights))
5393 V = BranchWeights[0] / Values.size();
5394 setBranchWeights(I&: *New, Weights: NewWeights, /*IsExpected=*/false);
5395 }
5396
5397 // Add all of the 'cases' to the switch instruction.
5398 for (ConstantInt *Val : Values)
5399 New->addCase(OnVal: Val, Dest: EdgeBB);
5400
5401 // We added edges from PI to the EdgeBB. As such, if there were any
5402 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5403 // the number of edges added.
5404 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5405 PHINode *PN = cast<PHINode>(Val&: BBI);
5406 Value *InVal = PN->getIncomingValueForBlock(BB);
5407 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5408 PN->addIncoming(V: InVal, BB);
5409 }
5410 }
5411
5412 // Erase the old branch instruction.
5413 eraseTerminatorAndDCECond(TI: BI);
5414 if (DTU)
5415 DTU->applyUpdates(Updates);
5416
5417 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5418 return true;
5419}
5420
5421bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5422 if (isa<PHINode>(Val: RI->getValue()))
5423 return simplifyCommonResume(RI);
5424 else if (isa<LandingPadInst>(Val: RI->getParent()->getFirstNonPHIIt()) &&
5425 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5426 // The resume must unwind the exception that caused control to branch here.
5427 return simplifySingleResume(RI);
5428
5429 return false;
5430}
5431
5432// Check if cleanup block is empty
5433static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
5434 for (Instruction &I : R) {
5435 auto *II = dyn_cast<IntrinsicInst>(Val: &I);
5436 if (!II)
5437 return false;
5438
5439 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5440 switch (IntrinsicID) {
5441 case Intrinsic::dbg_declare:
5442 case Intrinsic::dbg_value:
5443 case Intrinsic::dbg_label:
5444 case Intrinsic::lifetime_end:
5445 break;
5446 default:
5447 return false;
5448 }
5449 }
5450 return true;
5451}
5452
5453// Simplify resume that is shared by several landing pads (phi of landing pad).
5454bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5455 BasicBlock *BB = RI->getParent();
5456
5457 // Check that there are no other instructions except for debug and lifetime
5458 // intrinsics between the phi's and resume instruction.
5459 if (!isCleanupBlockEmpty(R: make_range(x: RI->getParent()->getFirstNonPHIIt(),
5460 y: BB->getTerminator()->getIterator())))
5461 return false;
5462
5463 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5464 auto *PhiLPInst = cast<PHINode>(Val: RI->getValue());
5465
5466 // Check incoming blocks to see if any of them are trivial.
5467 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5468 Idx++) {
5469 auto *IncomingBB = PhiLPInst->getIncomingBlock(i: Idx);
5470 auto *IncomingValue = PhiLPInst->getIncomingValue(i: Idx);
5471
5472 // If the block has other successors, we can not delete it because
5473 // it has other dependents.
5474 if (IncomingBB->getUniqueSuccessor() != BB)
5475 continue;
5476
5477 auto *LandingPad = dyn_cast<LandingPadInst>(Val: IncomingBB->getFirstNonPHIIt());
5478 // Not the landing pad that caused the control to branch here.
5479 if (IncomingValue != LandingPad)
5480 continue;
5481
5482 if (isCleanupBlockEmpty(
5483 R: make_range(x: LandingPad->getNextNode(), y: IncomingBB->getTerminator())))
5484 TrivialUnwindBlocks.insert(X: IncomingBB);
5485 }
5486
5487 // If no trivial unwind blocks, don't do any simplifications.
5488 if (TrivialUnwindBlocks.empty())
5489 return false;
5490
5491 // Turn all invokes that unwind here into calls.
5492 for (auto *TrivialBB : TrivialUnwindBlocks) {
5493 // Blocks that will be simplified should be removed from the phi node.
5494 // Note there could be multiple edges to the resume block, and we need
5495 // to remove them all.
5496 while (PhiLPInst->getBasicBlockIndex(BB: TrivialBB) != -1)
5497 BB->removePredecessor(Pred: TrivialBB, KeepOneInputPHIs: true);
5498
5499 for (BasicBlock *Pred :
5500 llvm::make_early_inc_range(Range: predecessors(BB: TrivialBB))) {
5501 removeUnwindEdge(BB: Pred, DTU);
5502 ++NumInvokes;
5503 }
5504
5505 // In each SimplifyCFG run, only the current processed block can be erased.
5506 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5507 // of erasing TrivialBB, we only remove the branch to the common resume
5508 // block so that we can later erase the resume block since it has no
5509 // predecessors.
5510 TrivialBB->getTerminator()->eraseFromParent();
5511 new UnreachableInst(RI->getContext(), TrivialBB);
5512 if (DTU)
5513 DTU->applyUpdates(Updates: {{DominatorTree::Delete, TrivialBB, BB}});
5514 }
5515
5516 // Delete the resume block if all its predecessors have been removed.
5517 if (pred_empty(BB))
5518 DeleteDeadBlock(BB, DTU);
5519
5520 return !TrivialUnwindBlocks.empty();
5521}
5522
5523// Simplify resume that is only used by a single (non-phi) landing pad.
5524bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5525 BasicBlock *BB = RI->getParent();
5526 auto *LPInst = cast<LandingPadInst>(Val: BB->getFirstNonPHIIt());
5527 assert(RI->getValue() == LPInst &&
5528 "Resume must unwind the exception that caused control to here");
5529
5530 // Check that there are no other instructions except for debug intrinsics.
5531 if (!isCleanupBlockEmpty(
5532 R: make_range<Instruction *>(x: LPInst->getNextNode(), y: RI)))
5533 return false;
5534
5535 // Turn all invokes that unwind here into calls and delete the basic block.
5536 for (BasicBlock *Pred : llvm::make_early_inc_range(Range: predecessors(BB))) {
5537 removeUnwindEdge(BB: Pred, DTU);
5538 ++NumInvokes;
5539 }
5540
5541 // The landingpad is now unreachable. Zap it.
5542 DeleteDeadBlock(BB, DTU);
5543 return true;
5544}
5545
5546static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
5547 // If this is a trivial cleanup pad that executes no instructions, it can be
5548 // eliminated. If the cleanup pad continues to the caller, any predecessor
5549 // that is an EH pad will be updated to continue to the caller and any
5550 // predecessor that terminates with an invoke instruction will have its invoke
5551 // instruction converted to a call instruction. If the cleanup pad being
5552 // simplified does not continue to the caller, each predecessor will be
5553 // updated to continue to the unwind destination of the cleanup pad being
5554 // simplified.
5555 BasicBlock *BB = RI->getParent();
5556 CleanupPadInst *CPInst = RI->getCleanupPad();
5557 if (CPInst->getParent() != BB)
5558 // This isn't an empty cleanup.
5559 return false;
5560
5561 // We cannot kill the pad if it has multiple uses. This typically arises
5562 // from unreachable basic blocks.
5563 if (!CPInst->hasOneUse())
5564 return false;
5565
5566 // Check that there are no other instructions except for benign intrinsics.
5567 if (!isCleanupBlockEmpty(
5568 R: make_range<Instruction *>(x: CPInst->getNextNode(), y: RI)))
5569 return false;
5570
5571 // If the cleanup return we are simplifying unwinds to the caller, this will
5572 // set UnwindDest to nullptr.
5573 BasicBlock *UnwindDest = RI->getUnwindDest();
5574
5575 // We're about to remove BB from the control flow. Before we do, sink any
5576 // PHINodes into the unwind destination. Doing this before changing the
5577 // control flow avoids some potentially slow checks, since we can currently
5578 // be certain that UnwindDest and BB have no common predecessors (since they
5579 // are both EH pads).
5580 if (UnwindDest) {
5581 // First, go through the PHI nodes in UnwindDest and update any nodes that
5582 // reference the block we are removing
5583 for (PHINode &DestPN : UnwindDest->phis()) {
5584 int Idx = DestPN.getBasicBlockIndex(BB);
5585 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5586 assert(Idx != -1);
5587 // This PHI node has an incoming value that corresponds to a control
5588 // path through the cleanup pad we are removing. If the incoming
5589 // value is in the cleanup pad, it must be a PHINode (because we
5590 // verified above that the block is otherwise empty). Otherwise, the
5591 // value is either a constant or a value that dominates the cleanup
5592 // pad being removed.
5593 //
5594 // Because BB and UnwindDest are both EH pads, all of their
5595 // predecessors must unwind to these blocks, and since no instruction
5596 // can have multiple unwind destinations, there will be no overlap in
5597 // incoming blocks between SrcPN and DestPN.
5598 Value *SrcVal = DestPN.getIncomingValue(i: Idx);
5599 PHINode *SrcPN = dyn_cast<PHINode>(Val: SrcVal);
5600
5601 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5602 for (auto *Pred : predecessors(BB)) {
5603 Value *Incoming =
5604 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(BB: Pred) : SrcVal;
5605 DestPN.addIncoming(V: Incoming, BB: Pred);
5606 }
5607 }
5608
5609 // Sink any remaining PHI nodes directly into UnwindDest.
5610 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5611 for (PHINode &PN : make_early_inc_range(Range: BB->phis())) {
5612 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5613 // If the PHI node has no uses or all of its uses are in this basic
5614 // block (meaning they are debug or lifetime intrinsics), just leave
5615 // it. It will be erased when we erase BB below.
5616 continue;
5617
5618 // Otherwise, sink this PHI node into UnwindDest.
5619 // Any predecessors to UnwindDest which are not already represented
5620 // must be back edges which inherit the value from the path through
5621 // BB. In this case, the PHI value must reference itself.
5622 for (auto *pred : predecessors(BB: UnwindDest))
5623 if (pred != BB)
5624 PN.addIncoming(V: &PN, BB: pred);
5625 PN.moveBefore(InsertPos: InsertPt);
5626 // Also, add a dummy incoming value for the original BB itself,
5627 // so that the PHI is well-formed until we drop said predecessor.
5628 PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB);
5629 }
5630 }
5631
5632 std::vector<DominatorTree::UpdateType> Updates;
5633
5634 // We use make_early_inc_range here because we will remove all predecessors.
5635 for (BasicBlock *PredBB : llvm::make_early_inc_range(Range: predecessors(BB))) {
5636 if (UnwindDest == nullptr) {
5637 if (DTU) {
5638 DTU->applyUpdates(Updates);
5639 Updates.clear();
5640 }
5641 removeUnwindEdge(BB: PredBB, DTU);
5642 ++NumInvokes;
5643 } else {
5644 BB->removePredecessor(Pred: PredBB);
5645 Instruction *TI = PredBB->getTerminator();
5646 TI->replaceUsesOfWith(From: BB, To: UnwindDest);
5647 if (DTU) {
5648 Updates.push_back(x: {DominatorTree::Insert, PredBB, UnwindDest});
5649 Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
5650 }
5651 }
5652 }
5653
5654 if (DTU)
5655 DTU->applyUpdates(Updates);
5656
5657 DeleteDeadBlock(BB, DTU);
5658
5659 return true;
5660}
5661
5662// Try to merge two cleanuppads together.
5663static bool mergeCleanupPad(CleanupReturnInst *RI) {
5664 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5665 // with.
5666 BasicBlock *UnwindDest = RI->getUnwindDest();
5667 if (!UnwindDest)
5668 return false;
5669
5670 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5671 // be safe to merge without code duplication.
5672 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5673 return false;
5674
5675 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5676 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(Val: &UnwindDest->front());
5677 if (!SuccessorCleanupPad)
5678 return false;
5679
5680 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5681 // Replace any uses of the successor cleanupad with the predecessor pad
5682 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5683 // funclet bundle operands.
5684 SuccessorCleanupPad->replaceAllUsesWith(V: PredecessorCleanupPad);
5685 // Remove the old cleanuppad.
5686 SuccessorCleanupPad->eraseFromParent();
5687 // Now, we simply replace the cleanupret with a branch to the unwind
5688 // destination.
5689 UncondBrInst::Create(Target: UnwindDest, InsertBefore: RI->getParent());
5690 RI->eraseFromParent();
5691
5692 return true;
5693}
5694
5695bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5696 // It is possible to transiantly have an undef cleanuppad operand because we
5697 // have deleted some, but not all, dead blocks.
5698 // Eventually, this block will be deleted.
5699 if (isa<UndefValue>(Val: RI->getOperand(i_nocapture: 0)))
5700 return false;
5701
5702 if (mergeCleanupPad(RI))
5703 return true;
5704
5705 if (removeEmptyCleanup(RI, DTU))
5706 return true;
5707
5708 return false;
5709}
5710
5711// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5712bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5713 BasicBlock *BB = UI->getParent();
5714
5715 bool Changed = false;
5716
5717 // Ensure that any debug-info records that used to occur after the Unreachable
5718 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5719 // the block.
5720 BB->flushTerminatorDbgRecords();
5721
5722 // Debug-info records on the unreachable inst itself should be deleted, as
5723 // below we delete everything past the final executable instruction.
5724 UI->dropDbgRecords();
5725
5726 // If there are any instructions immediately before the unreachable that can
5727 // be removed, do so.
5728 while (UI->getIterator() != BB->begin()) {
5729 BasicBlock::iterator BBI = UI->getIterator();
5730 --BBI;
5731
5732 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBI))
5733 break; // Can not drop any more instructions. We're done here.
5734 // Otherwise, this instruction can be freely erased,
5735 // even if it is not side-effect free.
5736
5737 // Note that deleting EH's here is in fact okay, although it involves a bit
5738 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5739 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5740 // and we can therefore guarantee this block will be erased.
5741
5742 // If we're deleting this, we're deleting any subsequent debug info, so
5743 // delete DbgRecords.
5744 BBI->dropDbgRecords();
5745
5746 // Delete this instruction (any uses are guaranteed to be dead)
5747 BBI->replaceAllUsesWith(V: PoisonValue::get(T: BBI->getType()));
5748 BBI->eraseFromParent();
5749 Changed = true;
5750 }
5751
5752 // If the unreachable instruction is the first in the block, take a gander
5753 // at all of the predecessors of this instruction, and simplify them.
5754 if (&BB->front() != UI)
5755 return Changed;
5756
5757 std::vector<DominatorTree::UpdateType> Updates;
5758
5759 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5760 for (BasicBlock *Predecessor : Preds) {
5761 Instruction *TI = Predecessor->getTerminator();
5762 IRBuilder<> Builder(TI);
5763 if (isa<UncondBrInst>(Val: TI)) {
5764 new UnreachableInst(TI->getContext(), TI->getIterator());
5765 TI->eraseFromParent();
5766 Changed = true;
5767 if (DTU)
5768 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5769 } else if (auto *BI = dyn_cast<CondBrInst>(Val: TI)) {
5770 // We could either have a proper unconditional branch,
5771 // or a degenerate conditional branch with matching destinations.
5772 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
5773 new UnreachableInst(TI->getContext(), TI->getIterator());
5774 TI->eraseFromParent();
5775 Changed = true;
5776 } else {
5777 Value* Cond = BI->getCondition();
5778 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5779 "The destinations are guaranteed to be different here.");
5780 CallInst *Assumption;
5781 if (BI->getSuccessor(i: 0) == BB) {
5782 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
5783 Builder.CreateBr(Dest: BI->getSuccessor(i: 1));
5784 } else {
5785 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5786 Assumption = Builder.CreateAssumption(Cond);
5787 Builder.CreateBr(Dest: BI->getSuccessor(i: 0));
5788 }
5789 if (Options.AC)
5790 Options.AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
5791
5792 eraseTerminatorAndDCECond(TI: BI);
5793 Changed = true;
5794 }
5795 if (DTU)
5796 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5797 } else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) {
5798 SwitchInstProfUpdateWrapper SU(*SI);
5799 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5800 if (i->getCaseSuccessor() != BB) {
5801 ++i;
5802 continue;
5803 }
5804 BB->removePredecessor(Pred: SU->getParent());
5805 i = SU.removeCase(I: i);
5806 e = SU->case_end();
5807 Changed = true;
5808 }
5809 // Note that the default destination can't be removed!
5810 if (DTU && SI->getDefaultDest() != BB)
5811 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5812 } else if (auto *II = dyn_cast<InvokeInst>(Val: TI)) {
5813 if (II->getUnwindDest() == BB) {
5814 if (DTU) {
5815 DTU->applyUpdates(Updates);
5816 Updates.clear();
5817 }
5818 auto *CI = cast<CallInst>(Val: removeUnwindEdge(BB: TI->getParent(), DTU));
5819 if (!CI->doesNotThrow())
5820 CI->setDoesNotThrow();
5821 Changed = true;
5822 }
5823 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: TI)) {
5824 if (CSI->getUnwindDest() == BB) {
5825 if (DTU) {
5826 DTU->applyUpdates(Updates);
5827 Updates.clear();
5828 }
5829 removeUnwindEdge(BB: TI->getParent(), DTU);
5830 Changed = true;
5831 continue;
5832 }
5833
5834 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5835 E = CSI->handler_end();
5836 I != E; ++I) {
5837 if (*I == BB) {
5838 CSI->removeHandler(HI: I);
5839 --I;
5840 --E;
5841 Changed = true;
5842 }
5843 }
5844 if (DTU)
5845 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5846 if (CSI->getNumHandlers() == 0) {
5847 if (CSI->hasUnwindDest()) {
5848 // Redirect all predecessors of the block containing CatchSwitchInst
5849 // to instead branch to the CatchSwitchInst's unwind destination.
5850 if (DTU) {
5851 for (auto *PredecessorOfPredecessor : predecessors(BB: Predecessor)) {
5852 Updates.push_back(x: {DominatorTree::Insert,
5853 PredecessorOfPredecessor,
5854 CSI->getUnwindDest()});
5855 Updates.push_back(x: {DominatorTree::Delete,
5856 PredecessorOfPredecessor, Predecessor});
5857 }
5858 }
5859 Predecessor->replaceAllUsesWith(V: CSI->getUnwindDest());
5860 } else {
5861 // Rewrite all preds to unwind to caller (or from invoke to call).
5862 if (DTU) {
5863 DTU->applyUpdates(Updates);
5864 Updates.clear();
5865 }
5866 SmallVector<BasicBlock *, 8> EHPreds(predecessors(BB: Predecessor));
5867 for (BasicBlock *EHPred : EHPreds)
5868 removeUnwindEdge(BB: EHPred, DTU);
5869 }
5870 // The catchswitch is no longer reachable.
5871 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5872 CSI->eraseFromParent();
5873 Changed = true;
5874 }
5875 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: TI)) {
5876 (void)CRI;
5877 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5878 "Expected to always have an unwind to BB.");
5879 if (DTU)
5880 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5881 new UnreachableInst(TI->getContext(), TI->getIterator());
5882 TI->eraseFromParent();
5883 Changed = true;
5884 }
5885 }
5886
5887 if (DTU)
5888 DTU->applyUpdates(Updates);
5889
5890 // If this block is now dead, remove it.
5891 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5892 DeleteDeadBlock(BB, DTU);
5893 return true;
5894 }
5895
5896 return Changed;
5897}
5898
5899struct ContiguousCasesResult {
5900 ConstantInt *Min;
5901 ConstantInt *Max;
5902 BasicBlock *Dest;
5903 BasicBlock *OtherDest;
5904 SmallVectorImpl<ConstantInt *> *Cases;
5905 SmallVectorImpl<ConstantInt *> *OtherCases;
5906};
5907
5908static std::optional<ContiguousCasesResult>
5909findContiguousCases(Value *Condition, SmallVectorImpl<ConstantInt *> &Cases,
5910 SmallVectorImpl<ConstantInt *> &OtherCases,
5911 BasicBlock *Dest, BasicBlock *OtherDest) {
5912 assert(Cases.size() >= 1);
5913
5914 array_pod_sort(Start: Cases.begin(), End: Cases.end(), Compare: constantIntSortPredicate);
5915 const APInt &Min = Cases.back()->getValue();
5916 const APInt &Max = Cases.front()->getValue();
5917 APInt Offset = Max - Min;
5918 size_t ContiguousOffset = Cases.size() - 1;
5919 if (Offset == ContiguousOffset) {
5920 return ContiguousCasesResult{
5921 /*Min=*/Cases.back(),
5922 /*Max=*/Cases.front(),
5923 /*Dest=*/Dest,
5924 /*OtherDest=*/OtherDest,
5925 /*Cases=*/&Cases,
5926 /*OtherCases=*/&OtherCases,
5927 };
5928 }
5929 ConstantRange CR = computeConstantRange(V: Condition, /*ForSigned=*/false,
5930 SQ: SimplifyQuery(Dest->getDataLayout()));
5931 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5932 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5933 // contiguous range for the other destination. N.B. If CR is not a full range,
5934 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5935 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5936 assert(Cases.size() >= 2);
5937 auto *It =
5938 std::adjacent_find(first: Cases.begin(), last: Cases.end(), binary_pred: [](auto L, auto R) {
5939 return L->getValue() != R->getValue() + 1;
5940 });
5941 if (It == Cases.end())
5942 return std::nullopt;
5943 auto [OtherMax, OtherMin] = std::make_pair(x&: *It, y&: *std::next(x: It));
5944 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5945 Cases.size() - 2) {
5946 return ContiguousCasesResult{
5947 /*Min=*/cast<ConstantInt>(
5948 Val: ConstantInt::get(Ty: OtherMin->getType(), V: OtherMin->getValue() + 1)),
5949 /*Max=*/
5950 cast<ConstantInt>(
5951 Val: ConstantInt::get(Ty: OtherMax->getType(), V: OtherMax->getValue() - 1)),
5952 /*Dest=*/OtherDest,
5953 /*OtherDest=*/Dest,
5954 /*Cases=*/&OtherCases,
5955 /*OtherCases=*/&Cases,
5956 };
5957 }
5958 }
5959 return std::nullopt;
5960}
5961
5962static void createUnreachableSwitchDefault(SwitchInst *Switch,
5963 DomTreeUpdater *DTU,
5964 bool RemoveOrigDefaultBlock = true) {
5965 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5966 auto *BB = Switch->getParent();
5967 auto *OrigDefaultBlock = Switch->getDefaultDest();
5968 if (RemoveOrigDefaultBlock)
5969 OrigDefaultBlock->removePredecessor(Pred: BB);
5970 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5971 Context&: BB->getContext(), Name: BB->getName() + ".unreachabledefault", Parent: BB->getParent(),
5972 InsertBefore: OrigDefaultBlock);
5973 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5974 UI->setDebugLoc(DebugLoc::getTemporary());
5975 Switch->setDefaultDest(&*NewDefaultBlock);
5976 if (DTU) {
5977 SmallVector<DominatorTree::UpdateType, 2> Updates;
5978 Updates.push_back(Elt: {DominatorTree::Insert, BB, &*NewDefaultBlock});
5979 if (RemoveOrigDefaultBlock &&
5980 !is_contained(Range: successors(BB), Element: OrigDefaultBlock))
5981 Updates.push_back(Elt: {DominatorTree::Delete, BB, &*OrigDefaultBlock});
5982 DTU->applyUpdates(Updates);
5983 }
5984}
5985
5986/// Turn a switch into an integer range comparison and branch.
5987/// Switches with more than 2 destinations are ignored.
5988/// Switches with 1 destination are also ignored.
5989bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5990 IRBuilder<> &Builder) {
5991 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5992
5993 bool HasDefault = !SI->defaultDestUnreachable();
5994
5995 auto *BB = SI->getParent();
5996 // Partition the cases into two sets with different destinations.
5997 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5998 BasicBlock *DestB = nullptr;
5999 SmallVector<ConstantInt *, 16> CasesA;
6000 SmallVector<ConstantInt *, 16> CasesB;
6001
6002 for (auto Case : SI->cases()) {
6003 BasicBlock *Dest = Case.getCaseSuccessor();
6004 if (!DestA)
6005 DestA = Dest;
6006 if (Dest == DestA) {
6007 CasesA.push_back(Elt: Case.getCaseValue());
6008 continue;
6009 }
6010 if (!DestB)
6011 DestB = Dest;
6012 if (Dest == DestB) {
6013 CasesB.push_back(Elt: Case.getCaseValue());
6014 continue;
6015 }
6016 return false; // More than two destinations.
6017 }
6018 if (!DestB)
6019 return false; // All destinations are the same and the default is unreachable
6020
6021 assert(DestA && DestB &&
6022 "Single-destination switch should have been folded.");
6023 assert(DestA != DestB);
6024 assert(DestB != SI->getDefaultDest());
6025 assert(!CasesB.empty() && "There must be non-default cases.");
6026 assert(!CasesA.empty() || HasDefault);
6027
6028 // Figure out if one of the sets of cases form a contiguous range.
6029 std::optional<ContiguousCasesResult> ContiguousCases;
6030
6031 // Only one icmp is needed when there is only one case.
6032 if (!HasDefault && CasesA.size() == 1)
6033 ContiguousCases = ContiguousCasesResult{
6034 /*Min=*/CasesA[0],
6035 /*Max=*/CasesA[0],
6036 /*Dest=*/DestA,
6037 /*OtherDest=*/DestB,
6038 /*Cases=*/&CasesA,
6039 /*OtherCases=*/&CasesB,
6040 };
6041 else if (CasesB.size() == 1)
6042 ContiguousCases = ContiguousCasesResult{
6043 /*Min=*/CasesB[0],
6044 /*Max=*/CasesB[0],
6045 /*Dest=*/DestB,
6046 /*OtherDest=*/DestA,
6047 /*Cases=*/&CasesB,
6048 /*OtherCases=*/&CasesA,
6049 };
6050 // Correctness: Cases to the default destination cannot be contiguous cases.
6051 else if (!HasDefault)
6052 ContiguousCases =
6053 findContiguousCases(Condition: SI->getCondition(), Cases&: CasesA, OtherCases&: CasesB, Dest: DestA, OtherDest: DestB);
6054
6055 if (!ContiguousCases)
6056 ContiguousCases =
6057 findContiguousCases(Condition: SI->getCondition(), Cases&: CasesB, OtherCases&: CasesA, Dest: DestB, OtherDest: DestA);
6058
6059 if (!ContiguousCases)
6060 return false;
6061
6062 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6063
6064 // Start building the compare and branch.
6065
6066 Constant *Offset = ConstantExpr::getNeg(C: Min);
6067 Constant *NumCases = ConstantInt::get(Ty: Offset->getType(),
6068 V: Max->getValue() - Min->getValue() + 1);
6069 Instruction *NewBI;
6070 if (NumCases->isOneValue()) {
6071 assert(Max->getValue() == Min->getValue());
6072 Value *Cmp = Builder.CreateICmpEQ(LHS: SI->getCondition(), RHS: Min);
6073 NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
6074 }
6075 // If NumCases overflowed, then all possible values jump to the successor.
6076 else if (NumCases->isNullValue() && !Cases->empty()) {
6077 NewBI = Builder.CreateBr(Dest);
6078 } else {
6079 Value *Sub = SI->getCondition();
6080 if (!Offset->isNullValue())
6081 Sub = Builder.CreateAdd(LHS: Sub, RHS: Offset, Name: Sub->getName() + ".off");
6082 Value *Cmp = Builder.CreateICmpULT(LHS: Sub, RHS: NumCases, Name: "switch");
6083 NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
6084 }
6085
6086 // Update weight for the newly-created conditional branch.
6087 if (hasBranchWeightMD(I: *SI) && isa<CondBrInst>(Val: NewBI)) {
6088 SmallVector<uint64_t, 8> Weights;
6089 getBranchWeights(TI: SI, Weights);
6090 if (Weights.size() == 1 + SI->getNumCases()) {
6091 uint64_t TrueWeight = 0;
6092 uint64_t FalseWeight = 0;
6093 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6094 if (SI->getSuccessor(idx: I) == Dest)
6095 TrueWeight += Weights[I];
6096 else
6097 FalseWeight += Weights[I];
6098 }
6099 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6100 TrueWeight /= 2;
6101 FalseWeight /= 2;
6102 }
6103 setFittedBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
6104 /*IsExpected=*/false, /*ElideAllZero=*/true);
6105 }
6106 }
6107
6108 // Prune obsolete incoming values off the successors' PHI nodes.
6109 for (auto &PHI : make_early_inc_range(Range: Dest->phis())) {
6110 unsigned PreviousEdges = Cases->size();
6111 if (Dest == SI->getDefaultDest())
6112 ++PreviousEdges;
6113 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6114 PHI.removeIncomingValue(BB: SI->getParent());
6115 }
6116 for (auto &PHI : make_early_inc_range(Range: OtherDest->phis())) {
6117 unsigned PreviousEdges = OtherCases->size();
6118 if (OtherDest == SI->getDefaultDest())
6119 ++PreviousEdges;
6120 unsigned E = PreviousEdges - 1;
6121 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6122 if (isa<UncondBrInst>(Val: NewBI))
6123 ++E;
6124 for (unsigned I = 0; I != E; ++I)
6125 PHI.removeIncomingValue(BB: SI->getParent());
6126 }
6127
6128 // Clean up the default block - it may have phis or other instructions before
6129 // the unreachable terminator.
6130 if (!HasDefault)
6131 createUnreachableSwitchDefault(Switch: SI, DTU);
6132
6133 auto *UnreachableDefault = SI->getDefaultDest();
6134
6135 // Drop the switch.
6136 SI->eraseFromParent();
6137
6138 if (!HasDefault && DTU)
6139 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, UnreachableDefault}});
6140
6141 return true;
6142}
6143
6144/// Compute masked bits for the condition of a switch
6145/// and use it to remove dead cases.
6146static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
6147 AssumptionCache *AC,
6148 const DataLayout &DL) {
6149 Value *Cond = SI->getCondition();
6150 KnownBits Known = computeKnownBits(V: Cond, DL, AC, CxtI: SI);
6151 SmallPtrSet<const Constant *, 4> KnownValues;
6152 bool IsKnownValuesValid = collectPossibleValues(V: Cond, Constants&: KnownValues, MaxCount: 4);
6153
6154 // We can also eliminate cases by determining that their values are outside of
6155 // the limited range of the condition based on how many significant (non-sign)
6156 // bits are in the condition value.
6157 unsigned MaxSignificantBitsInCond =
6158 ComputeMaxSignificantBits(Op: Cond, DL, AC, CxtI: SI);
6159
6160 // Gather dead cases.
6161 SmallVector<ConstantInt *, 8> DeadCases;
6162 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6163 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6164 for (const auto &Case : SI->cases()) {
6165 auto *Successor = Case.getCaseSuccessor();
6166 if (DTU) {
6167 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Key: Successor);
6168 if (Inserted)
6169 UniqueSuccessors.push_back(Elt: Successor);
6170 ++It->second;
6171 }
6172 ConstantInt *CaseC = Case.getCaseValue();
6173 const APInt &CaseVal = CaseC->getValue();
6174 if (Known.Zero.intersects(RHS: CaseVal) || !Known.One.isSubsetOf(RHS: CaseVal) ||
6175 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6176 (IsKnownValuesValid && !KnownValues.contains(Ptr: CaseC))) {
6177 DeadCases.push_back(Elt: CaseC);
6178 if (DTU)
6179 --NumPerSuccessorCases[Successor];
6180 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6181 << " is dead.\n");
6182 } else if (IsKnownValuesValid)
6183 KnownValues.erase(Ptr: CaseC);
6184 }
6185
6186 // If we can prove that the cases must cover all possible values, the
6187 // default destination becomes dead and we can remove it. If we know some
6188 // of the bits in the value, we can use that to more precisely compute the
6189 // number of possible unique case values.
6190 bool HasDefault = !SI->defaultDestUnreachable();
6191 const unsigned NumUnknownBits =
6192 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6193 assert(NumUnknownBits <= Known.getBitWidth());
6194 if (HasDefault && DeadCases.empty()) {
6195 if (IsKnownValuesValid && all_of(Range&: KnownValues, P: IsaPred<UndefValue>)) {
6196 createUnreachableSwitchDefault(Switch: SI, DTU);
6197 return true;
6198 }
6199
6200 if (NumUnknownBits < 64 /* avoid overflow */) {
6201 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6202 if (SI->getNumCases() == AllNumCases) {
6203 createUnreachableSwitchDefault(Switch: SI, DTU);
6204 return true;
6205 }
6206 // When only one case value is missing, replace default with that case.
6207 // Eliminating the default branch will provide more opportunities for
6208 // optimization, such as lookup tables.
6209 if (SI->getNumCases() == AllNumCases - 1) {
6210 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6211 IntegerType *CondTy = cast<IntegerType>(Val: Cond->getType());
6212 if (CondTy->getIntegerBitWidth() > 64 ||
6213 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
6214 return false;
6215
6216 uint64_t MissingCaseVal = 0;
6217 for (const auto &Case : SI->cases())
6218 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6219 auto *MissingCase = cast<ConstantInt>(
6220 Val: ConstantInt::get(Ty: Cond->getType(), V: MissingCaseVal));
6221 SwitchInstProfUpdateWrapper SIW(*SI);
6222 SIW.addCase(OnVal: MissingCase, Dest: SI->getDefaultDest(),
6223 W: SIW.getSuccessorWeight(idx: 0));
6224 createUnreachableSwitchDefault(Switch: SI, DTU,
6225 /*RemoveOrigDefaultBlock*/ false);
6226 SIW.setSuccessorWeight(idx: 0, W: 0);
6227 return true;
6228 }
6229 }
6230 }
6231
6232 if (DeadCases.empty())
6233 return false;
6234
6235 SwitchInstProfUpdateWrapper SIW(*SI);
6236 for (ConstantInt *DeadCase : DeadCases) {
6237 SwitchInst::CaseIt CaseI = SI->findCaseValue(C: DeadCase);
6238 assert(CaseI != SI->case_default() &&
6239 "Case was not found. Probably mistake in DeadCases forming.");
6240 // Prune unused values from PHI nodes.
6241 CaseI->getCaseSuccessor()->removePredecessor(Pred: SI->getParent());
6242 SIW.removeCase(I: CaseI);
6243 }
6244
6245 if (DTU) {
6246 std::vector<DominatorTree::UpdateType> Updates;
6247 for (auto *Successor : UniqueSuccessors)
6248 if (NumPerSuccessorCases[Successor] == 0)
6249 Updates.push_back(x: {DominatorTree::Delete, SI->getParent(), Successor});
6250 DTU->applyUpdates(Updates);
6251 }
6252
6253 return true;
6254}
6255
6256/// If BB would be eligible for simplification by
6257/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6258/// by an unconditional branch), look at the phi node for BB in the successor
6259/// block and see if the incoming value is equal to CaseValue. If so, return
6260/// the phi node, and set PhiIndex to BB's index in the phi node.
6261static PHINode *findPHIForConditionForwarding(ConstantInt *CaseValue,
6262 BasicBlock *BB, int *PhiIndex) {
6263 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6264 return nullptr; // BB must be empty to be a candidate for simplification.
6265 if (!BB->getSinglePredecessor())
6266 return nullptr; // BB must be dominated by the switch.
6267
6268 UncondBrInst *Branch = dyn_cast<UncondBrInst>(Val: BB->getTerminator());
6269 if (!Branch)
6270 return nullptr; // Terminator must be unconditional branch.
6271
6272 BasicBlock *Succ = Branch->getSuccessor();
6273
6274 for (PHINode &PHI : Succ->phis()) {
6275 int Idx = PHI.getBasicBlockIndex(BB);
6276 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6277
6278 Value *InValue = PHI.getIncomingValue(i: Idx);
6279 if (InValue != CaseValue)
6280 continue;
6281
6282 *PhiIndex = Idx;
6283 return &PHI;
6284 }
6285
6286 return nullptr;
6287}
6288
6289/// Try to forward the condition of a switch instruction to a phi node
6290/// dominated by the switch, if that would mean that some of the destination
6291/// blocks of the switch can be folded away. Return true if a change is made.
6292static bool forwardSwitchConditionToPHI(SwitchInst *SI) {
6293 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6294
6295 ForwardingNodesMap ForwardingNodes;
6296 BasicBlock *SwitchBlock = SI->getParent();
6297 bool Changed = false;
6298 for (const auto &Case : SI->cases()) {
6299 ConstantInt *CaseValue = Case.getCaseValue();
6300 BasicBlock *CaseDest = Case.getCaseSuccessor();
6301
6302 // Replace phi operands in successor blocks that are using the constant case
6303 // value rather than the switch condition variable:
6304 // switchbb:
6305 // switch i32 %x, label %default [
6306 // i32 17, label %succ
6307 // ...
6308 // succ:
6309 // %r = phi i32 ... [ 17, %switchbb ] ...
6310 // -->
6311 // %r = phi i32 ... [ %x, %switchbb ] ...
6312
6313 for (PHINode &Phi : CaseDest->phis()) {
6314 // This only works if there is exactly 1 incoming edge from the switch to
6315 // a phi. If there is >1, that means multiple cases of the switch map to 1
6316 // value in the phi, and that phi value is not the switch condition. Thus,
6317 // this transform would not make sense (the phi would be invalid because
6318 // a phi can't have different incoming values from the same block).
6319 int SwitchBBIdx = Phi.getBasicBlockIndex(BB: SwitchBlock);
6320 if (Phi.getIncomingValue(i: SwitchBBIdx) == CaseValue &&
6321 count(Range: Phi.blocks(), Element: SwitchBlock) == 1) {
6322 Phi.setIncomingValue(i: SwitchBBIdx, V: SI->getCondition());
6323 Changed = true;
6324 }
6325 }
6326
6327 // Collect phi nodes that are indirectly using this switch's case constants.
6328 int PhiIdx;
6329 if (auto *Phi = findPHIForConditionForwarding(CaseValue, BB: CaseDest, PhiIndex: &PhiIdx))
6330 ForwardingNodes[Phi].push_back(Elt: PhiIdx);
6331 }
6332
6333 for (auto &ForwardingNode : ForwardingNodes) {
6334 PHINode *Phi = ForwardingNode.first;
6335 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6336 // Check if it helps to fold PHI.
6337 if (Indexes.size() < 2 && !llvm::is_contained(Range: Phi->incoming_values(), Element: SI->getCondition()))
6338 continue;
6339
6340 for (int Index : Indexes)
6341 Phi->setIncomingValue(i: Index, V: SI->getCondition());
6342 Changed = true;
6343 }
6344
6345 return Changed;
6346}
6347
6348/// Return true if the backend will be able to handle
6349/// initializing an array of constants like C.
6350static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
6351 if (C->isThreadDependent())
6352 return false;
6353 if (C->isDLLImportDependent())
6354 return false;
6355
6356 if (!isa<ConstantDataVector, ConstantExpr, ConstantFP, ConstantInt,
6357 ConstantPointerNull, GlobalValue, UndefValue>(Val: C))
6358 return false;
6359
6360 // Globals cannot contain scalable types.
6361 if (C->getType()->isScalableTy())
6362 return false;
6363
6364 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
6365 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6366 // materializing the array of constants.
6367 Constant *StrippedC = cast<Constant>(Val: CE->stripInBoundsConstantOffsets());
6368 if (StrippedC == C || !validLookupTableConstant(C: StrippedC, TTI))
6369 return false;
6370 }
6371
6372 if (!TTI.shouldBuildLookupTablesForConstant(C))
6373 return false;
6374
6375 return true;
6376}
6377
6378/// If V is a Constant, return it. Otherwise, try to look up
6379/// its constant value in ConstantPool, returning 0 if it's not there.
6380static Constant *
6381lookupConstant(Value *V,
6382 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6383 if (Constant *C = dyn_cast<Constant>(Val: V))
6384 return C;
6385 return ConstantPool.lookup(Val: V);
6386}
6387
6388/// Try to fold instruction I into a constant. This works for
6389/// simple instructions such as binary operations where both operands are
6390/// constant or can be replaced by constants from the ConstantPool. Returns the
6391/// resulting constant on success, 0 otherwise.
6392static Constant *
6393constantFold(Instruction *I, const DataLayout &DL,
6394 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6395 if (SelectInst *Select = dyn_cast<SelectInst>(Val: I)) {
6396 Constant *A = lookupConstant(V: Select->getCondition(), ConstantPool);
6397 if (!A)
6398 return nullptr;
6399 if (A->isAllOnesValue())
6400 return lookupConstant(V: Select->getTrueValue(), ConstantPool);
6401 if (A->isNullValue())
6402 return lookupConstant(V: Select->getFalseValue(), ConstantPool);
6403 return nullptr;
6404 }
6405
6406 SmallVector<Constant *, 4> COps;
6407 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6408 if (Constant *A = lookupConstant(V: I->getOperand(i: N), ConstantPool))
6409 COps.push_back(Elt: A);
6410 else
6411 return nullptr;
6412 }
6413
6414 return ConstantFoldInstOperands(I, Ops: COps, DL);
6415}
6416
6417/// Try to determine the resulting constant values in phi nodes
6418/// at the common destination basic block, *CommonDest, for one of the case
6419/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6420/// default case), of a switch instruction SI.
6421static bool
6422getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
6423 BasicBlock **CommonDest,
6424 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6425 const DataLayout &DL, const TargetTransformInfo &TTI) {
6426 // The block from which we enter the common destination.
6427 BasicBlock *Pred = SI->getParent();
6428
6429 // If CaseDest is empty except for some side-effect free instructions through
6430 // which we can constant-propagate the CaseVal, continue to its successor.
6431 SmallDenseMap<Value *, Constant *> ConstantPool;
6432 ConstantPool.insert(KV: std::make_pair(x: SI->getCondition(), y&: CaseVal));
6433 for (Instruction &I : *CaseDest) {
6434 if (I.isTerminator()) {
6435 // If the terminator is a simple branch, continue to the next block.
6436 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6437 return false;
6438 Pred = CaseDest;
6439 CaseDest = I.getSuccessor(Idx: 0);
6440 } else if (Constant *C = constantFold(I: &I, DL, ConstantPool)) {
6441 // Instruction is side-effect free and constant.
6442
6443 // If the instruction has uses outside this block or a phi node slot for
6444 // the block, it is not safe to bypass the instruction since it would then
6445 // no longer dominate all its uses.
6446 for (auto &Use : I.uses()) {
6447 User *User = Use.getUser();
6448 if (Instruction *I = dyn_cast<Instruction>(Val: User))
6449 if (I->getParent() == CaseDest)
6450 continue;
6451 if (PHINode *Phi = dyn_cast<PHINode>(Val: User))
6452 if (Phi->getIncomingBlock(U: Use) == CaseDest)
6453 continue;
6454 return false;
6455 }
6456
6457 ConstantPool.insert(KV: std::make_pair(x: &I, y&: C));
6458 } else {
6459 break;
6460 }
6461 }
6462
6463 // If we did not have a CommonDest before, use the current one.
6464 if (!*CommonDest)
6465 *CommonDest = CaseDest;
6466 // If the destination isn't the common one, abort.
6467 if (CaseDest != *CommonDest)
6468 return false;
6469
6470 // Get the values for this case from phi nodes in the destination block.
6471 for (PHINode &PHI : (*CommonDest)->phis()) {
6472 int Idx = PHI.getBasicBlockIndex(BB: Pred);
6473 if (Idx == -1)
6474 continue;
6475
6476 Constant *ConstVal =
6477 lookupConstant(V: PHI.getIncomingValue(i: Idx), ConstantPool);
6478 if (!ConstVal)
6479 return false;
6480
6481 // Be conservative about which kinds of constants we support.
6482 if (!validLookupTableConstant(C: ConstVal, TTI))
6483 return false;
6484
6485 Res.push_back(Elt: std::make_pair(x: &PHI, y&: ConstVal));
6486 }
6487
6488 return Res.size() > 0;
6489}
6490
6491// Helper function used to add CaseVal to the list of cases that generate
6492// Result. Returns the updated number of cases that generate this result.
6493static size_t mapCaseToResult(ConstantInt *CaseVal,
6494 SwitchCaseResultVectorTy &UniqueResults,
6495 Constant *Result) {
6496 for (auto &I : UniqueResults) {
6497 if (I.first == Result) {
6498 I.second.push_back(Elt: CaseVal);
6499 return I.second.size();
6500 }
6501 }
6502 UniqueResults.push_back(
6503 Elt: std::make_pair(x&: Result, y: SmallVector<ConstantInt *, 4>(1, CaseVal)));
6504 return 1;
6505}
6506
6507// Helper function that initializes a map containing
6508// results for the PHI node of the common destination block for a switch
6509// instruction. Returns false if multiple PHI nodes have been found or if
6510// there is not a common destination block for the switch.
6511static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
6512 BasicBlock *&CommonDest,
6513 SwitchCaseResultVectorTy &UniqueResults,
6514 Constant *&DefaultResult,
6515 const DataLayout &DL,
6516 const TargetTransformInfo &TTI,
6517 uintptr_t MaxUniqueResults) {
6518 for (const auto &I : SI->cases()) {
6519 ConstantInt *CaseVal = I.getCaseValue();
6520
6521 // Resulting value at phi nodes for this case value.
6522 SwitchCaseResultsTy Results;
6523 if (!getCaseResults(SI, CaseVal, CaseDest: I.getCaseSuccessor(), CommonDest: &CommonDest, Res&: Results,
6524 DL, TTI))
6525 return false;
6526
6527 // Only one value per case is permitted.
6528 if (Results.size() > 1)
6529 return false;
6530
6531 // Add the case->result mapping to UniqueResults.
6532 const size_t NumCasesForResult =
6533 mapCaseToResult(CaseVal, UniqueResults, Result: Results.begin()->second);
6534
6535 // Early out if there are too many cases for this result.
6536 if (NumCasesForResult > MaxSwitchCasesPerResult)
6537 return false;
6538
6539 // Early out if there are too many unique results.
6540 if (UniqueResults.size() > MaxUniqueResults)
6541 return false;
6542
6543 // Check the PHI consistency.
6544 if (!PHI)
6545 PHI = Results[0].first;
6546 else if (PHI != Results[0].first)
6547 return false;
6548 }
6549 // Find the default result value.
6550 SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
6551 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest, Res&: DefaultResults,
6552 DL, TTI);
6553 // If the default value is not found abort unless the default destination
6554 // is unreachable.
6555 DefaultResult =
6556 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6557
6558 return DefaultResult || SI->defaultDestUnreachable();
6559}
6560
6561// Helper function that checks if it is possible to transform a switch with only
6562// two cases (or two cases + default) that produces a result into a select.
6563// TODO: Handle switches with more than 2 cases that map to the same result.
6564// The branch weights correspond to the provided Condition (i.e. if Condition is
6565// modified from the original SwitchInst, the caller must adjust the weights)
6566static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6567 Constant *DefaultResult, Value *Condition,
6568 IRBuilder<> &Builder, const DataLayout &DL,
6569 ArrayRef<uint32_t> BranchWeights) {
6570 // If we are selecting between only two cases transform into a simple
6571 // select or a two-way select if default is possible.
6572 // Example:
6573 // switch (a) { %0 = icmp eq i32 %a, 10
6574 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6575 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6576 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6577 // }
6578
6579 const bool HasBranchWeights =
6580 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6581
6582 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6583 ResultVector[1].second.size() == 1) {
6584 ConstantInt *FirstCase = ResultVector[0].second[0];
6585 ConstantInt *SecondCase = ResultVector[1].second[0];
6586 Value *SelectValue = ResultVector[1].first;
6587 if (DefaultResult) {
6588 Value *ValueCompare =
6589 Builder.CreateICmpEQ(LHS: Condition, RHS: SecondCase, Name: "switch.selectcmp");
6590 SelectValue = Builder.CreateSelect(C: ValueCompare, True: ResultVector[1].first,
6591 False: DefaultResult, Name: "switch.select");
6592 if (auto *SI = dyn_cast<SelectInst>(Val: SelectValue);
6593 SI && HasBranchWeights) {
6594 // We start with 3 probabilities, where the numerator is the
6595 // corresponding BranchWeights[i], and the denominator is the sum over
6596 // BranchWeights. We want the probability and negative probability of
6597 // Condition == SecondCase.
6598 assert(BranchWeights.size() == 3);
6599 setBranchWeights(
6600 I&: *SI, Weights: {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6601 /*IsExpected=*/false, /*ElideAllZero=*/true);
6602 }
6603 }
6604 Value *ValueCompare =
6605 Builder.CreateICmpEQ(LHS: Condition, RHS: FirstCase, Name: "switch.selectcmp");
6606 Value *Ret = Builder.CreateSelect(C: ValueCompare, True: ResultVector[0].first,
6607 False: SelectValue, Name: "switch.select");
6608 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6609 // We may have had a DefaultResult. Base the position of the first and
6610 // second's branch weights accordingly. Also the proability that Condition
6611 // != FirstCase needs to take that into account.
6612 assert(BranchWeights.size() >= 2);
6613 size_t FirstCasePos = (Condition != nullptr);
6614 size_t SecondCasePos = FirstCasePos + 1;
6615 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6616 setBranchWeights(I&: *SI,
6617 Weights: {BranchWeights[FirstCasePos],
6618 DefaultCase + BranchWeights[SecondCasePos]},
6619 /*IsExpected=*/false, /*ElideAllZero=*/true);
6620 }
6621 return Ret;
6622 }
6623
6624 // Handle the degenerate case where two cases have the same result value.
6625 if (ResultVector.size() == 1 && DefaultResult) {
6626 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6627 unsigned CaseCount = CaseValues.size();
6628 // n bits group cases map to the same result:
6629 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6630 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6631 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6632 if (isPowerOf2_32(Value: CaseCount)) {
6633 ConstantInt *MinCaseVal = CaseValues[0];
6634 // If there are bits that are set exclusively by CaseValues, we
6635 // can transform the switch into a select if the conjunction of
6636 // all the values uniquely identify CaseValues.
6637 APInt AndMask = APInt::getAllOnes(numBits: MinCaseVal->getBitWidth());
6638
6639 // Find the minimum value and compute the and of all the case values.
6640 for (auto *Case : CaseValues) {
6641 if (Case->getValue().slt(RHS: MinCaseVal->getValue()))
6642 MinCaseVal = Case;
6643 AndMask &= Case->getValue();
6644 }
6645 KnownBits Known = computeKnownBits(V: Condition, DL);
6646
6647 if (!AndMask.isZero() && Known.getMaxValue().uge(RHS: AndMask)) {
6648 // Compute the number of bits that are free to vary.
6649 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6650
6651 // Check if the number of values covered by the mask is equal
6652 // to the number of cases.
6653 if (FreeBits == Log2_32(Value: CaseCount)) {
6654 Value *And = Builder.CreateAnd(LHS: Condition, RHS: AndMask);
6655 Value *Cmp = Builder.CreateICmpEQ(
6656 LHS: And, RHS: Constant::getIntegerValue(Ty: And->getType(), V: AndMask));
6657 Value *Ret =
6658 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6659 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6660 // We know there's a Default case. We base the resulting branch
6661 // weights off its probability.
6662 assert(BranchWeights.size() >= 2);
6663 setBranchWeights(
6664 I&: *SI,
6665 Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6666 /*IsExpected=*/false, /*ElideAllZero=*/true);
6667 }
6668 return Ret;
6669 }
6670 }
6671
6672 // Mark the bits case number touched.
6673 APInt BitMask = APInt::getZero(numBits: MinCaseVal->getBitWidth());
6674 for (auto *Case : CaseValues)
6675 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6676
6677 // Check if cases with the same result can cover all number
6678 // in touched bits.
6679 if (BitMask.popcount() == Log2_32(Value: CaseCount)) {
6680 if (!MinCaseVal->isNullValue())
6681 Condition = Builder.CreateSub(LHS: Condition, RHS: MinCaseVal);
6682 Value *And = Builder.CreateAnd(LHS: Condition, RHS: ~BitMask, Name: "switch.and");
6683 Value *Cmp = Builder.CreateICmpEQ(
6684 LHS: And, RHS: Constant::getNullValue(Ty: And->getType()), Name: "switch.selectcmp");
6685 Value *Ret =
6686 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6687 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6688 assert(BranchWeights.size() >= 2);
6689 setBranchWeights(
6690 I&: *SI,
6691 Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6692 /*IsExpected=*/false, /*ElideAllZero=*/true);
6693 }
6694 return Ret;
6695 }
6696 }
6697
6698 // Handle the degenerate case where two cases have the same value.
6699 if (CaseValues.size() == 2) {
6700 Value *Cmp1 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[0],
6701 Name: "switch.selectcmp.case1");
6702 Value *Cmp2 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[1],
6703 Name: "switch.selectcmp.case2");
6704 Value *Cmp = Builder.CreateOr(LHS: Cmp1, RHS: Cmp2, Name: "switch.selectcmp");
6705 Value *Ret =
6706 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6707 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6708 assert(BranchWeights.size() >= 2);
6709 setBranchWeights(
6710 I&: *SI, Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6711 /*IsExpected=*/false, /*ElideAllZero=*/true);
6712 }
6713 return Ret;
6714 }
6715 }
6716
6717 return nullptr;
6718}
6719
6720// Helper function to cleanup a switch instruction that has been converted into
6721// a select, fixing up PHI nodes and basic blocks.
6722static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
6723 Value *SelectValue,
6724 IRBuilder<> &Builder,
6725 DomTreeUpdater *DTU) {
6726 std::vector<DominatorTree::UpdateType> Updates;
6727
6728 BasicBlock *SelectBB = SI->getParent();
6729 BasicBlock *DestBB = PHI->getParent();
6730
6731 if (DTU && !is_contained(Range: predecessors(BB: DestBB), Element: SelectBB))
6732 Updates.push_back(x: {DominatorTree::Insert, SelectBB, DestBB});
6733 Builder.CreateBr(Dest: DestBB);
6734
6735 // Remove the switch.
6736
6737 PHI->removeIncomingValueIf(
6738 Predicate: [&](unsigned Idx) { return PHI->getIncomingBlock(i: Idx) == SelectBB; });
6739 PHI->addIncoming(V: SelectValue, BB: SelectBB);
6740
6741 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6742 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6743 BasicBlock *Succ = SI->getSuccessor(idx: i);
6744
6745 if (Succ == DestBB)
6746 continue;
6747 Succ->removePredecessor(Pred: SelectBB);
6748 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6749 Updates.push_back(x: {DominatorTree::Delete, SelectBB, Succ});
6750 }
6751 SI->eraseFromParent();
6752 if (DTU)
6753 DTU->applyUpdates(Updates);
6754}
6755
6756/// If a switch is only used to initialize one or more phi nodes in a common
6757/// successor block with only two different constant values, try to replace the
6758/// switch with a select. Returns true if the fold was made.
6759static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6760 DomTreeUpdater *DTU, const DataLayout &DL,
6761 const TargetTransformInfo &TTI) {
6762 Value *const Cond = SI->getCondition();
6763 PHINode *PHI = nullptr;
6764 BasicBlock *CommonDest = nullptr;
6765 Constant *DefaultResult;
6766 SwitchCaseResultVectorTy UniqueResults;
6767 // Collect all the cases that will deliver the same value from the switch.
6768 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6769 DL, TTI, /*MaxUniqueResults*/ 2))
6770 return false;
6771
6772 assert(PHI != nullptr && "PHI for value select not found");
6773 Builder.SetInsertPoint(SI);
6774 SmallVector<uint32_t, 4> BranchWeights;
6775 if (!ProfcheckDisableMetadataFixes) {
6776 [[maybe_unused]] auto HasWeights =
6777 extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights&: BranchWeights);
6778 assert(!HasWeights == (BranchWeights.empty()));
6779 }
6780 assert(BranchWeights.empty() ||
6781 (BranchWeights.size() >=
6782 UniqueResults.size() + (DefaultResult != nullptr)));
6783
6784 Value *SelectValue = foldSwitchToSelect(ResultVector: UniqueResults, DefaultResult, Condition: Cond,
6785 Builder, DL, BranchWeights);
6786 if (!SelectValue)
6787 return false;
6788
6789 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6790 return true;
6791}
6792
6793namespace {
6794
6795/// This class finds alternatives for switches to ultimately
6796/// replace the switch.
6797class SwitchReplacement {
6798public:
6799 /// Create a helper for optimizations to use as a switch replacement.
6800 /// Find a better representation for the content of Values,
6801 /// using DefaultValue to fill any holes in the table.
6802 SwitchReplacement(
6803 Module &M, uint64_t TableSize, ConstantInt *Offset,
6804 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6805 Constant *DefaultValue, const DataLayout &DL,
6806 const TargetTransformInfo &TTI, const StringRef &FuncName);
6807
6808 /// Build instructions with Builder to retrieve values using Index
6809 /// and replace the switch.
6810 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6811 Function *Func);
6812
6813 /// Return true if a table with TableSize elements of
6814 /// type ElementType would fit in a target-legal register.
6815 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6816 Type *ElementType);
6817
6818 /// Return the default value of the switch.
6819 Constant *getDefaultValue();
6820
6821 /// Return true if the replacement is a lookup table.
6822 bool isLookupTable();
6823
6824 /// Return true if the replacement is a bit map.
6825 bool isBitMap();
6826
6827private:
6828 // Depending on the switch, there are different alternatives.
6829 enum {
6830 // For switches where each case contains the same value, we just have to
6831 // store that single value and return it for each lookup.
6832 SingleValueKind,
6833
6834 // For switches where there is a linear relationship between table index
6835 // and values. We calculate the result with a simple multiplication
6836 // and addition instead of a table lookup.
6837 LinearMapKind,
6838
6839 // For small tables with integer elements, we can pack them into a bitmap
6840 // that fits into a target-legal register. Values are retrieved by
6841 // shift and mask operations.
6842 BitMapKind,
6843
6844 // The table is stored as an array of values. Values are retrieved by load
6845 // instructions from the table.
6846 LookupTableKind
6847 } Kind;
6848
6849 // The default value of the switch.
6850 Constant *DefaultValue;
6851
6852 // The type of the output values.
6853 Type *ValueType;
6854
6855 // For SingleValueKind, this is the single value.
6856 Constant *SingleValue = nullptr;
6857
6858 // For BitMapKind, this is the bitmap.
6859 ConstantInt *BitMap = nullptr;
6860 IntegerType *BitMapElementTy = nullptr;
6861
6862 // For LinearMapKind, these are the constants used to derive the value.
6863 ConstantInt *LinearOffset = nullptr;
6864 ConstantInt *LinearMultiplier = nullptr;
6865 bool LinearMapValWrapped = false;
6866
6867 // For LookupTableKind, this is the table.
6868 Constant *Initializer = nullptr;
6869};
6870
6871} // end anonymous namespace
6872
6873SwitchReplacement::SwitchReplacement(
6874 Module &M, uint64_t TableSize, ConstantInt *Offset,
6875 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6876 Constant *DefaultValue, const DataLayout &DL,
6877 const TargetTransformInfo &TTI, const StringRef &FuncName)
6878 : DefaultValue(DefaultValue) {
6879 assert(Values.size() && "Can't build lookup table without values!");
6880 assert(TableSize >= Values.size() && "Can't fit values in table!");
6881
6882 // If all values in the table are equal, this is that value.
6883 SingleValue = Values.begin()->second;
6884
6885 ValueType = Values.begin()->second->getType();
6886
6887 // Build up the table contents.
6888 SmallVector<Constant *, 64> TableContents(TableSize);
6889 for (const auto &[CaseVal, CaseRes] : Values) {
6890 assert(CaseRes->getType() == ValueType);
6891
6892 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6893 TableContents[Idx] = CaseRes;
6894
6895 if (SingleValue && !isa<PoisonValue>(Val: CaseRes) && CaseRes != SingleValue)
6896 SingleValue = isa<PoisonValue>(Val: SingleValue) ? CaseRes : nullptr;
6897 }
6898
6899 // Fill in any holes in the table with the default result.
6900 if (Values.size() < TableSize) {
6901 assert(DefaultValue &&
6902 "Need a default value to fill the lookup table holes.");
6903 assert(DefaultValue->getType() == ValueType);
6904 for (uint64_t I = 0; I < TableSize; ++I) {
6905 if (!TableContents[I])
6906 TableContents[I] = DefaultValue;
6907 }
6908
6909 // If the default value is poison, all the holes are poison.
6910 bool DefaultValueIsPoison = isa<PoisonValue>(Val: DefaultValue);
6911
6912 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6913 SingleValue = nullptr;
6914 }
6915
6916 // If each element in the table contains the same value, we only need to store
6917 // that single value.
6918 if (SingleValue) {
6919 Kind = SingleValueKind;
6920 return;
6921 }
6922
6923 // Check if we can derive the value with a linear transformation from the
6924 // table index.
6925 if (isa<IntegerType>(Val: ValueType)) {
6926 bool LinearMappingPossible = true;
6927 APInt PrevVal;
6928 APInt DistToPrev;
6929 // When linear map is monotonic and signed overflow doesn't happen on
6930 // maximum index, we can attach nsw on Add and Mul.
6931 bool NonMonotonic = false;
6932 assert(TableSize >= 2 && "Should be a SingleValue table.");
6933 // Check if there is the same distance between two consecutive values.
6934 for (uint64_t I = 0; I < TableSize; ++I) {
6935 ConstantInt *ConstVal = dyn_cast<ConstantInt>(Val: TableContents[I]);
6936
6937 if (!ConstVal && isa<PoisonValue>(Val: TableContents[I])) {
6938 // This is an poison, so it's (probably) a lookup table hole.
6939 // To prevent any regressions from before we switched to using poison as
6940 // the default value, holes will fall back to using the first value.
6941 // This can be removed once we add proper handling for poisons in lookup
6942 // tables.
6943 ConstVal = dyn_cast<ConstantInt>(Val: Values[0].second);
6944 }
6945
6946 if (!ConstVal) {
6947 // This is an undef. We could deal with it, but undefs in lookup tables
6948 // are very seldom. It's probably not worth the additional complexity.
6949 LinearMappingPossible = false;
6950 break;
6951 }
6952 const APInt &Val = ConstVal->getValue();
6953 if (I != 0) {
6954 APInt Dist = Val - PrevVal;
6955 if (I == 1) {
6956 DistToPrev = Dist;
6957 } else if (Dist != DistToPrev) {
6958 LinearMappingPossible = false;
6959 break;
6960 }
6961 NonMonotonic |=
6962 Dist.isStrictlyPositive() ? Val.sle(RHS: PrevVal) : Val.sgt(RHS: PrevVal);
6963 }
6964 PrevVal = Val;
6965 }
6966 if (LinearMappingPossible) {
6967 LinearOffset = cast<ConstantInt>(Val: TableContents[0]);
6968 LinearMultiplier = ConstantInt::get(Context&: M.getContext(), V: DistToPrev);
6969 APInt M = LinearMultiplier->getValue();
6970 bool MayWrap = true;
6971 if (isIntN(N: M.getBitWidth(), x: TableSize - 1))
6972 (void)M.smul_ov(RHS: APInt(M.getBitWidth(), TableSize - 1), Overflow&: MayWrap);
6973 LinearMapValWrapped = NonMonotonic || MayWrap;
6974 Kind = LinearMapKind;
6975 return;
6976 }
6977 }
6978
6979 // If the type is integer and the table fits in a register, build a bitmap.
6980 if (wouldFitInRegister(DL, TableSize, ElementType: ValueType)) {
6981 IntegerType *IT = cast<IntegerType>(Val: ValueType);
6982 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6983 for (uint64_t I = TableSize; I > 0; --I) {
6984 TableInt <<= IT->getBitWidth();
6985 // Insert values into the bitmap. Undef values are set to zero.
6986 if (!isa<UndefValue>(Val: TableContents[I - 1])) {
6987 ConstantInt *Val = cast<ConstantInt>(Val: TableContents[I - 1]);
6988 TableInt |= Val->getValue().zext(width: TableInt.getBitWidth());
6989 }
6990 }
6991 BitMap = ConstantInt::get(Context&: M.getContext(), V: TableInt);
6992 BitMapElementTy = IT;
6993 Kind = BitMapKind;
6994 return;
6995 }
6996
6997 if (auto *IT = dyn_cast<IntegerType>(Val: ValueType)) {
6998 ConstantRange Range(IT->getBitWidth(), false);
6999 for (Constant *Value : TableContents)
7000 if (!isa<UndefValue>(Val: Value))
7001 Range = Range.unionWith(CR: cast<ConstantInt>(Val: Value)->getValue());
7002 // TODO: handle sign extension as well?
7003 unsigned NeededBitWidth =
7004 std::max(a: TTI.getMinimumLookupTableEntryBitWidth(),
7005 b: unsigned(PowerOf2Ceil(A: Range.getActiveBits())));
7006 if (NeededBitWidth < IT->getBitWidth()) {
7007 IntegerType *DstTy = IntegerType::get(C&: IT->getContext(), NumBits: NeededBitWidth);
7008 for (Constant *&Value : TableContents)
7009 Value = ConstantFoldCastInstruction(opcode: Instruction::Trunc, V: Value, DestTy: DstTy);
7010 }
7011 }
7012
7013 // Store the table in an array.
7014 auto *TableTy = ArrayType::get(ElementType: TableContents[0]->getType(), NumElements: TableSize);
7015 Initializer = ConstantArray::get(T: TableTy, V: TableContents);
7016
7017 Kind = LookupTableKind;
7018}
7019
7020Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
7021 const DataLayout &DL, Function *Func) {
7022 switch (Kind) {
7023 case SingleValueKind:
7024 return SingleValue;
7025 case LinearMapKind: {
7026 ++NumLinearMaps;
7027 // Derive the result value from the input value.
7028 Value *Result = Builder.CreateIntCast(V: Index, DestTy: LinearMultiplier->getType(),
7029 isSigned: false, Name: "switch.idx.cast");
7030 if (!LinearMultiplier->isOne())
7031 Result = Builder.CreateMul(LHS: Result, RHS: LinearMultiplier, Name: "switch.idx.mult",
7032 /*HasNUW = */ false,
7033 /*HasNSW = */ !LinearMapValWrapped);
7034
7035 if (!LinearOffset->isZero())
7036 Result = Builder.CreateAdd(LHS: Result, RHS: LinearOffset, Name: "switch.offset",
7037 /*HasNUW = */ false,
7038 /*HasNSW = */ !LinearMapValWrapped);
7039 return Result;
7040 }
7041 case BitMapKind: {
7042 ++NumBitMaps;
7043 // Type of the bitmap (e.g. i59).
7044 IntegerType *MapTy = BitMap->getIntegerType();
7045
7046 // Cast Index to the same type as the bitmap.
7047 // Note: The Index is <= the number of elements in the table, so
7048 // truncating it to the width of the bitmask is safe.
7049 Value *ShiftAmt = Builder.CreateZExtOrTrunc(V: Index, DestTy: MapTy, Name: "switch.cast");
7050
7051 // Multiply the shift amount by the element width. NUW/NSW can always be
7052 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
7053 // BitMap's bit width.
7054 ShiftAmt = Builder.CreateMul(
7055 LHS: ShiftAmt, RHS: ConstantInt::get(Ty: MapTy, V: BitMapElementTy->getBitWidth()),
7056 Name: "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7057
7058 // Shift down.
7059 Value *DownShifted =
7060 Builder.CreateLShr(LHS: BitMap, RHS: ShiftAmt, Name: "switch.downshift");
7061 // Mask off.
7062 return Builder.CreateTrunc(V: DownShifted, DestTy: BitMapElementTy, Name: "switch.masked");
7063 }
7064 case LookupTableKind: {
7065 ++NumLookupTables;
7066 auto *Table =
7067 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7068 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7069 Initializer, "switch.table." + Func->getName());
7070 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7071 // Set the alignment to that of an array items. We will be only loading one
7072 // value out of it.
7073 Table->setAlignment(DL.getPrefTypeAlign(Ty: ValueType));
7074 Type *IndexTy = DL.getIndexType(PtrTy: Table->getType());
7075 auto *ArrayTy = cast<ArrayType>(Val: Table->getValueType());
7076
7077 if (Index->getType() != IndexTy) {
7078 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7079 Index = Builder.CreateZExtOrTrunc(V: Index, DestTy: IndexTy);
7080 if (auto *Zext = dyn_cast<ZExtInst>(Val: Index))
7081 Zext->setNonNeg(
7082 isUIntN(N: OldBitWidth - 1, x: ArrayTy->getNumElements() - 1));
7083 }
7084
7085 Value *GEPIndices[] = {ConstantInt::get(Ty: IndexTy, V: 0), Index};
7086 Value *GEP =
7087 Builder.CreateInBoundsGEP(Ty: ArrayTy, Ptr: Table, IdxList: GEPIndices, Name: "switch.gep");
7088 Value *Load =
7089 Builder.CreateLoad(Ty: ArrayTy->getElementType(), Ptr: GEP, Name: "switch.load");
7090 if (Load->getType() == ValueType)
7091 return Load;
7092 return Builder.CreateZExt(V: Load, DestTy: ValueType, Name: "switch.ext");
7093 }
7094 }
7095 llvm_unreachable("Unknown helper kind!");
7096}
7097
7098bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7099 uint64_t TableSize,
7100 Type *ElementType) {
7101 auto *IT = dyn_cast<IntegerType>(Val: ElementType);
7102 if (!IT)
7103 return false;
7104 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7105 // are <= 15, we could try to narrow the type.
7106
7107 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7108 if (TableSize >= UINT_MAX / IT->getBitWidth())
7109 return false;
7110 return DL.fitsInLegalInteger(Width: TableSize * IT->getBitWidth());
7111}
7112
7113static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
7114 const DataLayout &DL) {
7115 // Allow any legal type.
7116 if (TTI.isTypeLegal(Ty))
7117 return true;
7118
7119 auto *IT = dyn_cast<IntegerType>(Val: Ty);
7120 if (!IT)
7121 return false;
7122
7123 // Also allow power of 2 integer types that have at least 8 bits and fit in
7124 // a register. These types are common in frontend languages and targets
7125 // usually support loads of these types.
7126 // TODO: We could relax this to any integer that fits in a register and rely
7127 // on ABI alignment and padding in the table to allow the load to be widened.
7128 // Or we could widen the constants and truncate the load.
7129 unsigned BitWidth = IT->getBitWidth();
7130 return BitWidth >= 8 && isPowerOf2_32(Value: BitWidth) &&
7131 DL.fitsInLegalInteger(Width: IT->getBitWidth());
7132}
7133
7134Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7135
7136bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7137
7138bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7139
7140static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange, bool OptSize) {
7141 // 40% is the default density for building a jump table in optsize/minsize
7142 // mode, 10% is the default density for jump tables. See also
7143 // TargetLoweringBase::isSuitableForJumpTable(), which this function was based
7144 // on.
7145 const uint64_t MinDensity = OptSize ? 40 : 10;
7146
7147 if (CaseRange >= UINT64_MAX / 100)
7148 return false; // Avoid multiplication overflows below.
7149
7150 return NumCases * 100 >= CaseRange * MinDensity;
7151}
7152
7153static bool isSwitchDense(ArrayRef<int64_t> Values, bool OptSize) {
7154 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7155 uint64_t Range = Diff + 1;
7156 if (Range < Diff)
7157 return false; // Overflow.
7158
7159 return isSwitchDense(NumCases: Values.size(), CaseRange: Range, OptSize);
7160}
7161
7162/// Determine whether a lookup table should be built for this switch, based on
7163/// the number of cases, size of the table, and the types of the results.
7164// TODO: We could support larger than legal types by limiting based on the
7165// number of loads required and/or table size. If the constants are small we
7166// could use smaller table entries and extend after the load.
7167static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
7168 const TargetTransformInfo &TTI,
7169 const DataLayout &DL,
7170 const SmallVector<Type *> &ResultTypes) {
7171 if (SI->getNumCases() > TableSize)
7172 return false; // TableSize overflowed.
7173
7174 bool AllTablesFitInRegister = true;
7175 bool HasIllegalType = false;
7176 for (const auto &Ty : ResultTypes) {
7177 // Saturate this flag to true.
7178 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7179
7180 // Saturate this flag to false.
7181 AllTablesFitInRegister =
7182 AllTablesFitInRegister &&
7183 SwitchReplacement::wouldFitInRegister(DL, TableSize, ElementType: Ty);
7184
7185 // If both flags saturate, we're done. NOTE: This *only* works with
7186 // saturating flags, and all flags have to saturate first due to the
7187 // non-deterministic behavior of iterating over a dense map.
7188 if (HasIllegalType && !AllTablesFitInRegister)
7189 break;
7190 }
7191
7192 // If each table would fit in a register, we should build it anyway.
7193 if (AllTablesFitInRegister)
7194 return true;
7195
7196 // Don't build a table that doesn't fit in-register if it has illegal types.
7197 if (HasIllegalType)
7198 return false;
7199
7200 return isSwitchDense(NumCases: SI->getNumCases(), CaseRange: TableSize,
7201 OptSize: SI->getFunction()->hasOptSize());
7202}
7203
7204static bool shouldUseSwitchConditionAsTableIndex(
7205 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7206 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7207 const DataLayout &DL, const TargetTransformInfo &TTI) {
7208 if (MinCaseVal.isNullValue())
7209 return true;
7210 if (MinCaseVal.isNegative() ||
7211 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7212 !HasDefaultResults)
7213 return false;
7214 return all_of(Range: ResultTypes, P: [&](const auto &ResultType) {
7215 return SwitchReplacement::wouldFitInRegister(
7216 DL, TableSize: MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ElementType: ResultType);
7217 });
7218}
7219
7220/// Try to reuse the switch table index compare. Following pattern:
7221/// \code
7222/// if (idx < tablesize)
7223/// r = table[idx]; // table does not contain default_value
7224/// else
7225/// r = default_value;
7226/// if (r != default_value)
7227/// ...
7228/// \endcode
7229/// Is optimized to:
7230/// \code
7231/// cond = idx < tablesize;
7232/// if (cond)
7233/// r = table[idx];
7234/// else
7235/// r = default_value;
7236/// if (cond)
7237/// ...
7238/// \endcode
7239/// Jump threading will then eliminate the second if(cond).
7240static void reuseTableCompare(
7241 User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch,
7242 Constant *DefaultValue,
7243 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7244 ICmpInst *CmpInst = dyn_cast<ICmpInst>(Val: PhiUser);
7245 if (!CmpInst)
7246 return;
7247
7248 // We require that the compare is in the same block as the phi so that jump
7249 // threading can do its work afterwards.
7250 if (CmpInst->getParent() != PhiBlock)
7251 return;
7252
7253 Constant *CmpOp1 = dyn_cast<Constant>(Val: CmpInst->getOperand(i_nocapture: 1));
7254 if (!CmpOp1)
7255 return;
7256
7257 Value *RangeCmp = RangeCheckBranch->getCondition();
7258 Constant *TrueConst = ConstantInt::getTrue(Ty: RangeCmp->getType());
7259 Constant *FalseConst = ConstantInt::getFalse(Ty: RangeCmp->getType());
7260
7261 // Check if the compare with the default value is constant true or false.
7262 const DataLayout &DL = PhiBlock->getDataLayout();
7263 Constant *DefaultConst = ConstantFoldCompareInstOperands(
7264 Predicate: CmpInst->getPredicate(), LHS: DefaultValue, RHS: CmpOp1, DL);
7265 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7266 return;
7267
7268 // Check if the compare with the case values is distinct from the default
7269 // compare result.
7270 for (auto ValuePair : Values) {
7271 Constant *CaseConst = ConstantFoldCompareInstOperands(
7272 Predicate: CmpInst->getPredicate(), LHS: ValuePair.second, RHS: CmpOp1, DL);
7273 if (!CaseConst || CaseConst == DefaultConst ||
7274 (CaseConst != TrueConst && CaseConst != FalseConst))
7275 return;
7276 }
7277
7278 // Check if the branch instruction dominates the phi node. It's a simple
7279 // dominance check, but sufficient for our needs.
7280 // Although this check is invariant in the calling loops, it's better to do it
7281 // at this late stage. Practically we do it at most once for a switch.
7282 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7283 for (BasicBlock *Pred : predecessors(BB: PhiBlock)) {
7284 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7285 return;
7286 }
7287
7288 if (DefaultConst == FalseConst) {
7289 // The compare yields the same result. We can replace it.
7290 CmpInst->replaceAllUsesWith(V: RangeCmp);
7291 ++NumTableCmpReuses;
7292 } else {
7293 // The compare yields the same result, just inverted. We can replace it.
7294 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7295 V1: RangeCmp, V2: ConstantInt::get(Ty: RangeCmp->getType(), V: 1), Name: "inverted.cmp",
7296 InsertBefore: RangeCheckBranch->getIterator());
7297 CmpInst->replaceAllUsesWith(V: InvertedTableCmp);
7298 ++NumTableCmpReuses;
7299 }
7300}
7301
7302/// If the switch is only used to initialize one or more phi nodes in a common
7303/// successor block with different constant values, replace the switch with
7304/// lookup tables.
7305static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
7306 DomTreeUpdater *DTU, const DataLayout &DL,
7307 const TargetTransformInfo &TTI,
7308 bool ConvertSwitchToLookupTable) {
7309 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7310
7311 BasicBlock *BB = SI->getParent();
7312 Function *Fn = BB->getParent();
7313
7314 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7315 // split off a dense part and build a lookup table for that.
7316
7317 // FIXME: This creates arrays of GEPs to constant strings, which means each
7318 // GEP needs a runtime relocation in PIC code. We should just build one big
7319 // string and lookup indices into that.
7320
7321 // Ignore switches with less than three cases. Lookup tables will not make
7322 // them faster, so we don't analyze them.
7323 if (SI->getNumCases() < 3)
7324 return false;
7325
7326 // Figure out the corresponding result for each case value and phi node in the
7327 // common destination, as well as the min and max case values.
7328 assert(!SI->cases().empty());
7329 SwitchInst::CaseIt CI = SI->case_begin();
7330 ConstantInt *MinCaseVal = CI->getCaseValue();
7331 ConstantInt *MaxCaseVal = CI->getCaseValue();
7332
7333 BasicBlock *CommonDest = nullptr;
7334
7335 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7336 SmallDenseMap<PHINode *, ResultListTy> ResultLists;
7337
7338 SmallDenseMap<PHINode *, Constant *> DefaultResults;
7339 SmallVector<Type *> ResultTypes;
7340 SmallVector<PHINode *, 4> PHIs;
7341
7342 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7343 ConstantInt *CaseVal = CI->getCaseValue();
7344 if (CaseVal->getValue().slt(RHS: MinCaseVal->getValue()))
7345 MinCaseVal = CaseVal;
7346 if (CaseVal->getValue().sgt(RHS: MaxCaseVal->getValue()))
7347 MaxCaseVal = CaseVal;
7348
7349 // Resulting value at phi nodes for this case value.
7350 using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
7351 ResultsTy Results;
7352 if (!getCaseResults(SI, CaseVal, CaseDest: CI->getCaseSuccessor(), CommonDest: &CommonDest,
7353 Res&: Results, DL, TTI))
7354 return false;
7355
7356 // Append the result and result types from this case to the list for each
7357 // phi.
7358 for (const auto &I : Results) {
7359 PHINode *PHI = I.first;
7360 Constant *Value = I.second;
7361 auto [It, Inserted] = ResultLists.try_emplace(Key: PHI);
7362 if (Inserted)
7363 PHIs.push_back(Elt: PHI);
7364 It->second.push_back(Elt: std::make_pair(x&: CaseVal, y&: Value));
7365 ResultTypes.push_back(Elt: PHI->getType());
7366 }
7367 }
7368
7369 // If the table has holes, we need a constant result for the default case
7370 // or a bitmask that fits in a register.
7371 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7372 bool HasDefaultResults =
7373 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest,
7374 Res&: DefaultResultsList, DL, TTI);
7375 for (const auto &I : DefaultResultsList) {
7376 PHINode *PHI = I.first;
7377 Constant *Result = I.second;
7378 DefaultResults[PHI] = Result;
7379 }
7380
7381 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7382 MinCaseVal&: *MinCaseVal, MaxCaseVal: *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7383 uint64_t TableSize;
7384 ConstantInt *TableIndexOffset;
7385 if (UseSwitchConditionAsTableIndex) {
7386 TableSize = MaxCaseVal->getLimitedValue() + 1;
7387 TableIndexOffset = ConstantInt::get(Ty: MaxCaseVal->getIntegerType(), V: 0);
7388 } else {
7389 TableSize =
7390 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7391
7392 TableIndexOffset = MinCaseVal;
7393 }
7394
7395 // If the default destination is unreachable, or if the lookup table covers
7396 // all values of the conditional variable, branch directly to the lookup table
7397 // BB. Otherwise, check that the condition is within the case range.
7398 uint64_t NumResults = ResultLists[PHIs[0]].size();
7399 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7400
7401 bool TableHasHoles = (NumResults < TableSize);
7402
7403 // If the table has holes but the default destination doesn't produce any
7404 // constant results, the lookup table entries corresponding to the holes will
7405 // contain poison.
7406 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7407
7408 // If the default destination doesn't produce a constant result but is still
7409 // reachable, and the lookup table has holes, we need to use a mask to
7410 // determine if the current index should load from the lookup table or jump
7411 // to the default case.
7412 // The mask is unnecessary if the table has holes but the default destination
7413 // is unreachable, as in that case the holes must also be unreachable.
7414 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7415 if (NeedMask) {
7416 // As an extra penalty for the validity test we require more cases.
7417 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7418 return false;
7419 if (!DL.fitsInLegalInteger(Width: TableSize))
7420 return false;
7421 }
7422
7423 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7424 return false;
7425
7426 // Compute the table index value.
7427 Value *TableIndex;
7428 if (UseSwitchConditionAsTableIndex) {
7429 TableIndex = SI->getCondition();
7430 if (HasDefaultResults) {
7431 // Grow the table to cover all possible index values to avoid the range
7432 // check. It will use the default result to fill in the table hole later,
7433 // so make sure it exist.
7434 ConstantRange CR = computeConstantRange(V: TableIndex, /*ForSigned=*/false,
7435 SQ: SimplifyQuery(DL));
7436 // Grow the table shouldn't have any size impact by checking
7437 // wouldFitInRegister.
7438 // TODO: Consider growing the table also when it doesn't fit in a register
7439 // if no optsize is specified.
7440 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7441 if (!CR.isUpperWrapped() &&
7442 all_of(Range&: ResultTypes, P: [&](const auto &ResultType) {
7443 return SwitchReplacement::wouldFitInRegister(DL, TableSize: UpperBound,
7444 ElementType: ResultType);
7445 })) {
7446 // There may be some case index larger than the UpperBound (unreachable
7447 // case), so make sure the table size does not get smaller.
7448 TableSize = std::max(a: UpperBound, b: TableSize);
7449 // The default branch is unreachable after we enlarge the lookup table.
7450 // Adjust DefaultIsReachable to reuse code path.
7451 DefaultIsReachable = false;
7452 }
7453 }
7454 }
7455
7456 // Keep track of the switch replacement for each phi
7457 SmallDenseMap<PHINode *, SwitchReplacement> PhiToReplacementMap;
7458 for (PHINode *PHI : PHIs) {
7459 const auto &ResultList = ResultLists[PHI];
7460
7461 Type *ResultType = ResultList.begin()->second->getType();
7462 // Use any value to fill the lookup table holes.
7463 Constant *DefaultVal =
7464 AllHolesArePoison ? PoisonValue::get(T: ResultType) : DefaultResults[PHI];
7465 StringRef FuncName = Fn->getName();
7466 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7467 ResultList, DefaultVal, DL, TTI, FuncName);
7468 PhiToReplacementMap.insert(KV: {PHI, Replacement});
7469 }
7470
7471 bool AnyLookupTables = any_of(
7472 Range&: PhiToReplacementMap, P: [](auto &KV) { return KV.second.isLookupTable(); });
7473 bool AnyBitMaps = any_of(Range&: PhiToReplacementMap,
7474 P: [](auto &KV) { return KV.second.isBitMap(); });
7475
7476 // A few conditions prevent the generation of lookup tables:
7477 // 1. The target does not support lookup tables.
7478 // 2. The "no-jump-tables" function attribute is set.
7479 // However, these objections do not apply to other switch replacements, like
7480 // the bitmap, so we only stop here if any of these conditions are met and we
7481 // want to create a LUT. Otherwise, continue with the switch replacement.
7482 if (AnyLookupTables &&
7483 (!TTI.shouldBuildLookupTables() ||
7484 Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()))
7485 return false;
7486
7487 // In the early optimization pipeline, disable formation of lookup tables,
7488 // bit maps and mask checks, as they may inhibit further optimization.
7489 if (!ConvertSwitchToLookupTable &&
7490 (AnyLookupTables || AnyBitMaps || NeedMask))
7491 return false;
7492
7493 Builder.SetInsertPoint(SI);
7494 // TableIndex is the switch condition - TableIndexOffset if we don't
7495 // use the condition directly
7496 if (!UseSwitchConditionAsTableIndex) {
7497 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7498 // we can try to attach nsw.
7499 bool MayWrap = true;
7500 if (!DefaultIsReachable) {
7501 APInt Res =
7502 MaxCaseVal->getValue().ssub_ov(RHS: MinCaseVal->getValue(), Overflow&: MayWrap);
7503 (void)Res;
7504 }
7505 TableIndex = Builder.CreateSub(LHS: SI->getCondition(), RHS: TableIndexOffset,
7506 Name: "switch.tableidx", /*HasNUW =*/false,
7507 /*HasNSW =*/!MayWrap);
7508 }
7509
7510 std::vector<DominatorTree::UpdateType> Updates;
7511
7512 // Compute the maximum table size representable by the integer type we are
7513 // switching upon.
7514 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7515 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7516 assert(MaxTableSize >= TableSize &&
7517 "It is impossible for a switch to have more entries than the max "
7518 "representable value of its input integer type's size.");
7519
7520 // Create the BB that does the lookups.
7521 Module &Mod = *CommonDest->getParent()->getParent();
7522 BasicBlock *LookupBB = BasicBlock::Create(
7523 Context&: Mod.getContext(), Name: "switch.lookup", Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7524
7525 CondBrInst *RangeCheckBranch = nullptr;
7526 CondBrInst *CondBranch = nullptr;
7527
7528 Builder.SetInsertPoint(SI);
7529 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7530 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7531 Builder.CreateBr(Dest: LookupBB);
7532 if (DTU)
7533 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7534 // Note: We call removeProdecessor later since we need to be able to get the
7535 // PHI value for the default case in case we're using a bit mask.
7536 } else {
7537 Value *Cmp = Builder.CreateICmpULT(
7538 LHS: TableIndex, RHS: ConstantInt::get(Ty: MinCaseVal->getType(), V: TableSize));
7539 RangeCheckBranch =
7540 Builder.CreateCondBr(Cond: Cmp, True: LookupBB, False: SI->getDefaultDest());
7541 CondBranch = RangeCheckBranch;
7542 if (DTU)
7543 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7544 }
7545
7546 // Populate the BB that does the lookups.
7547 Builder.SetInsertPoint(LookupBB);
7548
7549 if (NeedMask) {
7550 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7551 // re-purposed to do the hole check, and we create a new LookupBB.
7552 BasicBlock *MaskBB = LookupBB;
7553 MaskBB->setName("switch.hole_check");
7554 LookupBB = BasicBlock::Create(Context&: Mod.getContext(), Name: "switch.lookup",
7555 Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7556
7557 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7558 // unnecessary illegal types.
7559 uint64_t TableSizePowOf2 = NextPowerOf2(A: std::max(a: 7ULL, b: TableSize - 1ULL));
7560 APInt MaskInt(TableSizePowOf2, 0);
7561 APInt One(TableSizePowOf2, 1);
7562 // Build bitmask; fill in a 1 bit for every case.
7563 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7564 for (const auto &Result : ResultList) {
7565 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7566 .getLimitedValue();
7567 MaskInt |= One << Idx;
7568 }
7569 ConstantInt *TableMask = ConstantInt::get(Context&: Mod.getContext(), V: MaskInt);
7570
7571 // Get the TableIndex'th bit of the bitmask.
7572 // If this bit is 0 (meaning hole) jump to the default destination,
7573 // else continue with table lookup.
7574 IntegerType *MapTy = TableMask->getIntegerType();
7575 Value *MaskIndex =
7576 Builder.CreateZExtOrTrunc(V: TableIndex, DestTy: MapTy, Name: "switch.maskindex");
7577 Value *Shifted = Builder.CreateLShr(LHS: TableMask, RHS: MaskIndex, Name: "switch.shifted");
7578 Value *LoBit = Builder.CreateTrunc(
7579 V: Shifted, DestTy: Type::getInt1Ty(C&: Mod.getContext()), Name: "switch.lobit");
7580 CondBranch = Builder.CreateCondBr(Cond: LoBit, True: LookupBB, False: SI->getDefaultDest());
7581 if (DTU) {
7582 Updates.push_back(x: {DominatorTree::Insert, MaskBB, LookupBB});
7583 Updates.push_back(x: {DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7584 }
7585 Builder.SetInsertPoint(LookupBB);
7586 addPredecessorToBlock(Succ: SI->getDefaultDest(), NewPred: MaskBB, ExistPred: BB);
7587 }
7588
7589 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7590 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7591 // do not delete PHINodes here.
7592 SI->getDefaultDest()->removePredecessor(Pred: BB,
7593 /*KeepOneInputPHIs=*/true);
7594 if (DTU)
7595 Updates.push_back(x: {DominatorTree::Delete, BB, SI->getDefaultDest()});
7596 }
7597
7598 for (PHINode *PHI : PHIs) {
7599 const ResultListTy &ResultList = ResultLists[PHI];
7600 auto Replacement = PhiToReplacementMap.at(Val: PHI);
7601 auto *Result = Replacement.replaceSwitch(Index: TableIndex, Builder, DL, Func: Fn);
7602 // Do a small peephole optimization: re-use the switch table compare if
7603 // possible.
7604 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7605 BasicBlock *PhiBlock = PHI->getParent();
7606 // Search for compare instructions which use the phi.
7607 for (auto *User : PHI->users()) {
7608 reuseTableCompare(PhiUser: User, PhiBlock, RangeCheckBranch,
7609 DefaultValue: Replacement.getDefaultValue(), Values: ResultList);
7610 }
7611 }
7612
7613 PHI->addIncoming(V: Result, BB: LookupBB);
7614 }
7615
7616 Builder.CreateBr(Dest: CommonDest);
7617 if (DTU)
7618 Updates.push_back(x: {DominatorTree::Insert, LookupBB, CommonDest});
7619
7620 SmallVector<uint32_t> BranchWeights;
7621 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7622 extractBranchWeights(I: *SI, Weights&: BranchWeights);
7623 uint64_t ToLookupWeight = 0;
7624 uint64_t ToDefaultWeight = 0;
7625
7626 // Remove the switch.
7627 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7628 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7629 BasicBlock *Succ = SI->getSuccessor(idx: I);
7630
7631 if (Succ == SI->getDefaultDest()) {
7632 if (HasBranchWeights)
7633 ToDefaultWeight += BranchWeights[I];
7634 continue;
7635 }
7636 Succ->removePredecessor(Pred: BB);
7637 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
7638 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7639 if (HasBranchWeights)
7640 ToLookupWeight += BranchWeights[I];
7641 }
7642 SI->eraseFromParent();
7643 if (HasBranchWeights)
7644 setFittedBranchWeights(I&: *CondBranch, Weights: {ToLookupWeight, ToDefaultWeight},
7645 /*IsExpected=*/false);
7646 if (DTU)
7647 DTU->applyUpdates(Updates);
7648
7649 if (NeedMask)
7650 ++NumLookupTablesHoles;
7651 return true;
7652}
7653
7654/// Try to transform a switch that has "holes" in it to a contiguous sequence
7655/// of cases.
7656///
7657/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7658/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7659///
7660/// This converts a sparse switch into a dense switch which allows better
7661/// lowering and could also allow transforming into a lookup table.
7662static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7663 const DataLayout &DL,
7664 const TargetTransformInfo &TTI) {
7665 auto *CondTy = cast<IntegerType>(Val: SI->getCondition()->getType());
7666 if (CondTy->getIntegerBitWidth() > 64 ||
7667 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7668 return false;
7669 // Only bother with this optimization if there are more than 3 switch cases;
7670 // SDAG will only bother creating jump tables for 4 or more cases.
7671 if (SI->getNumCases() < 4)
7672 return false;
7673
7674 // This transform is agnostic to the signedness of the input or case values. We
7675 // can treat the case values as signed or unsigned. We can optimize more common
7676 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7677 // as signed.
7678 SmallVector<int64_t,4> Values;
7679 for (const auto &C : SI->cases())
7680 Values.push_back(Elt: C.getCaseValue()->getValue().getSExtValue());
7681 llvm::sort(C&: Values);
7682
7683 // If the switch is already dense, there's nothing useful to do here.
7684 if (isSwitchDense(Values, OptSize: SI->getFunction()->hasOptSize()))
7685 return false;
7686
7687 // First, transform the values such that they start at zero and ascend.
7688 int64_t Base = Values[0];
7689 for (auto &V : Values)
7690 V -= (uint64_t)(Base);
7691
7692 // Now we have signed numbers that have been shifted so that, given enough
7693 // precision, there are no negative values. Since the rest of the transform
7694 // is bitwise only, we switch now to an unsigned representation.
7695
7696 // This transform can be done speculatively because it is so cheap - it
7697 // results in a single rotate operation being inserted.
7698
7699 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7700 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7701 // less than 64.
7702 unsigned Shift = 64;
7703 for (auto &V : Values)
7704 Shift = std::min(a: Shift, b: (unsigned)llvm::countr_zero(Val: (uint64_t)V));
7705 assert(Shift < 64);
7706 if (Shift > 0)
7707 for (auto &V : Values)
7708 V = (int64_t)((uint64_t)V >> Shift);
7709
7710 if (!isSwitchDense(Values, OptSize: SI->getFunction()->hasOptSize()))
7711 // Transform didn't create a dense switch.
7712 return false;
7713
7714 // The obvious transform is to shift the switch condition right and emit a
7715 // check that the condition actually cleanly divided by GCD, i.e.
7716 // C & (1 << Shift - 1) == 0
7717 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7718 //
7719 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7720 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7721 // are nonzero then the switch condition will be very large and will hit the
7722 // default case.
7723
7724 auto *Ty = cast<IntegerType>(Val: SI->getCondition()->getType());
7725 Builder.SetInsertPoint(SI);
7726 Value *Sub =
7727 Builder.CreateSub(LHS: SI->getCondition(), RHS: ConstantInt::getSigned(Ty, V: Base));
7728 Value *Rot = Builder.CreateIntrinsic(
7729 RetTy: Ty, ID: Intrinsic::fshl,
7730 Args: {Sub, Sub, ConstantInt::get(Ty, V: Ty->getBitWidth() - Shift)});
7731 SI->replaceUsesOfWith(From: SI->getCondition(), To: Rot);
7732
7733 for (auto Case : SI->cases()) {
7734 auto *Orig = Case.getCaseValue();
7735 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7736 Case.setValue(cast<ConstantInt>(Val: ConstantInt::get(Ty, V: Sub.lshr(shiftAmt: Shift))));
7737 }
7738 return true;
7739}
7740
7741/// Tries to transform the switch when the condition is umin with a constant.
7742/// In that case, the default branch can be replaced by the constant's branch.
7743/// This method also removes dead cases when the simplification cannot replace
7744/// the default branch.
7745///
7746/// For example:
7747/// switch(umin(a, 3)) {
7748/// case 0:
7749/// case 1:
7750/// case 2:
7751/// case 3:
7752/// case 4:
7753/// // ...
7754/// default:
7755/// unreachable
7756/// }
7757///
7758/// Transforms into:
7759///
7760/// switch(a) {
7761/// case 0:
7762/// case 1:
7763/// case 2:
7764/// default:
7765/// // This is case 3
7766/// }
7767static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU) {
7768 Value *A;
7769 ConstantInt *Constant;
7770
7771 if (!match(V: SI->getCondition(), P: m_UMin(Op0: m_Value(V&: A), Op1: m_ConstantInt(CI&: Constant))))
7772 return false;
7773
7774 SmallVector<DominatorTree::UpdateType> Updates;
7775 SwitchInstProfUpdateWrapper SIW(*SI);
7776 BasicBlock *BB = SIW->getParent();
7777
7778 // Dead cases are removed even when the simplification fails.
7779 // A case is dead when its value is higher than the Constant.
7780 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7781 if (!I->getCaseValue()->getValue().ugt(RHS: Constant->getValue())) {
7782 ++I;
7783 continue;
7784 }
7785 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7786 DeadCaseBB->removePredecessor(Pred: BB);
7787 Updates.push_back(Elt: {DominatorTree::Delete, BB, DeadCaseBB});
7788 I = SIW.removeCase(I);
7789 E = SIW->case_end();
7790 }
7791
7792 auto Case = SI->findCaseValue(C: Constant);
7793 // If the case value is not found, `findCaseValue` returns the default case.
7794 // In this scenario, since there is no explicit `case 3:`, the simplification
7795 // fails. The simplification also fails when the switch’s default destination
7796 // is reachable.
7797 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7798 if (DTU)
7799 DTU->applyUpdates(Updates);
7800 return !Updates.empty();
7801 }
7802
7803 BasicBlock *Unreachable = SI->getDefaultDest();
7804 SIW.replaceDefaultDest(I: Case);
7805 SIW.removeCase(I: Case);
7806 SIW->setCondition(A);
7807
7808 Updates.push_back(Elt: {DominatorTree::Delete, BB, Unreachable});
7809
7810 if (DTU)
7811 DTU->applyUpdates(Updates);
7812
7813 return true;
7814}
7815
7816/// Tries to transform switch of powers of two to reduce switch range.
7817/// For example, switch like:
7818/// switch (C) { case 1: case 2: case 64: case 128: }
7819/// will be transformed to:
7820/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7821///
7822/// This transformation allows better lowering and may transform the switch
7823/// instruction into a sequence of bit manipulation and a smaller
7824/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7825/// address of the jump target, and indirectly jump to it).
7826static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7827 DomTreeUpdater *DTU,
7828 const DataLayout &DL,
7829 const TargetTransformInfo &TTI) {
7830 Value *Condition = SI->getCondition();
7831 LLVMContext &Context = SI->getContext();
7832 auto *CondTy = cast<IntegerType>(Val: Condition->getType());
7833
7834 if (CondTy->getIntegerBitWidth() > 64 ||
7835 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7836 return false;
7837
7838 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7839 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7840 {Condition, ConstantInt::getTrue(Context)});
7841 if (TTI.getIntrinsicInstrCost(ICA: Attrs, CostKind: TTI::TCK_SizeAndLatency) >
7842 TTI::TCC_Basic * 2)
7843 return false;
7844
7845 // Only bother with this optimization if there are more than 3 switch cases.
7846 // SDAG will start emitting jump tables for 4 or more cases.
7847 if (SI->getNumCases() < 4)
7848 return false;
7849
7850 // Check that switch cases are powers of two.
7851 SmallVector<uint64_t, 4> Values;
7852 for (const auto &Case : SI->cases()) {
7853 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7854 if (llvm::has_single_bit(Value: CaseValue))
7855 Values.push_back(Elt: CaseValue);
7856 else
7857 return false;
7858 }
7859
7860 // isSwichDense requires case values to be sorted.
7861 llvm::sort(C&: Values);
7862 if (!isSwitchDense(NumCases: Values.size(),
7863 CaseRange: llvm::countr_zero(Val: Values.back()) -
7864 llvm::countr_zero(Val: Values.front()) + 1,
7865 OptSize: SI->getFunction()->hasOptSize()))
7866 // Transform is unable to generate dense switch.
7867 return false;
7868
7869 Builder.SetInsertPoint(SI);
7870
7871 if (!SI->defaultDestUnreachable()) {
7872 // Let non-power-of-two inputs jump to the default case, when the latter is
7873 // reachable.
7874 auto *PopC = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, Op: Condition);
7875 auto *IsPow2 = Builder.CreateICmpEQ(LHS: PopC, RHS: ConstantInt::get(Ty: CondTy, V: 1));
7876
7877 auto *OrigBB = SI->getParent();
7878 auto *DefaultCaseBB = SI->getDefaultDest();
7879 BasicBlock *SplitBB = SplitBlock(Old: OrigBB, SplitPt: SI, DTU);
7880 auto It = OrigBB->getTerminator()->getIterator();
7881 SmallVector<uint32_t> Weights;
7882 auto HasWeights =
7883 !ProfcheckDisableMetadataFixes && extractBranchWeights(I: *SI, Weights);
7884 auto *BI = CondBrInst::Create(Cond: IsPow2, IfTrue: SplitBB, IfFalse: DefaultCaseBB, InsertBefore: It);
7885 if (HasWeights && any_of(Range&: Weights, P: not_equal_to(Arg: 0))) {
7886 // IsPow2 covers a subset of the cases in which we'd go to the default
7887 // label. The other is those powers of 2 that don't appear in the case
7888 // statement. We don't know the distribution of the values coming in, so
7889 // the safest is to split 50-50 the original probability to `default`.
7890 uint64_t OrigDenominator =
7891 sum_of(Range: map_range(C&: Weights, F: StaticCastTo<uint64_t>));
7892 SmallVector<uint64_t> NewWeights(2);
7893 NewWeights[1] = Weights[0] / 2;
7894 NewWeights[0] = OrigDenominator - NewWeights[1];
7895 setFittedBranchWeights(I&: *BI, Weights: NewWeights, /*IsExpected=*/false);
7896 // The probability of executing the default block stays constant. It was
7897 // p_d = Weights[0] / OrigDenominator
7898 // we rewrite as W/D
7899 // We want to find the probability of the default branch of the switch
7900 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7901 // i.e. the original probability is the probability we go to the default
7902 // branch from the BI branch, or we take the default branch on the SI.
7903 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7904 // This matches using W/2 for the default branch probability numerator and
7905 // D-W/2 as the denominator.
7906 Weights[0] = NewWeights[1];
7907 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7908 for (auto &W : drop_begin(RangeOrContainer&: Weights))
7909 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7910
7911 setBranchWeights(I&: *SI, Weights, /*IsExpected=*/false);
7912 }
7913 // BI is handling the default case for SI, and so should share its DebugLoc.
7914 BI->setDebugLoc(SI->getDebugLoc());
7915 It->eraseFromParent();
7916
7917 addPredecessorToBlock(Succ: DefaultCaseBB, NewPred: OrigBB, ExistPred: SplitBB);
7918 if (DTU)
7919 DTU->applyUpdates(Updates: {{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7920 }
7921
7922 // Replace each case with its trailing zeros number.
7923 for (auto &Case : SI->cases()) {
7924 auto *OrigValue = Case.getCaseValue();
7925 Case.setValue(ConstantInt::get(Ty: OrigValue->getIntegerType(),
7926 V: OrigValue->getValue().countr_zero()));
7927 }
7928
7929 // Replace condition with its trailing zeros number.
7930 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7931 ID: Intrinsic::cttz, OverloadTypes: {CondTy}, Args: {Condition, ConstantInt::getTrue(Context)});
7932
7933 SI->setCondition(ConditionTrailingZeros);
7934
7935 return true;
7936}
7937
7938/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7939/// the same destination.
7940static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
7941 DomTreeUpdater *DTU) {
7942 auto *Cmp = dyn_cast<CmpIntrinsic>(Val: SI->getCondition());
7943 if (!Cmp || !Cmp->hasOneUse())
7944 return false;
7945
7946 SmallVector<uint32_t, 4> Weights;
7947 bool HasWeights = extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights);
7948 if (!HasWeights)
7949 Weights.resize(N: 4); // Avoid checking HasWeights everywhere.
7950
7951 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7952 int64_t Res;
7953 BasicBlock *Succ, *OtherSucc;
7954 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7955 BasicBlock *Unreachable = nullptr;
7956
7957 if (SI->getNumCases() == 2) {
7958 // Find which of 1, 0 or -1 is missing (handled by default dest).
7959 SmallSet<int64_t, 3> Missing;
7960 Missing.insert(V: 1);
7961 Missing.insert(V: 0);
7962 Missing.insert(V: -1);
7963
7964 Succ = SI->getDefaultDest();
7965 SuccWeight = Weights[0];
7966 OtherSucc = nullptr;
7967 for (auto &Case : SI->cases()) {
7968 std::optional<int64_t> Val =
7969 Case.getCaseValue()->getValue().trySExtValue();
7970 if (!Val)
7971 return false;
7972 if (!Missing.erase(V: *Val))
7973 return false;
7974 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7975 return false;
7976 OtherSucc = Case.getCaseSuccessor();
7977 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7978 }
7979
7980 assert(Missing.size() == 1 && "Should have one case left");
7981 Res = *Missing.begin();
7982 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7983 // Normalize so that Succ is taken once and OtherSucc twice.
7984 Unreachable = SI->getDefaultDest();
7985 Succ = OtherSucc = nullptr;
7986 for (auto &Case : SI->cases()) {
7987 BasicBlock *NewSucc = Case.getCaseSuccessor();
7988 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7989 if (!OtherSucc || OtherSucc == NewSucc) {
7990 OtherSucc = NewSucc;
7991 OtherSuccWeight += Weight;
7992 } else if (!Succ) {
7993 Succ = NewSucc;
7994 SuccWeight = Weight;
7995 } else if (Succ == NewSucc) {
7996 std::swap(a&: Succ, b&: OtherSucc);
7997 std::swap(a&: SuccWeight, b&: OtherSuccWeight);
7998 } else
7999 return false;
8000 }
8001 for (auto &Case : SI->cases()) {
8002 std::optional<int64_t> Val =
8003 Case.getCaseValue()->getValue().trySExtValue();
8004 if (!Val || (Val != 1 && Val != 0 && Val != -1))
8005 return false;
8006 if (Case.getCaseSuccessor() == Succ) {
8007 Res = *Val;
8008 break;
8009 }
8010 }
8011 } else {
8012 return false;
8013 }
8014
8015 // Determine predicate for the missing case.
8016 ICmpInst::Predicate Pred;
8017 switch (Res) {
8018 case 1:
8019 Pred = ICmpInst::ICMP_UGT;
8020 break;
8021 case 0:
8022 Pred = ICmpInst::ICMP_EQ;
8023 break;
8024 case -1:
8025 Pred = ICmpInst::ICMP_ULT;
8026 break;
8027 }
8028 if (Cmp->isSigned())
8029 Pred = ICmpInst::getSignedPredicate(Pred);
8030
8031 MDNode *NewWeights = nullptr;
8032 if (HasWeights)
8033 NewWeights = MDBuilder(SI->getContext())
8034 .createBranchWeights(TrueWeight: SuccWeight, FalseWeight: OtherSuccWeight);
8035
8036 BasicBlock *BB = SI->getParent();
8037 Builder.SetInsertPoint(SI->getIterator());
8038 Value *ICmp = Builder.CreateICmp(P: Pred, LHS: Cmp->getLHS(), RHS: Cmp->getRHS());
8039 Builder.CreateCondBr(Cond: ICmp, True: Succ, False: OtherSucc, BranchWeights: NewWeights,
8040 Unpredictable: SI->getMetadata(KindID: LLVMContext::MD_unpredictable));
8041 OtherSucc->removePredecessor(Pred: BB);
8042 if (Unreachable)
8043 Unreachable->removePredecessor(Pred: BB);
8044 SI->eraseFromParent();
8045 Cmp->eraseFromParent();
8046 if (DTU && Unreachable)
8047 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, Unreachable}});
8048 return true;
8049}
8050
8051/// Checking whether two BBs are equal depends on the contents of the
8052/// BasicBlock and the incoming values of their successor PHINodes.
8053/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
8054/// calling this function on each BasicBlock every time isEqual is called,
8055/// especially since the same BasicBlock may be passed as an argument multiple
8056/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
8057/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
8058/// of the incoming values.
8059struct EqualBBWrapper {
8060 BasicBlock *BB;
8061
8062 // One Phi usually has < 8 incoming values.
8063 using BB2ValueMap = SmallDenseMap<BasicBlock *, Value *, 8>;
8064 using Phi2IVsMap = DenseMap<PHINode *, BB2ValueMap>;
8065 Phi2IVsMap *PhiPredIVs;
8066
8067 // We only merge the identical non-entry BBs with
8068 // - terminator unconditional br to Succ (pending relaxation),
8069 // - does not have address taken / weird control.
8070 static bool canBeMerged(const BasicBlock *BB) {
8071 assert(BB && "Expected non-null BB");
8072 // Entry block cannot be eliminated or have predecessors.
8073 if (BB->isEntryBlock())
8074 return false;
8075
8076 // Single successor and must be Succ.
8077 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8078 // on other kinds of terminators. We decide to only support unconditional
8079 // branches for now for compile time reasons.
8080 auto *BI = dyn_cast<UncondBrInst>(Val: BB->getTerminator());
8081 if (!BI)
8082 return false;
8083
8084 // Avoid blocks that are "address-taken" (blockaddress) or have unusual
8085 // uses.
8086 if (BB->hasAddressTaken() || BB->isEHPad())
8087 return false;
8088
8089 // TODO: relax this condition to merge equal blocks with >1 instructions?
8090 // Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
8091 if (&BB->front() != &BB->back())
8092 return false;
8093
8094 // The BB must have at least one predecessor.
8095 if (pred_empty(BB))
8096 return false;
8097
8098 return true;
8099 }
8100};
8101
8102template <> struct llvm::DenseMapInfo<const EqualBBWrapper *> {
8103 static unsigned getHashValue(const EqualBBWrapper *EBW) {
8104 BasicBlock *BB = EBW->BB;
8105 UncondBrInst *BI = cast<UncondBrInst>(Val: BB->getTerminator());
8106 assert(BB->size() == 1 && "Expected just a single branch in the BB");
8107
8108 // Since we assume the BB is just a single UncondBrInst with a single
8109 // successor, we hash as the BB and the incoming Values of its successor
8110 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8111 // including the incoming PHI values leads to better performance.
8112 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8113 // time and passing it in EqualBBWrapper, but this slowed down the average
8114 // compile time without having any impact on the worst case compile time.
8115 BasicBlock *Succ = BI->getSuccessor();
8116 auto PhiValsForBB = map_range(C: Succ->phis(), F: [&](PHINode &Phi) {
8117 return (*EBW->PhiPredIVs)[&Phi][BB];
8118 });
8119 return hash_combine(args: Succ, args: hash_combine_range(R&: PhiValsForBB));
8120 }
8121 static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
8122 BasicBlock *A = LHS->BB;
8123 BasicBlock *B = RHS->BB;
8124
8125 // FIXME: we checked that the size of A and B are both 1 in
8126 // mergeIdenticalUncondBBs to make the Case list smaller to
8127 // improve performance. If we decide to support BasicBlocks with more
8128 // than just a single instruction, we need to check that A.size() ==
8129 // B.size() here, and we need to check more than just the BranchInsts
8130 // for equality.
8131
8132 UncondBrInst *ABI = cast<UncondBrInst>(Val: A->getTerminator());
8133 UncondBrInst *BBI = cast<UncondBrInst>(Val: B->getTerminator());
8134 if (ABI->getSuccessor() != BBI->getSuccessor())
8135 return false;
8136
8137 // Need to check that PHIs in successor have matching values.
8138 BasicBlock *Succ = ABI->getSuccessor();
8139 auto IfPhiIVMatch = [&](PHINode &Phi) {
8140 // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
8141 // query.
8142 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8143 return PredIVs[A] == PredIVs[B];
8144 };
8145 return all_of(Range: Succ->phis(), P: IfPhiIVMatch);
8146 }
8147};
8148
8149// Merge identical BBs into one of them.
8150static bool mergeIdenticalBBs(ArrayRef<BasicBlock *> Candidates,
8151 DomTreeUpdater *DTU) {
8152 if (Candidates.size() < 2)
8153 return false;
8154
8155 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8156 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8157 // an entire PHI at once after the loop, opposed to calling
8158 // getIncomingValueForBlock inside this loop, since each call to
8159 // getIncomingValueForBlock is O(|Preds|).
8160 EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8161 SmallVector<EqualBBWrapper> BBs2Merge;
8162 BBs2Merge.reserve(N: Candidates.size());
8163 SmallSetVector<PHINode *, 8> Phis;
8164
8165 for (BasicBlock *BB : Candidates) {
8166 BasicBlock *Succ = BB->getSingleSuccessor();
8167 assert(Succ && "Expected unconditional BB");
8168 BBs2Merge.emplace_back(Args: EqualBBWrapper{.BB: BB, .PhiPredIVs: &PhiPredIVs});
8169 Phis.insert_range(R: make_pointer_range(Range: Succ->phis()));
8170 }
8171
8172 // Precompute a data structure to improve performance of isEqual for
8173 // EqualBBWrapper.
8174 PhiPredIVs.reserve(NumEntries: Phis.size());
8175 for (PHINode *Phi : Phis) {
8176 auto &IVs =
8177 PhiPredIVs.try_emplace(Key: Phi, Args: Phi->getNumIncomingValues()).first->second;
8178 // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8179 // O(|Pred|).
8180 for (auto &IV : Phi->incoming_values())
8181 IVs.insert(KV: {Phi->getIncomingBlock(U: IV), IV.get()});
8182 }
8183
8184 // Group duplicates using DenseSet with custom equality/hashing.
8185 // Build a set such that if the EqualBBWrapper exists in the set and another
8186 // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8187 // the set should be replaced with the one in the set. If the EqualBBWrapper
8188 // is not in the set, then it should be added to the set so other
8189 // EqualBBWrapper can check against it in the same manner. We use
8190 // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8191 // information to isEquality, getHashValue, and when doing the replacement
8192 // with better performance.
8193 DenseSet<const EqualBBWrapper *> Keep;
8194 Keep.reserve(Size: BBs2Merge.size());
8195
8196 SmallVector<DominatorTree::UpdateType> Updates;
8197 Updates.reserve(N: BBs2Merge.size() * 2);
8198
8199 bool MadeChange = false;
8200
8201 // Helper: redirect all edges X -> DeadPred to X -> LivePred.
8202 auto RedirectIncomingEdges = [&](BasicBlock *Dead, BasicBlock *Live) {
8203 SmallSetVector<BasicBlock *, 8> DeadPreds(llvm::from_range,
8204 predecessors(BB: Dead));
8205 if (DTU) {
8206 // All predecessors of DeadPred (except the common predecessor) will be
8207 // moved to LivePred.
8208 Updates.reserve(N: Updates.size() + DeadPreds.size() * 2);
8209 SmallPtrSet<BasicBlock *, 16> LivePreds(llvm::from_range,
8210 predecessors(BB: Live));
8211 for (BasicBlock *PredOfDead : DeadPreds) {
8212 // Do not modify those common predecessors of DeadPred and LivePred.
8213 if (!LivePreds.contains(Ptr: PredOfDead))
8214 Updates.push_back(Elt: {DominatorTree::Insert, PredOfDead, Live});
8215 Updates.push_back(Elt: {DominatorTree::Delete, PredOfDead, Dead});
8216 }
8217 }
8218 LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8219 Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8220 Live->printAsOperand(dbgs()); dbgs() << " for ";
8221 Live->getSingleSuccessor()->printAsOperand(dbgs());
8222 dbgs() << "\n");
8223 // Replace successors in all predecessors of DeadPred.
8224 for (BasicBlock *PredOfDead : DeadPreds) {
8225 Instruction *T = PredOfDead->getTerminator();
8226 T->replaceSuccessorWith(OldBB: Dead, NewBB: Live);
8227 }
8228 };
8229
8230 // Try to eliminate duplicate predecessors.
8231 for (const auto &EBW : BBs2Merge) {
8232 // EBW is a candidate for simplification. If we find a duplicate BB,
8233 // replace it.
8234 const auto &[It, Inserted] = Keep.insert(V: &EBW);
8235 if (Inserted)
8236 continue;
8237
8238 // Found duplicate: merge P into canonical predecessor It->Pred.
8239 BasicBlock *KeepBB = (*It)->BB;
8240 BasicBlock *DeadBB = EBW.BB;
8241
8242 // Avoid merging a BB with itself.
8243 if (KeepBB == DeadBB)
8244 continue;
8245
8246 // Redirect all edges into DeadPred to KeepPred.
8247 RedirectIncomingEdges(DeadBB, KeepBB);
8248
8249 // Now DeadBB should become unreachable; leave DCE to later,
8250 // but we can try to simplify it if it only branches to Succ.
8251 // (We won't erase here to keep the routine simple and DT-safe.)
8252 assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8253 MadeChange = true;
8254 }
8255
8256 if (DTU && !Updates.empty())
8257 DTU->applyUpdates(Updates);
8258
8259 return MadeChange;
8260}
8261
8262bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8263 DomTreeUpdater *DTU) {
8264 // Collect candidate switch-arms top-down.
8265 SmallSetVector<BasicBlock *, 16> FilteredArms(
8266 llvm::from_range,
8267 make_filter_range(Range: successors(I: SI), Pred: EqualBBWrapper::canBeMerged));
8268 return mergeIdenticalBBs(Candidates: FilteredArms.getArrayRef(), DTU);
8269}
8270
8271bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8272 DomTreeUpdater *DTU) {
8273 // Need at least 2 predecessors to do anything.
8274 if (!BB || !BB->hasNPredecessorsOrMore(N: 2))
8275 return false;
8276
8277 // Compilation time consideration: retain the canonical loop, otherwise, we
8278 // require more time in the later loop canonicalization.
8279 if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BB))
8280 return false;
8281
8282 // Collect candidate predecessors bottom-up.
8283 SmallSetVector<BasicBlock *, 8> FilteredPreds(
8284 llvm::from_range,
8285 make_filter_range(Range: predecessors(BB), Pred: EqualBBWrapper::canBeMerged));
8286 return mergeIdenticalBBs(Candidates: FilteredPreds.getArrayRef(), DTU);
8287}
8288
8289bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8290 BasicBlock *BB = SI->getParent();
8291
8292 if (isValueEqualityComparison(TI: SI)) {
8293 // If we only have one predecessor, and if it is a branch on this value,
8294 // see if that predecessor totally determines the outcome of this switch.
8295 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8296 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: SI, Pred: OnlyPred, Builder))
8297 return requestResimplify();
8298
8299 Value *Cond = SI->getCondition();
8300 if (SelectInst *Select = dyn_cast<SelectInst>(Val: Cond))
8301 if (simplifySwitchOnSelect(SI, Select))
8302 return requestResimplify();
8303
8304 // If the block only contains the switch, see if we can fold the block
8305 // away into any preds.
8306 if (SI == &*BB->begin())
8307 if (foldValueComparisonIntoPredecessors(TI: SI, Builder))
8308 return requestResimplify();
8309 }
8310
8311 // Try to transform the switch into an icmp and a branch.
8312 // The conversion from switch to comparison may lose information on
8313 // impossible switch values, so disable it early in the pipeline.
8314 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8315 return requestResimplify();
8316
8317 // Remove unreachable cases.
8318 if (eliminateDeadSwitchCases(SI, DTU, AC: Options.AC, DL))
8319 return requestResimplify();
8320
8321 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8322 return requestResimplify();
8323
8324 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8325 return requestResimplify();
8326
8327 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8328 return requestResimplify();
8329
8330 // The conversion of switches to arithmetic or lookup table is disabled in
8331 // the early optimization pipeline, as it may lose information or make the
8332 // resulting code harder to analyze.
8333 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8334 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8335 ConvertSwitchToLookupTable: Options.ConvertSwitchToLookupTable))
8336 return requestResimplify();
8337
8338 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8339 return requestResimplify();
8340
8341 if (reduceSwitchRange(SI, Builder, DL, TTI))
8342 return requestResimplify();
8343
8344 if (HoistCommon &&
8345 hoistCommonCodeFromSuccessors(TI: SI, AllInstsEqOnly: !Options.HoistCommonInsts))
8346 return requestResimplify();
8347
8348 // We can merge identical switch arms early to enhance more aggressive
8349 // optimization on switch.
8350 if (simplifyDuplicateSwitchArms(SI, DTU))
8351 return requestResimplify();
8352
8353 if (simplifySwitchWhenUMin(SI, DTU))
8354 return requestResimplify();
8355
8356 return false;
8357}
8358
8359bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8360 BasicBlock *BB = IBI->getParent();
8361 bool Changed = false;
8362 SmallVector<uint32_t> BranchWeights;
8363 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8364 extractBranchWeights(I: *IBI, Weights&: BranchWeights);
8365
8366 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8367 if (HasBranchWeights)
8368 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8369 TargetWeight[IBI->getDestination(i: I)] += BranchWeights[I];
8370
8371 // Eliminate redundant destinations.
8372 SmallPtrSet<Value *, 8> Succs;
8373 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8374 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8375 BasicBlock *Dest = IBI->getDestination(i: I);
8376 if (!Dest->hasAddressTaken() || !Succs.insert(Ptr: Dest).second) {
8377 if (!Dest->hasAddressTaken())
8378 RemovedSuccs.insert(X: Dest);
8379 Dest->removePredecessor(Pred: BB);
8380 IBI->removeDestination(i: I);
8381 --I;
8382 --E;
8383 Changed = true;
8384 }
8385 }
8386
8387 if (DTU) {
8388 std::vector<DominatorTree::UpdateType> Updates;
8389 Updates.reserve(n: RemovedSuccs.size());
8390 for (auto *RemovedSucc : RemovedSuccs)
8391 Updates.push_back(x: {DominatorTree::Delete, BB, RemovedSucc});
8392 DTU->applyUpdates(Updates);
8393 }
8394
8395 if (IBI->getNumDestinations() == 0) {
8396 // If the indirectbr has no successors, change it to unreachable.
8397 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8398 eraseTerminatorAndDCECond(TI: IBI);
8399 return true;
8400 }
8401
8402 if (IBI->getNumDestinations() == 1) {
8403 // If the indirectbr has one successor, change it to a direct branch.
8404 UncondBrInst::Create(Target: IBI->getDestination(i: 0), InsertBefore: IBI->getIterator());
8405 eraseTerminatorAndDCECond(TI: IBI);
8406 return true;
8407 }
8408 if (HasBranchWeights) {
8409 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8410 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8411 NewBranchWeights[I] += TargetWeight.find(Val: IBI->getDestination(i: I))->second;
8412 setFittedBranchWeights(I&: *IBI, Weights: NewBranchWeights, /*IsExpected=*/false);
8413 }
8414 if (SelectInst *SI = dyn_cast<SelectInst>(Val: IBI->getAddress())) {
8415 if (simplifyIndirectBrOnSelect(IBI, SI))
8416 return requestResimplify();
8417 }
8418 return Changed;
8419}
8420
8421/// Given an block with only a single landing pad and a unconditional branch
8422/// try to find another basic block which this one can be merged with. This
8423/// handles cases where we have multiple invokes with unique landing pads, but
8424/// a shared handler.
8425///
8426/// We specifically choose to not worry about merging non-empty blocks
8427/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8428/// practice, the optimizer produces empty landing pad blocks quite frequently
8429/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8430/// sinking in this file)
8431///
8432/// This is primarily a code size optimization. We need to avoid performing
8433/// any transform which might inhibit optimization (such as our ability to
8434/// specialize a particular handler via tail commoning). We do this by not
8435/// merging any blocks which require us to introduce a phi. Since the same
8436/// values are flowing through both blocks, we don't lose any ability to
8437/// specialize. If anything, we make such specialization more likely.
8438///
8439/// TODO - This transformation could remove entries from a phi in the target
8440/// block when the inputs in the phi are the same for the two blocks being
8441/// merged. In some cases, this could result in removal of the PHI entirely.
8442static bool tryToMergeLandingPad(LandingPadInst *LPad, UncondBrInst *BI,
8443 BasicBlock *BB, DomTreeUpdater *DTU) {
8444 auto Succ = BB->getUniqueSuccessor();
8445 assert(Succ);
8446 // If there's a phi in the successor block, we'd likely have to introduce
8447 // a phi into the merged landing pad block.
8448 if (isa<PHINode>(Val: *Succ->begin()))
8449 return false;
8450
8451 for (BasicBlock *OtherPred : predecessors(BB: Succ)) {
8452 if (BB == OtherPred)
8453 continue;
8454 BasicBlock::iterator I = OtherPred->begin();
8455 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(Val&: I);
8456 if (!LPad2 || !LPad2->isIdenticalTo(I: LPad))
8457 continue;
8458 ++I;
8459 UncondBrInst *BI2 = dyn_cast<UncondBrInst>(Val&: I);
8460 if (!BI2 || !BI2->isIdenticalTo(I: BI))
8461 continue;
8462
8463 std::vector<DominatorTree::UpdateType> Updates;
8464
8465 // We've found an identical block. Update our predecessors to take that
8466 // path instead and make ourselves dead.
8467 SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
8468 for (BasicBlock *Pred : UniquePreds) {
8469 InvokeInst *II = cast<InvokeInst>(Val: Pred->getTerminator());
8470 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8471 "unexpected successor");
8472 II->setUnwindDest(OtherPred);
8473 if (DTU) {
8474 Updates.push_back(x: {DominatorTree::Insert, Pred, OtherPred});
8475 Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
8476 }
8477 }
8478
8479 SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
8480 for (BasicBlock *Succ : UniqueSuccs) {
8481 Succ->removePredecessor(Pred: BB);
8482 if (DTU)
8483 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
8484 }
8485
8486 IRBuilder<> Builder(BI);
8487 Builder.CreateUnreachable();
8488 BI->eraseFromParent();
8489 if (DTU)
8490 DTU->applyUpdates(Updates);
8491 return true;
8492 }
8493 return false;
8494}
8495
8496bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8497 IRBuilder<> &Builder) {
8498 BasicBlock *BB = BI->getParent();
8499 BasicBlock *Succ = BI->getSuccessor(i: 0);
8500
8501 // If the Terminator is the only non-phi instruction, simplify the block.
8502 // If LoopHeader is provided, check if the block or its successor is a loop
8503 // header. (This is for early invocations before loop simplify and
8504 // vectorization to keep canonical loop forms for nested loops. These blocks
8505 // can be eliminated when the pass is invoked later in the back-end.)
8506 // Note that if BB has only one predecessor then we do not introduce new
8507 // backedge, so we can eliminate BB.
8508 bool NeedCanonicalLoop =
8509 Options.NeedCanonicalLoop &&
8510 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(N: 2) &&
8511 (is_contained(Range&: LoopHeaders, Element: BB) || is_contained(Range&: LoopHeaders, Element: Succ)));
8512 BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
8513 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8514 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8515 return true;
8516
8517 // If the only instruction in the block is a seteq/setne comparison against a
8518 // constant, try to simplify the block.
8519 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val&: I)) {
8520 if (ICI->isEquality() && isa<ConstantInt>(Val: ICI->getOperand(i_nocapture: 1))) {
8521 ++I;
8522 if (I->isTerminator() &&
8523 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8524 return true;
8525 if (isa<SelectInst>(Val: I) && I->getNextNode()->isTerminator() &&
8526 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: cast<SelectInst>(Val&: I),
8527 Builder))
8528 return true;
8529 }
8530 }
8531
8532 // See if we can merge an empty landing pad block with another which is
8533 // equivalent.
8534 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(Val&: I)) {
8535 ++I;
8536 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8537 return true;
8538 }
8539
8540 return false;
8541}
8542
8543static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
8544 BasicBlock *PredPred = nullptr;
8545 for (auto *P : predecessors(BB)) {
8546 BasicBlock *PPred = P->getSinglePredecessor();
8547 if (!PPred || (PredPred && PredPred != PPred))
8548 return nullptr;
8549 PredPred = PPred;
8550 }
8551 return PredPred;
8552}
8553
8554/// Fold the following pattern:
8555/// bb0:
8556/// br i1 %cond1, label %bb1, label %bb2
8557/// bb1:
8558/// br i1 %cond2, label %bb3, label %bb4
8559/// bb2:
8560/// br i1 %cond2, label %bb4, label %bb3
8561/// bb3:
8562/// ...
8563/// bb4:
8564/// ...
8565/// into
8566/// bb0:
8567/// %cond = xor i1 %cond1, %cond2
8568/// br i1 %cond, label %bb4, label %bb3
8569/// bb3:
8570/// ...
8571/// bb4:
8572/// ...
8573/// NOTE: %cond2 always dominates the terminator of bb0.
8574static bool mergeNestedCondBranch(CondBrInst *BI, DomTreeUpdater *DTU) {
8575 BasicBlock *BB = BI->getParent();
8576 BasicBlock *BB1 = BI->getSuccessor(i: 0);
8577 BasicBlock *BB2 = BI->getSuccessor(i: 1);
8578 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, CondBrInst *&SuccBI) {
8579 if (Succ == BB)
8580 return false;
8581 if (&Succ->front() != Succ->getTerminator())
8582 return false;
8583 SuccBI = dyn_cast<CondBrInst>(Val: Succ->getTerminator());
8584 if (!SuccBI)
8585 return false;
8586 BasicBlock *Succ1 = SuccBI->getSuccessor(i: 0);
8587 BasicBlock *Succ2 = SuccBI->getSuccessor(i: 1);
8588 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8589 !isa<PHINode>(Val: Succ1->front()) && !isa<PHINode>(Val: Succ2->front());
8590 };
8591 CondBrInst *BB1BI, *BB2BI;
8592 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8593 return false;
8594
8595 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8596 BB1BI->getSuccessor(i: 0) != BB2BI->getSuccessor(i: 1) ||
8597 BB1BI->getSuccessor(i: 1) != BB2BI->getSuccessor(i: 0))
8598 return false;
8599
8600 BasicBlock *BB3 = BB1BI->getSuccessor(i: 0);
8601 BasicBlock *BB4 = BB1BI->getSuccessor(i: 1);
8602 IRBuilder<> Builder(BI);
8603 BI->setCondition(
8604 Builder.CreateXor(LHS: BI->getCondition(), RHS: BB1BI->getCondition()));
8605 BB1->removePredecessor(Pred: BB);
8606 BI->setSuccessor(idx: 0, NewSucc: BB4);
8607 BB2->removePredecessor(Pred: BB);
8608 BI->setSuccessor(idx: 1, NewSucc: BB3);
8609 if (DTU) {
8610 SmallVector<DominatorTree::UpdateType, 4> Updates;
8611 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB1});
8612 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB4});
8613 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB2});
8614 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB3});
8615
8616 DTU->applyUpdates(Updates);
8617 }
8618 bool HasWeight = false;
8619 uint64_t BBTWeight, BBFWeight;
8620 if (extractBranchWeights(I: *BI, TrueVal&: BBTWeight, FalseVal&: BBFWeight))
8621 HasWeight = true;
8622 else
8623 BBTWeight = BBFWeight = 1;
8624 uint64_t BB1TWeight, BB1FWeight;
8625 if (extractBranchWeights(I: *BB1BI, TrueVal&: BB1TWeight, FalseVal&: BB1FWeight))
8626 HasWeight = true;
8627 else
8628 BB1TWeight = BB1FWeight = 1;
8629 uint64_t BB2TWeight, BB2FWeight;
8630 if (extractBranchWeights(I: *BB2BI, TrueVal&: BB2TWeight, FalseVal&: BB2FWeight))
8631 HasWeight = true;
8632 else
8633 BB2TWeight = BB2FWeight = 1;
8634 if (HasWeight) {
8635 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8636 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8637 setFittedBranchWeights(I&: *BI, Weights, /*IsExpected=*/false,
8638 /*ElideAllZero=*/true);
8639 }
8640 return true;
8641}
8642
8643bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8644 assert(
8645 !isa<ConstantInt>(BI->getCondition()) &&
8646 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8647 "Tautological conditional branch should have been eliminated already.");
8648
8649 BasicBlock *BB = BI->getParent();
8650 if (!Options.SimplifyCondBranch ||
8651 BI->getFunction()->hasFnAttribute(Kind: Attribute::OptForFuzzing))
8652 return false;
8653
8654 // Conditional branch
8655 if (isValueEqualityComparison(TI: BI)) {
8656 // If we only have one predecessor, and if it is a branch on this value,
8657 // see if that predecessor totally determines the outcome of this
8658 // switch.
8659 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8660 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: BI, Pred: OnlyPred, Builder))
8661 return requestResimplify();
8662
8663 // This block must be empty, except for the setcond inst, if it exists.
8664 // Ignore pseudo intrinsics.
8665 for (auto &I : *BB) {
8666 if (isa<PseudoProbeInst>(Val: I) ||
8667 &I == cast<Instruction>(Val: BI->getCondition()))
8668 continue;
8669 if (&I == BI)
8670 if (foldValueComparisonIntoPredecessors(TI: BI, Builder))
8671 return requestResimplify();
8672 break;
8673 }
8674 }
8675
8676 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8677 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8678 return true;
8679
8680 // If this basic block has dominating predecessor blocks and the dominating
8681 // blocks' conditions imply BI's condition, we know the direction of BI.
8682 std::optional<bool> Imp = isImpliedByDomCondition(Cond: BI->getCondition(), ContextI: BI, DL);
8683 if (Imp) {
8684 // Turn this into a branch on constant.
8685 auto *OldCond = BI->getCondition();
8686 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(Context&: BB->getContext())
8687 : ConstantInt::getFalse(Context&: BB->getContext());
8688 BI->setCondition(TorF);
8689 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
8690 return requestResimplify();
8691 }
8692
8693 // If this basic block is ONLY a compare and a branch, and if a predecessor
8694 // branches to us and one of our successors, fold the comparison into the
8695 // predecessor and use logical operations to pick the right destination.
8696 if (Options.SpeculateBlocks &&
8697 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, TTI: &TTI, AC: Options.AC,
8698 BonusInstThreshold: Options.BonusInstThreshold))
8699 return requestResimplify();
8700
8701 // We have a conditional branch to two blocks that are only reachable
8702 // from BI. We know that the condbr dominates the two blocks, so see if
8703 // there is any identical code in the "then" and "else" blocks. If so, we
8704 // can hoist it up to the branching block.
8705 if (BI->getSuccessor(i: 0)->getSinglePredecessor()) {
8706 if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8707 if (HoistCommon &&
8708 hoistCommonCodeFromSuccessors(TI: BI, AllInstsEqOnly: !Options.HoistCommonInsts))
8709 return requestResimplify();
8710
8711 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8712 isProfitableToSpeculate(BI, Invert: std::nullopt, TTI)) {
8713 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8714 auto CanSpeculateConditionalLoadsStores = [&]() {
8715 for (auto *Succ : successors(BB)) {
8716 for (Instruction &I : *Succ) {
8717 if (I.isTerminator()) {
8718 if (I.getNumSuccessors() > 1)
8719 return false;
8720 continue;
8721 } else if (!isSafeCheapLoadStore(I: &I, TTI) ||
8722 SpeculatedConditionalLoadsStores.size() ==
8723 HoistLoadsStoresWithCondFaultingThreshold) {
8724 return false;
8725 }
8726 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
8727 }
8728 }
8729 return !SpeculatedConditionalLoadsStores.empty();
8730 };
8731
8732 if (CanSpeculateConditionalLoadsStores()) {
8733 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8734 Invert: std::nullopt, Sel: nullptr);
8735 return requestResimplify();
8736 }
8737 }
8738 } else {
8739 // If Successor #1 has multiple preds, we may be able to conditionally
8740 // execute Successor #0 if it branches to Successor #1.
8741 Instruction *Succ0TI = BI->getSuccessor(i: 0)->getTerminator();
8742 if (Succ0TI->getNumSuccessors() == 1 &&
8743 Succ0TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 1))
8744 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 0)))
8745 return requestResimplify();
8746 }
8747 } else if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8748 // If Successor #0 has multiple preds, we may be able to conditionally
8749 // execute Successor #1 if it branches to Successor #0.
8750 Instruction *Succ1TI = BI->getSuccessor(i: 1)->getTerminator();
8751 if (Succ1TI->getNumSuccessors() == 1 &&
8752 Succ1TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 0))
8753 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 1)))
8754 return requestResimplify();
8755 }
8756
8757 // If this is a branch on something for which we know the constant value in
8758 // predecessors (e.g. a phi node in the current block), thread control
8759 // through this block.
8760 if (foldCondBranchOnValueKnownInPredecessor(BI))
8761 return requestResimplify();
8762
8763 // Scan predecessor blocks for conditional branches.
8764 for (BasicBlock *Pred : predecessors(BB))
8765 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Val: Pred->getTerminator()))
8766 if (PBI != BI)
8767 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8768 return requestResimplify();
8769
8770 // Look for diamond patterns.
8771 if (MergeCondStores)
8772 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8773 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Val: PrevBB->getTerminator()))
8774 if (PBI != BI)
8775 if (mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
8776 return requestResimplify();
8777
8778 // Look for nested conditional branches.
8779 if (mergeNestedCondBranch(BI, DTU))
8780 return requestResimplify();
8781
8782 return false;
8783}
8784
8785/// Check if passing a value to an instruction will cause undefined behavior.
8786static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8787 assert(V->getType() == I->getType() && "Mismatched types");
8788 Constant *C = dyn_cast<Constant>(Val: V);
8789 if (!C)
8790 return false;
8791
8792 if (I->use_empty())
8793 return false;
8794
8795 if (C->isNullValue() || isa<UndefValue>(Val: C)) {
8796 // Find the first same-block use with a UB-triggering opcode, skipping
8797 // cross-block or before-I uses.
8798 auto FindUse = llvm::find_if(Range: I->uses(), P: [I](auto &U) {
8799 auto *Use = cast<Instruction>(U.getUser());
8800 // Only same-block uses after I can witness UB at I's program point.
8801 // Self-uses and before-I uses can occur when I is a PHI node.
8802 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
8803 return false;
8804 // Change this list when we want to add new instructions.
8805 switch (Use->getOpcode()) {
8806 default:
8807 return false;
8808 case Instruction::GetElementPtr:
8809 case Instruction::Ret:
8810 case Instruction::BitCast:
8811 case Instruction::Load:
8812 case Instruction::Store:
8813 case Instruction::Call:
8814 case Instruction::CallBr:
8815 case Instruction::Invoke:
8816 case Instruction::UDiv:
8817 case Instruction::URem:
8818 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8819 // implemented to avoid code complexity as it is unclear how useful such
8820 // logic is.
8821 case Instruction::SDiv:
8822 case Instruction::SRem:
8823 return true;
8824 }
8825 });
8826 if (FindUse == I->use_end())
8827 return false;
8828 auto &Use = *FindUse;
8829 auto *User = cast<Instruction>(Val: Use.getUser());
8830
8831 // Now make sure that there are no instructions in between that can alter
8832 // control flow (eg. calls)
8833 auto InstrRange =
8834 make_range(x: std::next(x: I->getIterator()), y: User->getIterator());
8835 if (any_of(Range&: InstrRange, P: [](Instruction &I) {
8836 return !isGuaranteedToTransferExecutionToSuccessor(I: &I);
8837 }))
8838 return false;
8839
8840 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8841 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: User))
8842 if (GEP->getPointerOperand() == I) {
8843 // The type of GEP may differ from the type of base pointer.
8844 // Bail out on vector GEPs, as they are not handled by other checks.
8845 if (GEP->getType()->isVectorTy())
8846 return false;
8847 // The current base address is null, there are four cases to consider:
8848 // getelementptr (TY, null, 0) -> null
8849 // getelementptr (TY, null, not zero) -> may be modified
8850 // getelementptr inbounds (TY, null, 0) -> null
8851 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8852 // undefined?
8853 if (!GEP->hasAllZeroIndices() &&
8854 (!GEP->isInBounds() ||
8855 NullPointerIsDefined(F: GEP->getFunction(),
8856 AS: GEP->getPointerAddressSpace())))
8857 PtrValueMayBeModified = true;
8858 return passingValueIsAlwaysUndefined(V, I: GEP, PtrValueMayBeModified);
8859 }
8860
8861 // Look through return.
8862 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: User)) {
8863 bool HasNoUndefAttr =
8864 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NoUndef);
8865 // Return undefined to a noundef return value is undefined.
8866 if (isa<UndefValue>(Val: C) && HasNoUndefAttr)
8867 return true;
8868 // Return null to a nonnull+noundef return value is undefined.
8869 if (C->isNullValue() && HasNoUndefAttr &&
8870 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NonNull)) {
8871 return !PtrValueMayBeModified;
8872 }
8873 }
8874
8875 // Load from null is undefined.
8876 if (LoadInst *LI = dyn_cast<LoadInst>(Val: User))
8877 if (!LI->isVolatile())
8878 return !NullPointerIsDefined(F: LI->getFunction(),
8879 AS: LI->getPointerAddressSpace());
8880
8881 // Store to null is undefined.
8882 if (StoreInst *SI = dyn_cast<StoreInst>(Val: User))
8883 if (!SI->isVolatile())
8884 return (!NullPointerIsDefined(F: SI->getFunction(),
8885 AS: SI->getPointerAddressSpace())) &&
8886 SI->getPointerOperand() == I;
8887
8888 // llvm.assume(false/undef) always triggers immediate UB.
8889 if (auto *Assume = dyn_cast<AssumeInst>(Val: User)) {
8890 // Ignore assume operand bundles.
8891 if (I == Assume->getArgOperand(i: 0))
8892 return true;
8893 }
8894
8895 if (auto *CB = dyn_cast<CallBase>(Val: User)) {
8896 if (C->isNullValue() && NullPointerIsDefined(F: CB->getFunction()))
8897 return false;
8898 // A call to null is undefined.
8899 if (CB->getCalledOperand() == I)
8900 return true;
8901
8902 if (CB->isArgOperand(U: &Use)) {
8903 unsigned ArgIdx = CB->getArgOperandNo(U: &Use);
8904 // Passing null to a nonnnull+noundef argument is undefined.
8905 if (isa<ConstantPointerNull>(Val: C) &&
8906 CB->paramHasNonNullAttr(ArgNo: ArgIdx, /*AllowUndefOrPoison=*/false))
8907 return !PtrValueMayBeModified;
8908 // Passing undef to a noundef argument is undefined.
8909 if (isa<UndefValue>(Val: C) && CB->isPassingUndefUB(ArgNo: ArgIdx))
8910 return true;
8911 }
8912 }
8913 // Div/Rem by zero is immediate UB
8914 if (match(V: User, P: m_BinOp(L: m_Value(), R: m_Specific(V: I))) && User->isIntDivRem())
8915 return true;
8916 }
8917 return false;
8918}
8919
8920/// If BB has an incoming value that will always trigger undefined behavior
8921/// (eg. null pointer dereference), remove the branch leading here.
8922static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
8923 DomTreeUpdater *DTU,
8924 AssumptionCache *AC) {
8925 for (PHINode &PHI : BB->phis())
8926 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8927 if (passingValueIsAlwaysUndefined(V: PHI.getIncomingValue(i), I: &PHI)) {
8928 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8929 Instruction *T = Predecessor->getTerminator();
8930 IRBuilder<> Builder(T);
8931 if (isa<UncondBrInst>(Val: T)) {
8932 BB->removePredecessor(Pred: Predecessor);
8933 // Turn unconditional branches into unreachables.
8934 Builder.CreateUnreachable();
8935 T->eraseFromParent();
8936 if (DTU)
8937 DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8938 return true;
8939 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: T)) {
8940 BB->removePredecessor(Pred: Predecessor);
8941 // Preserve guarding condition in assume, because it might not be
8942 // inferrable from any dominating condition.
8943 Value *Cond = BI->getCondition();
8944 CallInst *Assumption;
8945 if (BI->getSuccessor(i: 0) == BB)
8946 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
8947 else
8948 Assumption = Builder.CreateAssumption(Cond);
8949 if (AC)
8950 AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
8951 Builder.CreateBr(Dest: BI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 1)
8952 : BI->getSuccessor(i: 0));
8953 BI->eraseFromParent();
8954 if (DTU)
8955 DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8956 return true;
8957 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: T)) {
8958 // Redirect all branches leading to UB into
8959 // a newly created unreachable block.
8960 BasicBlock *Unreachable = BasicBlock::Create(
8961 Context&: Predecessor->getContext(), Name: "unreachable", Parent: BB->getParent(), InsertBefore: BB);
8962 Builder.SetInsertPoint(Unreachable);
8963 // The new block contains only one instruction: Unreachable
8964 Builder.CreateUnreachable();
8965 for (const auto &Case : SI->cases())
8966 if (Case.getCaseSuccessor() == BB) {
8967 BB->removePredecessor(Pred: Predecessor);
8968 Case.setSuccessor(Unreachable);
8969 }
8970 if (SI->getDefaultDest() == BB) {
8971 BB->removePredecessor(Pred: Predecessor);
8972 SI->setDefaultDest(Unreachable);
8973 }
8974
8975 if (DTU)
8976 DTU->applyUpdates(
8977 Updates: { { DominatorTree::Insert, Predecessor, Unreachable },
8978 { DominatorTree::Delete, Predecessor, BB } });
8979 return true;
8980 }
8981 }
8982
8983 return false;
8984}
8985
8986bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8987 bool Changed = false;
8988
8989 assert(BB && BB->getParent() && "Block not embedded in function!");
8990 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8991
8992 // Remove basic blocks that have no predecessors (except the entry block)...
8993 // or that just have themself as a predecessor. These are unreachable.
8994 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8995 BB->getSinglePredecessor() == BB) {
8996 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8997 DeleteDeadBlock(BB, DTU);
8998 return true;
8999 }
9000
9001 // Check to see if we can constant propagate this terminator instruction
9002 // away...
9003 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
9004 /*TLI=*/nullptr, DTU);
9005
9006 // Check for and eliminate duplicate PHI nodes in this block.
9007 Changed |= EliminateDuplicatePHINodes(BB);
9008
9009 // Check for and remove branches that will always cause undefined behavior.
9010 if (removeUndefIntroducingPredecessor(BB, DTU, AC: Options.AC))
9011 return requestResimplify();
9012
9013 // Merge basic blocks into their predecessor if there is only one distinct
9014 // pred, and if there is only one distinct successor of the predecessor, and
9015 // if there are no PHI nodes.
9016 if (MergeBlockIntoPredecessor(BB, DTU))
9017 return true;
9018
9019 if (SinkCommon && Options.SinkCommonInsts) {
9020 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
9021 mergeCompatibleInvokes(BB, DTU)) {
9022 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
9023 // so we may now how duplicate PHI's.
9024 // Let's rerun EliminateDuplicatePHINodes() first,
9025 // before foldTwoEntryPHINode() potentially converts them into select's,
9026 // after which we'd need a whole EarlyCSE pass run to cleanup them.
9027 return true;
9028 }
9029 // Merge identical predecessors of this block.
9030 if (simplifyDuplicatePredecessors(BB, DTU))
9031 return true;
9032 }
9033
9034 if (Options.SpeculateBlocks &&
9035 !BB->getParent()->hasFnAttribute(Kind: Attribute::OptForFuzzing)) {
9036 // If there is a trivial two-entry PHI node in this basic block, and we can
9037 // eliminate it, do so now.
9038 if (auto *PN = dyn_cast<PHINode>(Val: BB->begin()))
9039 if (PN->getNumIncomingValues() == 2)
9040 if (foldTwoEntryPHINode(PN, TTI, DTU, AC: Options.AC, DL,
9041 SpeculateUnpredictables: Options.SpeculateUnpredictables))
9042 return true;
9043 }
9044
9045 IRBuilder<> Builder(BB);
9046 Instruction *Terminator = BB->getTerminator();
9047 Builder.SetInsertPoint(Terminator);
9048 switch (Terminator->getOpcode()) {
9049 case Instruction::UncondBr:
9050 Changed |= simplifyUncondBranch(BI: cast<UncondBrInst>(Val: Terminator), Builder);
9051 break;
9052 case Instruction::CondBr:
9053 Changed |= simplifyCondBranch(BI: cast<CondBrInst>(Val: Terminator), Builder);
9054 break;
9055 case Instruction::Resume:
9056 Changed |= simplifyResume(RI: cast<ResumeInst>(Val: Terminator), Builder);
9057 break;
9058 case Instruction::CleanupRet:
9059 Changed |= simplifyCleanupReturn(RI: cast<CleanupReturnInst>(Val: Terminator));
9060 break;
9061 case Instruction::Switch:
9062 Changed |= simplifySwitch(SI: cast<SwitchInst>(Val: Terminator), Builder);
9063 break;
9064 case Instruction::Unreachable:
9065 Changed |= simplifyUnreachable(UI: cast<UnreachableInst>(Val: Terminator));
9066 break;
9067 case Instruction::IndirectBr:
9068 Changed |= simplifyIndirectBr(IBI: cast<IndirectBrInst>(Val: Terminator));
9069 break;
9070 }
9071
9072 return Changed;
9073}
9074
9075bool SimplifyCFGOpt::run(BasicBlock *BB) {
9076 bool Changed = false;
9077
9078 // Repeated simplify BB as long as resimplification is requested.
9079 do {
9080 Resimplify = false;
9081
9082 // Perform one round of simplifcation. Resimplify flag will be set if
9083 // another iteration is requested.
9084 Changed |= simplifyOnce(BB);
9085 } while (Resimplify);
9086
9087 return Changed;
9088}
9089
9090bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
9091 DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
9092 ArrayRef<WeakVH> LoopHeaders) {
9093 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
9094 Options)
9095 .run(BB);
9096}
9097