1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
19#include "llvm/ADT/SetOperations.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/CaptureTracking.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/Analysis/DomTreeUpdater.h"
29#include "llvm/Analysis/GuardUtils.h"
30#include "llvm/Analysis/InstructionSimplify.h"
31#include "llvm/Analysis/Loads.h"
32#include "llvm/Analysis/MemorySSA.h"
33#include "llvm/Analysis/MemorySSAUpdater.h"
34#include "llvm/Analysis/TargetTransformInfo.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
40#include "llvm/IR/ConstantRange.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
51#include "llvm/IR/Instructions.h"
52#include "llvm/IR/IntrinsicInst.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PatternMatch.h"
61#include "llvm/IR/ProfDataUtils.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/BranchProbability.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/ErrorHandling.h"
72#include "llvm/Support/KnownBits.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Transforms/Utils/BasicBlockUtils.h"
76#include "llvm/Transforms/Utils/Cloning.h"
77#include "llvm/Transforms/Utils/Local.h"
78#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
79#include "llvm/Transforms/Utils/ValueMapper.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
101cl::opt<bool> RequireAndPreserveDomTree(
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
112static cl::opt<unsigned> PHINodeFoldingThreshold(
113 "phi-node-folding-threshold", cl::Hidden, cl::init(Val: 2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
117static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(Val: 4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(Val: true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
127static cl::opt<bool> HoistLoadsWithCondFaulting(
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(Val: true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
131static cl::opt<bool> HoistStoresWithCondFaulting(
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(Val: true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
135static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(Val: 6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
141static cl::opt<unsigned>
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(Val: 20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(Val: true),
149 cl::desc("Sink common instructions down to the end block"));
150
151static cl::opt<bool> HoistCondStores(
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(Val: true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
155static cl::opt<bool> MergeCondStores(
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(Val: true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
161static cl::opt<bool> MergeCondStoresAggressively(
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(Val: false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
166static cl::opt<bool> SpeculateOneExpensiveInst(
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(Val: true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
171static cl::opt<unsigned> MaxSpeculationDepth(
172 "max-speculation-depth", cl::Hidden, cl::init(Val: 10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(Val: 10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
183static cl::opt<unsigned>
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(Val: 2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
189static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(Val: 2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
196static cl::opt<bool> EnableMergeCompatibleInvokes(
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(Val: true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
200static cl::opt<unsigned> MaxSwitchCasesPerResult(
201 "max-switch-cases-per-result", cl::Hidden, cl::init(Val: 16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
204static cl::opt<unsigned> MaxJumpThreadingLiveBlocks(
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(Val: 24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
209extern cl::opt<bool> ProfcheckDisableMetadataFixes;
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
218STATISTIC(
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
226STATISTIC(
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
247 SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
256 ConstantInt *Value;
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
299 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
300 bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
301
302 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
303 IRBuilder<> &Builder);
304 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
305 SelectInst *Select,
306 IRBuilder<> &Builder);
307 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
308 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
309 Instruction *TI, Instruction *I1,
310 SmallVectorImpl<Instruction *> &OtherSuccTIs,
311 ArrayRef<BasicBlock *> UniqueSuccessors);
312 bool speculativelyExecuteBB(CondBrInst *BI, BasicBlock *ThenBB);
313 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
314 BasicBlock *TrueBB, BasicBlock *FalseBB,
315 uint32_t TrueWeight, uint32_t FalseWeight);
316 bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
317 const DataLayout &DL);
318 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
319 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
320 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
321 bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(Val: SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(Val: SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(Val: SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(Val: SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
363static bool incomingValuesAreCompatible(
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(Range: BB->phis(), P: [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(BB: IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(BB: IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(Ptr: IV0) &&
378 EquivalenceSet->contains(Ptr: IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
387safeToMergeTerminators(Instruction *SI1, Instruction *SI2,
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
398 SmallPtrSet<BasicBlock *, 16> SI1Succs(llvm::from_range, successors(BB: SI1BB));
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(BB: SI2BB)) {
401 if (!SI1Succs.count(Ptr: Succ))
402 continue;
403 if (incomingValuesAreCompatible(BB: Succ, IncomingBlocks: {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(X: Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(V: PN.getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(BB: Succ))
426 MPhi->addIncoming(V: MPhi->getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
433static InstructionCost computeSpeculationCost(const User *I,
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
455static bool dominatesMergePoint(
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
458 InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
464 if (Depth == MaxSpeculationDepth)
465 return false;
466
467 Instruction *I = dyn_cast<Instruction>(Val: V);
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
483 UncondBrInst *BI = dyn_cast<UncondBrInst>(Val: PBB->getTerminator());
484 if (!BI || BI->getSuccessor() != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(Ptr: I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, CtxI: InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(V: I, P: m_ExtractValue<1>(V: m_OneUse(SubPattern: m_WithOverflowInst(I&: OverflowInst))))) {
504 ZeroCostInstructions.insert(Ptr: OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(Ptr: I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(V: Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth: Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(Ptr: I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
533static ConstantInt *getConstantInt(Value *V, const DataLayout &DL) {
534 // Normal constant int.
535 ConstantInt *CI = dyn_cast<ConstantInt>(Val: V);
536 if (CI || !isa<Constant>(Val: V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(Ty: V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(Val: DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
549 if (isa<ConstantPointerNull>(Val: V))
550 return ConstantInt::get(Ty: IntPtrTy, V: 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
554 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: 0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 Val: ConstantFoldIntegerCast(C: CI, DestTy: IntPtrTy, /*isSigned=*/IsSigned: false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
589 SmallVector<ConstantInt *, 8> Vals;
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(V: Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(V: Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(V: I, P: m_Not(V: m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(V: I, P: m_NUWTrunc(Op: m_Value(V&: Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(Elt: ConstantInt::get(Ty: cast<IntegerType>(Val: Val->getType()), V: isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(Val: I)) &&
657 (C = getConstantInt(V: I->getOperand(i: 1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(V: ICI->getOperand(i_nocapture: 0),
709 P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(Elt: C);
717 Vals.push_back(
718 Elt: ConstantInt::get(Context&: C->getContext(),
719 V: C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(V: ICI->getOperand(i_nocapture: 0),
732 P: m_Or(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(Elt: C);
740 Vals.push_back(Elt: ConstantInt::get(Context&: C->getContext(),
741 V: C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(i_nocapture: 0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(Elt: C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
758 ConstantRange::makeExactICmpRegion(Pred: ICI->getPredicate(), Other: C->getValue());
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(i: 0);
763 if (match(V: I->getOperand(i: 0), P: m_Add(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
764 Span = Span.subtract(CI: *RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(MaxSize: 8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(Elt: ConstantInt::get(Context&: I->getContext(), V: Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
801 IsEq = true;
802 else if (match(V, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))
816 : match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
817 if (Visited.insert(Ptr: Op1).second)
818 DFT.push_back(Elt: Op1);
819 if (Visited.insert(Ptr: Op0).second)
820 DFT.push_back(Elt: Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, isEQ: IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
847static void eraseTerminatorAndDCECond(Instruction *TI,
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
850 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
851 Cond = dyn_cast<Instruction>(Val: SI->getCondition());
852 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
853 Cond = dyn_cast<Instruction>(Val: BI->getCondition());
854 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(Val: TI)) {
855 Cond = dyn_cast<Instruction>(Val: IBI->getAddress());
856 }
857
858 TI->eraseFromParent();
859 if (Cond)
860 RecursivelyDeleteTriviallyDeadInstructions(V: Cond, TLI: nullptr, MSSAU);
861}
862
863/// Return true if the specified terminator checks
864/// to see if a value is equal to constant integer value.
865Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
866 Value *CV = nullptr;
867 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
868 // Do not permit merging of large switch instructions into their
869 // predecessors unless there is only one predecessor.
870 if (!SI->getParent()->hasNPredecessorsOrMore(N: 128 / SI->getNumSuccessors()))
871 CV = SI->getCondition();
872 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI))
873 if (BI->getCondition()->hasOneUse()) {
874 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition())) {
875 if (ICI->isEquality() && getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL))
876 CV = ICI->getOperand(i_nocapture: 0);
877 } else if (auto *Trunc = dyn_cast<TruncInst>(Val: BI->getCondition())) {
878 if (Trunc->hasNoUnsignedWrap())
879 CV = Trunc->getOperand(i_nocapture: 0);
880 }
881 }
882
883 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
884 if (CV) {
885 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(Val: CV)) {
886 Value *Ptr = PTII->getPointerOperand();
887 if (DL.hasUnstableRepresentation(Ty: Ptr->getType()))
888 return CV;
889 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
890 CV = Ptr;
891 }
892 }
893 return CV;
894}
895
896/// Given a value comparison instruction,
897/// decode all of the 'cases' that it represents and return the 'default' block.
898BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
899 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
900 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
901 Cases.reserve(n: SI->getNumCases());
902 for (auto Case : SI->cases())
903 Cases.push_back(x: ValueEqualityComparisonCase(Case.getCaseValue(),
904 Case.getCaseSuccessor()));
905 return SI->getDefaultDest();
906 }
907
908 CondBrInst *BI = cast<CondBrInst>(Val: TI);
909 Value *Cond = BI->getCondition();
910 ICmpInst::Predicate Pred;
911 ConstantInt *C;
912 if (auto *ICI = dyn_cast<ICmpInst>(Val: Cond)) {
913 Pred = ICI->getPredicate();
914 C = getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL);
915 } else {
916 Pred = ICmpInst::ICMP_NE;
917 auto *Trunc = cast<TruncInst>(Val: Cond);
918 C = ConstantInt::get(Ty: cast<IntegerType>(Val: Trunc->getOperand(i_nocapture: 0)->getType()), V: 0);
919 }
920 BasicBlock *Succ = BI->getSuccessor(i: Pred == ICmpInst::ICMP_NE);
921 Cases.push_back(x: ValueEqualityComparisonCase(C, Succ));
922 return BI->getSuccessor(i: Pred == ICmpInst::ICMP_EQ);
923}
924
925/// Given a vector of bb/value pairs, remove any entries
926/// in the list that match the specified block.
927static void
928eliminateBlockCases(BasicBlock *BB,
929 std::vector<ValueEqualityComparisonCase> &Cases) {
930 llvm::erase(C&: Cases, V: BB);
931}
932
933/// Return true if there are any keys in C1 that exist in C2 as well.
934static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
935 std::vector<ValueEqualityComparisonCase> &C2) {
936 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
937
938 // Make V1 be smaller than V2.
939 if (V1->size() > V2->size())
940 std::swap(a&: V1, b&: V2);
941
942 if (V1->empty())
943 return false;
944 if (V1->size() == 1) {
945 // Just scan V2.
946 ConstantInt *TheVal = (*V1)[0].Value;
947 for (const ValueEqualityComparisonCase &VECC : *V2)
948 if (TheVal == VECC.Value)
949 return true;
950 }
951
952 // Otherwise, just sort both lists and compare element by element.
953 array_pod_sort(Start: V1->begin(), End: V1->end());
954 array_pod_sort(Start: V2->begin(), End: V2->end());
955 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
956 while (i1 != e1 && i2 != e2) {
957 if ((*V1)[i1].Value == (*V2)[i2].Value)
958 return true;
959 if ((*V1)[i1].Value < (*V2)[i2].Value)
960 ++i1;
961 else
962 ++i2;
963 }
964 return false;
965}
966
967/// If TI is known to be a terminator instruction and its block is known to
968/// only have a single predecessor block, check to see if that predecessor is
969/// also a value comparison with the same value, and if that comparison
970/// determines the outcome of this comparison. If so, simplify TI. This does a
971/// very limited form of jump threading.
972bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
973 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
974 Value *PredVal = isValueEqualityComparison(TI: Pred->getTerminator());
975 if (!PredVal)
976 return false; // Not a value comparison in predecessor.
977
978 Value *ThisVal = isValueEqualityComparison(TI);
979 assert(ThisVal && "This isn't a value comparison!!");
980 if (ThisVal != PredVal)
981 return false; // Different predicates.
982
983 // TODO: Preserve branch weight metadata, similarly to how
984 // foldValueComparisonIntoPredecessors preserves it.
985
986 // Find out information about when control will move from Pred to TI's block.
987 std::vector<ValueEqualityComparisonCase> PredCases;
988 BasicBlock *PredDef =
989 getValueEqualityComparisonCases(TI: Pred->getTerminator(), Cases&: PredCases);
990 eliminateBlockCases(BB: PredDef, Cases&: PredCases); // Remove default from cases.
991
992 // Find information about how control leaves this block.
993 std::vector<ValueEqualityComparisonCase> ThisCases;
994 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, Cases&: ThisCases);
995 eliminateBlockCases(BB: ThisDef, Cases&: ThisCases); // Remove default from cases.
996
997 // If TI's block is the default block from Pred's comparison, potentially
998 // simplify TI based on this knowledge.
999 if (PredDef == TI->getParent()) {
1000 // If we are here, we know that the value is none of those cases listed in
1001 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1002 // can simplify TI.
1003 if (!valuesOverlap(C1&: PredCases, C2&: ThisCases))
1004 return false;
1005
1006 if (isa<CondBrInst>(Val: TI)) {
1007 // Okay, one of the successors of this condbr is dead. Convert it to a
1008 // uncond br.
1009 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1010 // Insert the new branch.
1011 Instruction *NI = Builder.CreateBr(Dest: ThisDef);
1012 (void)NI;
1013
1014 // Remove PHI node entries for the dead edge.
1015 ThisCases[0].Dest->removePredecessor(Pred: PredDef);
1016
1017 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1018 << "Through successor TI: " << *TI << "Leaving: " << *NI
1019 << "\n");
1020
1021 eraseTerminatorAndDCECond(TI);
1022
1023 if (DTU)
1024 DTU->applyUpdates(
1025 Updates: {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1026
1027 return true;
1028 }
1029
1030 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(Val: TI);
1031 // Okay, TI has cases that are statically dead, prune them away.
1032 SmallPtrSet<Constant *, 16> DeadCases;
1033 for (const ValueEqualityComparisonCase &Case : PredCases)
1034 DeadCases.insert(Ptr: Case.Value);
1035
1036 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037 << "Through successor TI: " << *TI);
1038
1039 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1040 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1041 --i;
1042 auto *Successor = i->getCaseSuccessor();
1043 if (DTU)
1044 ++NumPerSuccessorCases[Successor];
1045 if (DeadCases.count(Ptr: i->getCaseValue())) {
1046 Successor->removePredecessor(Pred: PredDef);
1047 SI.removeCase(I: i);
1048 if (DTU)
1049 --NumPerSuccessorCases[Successor];
1050 }
1051 }
1052
1053 if (DTU) {
1054 std::vector<DominatorTree::UpdateType> Updates;
1055 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1056 if (I.second == 0)
1057 Updates.push_back(x: {DominatorTree::Delete, PredDef, I.first});
1058 DTU->applyUpdates(Updates);
1059 }
1060
1061 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1062 return true;
1063 }
1064
1065 // Otherwise, TI's block must correspond to some matched value. Find out
1066 // which value (or set of values) this is.
1067 ConstantInt *TIV = nullptr;
1068 BasicBlock *TIBB = TI->getParent();
1069 for (const auto &[Value, Dest] : PredCases)
1070 if (Dest == TIBB) {
1071 if (TIV)
1072 return false; // Cannot handle multiple values coming to this block.
1073 TIV = Value;
1074 }
1075 assert(TIV && "No edge from pred to succ?");
1076
1077 // Okay, we found the one constant that our value can be if we get into TI's
1078 // BB. Find out which successor will unconditionally be branched to.
1079 BasicBlock *TheRealDest = nullptr;
1080 for (const auto &[Value, Dest] : ThisCases)
1081 if (Value == TIV) {
1082 TheRealDest = Dest;
1083 break;
1084 }
1085
1086 // If not handled by any explicit cases, it is handled by the default case.
1087 if (!TheRealDest)
1088 TheRealDest = ThisDef;
1089
1090 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1091
1092 // Remove PHI node entries for dead edges.
1093 BasicBlock *CheckEdge = TheRealDest;
1094 for (BasicBlock *Succ : successors(BB: TIBB))
1095 if (Succ != CheckEdge) {
1096 if (Succ != TheRealDest)
1097 RemovedSuccs.insert(Ptr: Succ);
1098 Succ->removePredecessor(Pred: TIBB);
1099 } else
1100 CheckEdge = nullptr;
1101
1102 // Insert the new branch.
1103 Instruction *NI = Builder.CreateBr(Dest: TheRealDest);
1104 (void)NI;
1105
1106 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1107 << "Through successor TI: " << *TI << "Leaving: " << *NI
1108 << "\n");
1109
1110 eraseTerminatorAndDCECond(TI);
1111 if (DTU) {
1112 SmallVector<DominatorTree::UpdateType, 2> Updates;
1113 Updates.reserve(N: RemovedSuccs.size());
1114 for (auto *RemovedSucc : RemovedSuccs)
1115 Updates.push_back(Elt: {DominatorTree::Delete, TIBB, RemovedSucc});
1116 DTU->applyUpdates(Updates);
1117 }
1118 return true;
1119}
1120
1121namespace {
1122
1123/// This class implements a stable ordering of constant
1124/// integers that does not depend on their address. This is important for
1125/// applications that sort ConstantInt's to ensure uniqueness.
1126struct ConstantIntOrdering {
1127 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1128 return LHS->getValue().ult(RHS: RHS->getValue());
1129 }
1130};
1131
1132} // end anonymous namespace
1133
1134static int constantIntSortPredicate(ConstantInt *const *P1,
1135 ConstantInt *const *P2) {
1136 const ConstantInt *LHS = *P1;
1137 const ConstantInt *RHS = *P2;
1138 if (LHS == RHS)
1139 return 0;
1140 return LHS->getValue().ult(RHS: RHS->getValue()) ? 1 : -1;
1141}
1142
1143/// Get Weights of a given terminator, the default weight is at the front
1144/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1145/// metadata.
1146static void getBranchWeights(Instruction *TI,
1147 SmallVectorImpl<uint64_t> &Weights) {
1148 MDNode *MD = TI->getMetadata(KindID: LLVMContext::MD_prof);
1149 assert(MD && "Invalid branch-weight metadata");
1150 extractFromBranchWeightMD64(ProfileData: MD, Weights);
1151
1152 // If TI is a conditional eq, the default case is the false case,
1153 // and the corresponding branch-weight data is at index 2. We swap the
1154 // default weight to be the first entry.
1155 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
1156 assert(Weights.size() == 2);
1157 auto *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition());
1158 if (!ICI)
1159 return;
1160
1161 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1162 std::swap(a&: Weights.front(), b&: Weights.back());
1163 }
1164}
1165
1166static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1167 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1168 Instruction *PTI = PredBlock->getTerminator();
1169
1170 // If we have bonus instructions, clone them into the predecessor block.
1171 // Note that there may be multiple predecessor blocks, so we cannot move
1172 // bonus instructions to a predecessor block.
1173 for (Instruction &BonusInst : *BB) {
1174 if (BonusInst.isTerminator())
1175 continue;
1176
1177 Instruction *NewBonusInst = BonusInst.clone();
1178
1179 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(Other: PTI->getDebugLoc())) {
1180 // Unless the instruction has the same !dbg location as the original
1181 // branch, drop it. When we fold the bonus instructions we want to make
1182 // sure we reset their debug locations in order to avoid stepping on
1183 // dead code caused by folding dead branches.
1184 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1185 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1186 mapAtomInstance(DL, VMap);
1187 }
1188
1189 RemapInstruction(I: NewBonusInst, VM&: VMap,
1190 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1191
1192 // If we speculated an instruction, we need to drop any metadata that may
1193 // result in undefined behavior, as the metadata might have been valid
1194 // only given the branch precondition.
1195 // Similarly strip attributes on call parameters that may cause UB in
1196 // location the call is moved to.
1197 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1198
1199 NewBonusInst->insertInto(ParentBB: PredBlock, It: PTI->getIterator());
1200 auto Range = NewBonusInst->cloneDebugInfoFrom(From: &BonusInst);
1201 RemapDbgRecordRange(M: NewBonusInst->getModule(), Range, VM&: VMap,
1202 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1203
1204 NewBonusInst->takeName(V: &BonusInst);
1205 BonusInst.setName(NewBonusInst->getName() + ".old");
1206 VMap[&BonusInst] = NewBonusInst;
1207
1208 // Update (liveout) uses of bonus instructions,
1209 // now that the bonus instruction has been cloned into predecessor.
1210 // Note that we expect to be in a block-closed SSA form for this to work!
1211 for (Use &U : make_early_inc_range(Range: BonusInst.uses())) {
1212 auto *UI = cast<Instruction>(Val: U.getUser());
1213 auto *PN = dyn_cast<PHINode>(Val: UI);
1214 if (!PN) {
1215 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1216 "If the user is not a PHI node, then it should be in the same "
1217 "block as, and come after, the original bonus instruction.");
1218 continue; // Keep using the original bonus instruction.
1219 }
1220 // Is this the block-closed SSA form PHI node?
1221 if (PN->getIncomingBlock(U) == BB)
1222 continue; // Great, keep using the original bonus instruction.
1223 // The only other alternative is an "use" when coming from
1224 // the predecessor block - here we should refer to the cloned bonus instr.
1225 assert(PN->getIncomingBlock(U) == PredBlock &&
1226 "Not in block-closed SSA form?");
1227 U.set(NewBonusInst);
1228 }
1229 }
1230
1231 // Key Instructions: We may have propagated atom info into the pred. If the
1232 // pred's terminator already has atom info do nothing as merging would drop
1233 // one atom group anyway. If it doesn't, propagte the remapped atom group
1234 // from BB's terminator.
1235 if (auto &PredDL = PTI->getDebugLoc()) {
1236 auto &DL = BB->getTerminator()->getDebugLoc();
1237 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1238 PredDL.isSameSourceLocation(Other: DL)) {
1239 PTI->setDebugLoc(DL);
1240 RemapSourceAtom(I: PTI, VM&: VMap);
1241 }
1242 }
1243}
1244
1245bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1246 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1247 BasicBlock *BB = TI->getParent();
1248 BasicBlock *Pred = PTI->getParent();
1249
1250 SmallVector<DominatorTree::UpdateType, 32> Updates;
1251
1252 // Figure out which 'cases' to copy from SI to PSI.
1253 std::vector<ValueEqualityComparisonCase> BBCases;
1254 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, Cases&: BBCases);
1255
1256 std::vector<ValueEqualityComparisonCase> PredCases;
1257 BasicBlock *PredDefault = getValueEqualityComparisonCases(TI: PTI, Cases&: PredCases);
1258
1259 // Based on whether the default edge from PTI goes to BB or not, fill in
1260 // PredCases and PredDefault with the new switch cases we would like to
1261 // build.
1262 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1263
1264 // Update the branch weight metadata along the way
1265 SmallVector<uint64_t, 8> Weights;
1266 bool PredHasWeights = hasBranchWeightMD(I: *PTI);
1267 bool SuccHasWeights = hasBranchWeightMD(I: *TI);
1268
1269 if (PredHasWeights) {
1270 getBranchWeights(TI: PTI, Weights);
1271 // branch-weight metadata is inconsistent here.
1272 if (Weights.size() != 1 + PredCases.size())
1273 PredHasWeights = SuccHasWeights = false;
1274 } else if (SuccHasWeights)
1275 // If there are no predecessor weights but there are successor weights,
1276 // populate Weights with 1, which will later be scaled to the sum of
1277 // successor's weights
1278 Weights.assign(NumElts: 1 + PredCases.size(), Elt: 1);
1279
1280 SmallVector<uint64_t, 8> SuccWeights;
1281 if (SuccHasWeights) {
1282 getBranchWeights(TI, Weights&: SuccWeights);
1283 // branch-weight metadata is inconsistent here.
1284 if (SuccWeights.size() != 1 + BBCases.size())
1285 PredHasWeights = SuccHasWeights = false;
1286 } else if (PredHasWeights)
1287 SuccWeights.assign(NumElts: 1 + BBCases.size(), Elt: 1);
1288
1289 if (PredDefault == BB) {
1290 // If this is the default destination from PTI, only the edges in TI
1291 // that don't occur in PTI, or that branch to BB will be activated.
1292 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1293 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1294 if (PredCases[i].Dest != BB)
1295 PTIHandled.insert(x: PredCases[i].Value);
1296 else {
1297 // The default destination is BB, we don't need explicit targets.
1298 std::swap(a&: PredCases[i], b&: PredCases.back());
1299
1300 if (PredHasWeights || SuccHasWeights) {
1301 // Increase weight for the default case.
1302 Weights[0] += Weights[i + 1];
1303 std::swap(a&: Weights[i + 1], b&: Weights.back());
1304 Weights.pop_back();
1305 }
1306
1307 PredCases.pop_back();
1308 --i;
1309 --e;
1310 }
1311
1312 // Reconstruct the new switch statement we will be building.
1313 if (PredDefault != BBDefault) {
1314 PredDefault->removePredecessor(Pred);
1315 if (DTU && PredDefault != BB)
1316 Updates.push_back(Elt: {DominatorTree::Delete, Pred, PredDefault});
1317 PredDefault = BBDefault;
1318 ++NewSuccessors[BBDefault];
1319 }
1320
1321 unsigned CasesFromPred = Weights.size();
1322 uint64_t ValidTotalSuccWeight = 0;
1323 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1324 if (!PTIHandled.count(x: BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1325 PredCases.push_back(x: BBCases[i]);
1326 ++NewSuccessors[BBCases[i].Dest];
1327 if (SuccHasWeights || PredHasWeights) {
1328 // The default weight is at index 0, so weight for the ith case
1329 // should be at index i+1. Scale the cases from successor by
1330 // PredDefaultWeight (Weights[0]).
1331 Weights.push_back(Elt: Weights[0] * SuccWeights[i + 1]);
1332 ValidTotalSuccWeight += SuccWeights[i + 1];
1333 }
1334 }
1335
1336 if (SuccHasWeights || PredHasWeights) {
1337 ValidTotalSuccWeight += SuccWeights[0];
1338 // Scale the cases from predecessor by ValidTotalSuccWeight.
1339 for (unsigned i = 1; i < CasesFromPred; ++i)
1340 Weights[i] *= ValidTotalSuccWeight;
1341 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1342 Weights[0] *= SuccWeights[0];
1343 }
1344 } else {
1345 // If this is not the default destination from PSI, only the edges
1346 // in SI that occur in PSI with a destination of BB will be
1347 // activated.
1348 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1349 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1350 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1351 if (PredCases[i].Dest == BB) {
1352 PTIHandled.insert(x: PredCases[i].Value);
1353
1354 if (PredHasWeights || SuccHasWeights) {
1355 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1356 std::swap(a&: Weights[i + 1], b&: Weights.back());
1357 Weights.pop_back();
1358 }
1359
1360 std::swap(a&: PredCases[i], b&: PredCases.back());
1361 PredCases.pop_back();
1362 --i;
1363 --e;
1364 }
1365
1366 // Okay, now we know which constants were sent to BB from the
1367 // predecessor. Figure out where they will all go now.
1368 for (const ValueEqualityComparisonCase &Case : BBCases)
1369 if (PTIHandled.count(x: Case.Value)) {
1370 // If this is one we are capable of getting...
1371 if (PredHasWeights || SuccHasWeights)
1372 Weights.push_back(Elt: WeightsForHandled[Case.Value]);
1373 PredCases.push_back(x: Case);
1374 ++NewSuccessors[Case.Dest];
1375 PTIHandled.erase(x: Case.Value); // This constant is taken care of
1376 }
1377
1378 // If there are any constants vectored to BB that TI doesn't handle,
1379 // they must go to the default destination of TI.
1380 for (ConstantInt *I : PTIHandled) {
1381 if (PredHasWeights || SuccHasWeights)
1382 Weights.push_back(Elt: WeightsForHandled[I]);
1383 PredCases.push_back(x: ValueEqualityComparisonCase(I, BBDefault));
1384 ++NewSuccessors[BBDefault];
1385 }
1386 }
1387
1388 // Okay, at this point, we know which new successor Pred will get. Make
1389 // sure we update the number of entries in the PHI nodes for these
1390 // successors.
1391 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1392 if (DTU) {
1393 SuccsOfPred = {llvm::from_range, successors(BB: Pred)};
1394 Updates.reserve(N: Updates.size() + NewSuccessors.size());
1395 }
1396 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1397 NewSuccessors) {
1398 for (auto I : seq(Size: NewSuccessor.second)) {
1399 (void)I;
1400 addPredecessorToBlock(Succ: NewSuccessor.first, NewPred: Pred, ExistPred: BB);
1401 }
1402 if (DTU && !SuccsOfPred.contains(Ptr: NewSuccessor.first))
1403 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewSuccessor.first});
1404 }
1405
1406 Builder.SetInsertPoint(PTI);
1407 // Convert pointer to int before we switch.
1408 if (CV->getType()->isPointerTy()) {
1409 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1410 "Should not end up here with unstable pointers");
1411 CV =
1412 Builder.CreatePtrToInt(V: CV, DestTy: DL.getIntPtrType(CV->getType()), Name: "magicptr");
1413 }
1414
1415 // Now that the successors are updated, create the new Switch instruction.
1416 SwitchInst *NewSI = Builder.CreateSwitch(V: CV, Dest: PredDefault, NumCases: PredCases.size());
1417 NewSI->setDebugLoc(PTI->getDebugLoc());
1418 for (ValueEqualityComparisonCase &V : PredCases)
1419 NewSI->addCase(OnVal: V.Value, Dest: V.Dest);
1420
1421 if (PredHasWeights || SuccHasWeights)
1422 setFittedBranchWeights(I&: *NewSI, Weights, /*IsExpected=*/false,
1423 /*ElideAllZero=*/true);
1424
1425 eraseTerminatorAndDCECond(TI: PTI);
1426
1427 // Okay, last check. If BB is still a successor of PSI, then we must
1428 // have an infinite loop case. If so, add an infinitely looping block
1429 // to handle the case to preserve the behavior of the code.
1430 BasicBlock *InfLoopBlock = nullptr;
1431 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1432 if (NewSI->getSuccessor(idx: i) == BB) {
1433 if (!InfLoopBlock) {
1434 // Insert it at the end of the function, because it's either code,
1435 // or it won't matter if it's hot. :)
1436 InfLoopBlock =
1437 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
1438 UncondBrInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
1439 if (DTU)
1440 Updates.push_back(
1441 Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1442 }
1443 NewSI->setSuccessor(idx: i, NewSucc: InfLoopBlock);
1444 }
1445
1446 if (DTU) {
1447 if (InfLoopBlock)
1448 Updates.push_back(Elt: {DominatorTree::Insert, Pred, InfLoopBlock});
1449
1450 Updates.push_back(Elt: {DominatorTree::Delete, Pred, BB});
1451
1452 DTU->applyUpdates(Updates);
1453 }
1454
1455 ++NumFoldValueComparisonIntoPredecessors;
1456 return true;
1457}
1458
1459/// The specified terminator is a value equality comparison instruction
1460/// (either a switch or a branch on "X == c").
1461/// See if any of the predecessors of the terminator block are value comparisons
1462/// on the same value. If so, and if safe to do so, fold them together.
1463bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1464 IRBuilder<> &Builder) {
1465 BasicBlock *BB = TI->getParent();
1466 Value *CV = isValueEqualityComparison(TI); // CondVal
1467 assert(CV && "Not a comparison?");
1468
1469 bool Changed = false;
1470
1471 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1472 while (!Preds.empty()) {
1473 BasicBlock *Pred = Preds.pop_back_val();
1474 Instruction *PTI = Pred->getTerminator();
1475
1476 // Don't try to fold into itself.
1477 if (Pred == BB)
1478 continue;
1479
1480 // See if the predecessor is a comparison with the same value.
1481 Value *PCV = isValueEqualityComparison(TI: PTI); // PredCondVal
1482 if (PCV != CV)
1483 continue;
1484
1485 SmallSetVector<BasicBlock *, 4> FailBlocks;
1486 if (!safeToMergeTerminators(SI1: TI, SI2: PTI, FailBlocks: &FailBlocks)) {
1487 for (auto *Succ : FailBlocks) {
1488 if (!SplitBlockPredecessors(BB: Succ, Preds: TI->getParent(), Suffix: ".fold.split", DTU))
1489 return false;
1490 }
1491 }
1492
1493 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1494 Changed = true;
1495 }
1496 return Changed;
1497}
1498
1499// If we would need to insert a select that uses the value of this invoke
1500// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1501// need to do this), we can't hoist the invoke, as there is nowhere to put the
1502// select in this case.
1503static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1504 Instruction *I1, Instruction *I2) {
1505 for (BasicBlock *Succ : successors(BB: BB1)) {
1506 for (const PHINode &PN : Succ->phis()) {
1507 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1508 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1509 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1510 return false;
1511 }
1512 }
1513 }
1514 return true;
1515}
1516
1517// Get interesting characteristics of instructions that
1518// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1519// instructions can be reordered across.
1520enum SkipFlags {
1521 SkipReadMem = 1,
1522 SkipSideEffect = 2,
1523 SkipImplicitControlFlow = 4
1524};
1525
1526static unsigned skippedInstrFlags(Instruction *I) {
1527 unsigned Flags = 0;
1528 if (I->mayReadFromMemory())
1529 Flags |= SkipReadMem;
1530 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1531 // inalloca) across stacksave/stackrestore boundaries.
1532 if (I->mayHaveSideEffects() || isa<AllocaInst>(Val: I))
1533 Flags |= SkipSideEffect;
1534 if (!isGuaranteedToTransferExecutionToSuccessor(I))
1535 Flags |= SkipImplicitControlFlow;
1536 return Flags;
1537}
1538
1539// Returns true if it is safe to reorder an instruction across preceding
1540// instructions in a basic block.
1541static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1542 // Don't reorder a store over a load.
1543 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1544 return false;
1545
1546 // If we have seen an instruction with side effects, it's unsafe to reorder an
1547 // instruction which reads memory or itself has side effects.
1548 if ((Flags & SkipSideEffect) &&
1549 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(Val: I)))
1550 return false;
1551
1552 // Reordering across an instruction which does not necessarily transfer
1553 // control to the next instruction is speculation.
1554 if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1555 return false;
1556
1557 // Hoisting of llvm.deoptimize is only legal together with the next return
1558 // instruction, which this pass is not always able to do.
1559 if (auto *CB = dyn_cast<CallBase>(Val: I))
1560 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1561 return false;
1562
1563 // It's also unsafe/illegal to hoist an instruction above its instruction
1564 // operands
1565 BasicBlock *BB = I->getParent();
1566 for (Value *Op : I->operands()) {
1567 if (auto *J = dyn_cast<Instruction>(Val: Op))
1568 if (J->getParent() == BB)
1569 return false;
1570 }
1571
1572 return true;
1573}
1574
1575static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1576
1577/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1578/// instructions \p I1 and \p I2 can and should be hoisted.
1579static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
1580 const TargetTransformInfo &TTI) {
1581 // If we're going to hoist a call, make sure that the two instructions
1582 // we're commoning/hoisting are both marked with musttail, or neither of
1583 // them is marked as such. Otherwise, we might end up in a situation where
1584 // we hoist from a block where the terminator is a `ret` to a block where
1585 // the terminator is a `br`, and `musttail` calls expect to be followed by
1586 // a return.
1587 auto *C1 = dyn_cast<CallInst>(Val: I1);
1588 auto *C2 = dyn_cast<CallInst>(Val: I2);
1589 if (C1 && C2)
1590 if (C1->isMustTailCall() != C2->isMustTailCall())
1591 return false;
1592
1593 if (!TTI.isProfitableToHoist(I: I1) || !TTI.isProfitableToHoist(I: I2))
1594 return false;
1595
1596 // If any of the two call sites has nomerge or convergent attribute, stop
1597 // hoisting.
1598 if (const auto *CB1 = dyn_cast<CallBase>(Val: I1))
1599 if (CB1->cannotMerge() || CB1->isConvergent())
1600 return false;
1601 if (const auto *CB2 = dyn_cast<CallBase>(Val: I2))
1602 if (CB2->cannotMerge() || CB2->isConvergent())
1603 return false;
1604
1605 return true;
1606}
1607
1608/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1609/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1610/// hoistCommonCodeFromSuccessors. e.g. The input:
1611/// I1 DVRs: { x, z },
1612/// OtherInsts: { I2 DVRs: { x, y, z } }
1613/// would result in hoisting only DbgVariableRecord x.
1614static void hoistLockstepIdenticalDbgVariableRecords(
1615 Instruction *TI, Instruction *I1,
1616 SmallVectorImpl<Instruction *> &OtherInsts) {
1617 if (!I1->hasDbgRecords())
1618 return;
1619 using CurrentAndEndIt =
1620 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1621 // Vector of {Current, End} iterators.
1622 SmallVector<CurrentAndEndIt> Itrs;
1623 Itrs.reserve(N: OtherInsts.size() + 1);
1624 // Helper lambdas for lock-step checks:
1625 // Return true if this Current == End.
1626 auto atEnd = [](const CurrentAndEndIt &Pair) {
1627 return Pair.first == Pair.second;
1628 };
1629 // Return true if all Current are identical.
1630 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1631 return all_of(Range: make_first_range(c: ArrayRef(Itrs).drop_front()),
1632 P: [&](DbgRecord::self_iterator I) {
1633 return Itrs[0].first->isIdenticalToWhenDefined(R: *I);
1634 });
1635 };
1636
1637 // Collect the iterators.
1638 Itrs.push_back(
1639 Elt: {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1640 for (Instruction *Other : OtherInsts) {
1641 if (!Other->hasDbgRecords())
1642 return;
1643 Itrs.push_back(
1644 Elt: {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1645 }
1646
1647 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1648 // the lock-step DbgRecord are identical, hoist all of them to TI.
1649 // This replicates the dbg.* intrinsic behaviour in
1650 // hoistCommonCodeFromSuccessors.
1651 while (none_of(Range&: Itrs, P: atEnd)) {
1652 bool HoistDVRs = allIdentical(Itrs);
1653 for (CurrentAndEndIt &Pair : Itrs) {
1654 // Increment Current iterator now as we may be about to move the
1655 // DbgRecord.
1656 DbgRecord &DR = *Pair.first++;
1657 if (HoistDVRs) {
1658 DR.removeFromParent();
1659 TI->getParent()->insertDbgRecordBefore(DR: &DR, Here: TI->getIterator());
1660 }
1661 }
1662 }
1663}
1664
1665static bool areIdenticalUpToCommutativity(const Instruction *I1,
1666 const Instruction *I2) {
1667 if (I1->isIdenticalToWhenDefined(I: I2, /*IntersectAttrs=*/true))
1668 return true;
1669
1670 if (auto *Cmp1 = dyn_cast<CmpInst>(Val: I1))
1671 if (auto *Cmp2 = dyn_cast<CmpInst>(Val: I2))
1672 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1673 Cmp1->getOperand(i_nocapture: 0) == Cmp2->getOperand(i_nocapture: 1) &&
1674 Cmp1->getOperand(i_nocapture: 1) == Cmp2->getOperand(i_nocapture: 0);
1675
1676 if (I1->isCommutative() && I1->isSameOperationAs(I: I2)) {
1677 return I1->getOperand(i: 0) == I2->getOperand(i: 1) &&
1678 I1->getOperand(i: 1) == I2->getOperand(i: 0) &&
1679 equal(LRange: drop_begin(RangeOrContainer: I1->operands(), N: 2), RRange: drop_begin(RangeOrContainer: I2->operands(), N: 2));
1680 }
1681
1682 return false;
1683}
1684
1685/// If the target supports conditional faulting,
1686/// we look for the following pattern:
1687/// \code
1688/// BB:
1689/// ...
1690/// %cond = icmp ult %x, %y
1691/// br i1 %cond, label %TrueBB, label %FalseBB
1692/// FalseBB:
1693/// store i32 1, ptr %q, align 4
1694/// ...
1695/// TrueBB:
1696/// %maskedloadstore = load i32, ptr %b, align 4
1697/// store i32 %maskedloadstore, ptr %p, align 4
1698/// ...
1699/// \endcode
1700///
1701/// and transform it into:
1702///
1703/// \code
1704/// BB:
1705/// ...
1706/// %cond = icmp ult %x, %y
1707/// %maskedloadstore = cload i32, ptr %b, %cond
1708/// cstore i32 %maskedloadstore, ptr %p, %cond
1709/// cstore i32 1, ptr %q, ~%cond
1710/// br i1 %cond, label %TrueBB, label %FalseBB
1711/// FalseBB:
1712/// ...
1713/// TrueBB:
1714/// ...
1715/// \endcode
1716///
1717/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1718/// e.g.
1719///
1720/// \code
1721/// %vcond = bitcast i1 %cond to <1 x i1>
1722/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1723/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1724/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1725/// call void @llvm.masked.store.v1i32.p0
1726/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1727/// %cond.not = xor i1 %cond, true
1728/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1729/// call void @llvm.masked.store.v1i32.p0
1730/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1731/// \endcode
1732///
1733/// So we need to turn hoisted load/store into cload/cstore.
1734///
1735/// \param BI The branch instruction.
1736/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1737/// will be speculated.
1738/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1739static void hoistConditionalLoadsStores(
1740 CondBrInst *BI,
1741 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1742 std::optional<bool> Invert, Instruction *Sel) {
1743 auto &Context = BI->getParent()->getContext();
1744 auto *VCondTy = FixedVectorType::get(ElementType: Type::getInt1Ty(C&: Context), NumElts: 1);
1745 auto *Cond = BI->getCondition();
1746 // Construct the condition if needed.
1747 BasicBlock *BB = BI->getParent();
1748 Value *Mask = nullptr;
1749 Value *MaskFalse = nullptr;
1750 Value *MaskTrue = nullptr;
1751 if (Invert.has_value()) {
1752 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1753 Mask = Builder.CreateBitCast(
1754 V: *Invert ? Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)) : Cond,
1755 DestTy: VCondTy);
1756 } else {
1757 IRBuilder<> Builder(BI);
1758 MaskFalse = Builder.CreateBitCast(
1759 V: Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)), DestTy: VCondTy);
1760 MaskTrue = Builder.CreateBitCast(V: Cond, DestTy: VCondTy);
1761 }
1762 auto PeekThroughBitcasts = [](Value *V) {
1763 while (auto *BitCast = dyn_cast<BitCastInst>(Val: V))
1764 V = BitCast->getOperand(i_nocapture: 0);
1765 return V;
1766 };
1767 for (auto *I : SpeculatedConditionalLoadsStores) {
1768 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1769 if (!Invert.has_value())
1770 Mask = I->getParent() == BI->getSuccessor(i: 0) ? MaskTrue : MaskFalse;
1771 // We currently assume conditional faulting load/store is supported for
1772 // scalar types only when creating new instructions. This can be easily
1773 // extended for vector types in the future.
1774 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1775 auto *Op0 = I->getOperand(i: 0);
1776 CallInst *MaskedLoadStore = nullptr;
1777 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1778 // Handle Load.
1779 auto *Ty = I->getType();
1780 PHINode *PN = nullptr;
1781 Value *PassThru = nullptr;
1782 if (Invert.has_value())
1783 for (User *U : I->users()) {
1784 if ((PN = dyn_cast<PHINode>(Val: U))) {
1785 PassThru = Builder.CreateBitCast(
1786 V: PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1787 DestTy: FixedVectorType::get(ElementType: Ty, NumElts: 1));
1788 } else if (auto *Ins = cast<Instruction>(Val: U);
1789 Sel && Ins->getParent() == BB) {
1790 // This happens when store or/and a speculative instruction between
1791 // load and store were hoisted to the BB. Make sure the masked load
1792 // inserted before its use.
1793 // We assume there's one of such use.
1794 Builder.SetInsertPoint(Ins);
1795 }
1796 }
1797 MaskedLoadStore = Builder.CreateMaskedLoad(
1798 Ty: FixedVectorType::get(ElementType: Ty, NumElts: 1), Ptr: Op0, Alignment: LI->getAlign(), Mask, PassThru);
1799 Value *NewLoadStore = Builder.CreateBitCast(V: MaskedLoadStore, DestTy: Ty);
1800 if (PN)
1801 PN->setIncomingValue(i: PN->getBasicBlockIndex(BB), V: NewLoadStore);
1802 I->replaceAllUsesWith(V: NewLoadStore);
1803 } else {
1804 // Handle Store.
1805 auto *StoredVal = Builder.CreateBitCast(
1806 V: PeekThroughBitcasts(Op0), DestTy: FixedVectorType::get(ElementType: Op0->getType(), NumElts: 1));
1807 MaskedLoadStore = Builder.CreateMaskedStore(
1808 Val: StoredVal, Ptr: I->getOperand(i: 1), Alignment: cast<StoreInst>(Val: I)->getAlign(), Mask);
1809 }
1810 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1811 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1812 //
1813 // !nonnull, !align : Not support pointer type, no need to keep.
1814 // !range: Load type is changed from scalar to vector, but the metadata on
1815 // vector specifies a per-element range, so the semantics stay the
1816 // same. Keep it.
1817 // !annotation: Not impact semantics. Keep it.
1818 if (const MDNode *Ranges = I->getMetadata(KindID: LLVMContext::MD_range))
1819 MaskedLoadStore->addRangeRetAttr(CR: getConstantRangeFromMetadata(RangeMD: *Ranges));
1820 I->dropUBImplyingAttrsAndUnknownMetadata(KnownIDs: {LLVMContext::MD_annotation});
1821 // FIXME: DIAssignID is not supported for masked store yet.
1822 // (Verifier::visitDIAssignIDMetadata)
1823 at::deleteAssignmentMarkers(Inst: I);
1824 I->eraseMetadataIf(Pred: [](unsigned MDKind, MDNode *Node) {
1825 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1826 });
1827 MaskedLoadStore->copyMetadata(SrcInst: *I);
1828 I->eraseFromParent();
1829 }
1830}
1831
1832static bool isSafeCheapLoadStore(const Instruction *I,
1833 const TargetTransformInfo &TTI) {
1834 // Not handle volatile or atomic.
1835 bool IsStore = false;
1836 if (auto *L = dyn_cast<LoadInst>(Val: I)) {
1837 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1838 return false;
1839 } else if (auto *S = dyn_cast<StoreInst>(Val: I)) {
1840 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1841 return false;
1842 IsStore = true;
1843 } else
1844 return false;
1845
1846 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1847 // That's why we have the alignment limitation.
1848 // FIXME: Update the prototype of the intrinsics?
1849 return TTI.hasConditionalLoadStoreForType(Ty: getLoadStoreType(I), IsStore) &&
1850 getLoadStoreAlignment(I) < Value::MaximumAlignment;
1851}
1852
1853/// Hoist any common code in the successor blocks up into the block. This
1854/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1855/// given, only perform hoisting in case all successors blocks contain matching
1856/// instructions only. In that case, all instructions can be hoisted and the
1857/// original branch will be replaced and selects for PHIs are added.
1858bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1859 bool AllInstsEqOnly) {
1860 // This does very trivial matching, with limited scanning, to find identical
1861 // instructions in the two blocks. In particular, we don't want to get into
1862 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1863 // such, we currently just scan for obviously identical instructions in an
1864 // identical order, possibly separated by the same number of non-identical
1865 // instructions.
1866 BasicBlock *BB = TI->getParent();
1867 unsigned int SuccSize = succ_size(BB);
1868 if (SuccSize < 2)
1869 return false;
1870
1871 // If either of the blocks has it's address taken, then we can't do this fold,
1872 // because the code we'd hoist would no longer run when we jump into the block
1873 // by it's address.
1874 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1875 for (auto *Succ : UniqueSuccessors) {
1876 if (Succ->hasAddressTaken())
1877 return false;
1878 // Use getUniquePredecessor instead of getSinglePredecessor to support
1879 // multi-cases successors in switch.
1880 if (Succ->getUniquePredecessor())
1881 continue;
1882 // If Succ has >1 predecessors, continue to check if the Succ contains only
1883 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1884 // can relax the condition based on the assumptiom that the program would
1885 // never enter Succ and trigger such an UB.
1886 if (isa<UnreachableInst>(Val: *Succ->begin()))
1887 continue;
1888 return false;
1889 }
1890 // The second of pair is a SkipFlags bitmask.
1891 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1892 SmallVector<SuccIterPair, 8> SuccIterPairs;
1893 for (auto *Succ : UniqueSuccessors) {
1894 BasicBlock::iterator SuccItr = Succ->begin();
1895 if (isa<PHINode>(Val: *SuccItr))
1896 return false;
1897 SuccIterPairs.push_back(Elt: SuccIterPair(SuccItr, 0));
1898 }
1899
1900 if (AllInstsEqOnly) {
1901 // Check if all instructions in the successor blocks match. This allows
1902 // hoisting all instructions and removing the blocks we are hoisting from,
1903 // so does not add any new instructions.
1904
1905 // Check if sizes and terminators of all successors match.
1906 unsigned Size0 = UniqueSuccessors[0]->size();
1907 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1908 bool AllSame =
1909 all_of(Range: drop_begin(RangeOrContainer&: UniqueSuccessors), P: [Term0, Size0](BasicBlock *Succ) {
1910 return Succ->getTerminator()->isIdenticalTo(I: Term0) &&
1911 Succ->size() == Size0;
1912 });
1913 if (!AllSame)
1914 return false;
1915 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1916 while (LRI.isValid()) {
1917 Instruction *I0 = (*LRI)[0];
1918 if (any_of(Range: *LRI, P: [I0](Instruction *I) {
1919 return !areIdenticalUpToCommutativity(I1: I0, I2: I);
1920 })) {
1921 return false;
1922 }
1923 --LRI;
1924 }
1925 // Now we know that all instructions in all successors can be hoisted. Let
1926 // the loop below handle the hoisting.
1927 }
1928
1929 // Count how many instructions were not hoisted so far. There's a limit on how
1930 // many instructions we skip, serving as a compilation time control as well as
1931 // preventing excessive increase of life ranges.
1932 unsigned NumSkipped = 0;
1933 // If we find an unreachable instruction at the beginning of a basic block, we
1934 // can still hoist instructions from the rest of the basic blocks.
1935 if (SuccIterPairs.size() > 2) {
1936 erase_if(C&: SuccIterPairs,
1937 P: [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1938 if (SuccIterPairs.size() < 2)
1939 return false;
1940 }
1941
1942 bool Changed = false;
1943
1944 for (;;) {
1945 auto *SuccIterPairBegin = SuccIterPairs.begin();
1946 auto &BB1ItrPair = *SuccIterPairBegin++;
1947 auto OtherSuccIterPairRange =
1948 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1949 auto OtherSuccIterRange = make_first_range(c&: OtherSuccIterPairRange);
1950
1951 Instruction *I1 = &*BB1ItrPair.first;
1952
1953 bool AllInstsAreIdentical = true;
1954 bool HasTerminator = I1->isTerminator();
1955 for (auto &SuccIter : OtherSuccIterRange) {
1956 Instruction *I2 = &*SuccIter;
1957 HasTerminator |= I2->isTerminator();
1958 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1959 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1960 AllInstsAreIdentical = false;
1961 }
1962
1963 SmallVector<Instruction *, 8> OtherInsts;
1964 for (auto &SuccIter : OtherSuccIterRange)
1965 OtherInsts.push_back(Elt: &*SuccIter);
1966
1967 // If we are hoisting the terminator instruction, don't move one (making a
1968 // broken BB), instead clone it, and remove BI.
1969 if (HasTerminator) {
1970 // Even if BB, which contains only one unreachable instruction, is ignored
1971 // at the beginning of the loop, we can hoist the terminator instruction.
1972 // If any instructions remain in the block, we cannot hoist terminators.
1973 if (NumSkipped || !AllInstsAreIdentical) {
1974 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1975 return Changed;
1976 }
1977
1978 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1979 TI, I1, OtherSuccTIs&: OtherInsts, UniqueSuccessors: UniqueSuccessors.getArrayRef()) ||
1980 Changed;
1981 }
1982
1983 if (AllInstsAreIdentical) {
1984 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1985 AllInstsAreIdentical =
1986 isSafeToHoistInstr(I: I1, Flags: SkipFlagsBB1) &&
1987 all_of(Range&: OtherSuccIterPairRange, P: [=](const auto &Pair) {
1988 Instruction *I2 = &*Pair.first;
1989 unsigned SkipFlagsBB2 = Pair.second;
1990 // Even if the instructions are identical, it may not
1991 // be safe to hoist them if we have skipped over
1992 // instructions with side effects or their operands
1993 // weren't hoisted.
1994 return isSafeToHoistInstr(I: I2, Flags: SkipFlagsBB2) &&
1995 shouldHoistCommonInstructions(I1, I2, TTI);
1996 });
1997 }
1998
1999 if (AllInstsAreIdentical) {
2000 BB1ItrPair.first++;
2001 // For a normal instruction, we just move one to right before the
2002 // branch, then replace all uses of the other with the first. Finally,
2003 // we remove the now redundant second instruction.
2004 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2005 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2006 // and leave any that were not hoisted behind (by calling moveBefore
2007 // rather than moveBeforePreserving).
2008 I1->moveBefore(InsertPos: TI->getIterator());
2009 for (auto &SuccIter : OtherSuccIterRange) {
2010 Instruction *I2 = &*SuccIter++;
2011 assert(I2 != I1);
2012 if (!I2->use_empty())
2013 I2->replaceAllUsesWith(V: I1);
2014 I1->andIRFlags(V: I2);
2015 if (auto *CB = dyn_cast<CallBase>(Val: I1)) {
2016 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I2));
2017 assert(Success && "We should not be trying to hoist callbases "
2018 "with non-intersectable attributes");
2019 // For NDEBUG Compile.
2020 (void)Success;
2021 }
2022
2023 combineMetadataForCSE(K: I1, J: I2, DoesKMove: true);
2024 // I1 and I2 are being combined into a single instruction. Its debug
2025 // location is the merged locations of the original instructions.
2026 I1->applyMergedLocation(LocA: I1->getDebugLoc(), LocB: I2->getDebugLoc());
2027 I2->eraseFromParent();
2028 }
2029 if (!Changed)
2030 NumHoistCommonCode += SuccIterPairs.size();
2031 Changed = true;
2032 NumHoistCommonInstrs += SuccIterPairs.size();
2033 } else {
2034 if (NumSkipped >= HoistCommonSkipLimit) {
2035 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2036 return Changed;
2037 }
2038 // We are about to skip over a pair of non-identical instructions. Record
2039 // if any have characteristics that would prevent reordering instructions
2040 // across them.
2041 for (auto &SuccIterPair : SuccIterPairs) {
2042 Instruction *I = &*SuccIterPair.first++;
2043 SuccIterPair.second |= skippedInstrFlags(I);
2044 }
2045 ++NumSkipped;
2046 }
2047 }
2048}
2049
2050bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2051 Instruction *TI, Instruction *I1,
2052 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2053 ArrayRef<BasicBlock *> UniqueSuccessors) {
2054
2055 auto *BI = dyn_cast<CondBrInst>(Val: TI);
2056
2057 bool Changed = false;
2058 BasicBlock *TIParent = TI->getParent();
2059 BasicBlock *BB1 = I1->getParent();
2060
2061 // Use only for an if statement.
2062 auto *I2 = *OtherSuccTIs.begin();
2063 auto *BB2 = I2->getParent();
2064 if (BI) {
2065 assert(OtherSuccTIs.size() == 1);
2066 assert(BI->getSuccessor(0) == I1->getParent());
2067 assert(BI->getSuccessor(1) == I2->getParent());
2068 }
2069
2070 // In the case of an if statement, we try to hoist an invoke.
2071 // FIXME: Can we define a safety predicate for CallBr?
2072 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2073 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2074 if (isa<InvokeInst>(Val: I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2075 return false;
2076
2077 // TODO: callbr hoisting currently disabled pending further study.
2078 if (isa<CallBrInst>(Val: I1))
2079 return false;
2080
2081 for (BasicBlock *Succ : successors(BB: BB1)) {
2082 for (PHINode &PN : Succ->phis()) {
2083 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2084 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2085 Value *BB2V = PN.getIncomingValueForBlock(BB: OtherSuccTI->getParent());
2086 if (BB1V == BB2V)
2087 continue;
2088
2089 // In the case of an if statement, check for
2090 // passingValueIsAlwaysUndefined here because we would rather eliminate
2091 // undefined control flow then converting it to a select.
2092 if (!BI || passingValueIsAlwaysUndefined(V: BB1V, I: &PN) ||
2093 passingValueIsAlwaysUndefined(V: BB2V, I: &PN))
2094 return false;
2095 }
2096 }
2097 }
2098
2099 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2100 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2101 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts&: OtherSuccTIs);
2102 // Clone the terminator and hoist it into the pred, without any debug info.
2103 Instruction *NT = I1->clone();
2104 NT->insertInto(ParentBB: TIParent, It: TI->getIterator());
2105 if (!NT->getType()->isVoidTy()) {
2106 I1->replaceAllUsesWith(V: NT);
2107 for (Instruction *OtherSuccTI : OtherSuccTIs)
2108 OtherSuccTI->replaceAllUsesWith(V: NT);
2109 NT->takeName(V: I1);
2110 }
2111 Changed = true;
2112 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2113
2114 // Ensure terminator gets a debug location, even an unknown one, in case
2115 // it involves inlinable calls.
2116 SmallVector<DebugLoc, 4> Locs;
2117 Locs.push_back(Elt: I1->getDebugLoc());
2118 for (auto *OtherSuccTI : OtherSuccTIs)
2119 Locs.push_back(Elt: OtherSuccTI->getDebugLoc());
2120 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2121
2122 // PHIs created below will adopt NT's merged DebugLoc.
2123 IRBuilder<NoFolder> Builder(NT);
2124
2125 // In the case of an if statement, hoisting one of the terminators from our
2126 // successor is a great thing. Unfortunately, the successors of the if/else
2127 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2128 // must agree for all PHI nodes, so we insert select instruction to compute
2129 // the final result.
2130 if (BI) {
2131 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2132 for (BasicBlock *Succ : successors(BB: BB1)) {
2133 for (PHINode &PN : Succ->phis()) {
2134 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2135 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
2136 if (BB1V == BB2V)
2137 continue;
2138
2139 // These values do not agree. Insert a select instruction before NT
2140 // that determines the right value.
2141 SelectInst *&SI = InsertedSelects[std::make_pair(x&: BB1V, y&: BB2V)];
2142 if (!SI) {
2143 // Propagate fast-math-flags from phi node to its replacement select.
2144 SI = cast<SelectInst>(Val: Builder.CreateSelectFMF(
2145 C: BI->getCondition(), True: BB1V, False: BB2V,
2146 FMFSource: isa<FPMathOperator>(Val: PN) ? &PN : nullptr,
2147 Name: BB1V->getName() + "." + BB2V->getName(), MDFrom: BI));
2148 }
2149
2150 // Make the PHI node use the select for all incoming values for BB1/BB2
2151 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2152 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2153 PN.setIncomingValue(i, V: SI);
2154 }
2155 }
2156 }
2157
2158 SmallVector<DominatorTree::UpdateType, 4> Updates;
2159
2160 // Update any PHI nodes in our new successors.
2161 for (BasicBlock *Succ : successors(BB: BB1)) {
2162 addPredecessorToBlock(Succ, NewPred: TIParent, ExistPred: BB1);
2163 if (DTU)
2164 Updates.push_back(Elt: {DominatorTree::Insert, TIParent, Succ});
2165 }
2166
2167 if (DTU) {
2168 // TI might be a switch with multi-cases destination, so we need to care for
2169 // the duplication of successors.
2170 for (BasicBlock *Succ : UniqueSuccessors)
2171 Updates.push_back(Elt: {DominatorTree::Delete, TIParent, Succ});
2172 }
2173
2174 eraseTerminatorAndDCECond(TI);
2175 if (DTU)
2176 DTU->applyUpdates(Updates);
2177 return Changed;
2178}
2179
2180// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2181// into variables.
2182static bool replacingOperandWithVariableIsCheap(const Instruction *I,
2183 int OpIdx) {
2184 // Divide/Remainder by constant is typically much cheaper than by variable.
2185 if (I->isIntDivRem())
2186 return OpIdx != 1;
2187 return !isa<IntrinsicInst>(Val: I);
2188}
2189
2190// All instructions in Insts belong to different blocks that all unconditionally
2191// branch to a common successor. Analyze each instruction and return true if it
2192// would be possible to sink them into their successor, creating one common
2193// instruction instead. For every value that would be required to be provided by
2194// PHI node (because an operand varies in each input block), add to PHIOperands.
2195static bool canSinkInstructions(
2196 ArrayRef<Instruction *> Insts,
2197 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2198 // Prune out obviously bad instructions to move. Each instruction must have
2199 // the same number of uses, and we check later that the uses are consistent.
2200 std::optional<unsigned> NumUses;
2201 for (auto *I : Insts) {
2202 // These instructions may change or break semantics if moved.
2203 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
2204 I->getType()->isTokenTy())
2205 return false;
2206
2207 // Do not try to sink an instruction in an infinite loop - it can cause
2208 // this algorithm to infinite loop.
2209 if (I->getParent()->getSingleSuccessor() == I->getParent())
2210 return false;
2211
2212 // Conservatively return false if I is an inline-asm instruction. Sinking
2213 // and merging inline-asm instructions can potentially create arguments
2214 // that cannot satisfy the inline-asm constraints.
2215 // If the instruction has nomerge or convergent attribute, return false.
2216 if (const auto *C = dyn_cast<CallBase>(Val: I))
2217 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2218 return false;
2219
2220 if (!NumUses)
2221 NumUses = I->getNumUses();
2222 else if (NumUses != I->getNumUses())
2223 return false;
2224 }
2225
2226 const Instruction *I0 = Insts.front();
2227 const auto I0MMRA = MMRAMetadata(*I0);
2228 for (auto *I : Insts) {
2229 if (!I->isSameOperationAs(I: I0, flags: Instruction::CompareUsingIntersectedAttrs))
2230 return false;
2231
2232 // Treat MMRAs conservatively. This pass can be quite aggressive and
2233 // could drop a lot of MMRAs otherwise.
2234 if (MMRAMetadata(*I) != I0MMRA)
2235 return false;
2236 }
2237
2238 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2239 // then the other phi operands must match the instructions from Insts. This
2240 // also has to hold true for any phi nodes that would be created as a result
2241 // of sinking. Both of these cases are represented by PhiOperands.
2242 for (const Use &U : I0->uses()) {
2243 auto It = PHIOperands.find(Val: &U);
2244 if (It == PHIOperands.end())
2245 // There may be uses in other blocks when sinking into a loop header.
2246 return false;
2247 if (!equal(LRange&: Insts, RRange&: It->second))
2248 return false;
2249 }
2250
2251 // For calls to be sinkable, they must all be indirect, or have same callee.
2252 // I.e. if we have two direct calls to different callees, we don't want to
2253 // turn that into an indirect call. Likewise, if we have an indirect call,
2254 // and a direct call, we don't actually want to have a single indirect call.
2255 if (isa<CallBase>(Val: I0)) {
2256 auto IsIndirectCall = [](const Instruction *I) {
2257 return cast<CallBase>(Val: I)->isIndirectCall();
2258 };
2259 bool HaveIndirectCalls = any_of(Range&: Insts, P: IsIndirectCall);
2260 bool AllCallsAreIndirect = all_of(Range&: Insts, P: IsIndirectCall);
2261 if (HaveIndirectCalls) {
2262 if (!AllCallsAreIndirect)
2263 return false;
2264 } else {
2265 // All callees must be identical.
2266 Value *Callee = nullptr;
2267 for (const Instruction *I : Insts) {
2268 Value *CurrCallee = cast<CallBase>(Val: I)->getCalledOperand();
2269 if (!Callee)
2270 Callee = CurrCallee;
2271 else if (Callee != CurrCallee)
2272 return false;
2273 }
2274 }
2275 }
2276
2277 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2278 Value *Op = I0->getOperand(i: OI);
2279 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2280 assert(I->getNumOperands() == I0->getNumOperands());
2281 return I->getOperand(i: OI) == I0->getOperand(i: OI);
2282 };
2283 if (!all_of(Range&: Insts, P: SameAsI0)) {
2284 if ((isa<Constant>(Val: Op) && !replacingOperandWithVariableIsCheap(I: I0, OpIdx: OI)) ||
2285 !canReplaceOperandWithVariable(I: I0, OpIdx: OI))
2286 // We can't create a PHI from this GEP.
2287 return false;
2288 auto &Ops = PHIOperands[&I0->getOperandUse(i: OI)];
2289 for (auto *I : Insts)
2290 Ops.push_back(Elt: I->getOperand(i: OI));
2291 }
2292 }
2293 return true;
2294}
2295
2296// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2297// instruction of every block in Blocks to their common successor, commoning
2298// into one instruction.
2299static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2300 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(Idx: 0);
2301
2302 // canSinkInstructions returning true guarantees that every block has at
2303 // least one non-terminator instruction.
2304 SmallVector<Instruction*,4> Insts;
2305 for (auto *BB : Blocks) {
2306 Instruction *I = BB->getTerminator();
2307 I = I->getPrevNode();
2308 Insts.push_back(Elt: I);
2309 }
2310
2311 // We don't need to do any more checking here; canSinkInstructions should
2312 // have done it all for us.
2313 SmallVector<Value*, 4> NewOperands;
2314 Instruction *I0 = Insts.front();
2315 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2316 // This check is different to that in canSinkInstructions. There, we
2317 // cared about the global view once simplifycfg (and instcombine) have
2318 // completed - it takes into account PHIs that become trivially
2319 // simplifiable. However here we need a more local view; if an operand
2320 // differs we create a PHI and rely on instcombine to clean up the very
2321 // small mess we may make.
2322 bool NeedPHI = any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
2323 return I->getOperand(i: O) != I0->getOperand(i: O);
2324 });
2325 if (!NeedPHI) {
2326 NewOperands.push_back(Elt: I0->getOperand(i: O));
2327 continue;
2328 }
2329
2330 // Create a new PHI in the successor block and populate it.
2331 auto *Op = I0->getOperand(i: O);
2332 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2333 auto *PN =
2334 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
2335 PN->insertBefore(InsertPos: BBEnd->begin());
2336 for (auto *I : Insts)
2337 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
2338 NewOperands.push_back(Elt: PN);
2339 }
2340
2341 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2342 // and move it to the start of the successor block.
2343 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2344 I0->getOperandUse(i: O).set(NewOperands[O]);
2345
2346 I0->moveBefore(BB&: *BBEnd, I: BBEnd->getFirstInsertionPt());
2347
2348 // Update metadata and IR flags, and merge debug locations.
2349 for (auto *I : Insts)
2350 if (I != I0) {
2351 // The debug location for the "common" instruction is the merged locations
2352 // of all the commoned instructions. We start with the original location
2353 // of the "common" instruction and iteratively merge each location in the
2354 // loop below.
2355 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2356 // However, as N-way merge for CallInst is rare, so we use simplified API
2357 // instead of using complex API for N-way merge.
2358 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
2359 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
2360 I0->andIRFlags(V: I);
2361 if (auto *CB = dyn_cast<CallBase>(Val: I0)) {
2362 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I));
2363 assert(Success && "We should not be trying to sink callbases "
2364 "with non-intersectable attributes");
2365 // For NDEBUG Compile.
2366 (void)Success;
2367 }
2368 }
2369
2370 for (User *U : make_early_inc_range(Range: I0->users())) {
2371 // canSinkLastInstruction checked that all instructions are only used by
2372 // phi nodes in a way that allows replacing the phi node with the common
2373 // instruction.
2374 auto *PN = cast<PHINode>(Val: U);
2375 PN->replaceAllUsesWith(V: I0);
2376 PN->eraseFromParent();
2377 }
2378
2379 // Finally nuke all instructions apart from the common instruction.
2380 for (auto *I : Insts) {
2381 if (I == I0)
2382 continue;
2383 // The remaining uses are debug users, replace those with the common inst.
2384 // In most (all?) cases this just introduces a use-before-def.
2385 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2386 I->replaceAllUsesWith(V: I0);
2387 I->eraseFromParent();
2388 }
2389}
2390
2391/// Check whether BB's predecessors end with unconditional branches. If it is
2392/// true, sink any common code from the predecessors to BB.
2393static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
2394 DomTreeUpdater *DTU) {
2395 // We support two situations:
2396 // (1) all incoming arcs are unconditional
2397 // (2) there are non-unconditional incoming arcs
2398 //
2399 // (2) is very common in switch defaults and
2400 // else-if patterns;
2401 //
2402 // if (a) f(1);
2403 // else if (b) f(2);
2404 //
2405 // produces:
2406 //
2407 // [if]
2408 // / \
2409 // [f(1)] [if]
2410 // | | \
2411 // | | |
2412 // | [f(2)]|
2413 // \ | /
2414 // [ end ]
2415 //
2416 // [end] has two unconditional predecessor arcs and one conditional. The
2417 // conditional refers to the implicit empty 'else' arc. This conditional
2418 // arc can also be caused by an empty default block in a switch.
2419 //
2420 // In this case, we attempt to sink code from all *unconditional* arcs.
2421 // If we can sink instructions from these arcs (determined during the scan
2422 // phase below) we insert a common successor for all unconditional arcs and
2423 // connect that to [end], to enable sinking:
2424 //
2425 // [if]
2426 // / \
2427 // [x(1)] [if]
2428 // | | \
2429 // | | \
2430 // | [x(2)] |
2431 // \ / |
2432 // [sink.split] |
2433 // \ /
2434 // [ end ]
2435 //
2436 SmallVector<BasicBlock*,4> UnconditionalPreds;
2437 bool HaveNonUnconditionalPredecessors = false;
2438 for (auto *PredBB : predecessors(BB)) {
2439 auto *PredBr = dyn_cast<UncondBrInst>(Val: PredBB->getTerminator());
2440 if (PredBr)
2441 UnconditionalPreds.push_back(Elt: PredBB);
2442 else
2443 HaveNonUnconditionalPredecessors = true;
2444 }
2445 if (UnconditionalPreds.size() < 2)
2446 return false;
2447
2448 // We take a two-step approach to tail sinking. First we scan from the end of
2449 // each block upwards in lockstep. If the n'th instruction from the end of each
2450 // block can be sunk, those instructions are added to ValuesToSink and we
2451 // carry on. If we can sink an instruction but need to PHI-merge some operands
2452 // (because they're not identical in each instruction) we add these to
2453 // PHIOperands.
2454 // We prepopulate PHIOperands with the phis that already exist in BB.
2455 DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
2456 for (PHINode &PN : BB->phis()) {
2457 SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
2458 for (const Use &U : PN.incoming_values())
2459 IncomingVals.insert(KV: {PN.getIncomingBlock(U), &U});
2460 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2461 for (BasicBlock *Pred : UnconditionalPreds)
2462 Ops.push_back(Elt: *IncomingVals[Pred]);
2463 }
2464
2465 int ScanIdx = 0;
2466 SmallPtrSet<Value*,4> InstructionsToSink;
2467 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2468 while (LRI.isValid() &&
2469 canSinkInstructions(Insts: *LRI, PHIOperands)) {
2470 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2471 << "\n");
2472 InstructionsToSink.insert_range(R: *LRI);
2473 ++ScanIdx;
2474 --LRI;
2475 }
2476
2477 // If no instructions can be sunk, early-return.
2478 if (ScanIdx == 0)
2479 return false;
2480
2481 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2482
2483 if (!followedByDeoptOrUnreachable) {
2484 // Check whether this is the pointer operand of a load/store.
2485 auto IsMemOperand = [](Use &U) {
2486 auto *I = cast<Instruction>(Val: U.getUser());
2487 if (isa<LoadInst>(Val: I))
2488 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2489 if (isa<StoreInst>(Val: I))
2490 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2491 return false;
2492 };
2493
2494 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2495 // actually sink before encountering instruction that is unprofitable to
2496 // sink?
2497 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2498 unsigned NumPHIInsts = 0;
2499 for (Use &U : (*LRI)[0]->operands()) {
2500 auto It = PHIOperands.find(Val: &U);
2501 if (It != PHIOperands.end() && !all_of(Range&: It->second, P: [&](Value *V) {
2502 return InstructionsToSink.contains(Ptr: V);
2503 })) {
2504 ++NumPHIInsts;
2505 // Do not separate a load/store from the gep producing the address.
2506 // The gep can likely be folded into the load/store as an addressing
2507 // mode. Additionally, a load of a gep is easier to analyze than a
2508 // load of a phi.
2509 if (IsMemOperand(U) &&
2510 any_of(Range&: It->second, P: [](Value *V) { return isa<GEPOperator>(Val: V); }))
2511 return false;
2512 // FIXME: this check is overly optimistic. We may end up not sinking
2513 // said instruction, due to the very same profitability check.
2514 // See @creating_too_many_phis in sink-common-code.ll.
2515 }
2516 }
2517 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2518 return NumPHIInsts <= 1;
2519 };
2520
2521 // We've determined that we are going to sink last ScanIdx instructions,
2522 // and recorded them in InstructionsToSink. Now, some instructions may be
2523 // unprofitable to sink. But that determination depends on the instructions
2524 // that we are going to sink.
2525
2526 // First, forward scan: find the first instruction unprofitable to sink,
2527 // recording all the ones that are profitable to sink.
2528 // FIXME: would it be better, after we detect that not all are profitable.
2529 // to either record the profitable ones, or erase the unprofitable ones?
2530 // Maybe we need to choose (at runtime) the one that will touch least
2531 // instrs?
2532 LRI.reset();
2533 int Idx = 0;
2534 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2535 while (Idx < ScanIdx) {
2536 if (!ProfitableToSinkInstruction(LRI)) {
2537 // Too many PHIs would be created.
2538 LLVM_DEBUG(
2539 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2540 break;
2541 }
2542 InstructionsProfitableToSink.insert_range(R: *LRI);
2543 --LRI;
2544 ++Idx;
2545 }
2546
2547 // If no instructions can be sunk, early-return.
2548 if (Idx == 0)
2549 return false;
2550
2551 // Did we determine that (only) some instructions are unprofitable to sink?
2552 if (Idx < ScanIdx) {
2553 // Okay, some instructions are unprofitable.
2554 ScanIdx = Idx;
2555 InstructionsToSink = InstructionsProfitableToSink;
2556
2557 // But, that may make other instructions unprofitable, too.
2558 // So, do a backward scan, do any earlier instructions become
2559 // unprofitable?
2560 assert(
2561 !ProfitableToSinkInstruction(LRI) &&
2562 "We already know that the last instruction is unprofitable to sink");
2563 ++LRI;
2564 --Idx;
2565 while (Idx >= 0) {
2566 // If we detect that an instruction becomes unprofitable to sink,
2567 // all earlier instructions won't be sunk either,
2568 // so preemptively keep InstructionsProfitableToSink in sync.
2569 // FIXME: is this the most performant approach?
2570 for (auto *I : *LRI)
2571 InstructionsProfitableToSink.erase(Ptr: I);
2572 if (!ProfitableToSinkInstruction(LRI)) {
2573 // Everything starting with this instruction won't be sunk.
2574 ScanIdx = Idx;
2575 InstructionsToSink = InstructionsProfitableToSink;
2576 }
2577 ++LRI;
2578 --Idx;
2579 }
2580 }
2581
2582 // If no instructions can be sunk, early-return.
2583 if (ScanIdx == 0)
2584 return false;
2585 }
2586
2587 bool Changed = false;
2588
2589 if (HaveNonUnconditionalPredecessors) {
2590 if (!followedByDeoptOrUnreachable) {
2591 // It is always legal to sink common instructions from unconditional
2592 // predecessors. However, if not all predecessors are unconditional,
2593 // this transformation might be pessimizing. So as a rule of thumb,
2594 // don't do it unless we'd sink at least one non-speculatable instruction.
2595 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2596 LRI.reset();
2597 int Idx = 0;
2598 bool Profitable = false;
2599 while (Idx < ScanIdx) {
2600 if (!isSafeToSpeculativelyExecute(I: (*LRI)[0])) {
2601 Profitable = true;
2602 break;
2603 }
2604 --LRI;
2605 ++Idx;
2606 }
2607 if (!Profitable)
2608 return false;
2609 }
2610
2611 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2612 // We have a conditional edge and we're going to sink some instructions.
2613 // Insert a new block postdominating all blocks we're going to sink from.
2614 if (!SplitBlockPredecessors(BB, Preds: UnconditionalPreds, Suffix: ".sink.split", DTU))
2615 // Edges couldn't be split.
2616 return false;
2617 Changed = true;
2618 }
2619
2620 // Now that we've analyzed all potential sinking candidates, perform the
2621 // actual sink. We iteratively sink the last non-terminator of the source
2622 // blocks into their common successor unless doing so would require too
2623 // many PHI instructions to be generated (currently only one PHI is allowed
2624 // per sunk instruction).
2625 //
2626 // We can use InstructionsToSink to discount values needing PHI-merging that will
2627 // actually be sunk in a later iteration. This allows us to be more
2628 // aggressive in what we sink. This does allow a false positive where we
2629 // sink presuming a later value will also be sunk, but stop half way through
2630 // and never actually sink it which means we produce more PHIs than intended.
2631 // This is unlikely in practice though.
2632 int SinkIdx = 0;
2633 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2634 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2635 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2636 << "\n");
2637
2638 // Because we've sunk every instruction in turn, the current instruction to
2639 // sink is always at index 0.
2640 LRI.reset();
2641
2642 sinkLastInstruction(Blocks: UnconditionalPreds);
2643 NumSinkCommonInstrs++;
2644 Changed = true;
2645 }
2646 if (SinkIdx != 0)
2647 ++NumSinkCommonCode;
2648 return Changed;
2649}
2650
2651namespace {
2652
2653struct CompatibleSets {
2654 using SetTy = SmallVector<InvokeInst *, 2>;
2655
2656 SmallVector<SetTy, 1> Sets;
2657
2658 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2659
2660 SetTy &getCompatibleSet(InvokeInst *II);
2661
2662 void insert(InvokeInst *II);
2663};
2664
2665CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2666 // Perform a linear scan over all the existing sets, see if the new `invoke`
2667 // is compatible with any particular set. Since we know that all the `invokes`
2668 // within a set are compatible, only check the first `invoke` in each set.
2669 // WARNING: at worst, this has quadratic complexity.
2670 for (CompatibleSets::SetTy &Set : Sets) {
2671 if (CompatibleSets::shouldBelongToSameSet(Invokes: {Set.front(), II}))
2672 return Set;
2673 }
2674
2675 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2676 return Sets.emplace_back();
2677}
2678
2679void CompatibleSets::insert(InvokeInst *II) {
2680 getCompatibleSet(II).emplace_back(Args&: II);
2681}
2682
2683bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2684 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2685
2686 // Can we theoretically merge these `invoke`s?
2687 auto IsIllegalToMerge = [](InvokeInst *II) {
2688 return II->cannotMerge() || II->isInlineAsm();
2689 };
2690 if (any_of(Range&: Invokes, P: IsIllegalToMerge))
2691 return false;
2692
2693 // Either both `invoke`s must be direct,
2694 // or both `invoke`s must be indirect.
2695 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2696 bool HaveIndirectCalls = any_of(Range&: Invokes, P: IsIndirectCall);
2697 bool AllCallsAreIndirect = all_of(Range&: Invokes, P: IsIndirectCall);
2698 if (HaveIndirectCalls) {
2699 if (!AllCallsAreIndirect)
2700 return false;
2701 } else {
2702 // All callees must be identical.
2703 Value *Callee = nullptr;
2704 for (InvokeInst *II : Invokes) {
2705 Value *CurrCallee = II->getCalledOperand();
2706 assert(CurrCallee && "There is always a called operand.");
2707 if (!Callee)
2708 Callee = CurrCallee;
2709 else if (Callee != CurrCallee)
2710 return false;
2711 }
2712 }
2713
2714 // Either both `invoke`s must not have a normal destination,
2715 // or both `invoke`s must have a normal destination,
2716 auto HasNormalDest = [](InvokeInst *II) {
2717 return !isa<UnreachableInst>(Val: II->getNormalDest()->getFirstNonPHIOrDbg());
2718 };
2719 if (any_of(Range&: Invokes, P: HasNormalDest)) {
2720 // Do not merge `invoke` that does not have a normal destination with one
2721 // that does have a normal destination, even though doing so would be legal.
2722 if (!all_of(Range&: Invokes, P: HasNormalDest))
2723 return false;
2724
2725 // All normal destinations must be identical.
2726 BasicBlock *NormalBB = nullptr;
2727 for (InvokeInst *II : Invokes) {
2728 BasicBlock *CurrNormalBB = II->getNormalDest();
2729 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2730 if (!NormalBB)
2731 NormalBB = CurrNormalBB;
2732 else if (NormalBB != CurrNormalBB)
2733 return false;
2734 }
2735
2736 // In the normal destination, the incoming values for these two `invoke`s
2737 // must be compatible.
2738 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2739 if (!incomingValuesAreCompatible(
2740 BB: NormalBB, IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()},
2741 EquivalenceSet: &EquivalenceSet))
2742 return false;
2743 }
2744
2745#ifndef NDEBUG
2746 // All unwind destinations must be identical.
2747 // We know that because we have started from said unwind destination.
2748 BasicBlock *UnwindBB = nullptr;
2749 for (InvokeInst *II : Invokes) {
2750 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2751 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2752 if (!UnwindBB)
2753 UnwindBB = CurrUnwindBB;
2754 else
2755 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2756 }
2757#endif
2758
2759 // In the unwind destination, the incoming values for these two `invoke`s
2760 // must be compatible.
2761 if (!incomingValuesAreCompatible(
2762 BB: Invokes.front()->getUnwindDest(),
2763 IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2764 return false;
2765
2766 // Ignoring arguments, these `invoke`s must be identical,
2767 // including operand bundles.
2768 const InvokeInst *II0 = Invokes.front();
2769 for (auto *II : Invokes.drop_front())
2770 if (!II->isSameOperationAs(I: II0, flags: Instruction::CompareUsingIntersectedAttrs))
2771 return false;
2772
2773 // Can we theoretically form the data operands for the merged `invoke`?
2774 auto IsIllegalToMergeArguments = [](auto Ops) {
2775 Use &U0 = std::get<0>(Ops);
2776 Use &U1 = std::get<1>(Ops);
2777 if (U0 == U1)
2778 return false;
2779 return !canReplaceOperandWithVariable(I: cast<Instruction>(Val: U0.getUser()),
2780 OpIdx: U0.getOperandNo());
2781 };
2782 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2783 if (any_of(Range: zip(t: Invokes[0]->data_ops(), u: Invokes[1]->data_ops()),
2784 P: IsIllegalToMergeArguments))
2785 return false;
2786
2787 return true;
2788}
2789
2790} // namespace
2791
2792// Merge all invokes in the provided set, all of which are compatible
2793// as per the `CompatibleSets::shouldBelongToSameSet()`.
2794static void mergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2795 DomTreeUpdater *DTU) {
2796 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2797
2798 SmallVector<DominatorTree::UpdateType, 8> Updates;
2799 if (DTU)
2800 Updates.reserve(N: 2 + 3 * Invokes.size());
2801
2802 bool HasNormalDest =
2803 !isa<UnreachableInst>(Val: Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2804
2805 // Clone one of the invokes into a new basic block.
2806 // Since they are all compatible, it doesn't matter which invoke is cloned.
2807 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2808 InvokeInst *II0 = Invokes.front();
2809 BasicBlock *II0BB = II0->getParent();
2810 BasicBlock *InsertBeforeBlock =
2811 II0->getParent()->getIterator()->getNextNode();
2812 Function *Func = II0BB->getParent();
2813 LLVMContext &Ctx = II0->getContext();
2814
2815 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2816 Context&: Ctx, Name: II0BB->getName() + ".invoke", Parent: Func, InsertBefore: InsertBeforeBlock);
2817
2818 auto *MergedInvoke = cast<InvokeInst>(Val: II0->clone());
2819 // NOTE: all invokes have the same attributes, so no handling needed.
2820 MergedInvoke->insertInto(ParentBB: MergedInvokeBB, It: MergedInvokeBB->end());
2821
2822 if (!HasNormalDest) {
2823 // This set does not have a normal destination,
2824 // so just form a new block with unreachable terminator.
2825 BasicBlock *MergedNormalDest = BasicBlock::Create(
2826 Context&: Ctx, Name: II0BB->getName() + ".cont", Parent: Func, InsertBefore: InsertBeforeBlock);
2827 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2828 UI->setDebugLoc(DebugLoc::getTemporary());
2829 MergedInvoke->setNormalDest(MergedNormalDest);
2830 }
2831
2832 // The unwind destination, however, remainds identical for all invokes here.
2833
2834 return MergedInvoke;
2835 }();
2836
2837 if (DTU) {
2838 // Predecessor blocks that contained these invokes will now branch to
2839 // the new block that contains the merged invoke, ...
2840 for (InvokeInst *II : Invokes)
2841 Updates.push_back(
2842 Elt: {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2843
2844 // ... which has the new `unreachable` block as normal destination,
2845 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2846 for (BasicBlock *SuccBBOfMergedInvoke : successors(I: MergedInvoke))
2847 Updates.push_back(Elt: {DominatorTree::Insert, MergedInvoke->getParent(),
2848 SuccBBOfMergedInvoke});
2849
2850 // Since predecessor blocks now unconditionally branch to a new block,
2851 // they no longer branch to their original successors.
2852 for (InvokeInst *II : Invokes)
2853 for (BasicBlock *SuccOfPredBB : successors(BB: II->getParent()))
2854 Updates.push_back(
2855 Elt: {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2856 }
2857
2858 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2859
2860 // Form the merged operands for the merged invoke.
2861 for (Use &U : MergedInvoke->operands()) {
2862 // Only PHI together the indirect callees and data operands.
2863 if (MergedInvoke->isCallee(U: &U)) {
2864 if (!IsIndirectCall)
2865 continue;
2866 } else if (!MergedInvoke->isDataOperand(U: &U))
2867 continue;
2868
2869 // Don't create trivial PHI's with all-identical incoming values.
2870 bool NeedPHI = any_of(Range&: Invokes, P: [&U](InvokeInst *II) {
2871 return II->getOperand(i_nocapture: U.getOperandNo()) != U.get();
2872 });
2873 if (!NeedPHI)
2874 continue;
2875
2876 // Form a PHI out of all the data ops under this index.
2877 PHINode *PN = PHINode::Create(
2878 Ty: U->getType(), /*NumReservedValues=*/Invokes.size(), NameStr: "", InsertBefore: MergedInvoke->getIterator());
2879 for (InvokeInst *II : Invokes)
2880 PN->addIncoming(V: II->getOperand(i_nocapture: U.getOperandNo()), BB: II->getParent());
2881
2882 U.set(PN);
2883 }
2884
2885 // We've ensured that each PHI node has compatible (identical) incoming values
2886 // when coming from each of the `invoke`s in the current merge set,
2887 // so update the PHI nodes accordingly.
2888 for (BasicBlock *Succ : successors(I: MergedInvoke))
2889 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2890 /*ExistPred=*/Invokes.front()->getParent());
2891
2892 // And finally, replace the original `invoke`s with an unconditional branch
2893 // to the block with the merged `invoke`. Also, give that merged `invoke`
2894 // the merged debugloc of all the original `invoke`s.
2895 DILocation *MergedDebugLoc = nullptr;
2896 for (InvokeInst *II : Invokes) {
2897 // Compute the debug location common to all the original `invoke`s.
2898 if (!MergedDebugLoc)
2899 MergedDebugLoc = II->getDebugLoc();
2900 else
2901 MergedDebugLoc =
2902 DebugLoc::getMergedLocation(LocA: MergedDebugLoc, LocB: II->getDebugLoc());
2903
2904 // And replace the old `invoke` with an unconditionally branch
2905 // to the block with the merged `invoke`.
2906 for (BasicBlock *OrigSuccBB : successors(BB: II->getParent()))
2907 OrigSuccBB->removePredecessor(Pred: II->getParent());
2908 auto *BI = UncondBrInst::Create(IfTrue: MergedInvoke->getParent(), InsertBefore: II->getParent());
2909 // The unconditional branch is part of the replacement for the original
2910 // invoke, so should use its DebugLoc.
2911 BI->setDebugLoc(II->getDebugLoc());
2912 bool Success = MergedInvoke->tryIntersectAttributes(Other: II);
2913 assert(Success && "Merged invokes with incompatible attributes");
2914 // For NDEBUG Compile
2915 (void)Success;
2916 II->replaceAllUsesWith(V: MergedInvoke);
2917 II->eraseFromParent();
2918 ++NumInvokesMerged;
2919 }
2920 MergedInvoke->setDebugLoc(MergedDebugLoc);
2921 ++NumInvokeSetsFormed;
2922
2923 if (DTU)
2924 DTU->applyUpdates(Updates);
2925}
2926
2927/// If this block is a `landingpad` exception handling block, categorize all
2928/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2929/// being "mergeable" together, and then merge invokes in each set together.
2930///
2931/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2932/// [...] [...]
2933/// | |
2934/// [invoke0] [invoke1]
2935/// / \ / \
2936/// [cont0] [landingpad] [cont1]
2937/// to:
2938/// [...] [...]
2939/// \ /
2940/// [invoke]
2941/// / \
2942/// [cont] [landingpad]
2943///
2944/// But of course we can only do that if the invokes share the `landingpad`,
2945/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2946/// and the invoked functions are "compatible".
2947static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
2948 if (!EnableMergeCompatibleInvokes)
2949 return false;
2950
2951 bool Changed = false;
2952
2953 // FIXME: generalize to all exception handling blocks?
2954 if (!BB->isLandingPad())
2955 return Changed;
2956
2957 CompatibleSets Grouper;
2958
2959 // Record all the predecessors of this `landingpad`. As per verifier,
2960 // the only allowed predecessor is the unwind edge of an `invoke`.
2961 // We want to group "compatible" `invokes` into the same set to be merged.
2962 for (BasicBlock *PredBB : predecessors(BB))
2963 Grouper.insert(II: cast<InvokeInst>(Val: PredBB->getTerminator()));
2964
2965 // And now, merge `invoke`s that were grouped togeter.
2966 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2967 if (Invokes.size() < 2)
2968 continue;
2969 Changed = true;
2970 mergeCompatibleInvokesImpl(Invokes, DTU);
2971 }
2972
2973 return Changed;
2974}
2975
2976namespace {
2977/// Track ephemeral values, which should be ignored for cost-modelling
2978/// purposes. Requires walking instructions in reverse order.
2979class EphemeralValueTracker {
2980 SmallPtrSet<const Instruction *, 32> EphValues;
2981
2982 bool isEphemeral(const Instruction *I) {
2983 if (isa<AssumeInst>(Val: I))
2984 return true;
2985 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2986 all_of(Range: I->users(), P: [&](const User *U) {
2987 return EphValues.count(Ptr: cast<Instruction>(Val: U));
2988 });
2989 }
2990
2991public:
2992 bool track(const Instruction *I) {
2993 if (isEphemeral(I)) {
2994 EphValues.insert(Ptr: I);
2995 return true;
2996 }
2997 return false;
2998 }
2999
3000 bool contains(const Instruction *I) const { return EphValues.contains(Ptr: I); }
3001};
3002} // namespace
3003
3004/// Determine if we can hoist sink a sole store instruction out of a
3005/// conditional block.
3006///
3007/// We are looking for code like the following:
3008/// BrBB:
3009/// store i32 %add, i32* %arrayidx2
3010/// ... // No other stores or function calls (we could be calling a memory
3011/// ... // function).
3012/// %cmp = icmp ult %x, %y
3013/// br i1 %cmp, label %EndBB, label %ThenBB
3014/// ThenBB:
3015/// store i32 %add5, i32* %arrayidx2
3016/// br label EndBB
3017/// EndBB:
3018/// ...
3019/// We are going to transform this into:
3020/// BrBB:
3021/// store i32 %add, i32* %arrayidx2
3022/// ... //
3023/// %cmp = icmp ult %x, %y
3024/// %add.add5 = select i1 %cmp, i32 %add, %add5
3025/// store i32 %add.add5, i32* %arrayidx2
3026/// ...
3027///
3028/// \return The pointer to the value of the previous store if the store can be
3029/// hoisted into the predecessor block. 0 otherwise.
3030static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
3031 BasicBlock *StoreBB, BasicBlock *EndBB) {
3032 StoreInst *StoreToHoist = dyn_cast<StoreInst>(Val: I);
3033 if (!StoreToHoist)
3034 return nullptr;
3035
3036 // Volatile or atomic.
3037 if (!StoreToHoist->isSimple())
3038 return nullptr;
3039
3040 Value *StorePtr = StoreToHoist->getPointerOperand();
3041 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3042
3043 // Look for a store to the same pointer in BrBB.
3044 unsigned MaxNumInstToLookAt = 9;
3045 // Skip pseudo probe intrinsic calls which are not really killing any memory
3046 // accesses.
3047 for (Instruction &CurI : reverse(C: BrBB->instructionsWithoutDebug(SkipPseudoOp: true))) {
3048 if (!MaxNumInstToLookAt)
3049 break;
3050 --MaxNumInstToLookAt;
3051
3052 // Could be calling an instruction that affects memory like free().
3053 if (CurI.mayWriteToMemory() && !isa<StoreInst>(Val: CurI))
3054 return nullptr;
3055
3056 if (auto *SI = dyn_cast<StoreInst>(Val: &CurI)) {
3057 // Found the previous store to same location and type. Make sure it is
3058 // simple, to avoid introducing a spurious non-atomic write after an
3059 // atomic write.
3060 if (SI->getPointerOperand() == StorePtr &&
3061 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3062 SI->getAlign() >= StoreToHoist->getAlign())
3063 // Found the previous store, return its value operand.
3064 return SI->getValueOperand();
3065 return nullptr; // Unknown store.
3066 }
3067
3068 if (auto *LI = dyn_cast<LoadInst>(Val: &CurI)) {
3069 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3070 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3071 Value *Obj = getUnderlyingObject(V: StorePtr);
3072 bool ExplicitlyDereferenceableOnly;
3073 if (isWritableObject(Object: Obj, ExplicitlyDereferenceableOnly) &&
3074 capturesNothing(
3075 CC: PointerMayBeCaptured(V: Obj, /*ReturnCaptures=*/false,
3076 Mask: CaptureComponents::Provenance)) &&
3077 (!ExplicitlyDereferenceableOnly ||
3078 isDereferenceablePointer(V: StorePtr, Ty: StoreTy,
3079 DL: LI->getDataLayout()))) {
3080 // Found a previous load, return it.
3081 return LI;
3082 }
3083 }
3084 // The load didn't work out, but we may still find a store.
3085 }
3086 }
3087
3088 return nullptr;
3089}
3090
3091/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3092/// converted to selects.
3093static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
3094 BasicBlock *EndBB,
3095 unsigned &SpeculatedInstructions,
3096 InstructionCost &Cost,
3097 const TargetTransformInfo &TTI) {
3098 TargetTransformInfo::TargetCostKind CostKind =
3099 BB->getParent()->hasMinSize()
3100 ? TargetTransformInfo::TCK_CodeSize
3101 : TargetTransformInfo::TCK_SizeAndLatency;
3102
3103 bool HaveRewritablePHIs = false;
3104 for (PHINode &PN : EndBB->phis()) {
3105 Value *OrigV = PN.getIncomingValueForBlock(BB);
3106 Value *ThenV = PN.getIncomingValueForBlock(BB: ThenBB);
3107
3108 // FIXME: Try to remove some of the duplication with
3109 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3110 if (ThenV == OrigV)
3111 continue;
3112
3113 Cost += TTI.getCmpSelInstrCost(Opcode: Instruction::Select, ValTy: PN.getType(),
3114 CondTy: CmpInst::makeCmpResultType(opnd_type: PN.getType()),
3115 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
3116
3117 // Don't convert to selects if we could remove undefined behavior instead.
3118 if (passingValueIsAlwaysUndefined(V: OrigV, I: &PN) ||
3119 passingValueIsAlwaysUndefined(V: ThenV, I: &PN))
3120 return false;
3121
3122 HaveRewritablePHIs = true;
3123 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(Val: OrigV);
3124 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(Val: ThenV);
3125 if (!OrigCE && !ThenCE)
3126 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3127
3128 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(I: OrigCE, TTI) : 0;
3129 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(I: ThenCE, TTI) : 0;
3130 InstructionCost MaxCost =
3131 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3132 if (OrigCost + ThenCost > MaxCost)
3133 return false;
3134
3135 // Account for the cost of an unfolded ConstantExpr which could end up
3136 // getting expanded into Instructions.
3137 // FIXME: This doesn't account for how many operations are combined in the
3138 // constant expression.
3139 ++SpeculatedInstructions;
3140 if (SpeculatedInstructions > 1)
3141 return false;
3142 }
3143
3144 return HaveRewritablePHIs;
3145}
3146
3147static bool isProfitableToSpeculate(const CondBrInst *BI,
3148 std::optional<bool> Invert,
3149 const TargetTransformInfo &TTI) {
3150 // If the branch is non-unpredictable, and is predicted to *not* branch to
3151 // the `then` block, then avoid speculating it.
3152 if (BI->getMetadata(KindID: LLVMContext::MD_unpredictable))
3153 return true;
3154
3155 uint64_t TWeight, FWeight;
3156 if (!extractBranchWeights(I: *BI, TrueVal&: TWeight, FalseVal&: FWeight) || (TWeight + FWeight) == 0)
3157 return true;
3158
3159 if (!Invert.has_value())
3160 return false;
3161
3162 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3163 BranchProbability BIEndProb =
3164 BranchProbability::getBranchProbability(Numerator: EndWeight, Denominator: TWeight + FWeight);
3165 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3166 return BIEndProb < Likely;
3167}
3168
3169/// Speculate a conditional basic block flattening the CFG.
3170///
3171/// Note that this is a very risky transform currently. Speculating
3172/// instructions like this is most often not desirable. Instead, there is an MI
3173/// pass which can do it with full awareness of the resource constraints.
3174/// However, some cases are "obvious" and we should do directly. An example of
3175/// this is speculating a single, reasonably cheap instruction.
3176///
3177/// There is only one distinct advantage to flattening the CFG at the IR level:
3178/// it makes very common but simplistic optimizations such as are common in
3179/// instcombine and the DAG combiner more powerful by removing CFG edges and
3180/// modeling their effects with easier to reason about SSA value graphs.
3181///
3182///
3183/// An illustration of this transform is turning this IR:
3184/// \code
3185/// BB:
3186/// %cmp = icmp ult %x, %y
3187/// br i1 %cmp, label %EndBB, label %ThenBB
3188/// ThenBB:
3189/// %sub = sub %x, %y
3190/// br label BB2
3191/// EndBB:
3192/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3193/// ...
3194/// \endcode
3195///
3196/// Into this IR:
3197/// \code
3198/// BB:
3199/// %cmp = icmp ult %x, %y
3200/// %sub = sub %x, %y
3201/// %cond = select i1 %cmp, 0, %sub
3202/// ...
3203/// \endcode
3204///
3205/// \returns true if the conditional block is removed.
3206bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3207 BasicBlock *ThenBB) {
3208 if (!Options.SpeculateBlocks)
3209 return false;
3210
3211 // Be conservative for now. FP select instruction can often be expensive.
3212 Value *BrCond = BI->getCondition();
3213 if (isa<FCmpInst>(Val: BrCond))
3214 return false;
3215
3216 BasicBlock *BB = BI->getParent();
3217 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(Idx: 0);
3218 InstructionCost Budget =
3219 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3220
3221 // If ThenBB is actually on the false edge of the conditional branch, remember
3222 // to swap the select operands later.
3223 bool Invert = false;
3224 if (ThenBB != BI->getSuccessor(i: 0)) {
3225 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3226 Invert = true;
3227 }
3228 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3229
3230 if (!isProfitableToSpeculate(BI, Invert, TTI))
3231 return false;
3232
3233 // Keep a count of how many times instructions are used within ThenBB when
3234 // they are candidates for sinking into ThenBB. Specifically:
3235 // - They are defined in BB, and
3236 // - They have no side effects, and
3237 // - All of their uses are in ThenBB.
3238 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3239
3240 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3241
3242 unsigned SpeculatedInstructions = 0;
3243 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3244 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3245 Value *SpeculatedStoreValue = nullptr;
3246 StoreInst *SpeculatedStore = nullptr;
3247 EphemeralValueTracker EphTracker;
3248 for (Instruction &I : reverse(C: drop_end(RangeOrContainer&: *ThenBB))) {
3249 // Skip pseudo probes. The consequence is we lose track of the branch
3250 // probability for ThenBB, which is fine since the optimization here takes
3251 // place regardless of the branch probability.
3252 if (isa<PseudoProbeInst>(Val: I)) {
3253 // The probe should be deleted so that it will not be over-counted when
3254 // the samples collected on the non-conditional path are counted towards
3255 // the conditional path. We leave it for the counts inference algorithm to
3256 // figure out a proper count for an unknown probe.
3257 SpeculatedPseudoProbes.push_back(Elt: &I);
3258 continue;
3259 }
3260
3261 // Ignore ephemeral values, they will be dropped by the transform.
3262 if (EphTracker.track(I: &I))
3263 continue;
3264
3265 // Only speculatively execute a single instruction (not counting the
3266 // terminator) for now.
3267 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3268 isSafeCheapLoadStore(I: &I, TTI) &&
3269 SpeculatedConditionalLoadsStores.size() <
3270 HoistLoadsStoresWithCondFaultingThreshold;
3271 // Not count load/store into cost if target supports conditional faulting
3272 // b/c it's cheap to speculate it.
3273 if (IsSafeCheapLoadStore)
3274 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
3275 else
3276 ++SpeculatedInstructions;
3277
3278 if (SpeculatedInstructions > 1)
3279 return false;
3280
3281 // Don't hoist the instruction if it's unsafe or expensive.
3282 if (!IsSafeCheapLoadStore &&
3283 !isSafeToSpeculativelyExecute(I: &I, CtxI: BI, AC: Options.AC) &&
3284 !(HoistCondStores && !SpeculatedStoreValue &&
3285 (SpeculatedStoreValue =
3286 isSafeToSpeculateStore(I: &I, BrBB: BB, StoreBB: ThenBB, EndBB))))
3287 return false;
3288 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3289 computeSpeculationCost(I: &I, TTI) >
3290 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3291 return false;
3292
3293 // Store the store speculation candidate.
3294 if (!SpeculatedStore && SpeculatedStoreValue)
3295 SpeculatedStore = cast<StoreInst>(Val: &I);
3296
3297 // Do not hoist the instruction if any of its operands are defined but not
3298 // used in BB. The transformation will prevent the operand from
3299 // being sunk into the use block.
3300 for (Use &Op : I.operands()) {
3301 Instruction *OpI = dyn_cast<Instruction>(Val&: Op);
3302 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3303 continue; // Not a candidate for sinking.
3304
3305 ++SinkCandidateUseCounts[OpI];
3306 }
3307 }
3308
3309 // Consider any sink candidates which are only used in ThenBB as costs for
3310 // speculation. Note, while we iterate over a DenseMap here, we are summing
3311 // and so iteration order isn't significant.
3312 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3313 if (Inst->hasNUses(N: Count)) {
3314 ++SpeculatedInstructions;
3315 if (SpeculatedInstructions > 1)
3316 return false;
3317 }
3318
3319 // Check that we can insert the selects and that it's not too expensive to do
3320 // so.
3321 bool Convert =
3322 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3323 InstructionCost Cost = 0;
3324 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3325 SpeculatedInstructions, Cost, TTI);
3326 if (!Convert || Cost > Budget)
3327 return false;
3328
3329 // If we get here, we can hoist the instruction and if-convert.
3330 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3331
3332 Instruction *Sel = nullptr;
3333 // Insert a select of the value of the speculated store.
3334 if (SpeculatedStoreValue) {
3335 IRBuilder<NoFolder> Builder(BI);
3336 Value *OrigV = SpeculatedStore->getValueOperand();
3337 Value *TrueV = SpeculatedStore->getValueOperand();
3338 Value *FalseV = SpeculatedStoreValue;
3339 if (Invert)
3340 std::swap(a&: TrueV, b&: FalseV);
3341 Value *S = Builder.CreateSelect(
3342 C: BrCond, True: TrueV, False: FalseV, Name: "spec.store.select", MDFrom: BI);
3343 Sel = cast<Instruction>(Val: S);
3344 SpeculatedStore->setOperand(i_nocapture: 0, Val_nocapture: S);
3345 SpeculatedStore->applyMergedLocation(LocA: BI->getDebugLoc(),
3346 LocB: SpeculatedStore->getDebugLoc());
3347 // The value stored is still conditional, but the store itself is now
3348 // unconditonally executed, so we must be sure that any linked dbg.assign
3349 // intrinsics are tracking the new stored value (the result of the
3350 // select). If we don't, and the store were to be removed by another pass
3351 // (e.g. DSE), then we'd eventually end up emitting a location describing
3352 // the conditional value, unconditionally.
3353 //
3354 // === Before this transformation ===
3355 // pred:
3356 // store %one, %x.dest, !DIAssignID !1
3357 // dbg.assign %one, "x", ..., !1, ...
3358 // br %cond if.then
3359 //
3360 // if.then:
3361 // store %two, %x.dest, !DIAssignID !2
3362 // dbg.assign %two, "x", ..., !2, ...
3363 //
3364 // === After this transformation ===
3365 // pred:
3366 // store %one, %x.dest, !DIAssignID !1
3367 // dbg.assign %one, "x", ..., !1
3368 /// ...
3369 // %merge = select %cond, %two, %one
3370 // store %merge, %x.dest, !DIAssignID !2
3371 // dbg.assign %merge, "x", ..., !2
3372 for (DbgVariableRecord *DbgAssign :
3373 at::getDVRAssignmentMarkers(Inst: SpeculatedStore))
3374 if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: OrigV))
3375 DbgAssign->replaceVariableLocationOp(OldValue: OrigV, NewValue: S);
3376 }
3377
3378 // Metadata can be dependent on the condition we are hoisting above.
3379 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3380 // to avoid making it appear as if the condition is a constant, which would
3381 // be misleading while debugging.
3382 // Similarly strip attributes that maybe dependent on condition we are
3383 // hoisting above.
3384 for (auto &I : make_early_inc_range(Range&: *ThenBB)) {
3385 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3386 I.dropLocation();
3387 }
3388 I.dropUBImplyingAttrsAndMetadata();
3389
3390 // Drop ephemeral values.
3391 if (EphTracker.contains(I: &I)) {
3392 I.replaceAllUsesWith(V: PoisonValue::get(T: I.getType()));
3393 I.eraseFromParent();
3394 }
3395 }
3396
3397 // Hoist the instructions.
3398 // Drop DbgVariableRecords attached to these instructions.
3399 for (auto &It : *ThenBB)
3400 for (DbgRecord &DR : make_early_inc_range(Range: It.getDbgRecordRange()))
3401 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3402 // equivalent).
3403 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(Val: &DR);
3404 !DVR || !DVR->isDbgAssign())
3405 It.dropOneDbgRecord(I: &DR);
3406 BB->splice(ToIt: BI->getIterator(), FromBB: ThenBB, FromBeginIt: ThenBB->begin(),
3407 FromEndIt: std::prev(x: ThenBB->end()));
3408
3409 if (!SpeculatedConditionalLoadsStores.empty())
3410 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3411 Sel);
3412
3413 // Insert selects and rewrite the PHI operands.
3414 IRBuilder<NoFolder> Builder(BI);
3415 for (PHINode &PN : EndBB->phis()) {
3416 unsigned OrigI = PN.getBasicBlockIndex(BB);
3417 unsigned ThenI = PN.getBasicBlockIndex(BB: ThenBB);
3418 Value *OrigV = PN.getIncomingValue(i: OrigI);
3419 Value *ThenV = PN.getIncomingValue(i: ThenI);
3420
3421 // Skip PHIs which are trivial.
3422 if (OrigV == ThenV)
3423 continue;
3424
3425 // Create a select whose true value is the speculatively executed value and
3426 // false value is the pre-existing value. Swap them if the branch
3427 // destinations were inverted.
3428 Value *TrueV = ThenV, *FalseV = OrigV;
3429 if (Invert)
3430 std::swap(a&: TrueV, b&: FalseV);
3431 Value *V = Builder.CreateSelect(C: BrCond, True: TrueV, False: FalseV, Name: "spec.select", MDFrom: BI);
3432 PN.setIncomingValue(i: OrigI, V);
3433 PN.setIncomingValue(i: ThenI, V);
3434 }
3435
3436 // Remove speculated pseudo probes.
3437 for (Instruction *I : SpeculatedPseudoProbes)
3438 I->eraseFromParent();
3439
3440 ++NumSpeculations;
3441 return true;
3442}
3443
3444using BlocksSet = SmallPtrSet<BasicBlock *, 8>;
3445
3446// Return false if number of blocks searched is too much.
3447static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3448 BlocksSet &ReachesNonLocalUses) {
3449 if (BB == DefBB)
3450 return true;
3451 if (!ReachesNonLocalUses.insert(Ptr: BB).second)
3452 return true;
3453
3454 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3455 return false;
3456 for (BasicBlock *Pred : predecessors(BB))
3457 if (!findReaching(BB: Pred, DefBB, ReachesNonLocalUses))
3458 return false;
3459 return true;
3460}
3461
3462/// Return true if we can thread a branch across this block.
3463static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3464 BlocksSet &NonLocalUseBlocks) {
3465 int Size = 0;
3466 EphemeralValueTracker EphTracker;
3467
3468 // Walk the loop in reverse so that we can identify ephemeral values properly
3469 // (values only feeding assumes).
3470 for (Instruction &I : reverse(C: BB->instructionsWithoutDebug(SkipPseudoOp: false))) {
3471 // Can't fold blocks that contain noduplicate or convergent calls.
3472 if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
3473 if (CI->cannotDuplicate() || CI->isConvergent())
3474 return false;
3475
3476 // Ignore ephemeral values which are deleted during codegen.
3477 // We will delete Phis while threading, so Phis should not be accounted in
3478 // block's size.
3479 if (!EphTracker.track(I: &I) && !isa<PHINode>(Val: I)) {
3480 if (Size++ > MaxSmallBlockSize)
3481 return false; // Don't clone large BB's.
3482 }
3483
3484 // Record blocks with non-local uses of values defined in the current basic
3485 // block.
3486 for (User *U : I.users()) {
3487 Instruction *UI = cast<Instruction>(Val: U);
3488 BasicBlock *UsedInBB = UI->getParent();
3489 if (UsedInBB == BB) {
3490 if (isa<PHINode>(Val: UI))
3491 return false;
3492 } else
3493 NonLocalUseBlocks.insert(Ptr: UsedInBB);
3494 }
3495
3496 // Looks ok, continue checking.
3497 }
3498
3499 return true;
3500}
3501
3502static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
3503 BasicBlock *To) {
3504 // Don't look past the block defining the value, we might get the value from
3505 // a previous loop iteration.
3506 auto *I = dyn_cast<Instruction>(Val: V);
3507 if (I && I->getParent() == To)
3508 return nullptr;
3509
3510 // We know the value if the From block branches on it.
3511 auto *BI = dyn_cast<CondBrInst>(Val: From->getTerminator());
3512 if (BI && BI->getCondition() == V &&
3513 BI->getSuccessor(i: 0) != BI->getSuccessor(i: 1))
3514 return BI->getSuccessor(i: 0) == To ? ConstantInt::getTrue(Context&: BI->getContext())
3515 : ConstantInt::getFalse(Context&: BI->getContext());
3516
3517 return nullptr;
3518}
3519
3520/// If we have a conditional branch on something for which we know the constant
3521/// value in predecessors (e.g. a phi node in the current block), thread edges
3522/// from the predecessor to their ultimate destination.
3523static std::optional<bool>
3524foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst *BI, DomTreeUpdater *DTU,
3525 const DataLayout &DL,
3526 AssumptionCache *AC) {
3527 SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
3528 BasicBlock *BB = BI->getParent();
3529 Value *Cond = BI->getCondition();
3530 PHINode *PN = dyn_cast<PHINode>(Val: Cond);
3531 if (PN && PN->getParent() == BB) {
3532 // Degenerate case of a single entry PHI.
3533 if (PN->getNumIncomingValues() == 1) {
3534 FoldSingleEntryPHINodes(BB: PN->getParent());
3535 return true;
3536 }
3537
3538 for (Use &U : PN->incoming_values())
3539 if (auto *CB = dyn_cast<ConstantInt>(Val&: U))
3540 KnownValues[CB].insert(X: PN->getIncomingBlock(U));
3541 } else {
3542 for (BasicBlock *Pred : predecessors(BB)) {
3543 if (ConstantInt *CB = getKnownValueOnEdge(V: Cond, From: Pred, To: BB))
3544 KnownValues[CB].insert(X: Pred);
3545 }
3546 }
3547
3548 if (KnownValues.empty())
3549 return false;
3550
3551 // Now we know that this block has multiple preds and two succs.
3552 // Check that the block is small enough and record which non-local blocks use
3553 // values defined in the block.
3554
3555 BlocksSet NonLocalUseBlocks;
3556 BlocksSet ReachesNonLocalUseBlocks;
3557 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3558 return false;
3559
3560 // Jump-threading can only be done to destinations where no values defined
3561 // in BB are live.
3562
3563 // Quickly check if both destinations have uses. If so, jump-threading cannot
3564 // be done.
3565 if (NonLocalUseBlocks.contains(Ptr: BI->getSuccessor(i: 0)) &&
3566 NonLocalUseBlocks.contains(Ptr: BI->getSuccessor(i: 1)))
3567 return false;
3568
3569 // Search backward from NonLocalUseBlocks to find which blocks
3570 // reach non-local uses.
3571 for (BasicBlock *UseBB : NonLocalUseBlocks)
3572 // Give up if too many blocks are searched.
3573 if (!findReaching(BB: UseBB, DefBB: BB, ReachesNonLocalUses&: ReachesNonLocalUseBlocks))
3574 return false;
3575
3576 for (const auto &Pair : KnownValues) {
3577 ConstantInt *CB = Pair.first;
3578 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3579 BasicBlock *RealDest = BI->getSuccessor(i: !CB->getZExtValue());
3580
3581 // Okay, we now know that all edges from PredBB should be revectored to
3582 // branch to RealDest.
3583 if (RealDest == BB)
3584 continue; // Skip self loops.
3585
3586 // Skip if the predecessor's terminator is an indirect branch.
3587 if (any_of(Range&: PredBBs, P: [](BasicBlock *PredBB) {
3588 return isa<IndirectBrInst>(Val: PredBB->getTerminator());
3589 }))
3590 continue;
3591
3592 // Only revector to RealDest if no values defined in BB are live.
3593 if (ReachesNonLocalUseBlocks.contains(Ptr: RealDest))
3594 continue;
3595
3596 LLVM_DEBUG({
3597 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3598 << " has value " << *Pair.first << " in predecessors:\n";
3599 for (const BasicBlock *PredBB : Pair.second)
3600 dbgs() << " " << PredBB->getName() << "\n";
3601 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3602 });
3603
3604 // Split the predecessors we are threading into a new edge block. We'll
3605 // clone the instructions into this block, and then redirect it to RealDest.
3606 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, Preds: PredBBs, Suffix: ".critedge", DTU);
3607 if (!EdgeBB)
3608 continue;
3609
3610 // TODO: These just exist to reduce test diff, we can drop them if we like.
3611 EdgeBB->setName(RealDest->getName() + ".critedge");
3612 EdgeBB->moveBefore(MovePos: RealDest);
3613
3614 // Update PHI nodes.
3615 addPredecessorToBlock(Succ: RealDest, NewPred: EdgeBB, ExistPred: BB);
3616
3617 // BB may have instructions that are being threaded over. Clone these
3618 // instructions into EdgeBB. We know that there will be no uses of the
3619 // cloned instructions outside of EdgeBB.
3620 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3621 ValueToValueMapTy TranslateMap; // Track translated values.
3622 TranslateMap[Cond] = CB;
3623
3624 // RemoveDIs: track instructions that we optimise away while folding, so
3625 // that we can copy DbgVariableRecords from them later.
3626 BasicBlock::iterator SrcDbgCursor = BB->begin();
3627 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3628 if (PHINode *PN = dyn_cast<PHINode>(Val&: BBI)) {
3629 TranslateMap[PN] = PN->getIncomingValueForBlock(BB: EdgeBB);
3630 continue;
3631 }
3632 // Clone the instruction.
3633 Instruction *N = BBI->clone();
3634 // Insert the new instruction into its new home.
3635 N->insertInto(ParentBB: EdgeBB, It: InsertPt);
3636
3637 if (BBI->hasName())
3638 N->setName(BBI->getName() + ".c");
3639
3640 // Update operands due to translation.
3641 // Key Instructions: Remap all the atom groups.
3642 if (const DebugLoc &DL = BBI->getDebugLoc())
3643 mapAtomInstance(DL, VMap&: TranslateMap);
3644 RemapInstruction(I: N, VM&: TranslateMap,
3645 Flags: RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
3646
3647 // Check for trivial simplification.
3648 if (Value *V = simplifyInstruction(I: N, Q: {DL, nullptr, nullptr, AC})) {
3649 if (!BBI->use_empty())
3650 TranslateMap[&*BBI] = V;
3651 if (!N->mayHaveSideEffects()) {
3652 N->eraseFromParent(); // Instruction folded away, don't need actual
3653 // inst
3654 N = nullptr;
3655 }
3656 } else {
3657 if (!BBI->use_empty())
3658 TranslateMap[&*BBI] = N;
3659 }
3660 if (N) {
3661 // Copy all debug-info attached to instructions from the last we
3662 // successfully clone, up to this instruction (they might have been
3663 // folded away).
3664 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3665 N->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3666 SrcDbgCursor = std::next(x: BBI);
3667 // Clone debug-info on this instruction too.
3668 N->cloneDebugInfoFrom(From: &*BBI);
3669
3670 // Register the new instruction with the assumption cache if necessary.
3671 if (auto *Assume = dyn_cast<AssumeInst>(Val: N))
3672 if (AC)
3673 AC->registerAssumption(CI: Assume);
3674 }
3675 }
3676
3677 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3678 InsertPt->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3679 InsertPt->cloneDebugInfoFrom(From: BI);
3680
3681 BB->removePredecessor(Pred: EdgeBB);
3682 UncondBrInst *EdgeBI = cast<UncondBrInst>(Val: EdgeBB->getTerminator());
3683 EdgeBI->setSuccessor(idx: 0, NewSucc: RealDest);
3684 EdgeBI->setDebugLoc(BI->getDebugLoc());
3685
3686 if (DTU) {
3687 SmallVector<DominatorTree::UpdateType, 2> Updates;
3688 Updates.push_back(Elt: {DominatorTree::Delete, EdgeBB, BB});
3689 Updates.push_back(Elt: {DominatorTree::Insert, EdgeBB, RealDest});
3690 DTU->applyUpdates(Updates);
3691 }
3692
3693 // For simplicity, we created a separate basic block for the edge. Merge
3694 // it back into the predecessor if possible. This not only avoids
3695 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3696 // bypass the check for trivial cycles above.
3697 MergeBlockIntoPredecessor(BB: EdgeBB, DTU);
3698
3699 // Signal repeat, simplifying any other constants.
3700 return std::nullopt;
3701 }
3702
3703 return false;
3704}
3705
3706bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3707 // Note: If BB is a loop header then there is a risk that threading introduces
3708 // a non-canonical loop by moving a back edge. So we avoid this optimization
3709 // for loop headers if NeedCanonicalLoop is set.
3710 if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BI->getParent()))
3711 return false;
3712
3713 std::optional<bool> Result;
3714 bool EverChanged = false;
3715 do {
3716 // Note that None means "we changed things, but recurse further."
3717 Result =
3718 foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC: Options.AC);
3719 EverChanged |= Result == std::nullopt || *Result;
3720 } while (Result == std::nullopt);
3721 return EverChanged;
3722}
3723
3724/// Given a BB that starts with the specified two-entry PHI node,
3725/// see if we can eliminate it.
3726static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
3727 DomTreeUpdater *DTU, AssumptionCache *AC,
3728 const DataLayout &DL,
3729 bool SpeculateUnpredictables) {
3730 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3731 // statement", which has a very simple dominance structure. Basically, we
3732 // are trying to find the condition that is being branched on, which
3733 // subsequently causes this merge to happen. We really want control
3734 // dependence information for this check, but simplifycfg can't keep it up
3735 // to date, and this catches most of the cases we care about anyway.
3736 BasicBlock *BB = PN->getParent();
3737
3738 BasicBlock *IfTrue, *IfFalse;
3739 CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3740 if (!DomBI)
3741 return false;
3742 Value *IfCond = DomBI->getCondition();
3743 // Don't bother if the branch will be constant folded trivially.
3744 if (isa<ConstantInt>(Val: IfCond))
3745 return false;
3746
3747 BasicBlock *DomBlock = DomBI->getParent();
3748 SmallVector<BasicBlock *, 2> IfBlocks;
3749 llvm::copy_if(Range: PN->blocks(), Out: std::back_inserter(x&: IfBlocks),
3750 P: [](BasicBlock *IfBlock) {
3751 return isa<UncondBrInst>(Val: IfBlock->getTerminator());
3752 });
3753 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3754 "Will have either one or two blocks to speculate.");
3755
3756 // If the branch is non-unpredictable, see if we either predictably jump to
3757 // the merge bb (if we have only a single 'then' block), or if we predictably
3758 // jump to one specific 'then' block (if we have two of them).
3759 // It isn't beneficial to speculatively execute the code
3760 // from the block that we know is predictably not entered.
3761 bool IsUnpredictable = DomBI->getMetadata(KindID: LLVMContext::MD_unpredictable);
3762 if (!IsUnpredictable) {
3763 uint64_t TWeight, FWeight;
3764 if (extractBranchWeights(I: *DomBI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3765 (TWeight + FWeight) != 0) {
3766 BranchProbability BITrueProb =
3767 BranchProbability::getBranchProbability(Numerator: TWeight, Denominator: TWeight + FWeight);
3768 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3769 BranchProbability BIFalseProb = BITrueProb.getCompl();
3770 if (IfBlocks.size() == 1) {
3771 BranchProbability BIBBProb =
3772 DomBI->getSuccessor(i: 0) == BB ? BITrueProb : BIFalseProb;
3773 if (BIBBProb >= Likely)
3774 return false;
3775 } else {
3776 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3777 return false;
3778 }
3779 }
3780 }
3781
3782 // Don't try to fold an unreachable block. For example, the phi node itself
3783 // can't be the candidate if-condition for a select that we want to form.
3784 if (auto *IfCondPhiInst = dyn_cast<PHINode>(Val: IfCond))
3785 if (IfCondPhiInst->getParent() == BB)
3786 return false;
3787
3788 // Okay, we found that we can merge this two-entry phi node into a select.
3789 // Doing so would require us to fold *all* two entry phi nodes in this block.
3790 // At some point this becomes non-profitable (particularly if the target
3791 // doesn't support cmov's). Only do this transformation if there are two or
3792 // fewer PHI nodes in this block.
3793 unsigned NumPhis = 0;
3794 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(Val: I); ++NumPhis, ++I)
3795 if (NumPhis > 2)
3796 return false;
3797
3798 // Loop over the PHI's seeing if we can promote them all to select
3799 // instructions. While we are at it, keep track of the instructions
3800 // that need to be moved to the dominating block.
3801 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3802 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3803 InstructionCost Cost = 0;
3804 InstructionCost Budget =
3805 TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3806 if (SpeculateUnpredictables && IsUnpredictable)
3807 Budget += TTI.getBranchMispredictPenalty();
3808
3809 bool Changed = false;
3810 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(Val: II);) {
3811 PHINode *PN = cast<PHINode>(Val: II++);
3812 if (Value *V = simplifyInstruction(I: PN, Q: {DL, PN})) {
3813 PN->replaceAllUsesWith(V);
3814 PN->eraseFromParent();
3815 Changed = true;
3816 continue;
3817 }
3818
3819 if (!dominatesMergePoint(V: PN->getIncomingValue(i: 0), BB, InsertPt: DomBI,
3820 AggressiveInsts, Cost, Budget, TTI, AC,
3821 ZeroCostInstructions) ||
3822 !dominatesMergePoint(V: PN->getIncomingValue(i: 1), BB, InsertPt: DomBI,
3823 AggressiveInsts, Cost, Budget, TTI, AC,
3824 ZeroCostInstructions))
3825 return Changed;
3826 }
3827
3828 // If we folded the first phi, PN dangles at this point. Refresh it. If
3829 // we ran out of PHIs then we simplified them all.
3830 PN = dyn_cast<PHINode>(Val: BB->begin());
3831 if (!PN)
3832 return true;
3833
3834 // Don't fold i1 branches on PHIs which contain binary operators or
3835 // (possibly inverted) select form of or/ands if their parameters are
3836 // an equality test.
3837 auto IsBinOpOrAndEq = [](Value *V) {
3838 CmpPredicate Pred;
3839 if (match(V, P: m_CombineOr(
3840 L: m_CombineOr(
3841 L: m_BinOp(L: m_Cmp(Pred, L: m_Value(), R: m_Value()), R: m_Value()),
3842 R: m_BinOp(L: m_Value(), R: m_Cmp(Pred, L: m_Value(), R: m_Value()))),
3843 R: m_c_Select(L: m_ImmConstant(),
3844 R: m_Cmp(Pred, L: m_Value(), R: m_Value()))))) {
3845 return CmpInst::isEquality(pred: Pred);
3846 }
3847 return false;
3848 };
3849 if (PN->getType()->isIntegerTy(Bitwidth: 1) &&
3850 (IsBinOpOrAndEq(PN->getIncomingValue(i: 0)) ||
3851 IsBinOpOrAndEq(PN->getIncomingValue(i: 1)) || IsBinOpOrAndEq(IfCond)))
3852 return Changed;
3853
3854 // If all PHI nodes are promotable, check to make sure that all instructions
3855 // in the predecessor blocks can be promoted as well. If not, we won't be able
3856 // to get rid of the control flow, so it's not worth promoting to select
3857 // instructions.
3858 for (BasicBlock *IfBlock : IfBlocks)
3859 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3860 if (!AggressiveInsts.count(Ptr: &*I) && !I->isDebugOrPseudoInst()) {
3861 // This is not an aggressive instruction that we can promote.
3862 // Because of this, we won't be able to get rid of the control flow, so
3863 // the xform is not worth it.
3864 return Changed;
3865 }
3866
3867 // If either of the blocks has it's address taken, we can't do this fold.
3868 if (any_of(Range&: IfBlocks,
3869 P: [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3870 return Changed;
3871
3872 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3873 if (IsUnpredictable) dbgs() << " (unpredictable)";
3874 dbgs() << " T: " << IfTrue->getName()
3875 << " F: " << IfFalse->getName() << "\n");
3876
3877 // If we can still promote the PHI nodes after this gauntlet of tests,
3878 // do all of the PHI's now.
3879
3880 // Move all 'aggressive' instructions, which are defined in the
3881 // conditional parts of the if's up to the dominating block.
3882 for (BasicBlock *IfBlock : IfBlocks)
3883 hoistAllInstructionsInto(DomBlock, InsertPt: DomBI, BB: IfBlock);
3884
3885 IRBuilder<NoFolder> Builder(DomBI);
3886 // Propagate fast-math-flags from phi nodes to replacement selects.
3887 while (PHINode *PN = dyn_cast<PHINode>(Val: BB->begin())) {
3888 // Change the PHI node into a select instruction.
3889 Value *TrueVal = PN->getIncomingValueForBlock(BB: IfTrue);
3890 Value *FalseVal = PN->getIncomingValueForBlock(BB: IfFalse);
3891
3892 Value *Sel = Builder.CreateSelectFMF(C: IfCond, True: TrueVal, False: FalseVal,
3893 FMFSource: isa<FPMathOperator>(Val: PN) ? PN : nullptr,
3894 Name: "", MDFrom: DomBI);
3895 PN->replaceAllUsesWith(V: Sel);
3896 Sel->takeName(V: PN);
3897 PN->eraseFromParent();
3898 }
3899
3900 // At this point, all IfBlocks are empty, so our if statement
3901 // has been flattened. Change DomBlock to jump directly to our new block to
3902 // avoid other simplifycfg's kicking in on the diamond.
3903 Builder.CreateBr(Dest: BB);
3904
3905 SmallVector<DominatorTree::UpdateType, 3> Updates;
3906 if (DTU) {
3907 Updates.push_back(Elt: {DominatorTree::Insert, DomBlock, BB});
3908 for (auto *Successor : successors(BB: DomBlock))
3909 Updates.push_back(Elt: {DominatorTree::Delete, DomBlock, Successor});
3910 }
3911
3912 DomBI->eraseFromParent();
3913 if (DTU)
3914 DTU->applyUpdates(Updates);
3915
3916 return true;
3917}
3918
3919static Value *createLogicalOp(IRBuilderBase &Builder,
3920 Instruction::BinaryOps Opc, Value *LHS,
3921 Value *RHS, const Twine &Name = "") {
3922 // Try to relax logical op to binary op.
3923 if (impliesPoison(ValAssumedPoison: RHS, V: LHS))
3924 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3925 if (Opc == Instruction::And)
3926 return Builder.CreateLogicalAnd(Cond1: LHS, Cond2: RHS, Name);
3927 if (Opc == Instruction::Or)
3928 return Builder.CreateLogicalOr(Cond1: LHS, Cond2: RHS, Name);
3929 llvm_unreachable("Invalid logical opcode");
3930}
3931
3932/// Return true if either PBI or BI has branch weight available, and store
3933/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3934/// not have branch weight, use 1:1 as its weight.
3935static bool extractPredSuccWeights(CondBrInst *PBI, CondBrInst *BI,
3936 uint64_t &PredTrueWeight,
3937 uint64_t &PredFalseWeight,
3938 uint64_t &SuccTrueWeight,
3939 uint64_t &SuccFalseWeight) {
3940 bool PredHasWeights =
3941 extractBranchWeights(I: *PBI, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight);
3942 bool SuccHasWeights =
3943 extractBranchWeights(I: *BI, TrueVal&: SuccTrueWeight, FalseVal&: SuccFalseWeight);
3944 if (PredHasWeights || SuccHasWeights) {
3945 if (!PredHasWeights)
3946 PredTrueWeight = PredFalseWeight = 1;
3947 if (!SuccHasWeights)
3948 SuccTrueWeight = SuccFalseWeight = 1;
3949 return true;
3950 } else {
3951 return false;
3952 }
3953}
3954
3955/// Determine if the two branches share a common destination and deduce a glue
3956/// that joins the branches' conditions to arrive at the common destination if
3957/// that would be profitable.
3958static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3959shouldFoldCondBranchesToCommonDestination(CondBrInst *BI, CondBrInst *PBI,
3960 const TargetTransformInfo *TTI) {
3961 assert(BI && PBI && "Both blocks must end with a conditional branches.");
3962 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3963 "PredBB must be a predecessor of BB.");
3964
3965 // We have the potential to fold the conditions together, but if the
3966 // predecessor branch is predictable, we may not want to merge them.
3967 uint64_t PTWeight, PFWeight;
3968 BranchProbability PBITrueProb, Likely;
3969 if (TTI && !PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
3970 extractBranchWeights(I: *PBI, TrueVal&: PTWeight, FalseVal&: PFWeight) &&
3971 (PTWeight + PFWeight) != 0) {
3972 PBITrueProb =
3973 BranchProbability::getBranchProbability(Numerator: PTWeight, Denominator: PTWeight + PFWeight);
3974 Likely = TTI->getPredictableBranchThreshold();
3975 }
3976
3977 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
3978 // Speculate the 2nd condition unless the 1st is probably true.
3979 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3980 return {{BI->getSuccessor(i: 0), Instruction::Or, false}};
3981 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
3982 // Speculate the 2nd condition unless the 1st is probably false.
3983 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3984 return {{BI->getSuccessor(i: 1), Instruction::And, false}};
3985 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
3986 // Speculate the 2nd condition unless the 1st is probably true.
3987 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3988 return {{BI->getSuccessor(i: 1), Instruction::And, true}};
3989 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
3990 // Speculate the 2nd condition unless the 1st is probably false.
3991 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3992 return {{BI->getSuccessor(i: 0), Instruction::Or, true}};
3993 }
3994 return std::nullopt;
3995}
3996
3997static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI,
3998 DomTreeUpdater *DTU,
3999 MemorySSAUpdater *MSSAU,
4000 const TargetTransformInfo *TTI) {
4001 BasicBlock *BB = BI->getParent();
4002 BasicBlock *PredBlock = PBI->getParent();
4003
4004 // Determine if the two branches share a common destination.
4005 BasicBlock *CommonSucc;
4006 Instruction::BinaryOps Opc;
4007 bool InvertPredCond;
4008 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) =
4009 *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
4010
4011 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4012
4013 IRBuilder<> Builder(PBI);
4014 // The builder is used to create instructions to eliminate the branch in BB.
4015 // If BB's terminator has !annotation metadata, add it to the new
4016 // instructions.
4017 Builder.CollectMetadataToCopy(Src: BB->getTerminator(),
4018 MetadataKinds: {LLVMContext::MD_annotation});
4019
4020 // If we need to invert the condition in the pred block to match, do so now.
4021 if (InvertPredCond) {
4022 InvertBranch(PBI, Builder);
4023 }
4024
4025 BasicBlock *UniqueSucc =
4026 PBI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 0) : BI->getSuccessor(i: 1);
4027
4028 // Before cloning instructions, notify the successor basic block that it
4029 // is about to have a new predecessor. This will update PHI nodes,
4030 // which will allow us to update live-out uses of bonus instructions.
4031 addPredecessorToBlock(Succ: UniqueSucc, NewPred: PredBlock, ExistPred: BB, MSSAU);
4032
4033 // Try to update branch weights.
4034 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4035 SmallVector<uint64_t, 2> MDWeights;
4036 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4037 SuccTrueWeight, SuccFalseWeight)) {
4038
4039 if (PBI->getSuccessor(i: 0) == BB) {
4040 // PBI: br i1 %x, BB, FalseDest
4041 // BI: br i1 %y, UniqueSucc, FalseDest
4042 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4043 MDWeights.push_back(Elt: PredTrueWeight * SuccTrueWeight);
4044 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4045 // TrueWeight for PBI * FalseWeight for BI.
4046 // We assume that total weights of a CondBrInst can fit into 32 bits.
4047 // Therefore, we will not have overflow using 64-bit arithmetic.
4048 MDWeights.push_back(Elt: PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4049 PredTrueWeight * SuccFalseWeight);
4050 } else {
4051 // PBI: br i1 %x, TrueDest, BB
4052 // BI: br i1 %y, TrueDest, UniqueSucc
4053 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4054 // FalseWeight for PBI * TrueWeight for BI.
4055 MDWeights.push_back(Elt: PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4056 PredFalseWeight * SuccTrueWeight);
4057 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4058 MDWeights.push_back(Elt: PredFalseWeight * SuccFalseWeight);
4059 }
4060
4061 setFittedBranchWeights(I&: *PBI, Weights: MDWeights, /*IsExpected=*/false,
4062 /*ElideAllZero=*/true);
4063
4064 // TODO: If BB is reachable from all paths through PredBlock, then we
4065 // could replace PBI's branch probabilities with BI's.
4066 } else
4067 PBI->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
4068
4069 // Now, update the CFG.
4070 PBI->setSuccessor(idx: PBI->getSuccessor(i: 0) != BB, NewSucc: UniqueSucc);
4071
4072 if (DTU)
4073 DTU->applyUpdates(Updates: {{DominatorTree::Insert, PredBlock, UniqueSucc},
4074 {DominatorTree::Delete, PredBlock, BB}});
4075
4076 // If BI was a loop latch, it may have had associated loop metadata.
4077 // We need to copy it to the new latch, that is, PBI.
4078 if (MDNode *LoopMD = BI->getMetadata(KindID: LLVMContext::MD_loop))
4079 PBI->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
4080
4081 ValueToValueMapTy VMap; // maps original values to cloned values
4082 cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
4083
4084 Module *M = BB->getModule();
4085
4086 PredBlock->getTerminator()->cloneDebugInfoFrom(From: BB->getTerminator());
4087 for (DbgVariableRecord &DVR :
4088 filterDbgVars(R: PredBlock->getTerminator()->getDbgRecordRange())) {
4089 RemapDbgRecord(M, DR: &DVR, VM&: VMap,
4090 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
4091 }
4092
4093 // Now that the Cond was cloned into the predecessor basic block,
4094 // or/and the two conditions together.
4095 Value *BICond = VMap[BI->getCondition()];
4096 PBI->setCondition(
4097 createLogicalOp(Builder, Opc, LHS: PBI->getCondition(), RHS: BICond, Name: "or.cond"));
4098 if (!ProfcheckDisableMetadataFixes)
4099 if (auto *SI = dyn_cast<SelectInst>(Val: PBI->getCondition()))
4100 if (!MDWeights.empty()) {
4101 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4102 setFittedBranchWeights(I&: *SI, Weights: {MDWeights[0], MDWeights[1]},
4103 /*IsExpected=*/false, /*ElideAllZero=*/true);
4104 }
4105
4106 ++NumFoldBranchToCommonDest;
4107 return true;
4108}
4109
4110/// Return if an instruction's type or any of its operands' types are a vector
4111/// type.
4112static bool isVectorOp(Instruction &I) {
4113 return I.getType()->isVectorTy() || any_of(Range: I.operands(), P: [](Use &U) {
4114 return U->getType()->isVectorTy();
4115 });
4116}
4117
4118/// If this basic block is simple enough, and if a predecessor branches to us
4119/// and one of our successors, fold the block into the predecessor and use
4120/// logical operations to pick the right destination.
4121bool llvm::foldBranchToCommonDest(CondBrInst *BI, DomTreeUpdater *DTU,
4122 MemorySSAUpdater *MSSAU,
4123 const TargetTransformInfo *TTI,
4124 unsigned BonusInstThreshold) {
4125 BasicBlock *BB = BI->getParent();
4126 TargetTransformInfo::TargetCostKind CostKind =
4127 BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
4128 : TargetTransformInfo::TCK_SizeAndLatency;
4129
4130 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
4131
4132 if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Val: Cond) ||
4133 Cond->getParent() != BB || !Cond->hasOneUse())
4134 return false;
4135
4136 // Finally, don't infinitely unroll conditional loops.
4137 if (is_contained(Range: successors(BB), Element: BB))
4138 return false;
4139
4140 // With which predecessors will we want to deal with?
4141 SmallVector<BasicBlock *, 8> Preds;
4142 for (BasicBlock *PredBlock : predecessors(BB)) {
4143 CondBrInst *PBI = dyn_cast<CondBrInst>(Val: PredBlock->getTerminator());
4144
4145 // Check that we have two conditional branches. If there is a PHI node in
4146 // the common successor, verify that the same value flows in from both
4147 // blocks.
4148 if (!PBI || !safeToMergeTerminators(SI1: BI, SI2: PBI))
4149 continue;
4150
4151 // Determine if the two branches share a common destination.
4152 BasicBlock *CommonSucc;
4153 Instruction::BinaryOps Opc;
4154 bool InvertPredCond;
4155 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4156 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) = *Recipe;
4157 else
4158 continue;
4159
4160 // Check the cost of inserting the necessary logic before performing the
4161 // transformation.
4162 if (TTI) {
4163 Type *Ty = BI->getCondition()->getType();
4164 InstructionCost Cost = TTI->getArithmeticInstrCost(Opcode: Opc, Ty, CostKind);
4165 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4166 !isa<CmpInst>(Val: PBI->getCondition())))
4167 Cost += TTI->getArithmeticInstrCost(Opcode: Instruction::Xor, Ty, CostKind);
4168
4169 if (Cost > BranchFoldThreshold)
4170 continue;
4171 }
4172
4173 // Ok, we do want to deal with this predecessor. Record it.
4174 Preds.emplace_back(Args&: PredBlock);
4175 }
4176
4177 // If there aren't any predecessors into which we can fold,
4178 // don't bother checking the cost.
4179 if (Preds.empty())
4180 return false;
4181
4182 // Only allow this transformation if computing the condition doesn't involve
4183 // too many instructions and these involved instructions can be executed
4184 // unconditionally. We denote all involved instructions except the condition
4185 // as "bonus instructions", and only allow this transformation when the
4186 // number of the bonus instructions we'll need to create when cloning into
4187 // each predecessor does not exceed a certain threshold.
4188 unsigned NumBonusInsts = 0;
4189 bool SawVectorOp = false;
4190 const unsigned PredCount = Preds.size();
4191 for (Instruction &I : *BB) {
4192 // Don't check the branch condition comparison itself.
4193 if (&I == Cond)
4194 continue;
4195 // Ignore the terminator.
4196 if (isa<UncondBrInst, CondBrInst>(Val: I))
4197 continue;
4198 // I must be safe to execute unconditionally.
4199 if (!isSafeToSpeculativelyExecute(I: &I))
4200 return false;
4201 SawVectorOp |= isVectorOp(I);
4202
4203 // Account for the cost of duplicating this instruction into each
4204 // predecessor. Ignore free instructions.
4205 if (!TTI || TTI->getInstructionCost(U: &I, CostKind) !=
4206 TargetTransformInfo::TCC_Free) {
4207 NumBonusInsts += PredCount;
4208
4209 // Early exits once we reach the limit.
4210 if (NumBonusInsts >
4211 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4212 return false;
4213 }
4214
4215 auto IsBCSSAUse = [BB, &I](Use &U) {
4216 auto *UI = cast<Instruction>(Val: U.getUser());
4217 if (auto *PN = dyn_cast<PHINode>(Val: UI))
4218 return PN->getIncomingBlock(U) == BB;
4219 return UI->getParent() == BB && I.comesBefore(Other: UI);
4220 };
4221
4222 // Does this instruction require rewriting of uses?
4223 if (!all_of(Range: I.uses(), P: IsBCSSAUse))
4224 return false;
4225 }
4226 if (NumBonusInsts >
4227 BonusInstThreshold *
4228 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4229 return false;
4230
4231 // Ok, we have the budget. Perform the transformation.
4232 for (BasicBlock *PredBlock : Preds) {
4233 auto *PBI = cast<CondBrInst>(Val: PredBlock->getTerminator());
4234 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4235 }
4236 return false;
4237}
4238
4239// If there is only one store in BB1 and BB2, return it, otherwise return
4240// nullptr.
4241static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
4242 StoreInst *S = nullptr;
4243 for (auto *BB : {BB1, BB2}) {
4244 if (!BB)
4245 continue;
4246 for (auto &I : *BB)
4247 if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
4248 if (S)
4249 // Multiple stores seen.
4250 return nullptr;
4251 else
4252 S = SI;
4253 }
4254 }
4255 return S;
4256}
4257
4258static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
4259 Value *AlternativeV = nullptr) {
4260 // PHI is going to be a PHI node that allows the value V that is defined in
4261 // BB to be referenced in BB's only successor.
4262 //
4263 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4264 // doesn't matter to us what the other operand is (it'll never get used). We
4265 // could just create a new PHI with an undef incoming value, but that could
4266 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4267 // other PHI. So here we directly look for some PHI in BB's successor with V
4268 // as an incoming operand. If we find one, we use it, else we create a new
4269 // one.
4270 //
4271 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4272 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4273 // where OtherBB is the single other predecessor of BB's only successor.
4274 PHINode *PHI = nullptr;
4275 BasicBlock *Succ = BB->getSingleSuccessor();
4276
4277 for (auto I = Succ->begin(); isa<PHINode>(Val: I); ++I)
4278 if (cast<PHINode>(Val&: I)->getIncomingValueForBlock(BB) == V) {
4279 PHI = cast<PHINode>(Val&: I);
4280 if (!AlternativeV)
4281 break;
4282
4283 assert(Succ->hasNPredecessors(2));
4284 auto PredI = pred_begin(BB: Succ);
4285 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4286 if (PHI->getIncomingValueForBlock(BB: OtherPredBB) == AlternativeV)
4287 break;
4288 PHI = nullptr;
4289 }
4290 if (PHI)
4291 return PHI;
4292
4293 // If V is not an instruction defined in BB, just return it.
4294 if (!AlternativeV &&
4295 (!isa<Instruction>(Val: V) || cast<Instruction>(Val: V)->getParent() != BB))
4296 return V;
4297
4298 PHI = PHINode::Create(Ty: V->getType(), NumReservedValues: 2, NameStr: "simplifycfg.merge");
4299 PHI->insertBefore(InsertPos: Succ->begin());
4300 PHI->addIncoming(V, BB);
4301 for (BasicBlock *PredBB : predecessors(BB: Succ))
4302 if (PredBB != BB)
4303 PHI->addIncoming(
4304 V: AlternativeV ? AlternativeV : PoisonValue::get(T: V->getType()), BB: PredBB);
4305 return PHI;
4306}
4307
4308static bool mergeConditionalStoreToAddress(
4309 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4310 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4311 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4312 // For every pointer, there must be exactly two stores, one coming from
4313 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4314 // store (to any address) in PTB,PFB or QTB,QFB.
4315 // FIXME: We could relax this restriction with a bit more work and performance
4316 // testing.
4317 StoreInst *PStore = findUniqueStoreInBlocks(BB1: PTB, BB2: PFB);
4318 StoreInst *QStore = findUniqueStoreInBlocks(BB1: QTB, BB2: QFB);
4319 if (!PStore || !QStore)
4320 return false;
4321
4322 // Now check the stores are compatible.
4323 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4324 PStore->getValueOperand()->getType() !=
4325 QStore->getValueOperand()->getType())
4326 return false;
4327
4328 // Check that sinking the store won't cause program behavior changes. Sinking
4329 // the store out of the Q blocks won't change any behavior as we're sinking
4330 // from a block to its unconditional successor. But we're moving a store from
4331 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4332 // So we need to check that there are no aliasing loads or stores in
4333 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4334 // operations between PStore and the end of its parent block.
4335 //
4336 // The ideal way to do this is to query AliasAnalysis, but we don't
4337 // preserve AA currently so that is dangerous. Be super safe and just
4338 // check there are no other memory operations at all.
4339 for (auto &I : *QFB->getSinglePredecessor())
4340 if (I.mayReadOrWriteMemory())
4341 return false;
4342 for (auto &I : *QFB)
4343 if (&I != QStore && I.mayReadOrWriteMemory())
4344 return false;
4345 if (QTB)
4346 for (auto &I : *QTB)
4347 if (&I != QStore && I.mayReadOrWriteMemory())
4348 return false;
4349 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4350 I != E; ++I)
4351 if (&*I != PStore && I->mayReadOrWriteMemory())
4352 return false;
4353
4354 // If we're not in aggressive mode, we only optimize if we have some
4355 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4356 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4357 if (!BB)
4358 return true;
4359 // Heuristic: if the block can be if-converted/phi-folded and the
4360 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4361 // thread this store.
4362 InstructionCost Cost = 0;
4363 InstructionCost Budget =
4364 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4365 for (auto &I : BB->instructionsWithoutDebug(SkipPseudoOp: false)) {
4366 // Consider terminator instruction to be free.
4367 if (I.isTerminator())
4368 continue;
4369 // If this is one the stores that we want to speculate out of this BB,
4370 // then don't count it's cost, consider it to be free.
4371 if (auto *S = dyn_cast<StoreInst>(Val: &I))
4372 if (llvm::find(Range&: FreeStores, Val: S))
4373 continue;
4374 // Else, we have a white-list of instructions that we are ak speculating.
4375 if (!isa<BinaryOperator>(Val: I) && !isa<GetElementPtrInst>(Val: I))
4376 return false; // Not in white-list - not worthwhile folding.
4377 // And finally, if this is a non-free instruction that we are okay
4378 // speculating, ensure that we consider the speculation budget.
4379 Cost +=
4380 TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
4381 if (Cost > Budget)
4382 return false; // Eagerly refuse to fold as soon as we're out of budget.
4383 }
4384 assert(Cost <= Budget &&
4385 "When we run out of budget we will eagerly return from within the "
4386 "per-instruction loop.");
4387 return true;
4388 };
4389
4390 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4391 if (!MergeCondStoresAggressively &&
4392 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4393 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4394 return false;
4395
4396 // If PostBB has more than two predecessors, we need to split it so we can
4397 // sink the store.
4398 if (std::next(x: pred_begin(BB: PostBB), n: 2) != pred_end(BB: PostBB)) {
4399 // We know that QFB's only successor is PostBB. And QFB has a single
4400 // predecessor. If QTB exists, then its only successor is also PostBB.
4401 // If QTB does not exist, then QFB's only predecessor has a conditional
4402 // branch to QFB and PostBB.
4403 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4404 BasicBlock *NewBB =
4405 SplitBlockPredecessors(BB: PostBB, Preds: {QFB, TruePred}, Suffix: "condstore.split", DTU);
4406 if (!NewBB)
4407 return false;
4408 PostBB = NewBB;
4409 }
4410
4411 // OK, we're going to sink the stores to PostBB. The store has to be
4412 // conditional though, so first create the predicate.
4413 CondBrInst *PBranch =
4414 cast<CondBrInst>(Val: PFB->getSinglePredecessor()->getTerminator());
4415 CondBrInst *QBranch =
4416 cast<CondBrInst>(Val: QFB->getSinglePredecessor()->getTerminator());
4417 Value *PCond = PBranch->getCondition();
4418 Value *QCond = QBranch->getCondition();
4419
4420 Value *PPHI = ensureValueAvailableInSuccessor(V: PStore->getValueOperand(),
4421 BB: PStore->getParent());
4422 Value *QPHI = ensureValueAvailableInSuccessor(V: QStore->getValueOperand(),
4423 BB: QStore->getParent(), AlternativeV: PPHI);
4424
4425 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4426 IRBuilder<> QB(PostBB, PostBBFirst);
4427 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4428
4429 InvertPCond ^= (PStore->getParent() != PTB);
4430 InvertQCond ^= (QStore->getParent() != QTB);
4431 Value *PPred = InvertPCond ? QB.CreateNot(V: PCond) : PCond;
4432 Value *QPred = InvertQCond ? QB.CreateNot(V: QCond) : QCond;
4433
4434 Value *CombinedPred = QB.CreateOr(LHS: PPred, RHS: QPred);
4435
4436 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4437 auto *T = SplitBlockAndInsertIfThen(Cond: CombinedPred, SplitBefore: InsertPt,
4438 /*Unreachable=*/false,
4439 /*BranchWeights=*/nullptr, DTU);
4440 if (hasBranchWeightMD(I: *PBranch) && hasBranchWeightMD(I: *QBranch) &&
4441 !ProfcheckDisableMetadataFixes) {
4442 SmallVector<uint32_t, 2> PWeights, QWeights;
4443 extractBranchWeights(I: *PBranch, Weights&: PWeights);
4444 extractBranchWeights(I: *QBranch, Weights&: QWeights);
4445 if (InvertPCond)
4446 std::swap(a&: PWeights[0], b&: PWeights[1]);
4447 if (InvertQCond)
4448 std::swap(a&: QWeights[0], b&: QWeights[1]);
4449 auto CombinedWeights = getDisjunctionWeights(B1: PWeights, B2: QWeights);
4450 setFittedBranchWeights(I&: *PostBB->getTerminator(),
4451 Weights: {CombinedWeights[0], CombinedWeights[1]},
4452 /*IsExpected=*/false, /*ElideAllZero=*/true);
4453 }
4454
4455 QB.SetInsertPoint(T);
4456 StoreInst *SI = cast<StoreInst>(Val: QB.CreateStore(Val: QPHI, Ptr: Address));
4457 SI->setAAMetadata(PStore->getAAMetadata().merge(Other: QStore->getAAMetadata()));
4458 // Choose the minimum alignment. If we could prove both stores execute, we
4459 // could use biggest one. In this case, though, we only know that one of the
4460 // stores executes. And we don't know it's safe to take the alignment from a
4461 // store that doesn't execute.
4462 SI->setAlignment(std::min(a: PStore->getAlign(), b: QStore->getAlign()));
4463
4464 QStore->eraseFromParent();
4465 PStore->eraseFromParent();
4466
4467 return true;
4468}
4469
4470static bool mergeConditionalStores(CondBrInst *PBI, CondBrInst *QBI,
4471 DomTreeUpdater *DTU, const DataLayout &DL,
4472 const TargetTransformInfo &TTI) {
4473 // The intention here is to find diamonds or triangles (see below) where each
4474 // conditional block contains a store to the same address. Both of these
4475 // stores are conditional, so they can't be unconditionally sunk. But it may
4476 // be profitable to speculatively sink the stores into one merged store at the
4477 // end, and predicate the merged store on the union of the two conditions of
4478 // PBI and QBI.
4479 //
4480 // This can reduce the number of stores executed if both of the conditions are
4481 // true, and can allow the blocks to become small enough to be if-converted.
4482 // This optimization will also chain, so that ladders of test-and-set
4483 // sequences can be if-converted away.
4484 //
4485 // We only deal with simple diamonds or triangles:
4486 //
4487 // PBI or PBI or a combination of the two
4488 // / \ | \
4489 // PTB PFB | PFB
4490 // \ / | /
4491 // QBI QBI
4492 // / \ | \
4493 // QTB QFB | QFB
4494 // \ / | /
4495 // PostBB PostBB
4496 //
4497 // We model triangles as a type of diamond with a nullptr "true" block.
4498 // Triangles are canonicalized so that the fallthrough edge is represented by
4499 // a true condition, as in the diagram above.
4500 BasicBlock *PTB = PBI->getSuccessor(i: 0);
4501 BasicBlock *PFB = PBI->getSuccessor(i: 1);
4502 BasicBlock *QTB = QBI->getSuccessor(i: 0);
4503 BasicBlock *QFB = QBI->getSuccessor(i: 1);
4504 BasicBlock *PostBB = QFB->getSingleSuccessor();
4505
4506 // Make sure we have a good guess for PostBB. If QTB's only successor is
4507 // QFB, then QFB is a better PostBB.
4508 if (QTB->getSingleSuccessor() == QFB)
4509 PostBB = QFB;
4510
4511 // If we couldn't find a good PostBB, stop.
4512 if (!PostBB)
4513 return false;
4514
4515 bool InvertPCond = false, InvertQCond = false;
4516 // Canonicalize fallthroughs to the true branches.
4517 if (PFB == QBI->getParent()) {
4518 std::swap(a&: PFB, b&: PTB);
4519 InvertPCond = true;
4520 }
4521 if (QFB == PostBB) {
4522 std::swap(a&: QFB, b&: QTB);
4523 InvertQCond = true;
4524 }
4525
4526 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4527 // and QFB may not. Model fallthroughs as a nullptr block.
4528 if (PTB == QBI->getParent())
4529 PTB = nullptr;
4530 if (QTB == PostBB)
4531 QTB = nullptr;
4532
4533 // Legality bailouts. We must have at least the non-fallthrough blocks and
4534 // the post-dominating block, and the non-fallthroughs must only have one
4535 // predecessor.
4536 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4537 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4538 };
4539 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4540 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4541 return false;
4542 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4543 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4544 return false;
4545 if (!QBI->getParent()->hasNUses(N: 2))
4546 return false;
4547
4548 // OK, this is a sequence of two diamonds or triangles.
4549 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4550 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4551 for (auto *BB : {PTB, PFB}) {
4552 if (!BB)
4553 continue;
4554 for (auto &I : *BB)
4555 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4556 PStoreAddresses.insert(Ptr: SI->getPointerOperand());
4557 }
4558 for (auto *BB : {QTB, QFB}) {
4559 if (!BB)
4560 continue;
4561 for (auto &I : *BB)
4562 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4563 QStoreAddresses.insert(Ptr: SI->getPointerOperand());
4564 }
4565
4566 set_intersect(S1&: PStoreAddresses, S2: QStoreAddresses);
4567 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4568 // clear what it contains.
4569 auto &CommonAddresses = PStoreAddresses;
4570
4571 bool Changed = false;
4572 for (auto *Address : CommonAddresses)
4573 Changed |=
4574 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4575 InvertPCond, InvertQCond, DTU, DL, TTI);
4576 return Changed;
4577}
4578
4579/// If the previous block ended with a widenable branch, determine if reusing
4580/// the target block is profitable and legal. This will have the effect of
4581/// "widening" PBI, but doesn't require us to reason about hosting safety.
4582static bool tryWidenCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI,
4583 DomTreeUpdater *DTU) {
4584 // TODO: This can be generalized in two important ways:
4585 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4586 // values from the PBI edge.
4587 // 2) We can sink side effecting instructions into BI's fallthrough
4588 // successor provided they doesn't contribute to computation of
4589 // BI's condition.
4590 BasicBlock *IfTrueBB = PBI->getSuccessor(i: 0);
4591 BasicBlock *IfFalseBB = PBI->getSuccessor(i: 1);
4592 if (!isWidenableBranch(U: PBI) || IfTrueBB != BI->getParent() ||
4593 !BI->getParent()->getSinglePredecessor())
4594 return false;
4595 if (!IfFalseBB->phis().empty())
4596 return false; // TODO
4597 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4598 // may undo the transform done here.
4599 // TODO: There might be a more fine-grained solution to this.
4600 if (!llvm::succ_empty(BB: IfFalseBB))
4601 return false;
4602 // Use lambda to lazily compute expensive condition after cheap ones.
4603 auto NoSideEffects = [](BasicBlock &BB) {
4604 return llvm::none_of(Range&: BB, P: [](const Instruction &I) {
4605 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4606 });
4607 };
4608 if (BI->getSuccessor(i: 1) != IfFalseBB && // no inf looping
4609 BI->getSuccessor(i: 1)->getTerminatingDeoptimizeCall() && // profitability
4610 NoSideEffects(*BI->getParent())) {
4611 auto *OldSuccessor = BI->getSuccessor(i: 1);
4612 OldSuccessor->removePredecessor(Pred: BI->getParent());
4613 BI->setSuccessor(idx: 1, NewSucc: IfFalseBB);
4614 if (DTU)
4615 DTU->applyUpdates(
4616 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4617 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4618 return true;
4619 }
4620 if (BI->getSuccessor(i: 0) != IfFalseBB && // no inf looping
4621 BI->getSuccessor(i: 0)->getTerminatingDeoptimizeCall() && // profitability
4622 NoSideEffects(*BI->getParent())) {
4623 auto *OldSuccessor = BI->getSuccessor(i: 0);
4624 OldSuccessor->removePredecessor(Pred: BI->getParent());
4625 BI->setSuccessor(idx: 0, NewSucc: IfFalseBB);
4626 if (DTU)
4627 DTU->applyUpdates(
4628 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4629 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4630 return true;
4631 }
4632 return false;
4633}
4634
4635/// If we have a conditional branch as a predecessor of another block,
4636/// this function tries to simplify it. We know
4637/// that PBI and BI are both conditional branches, and BI is in one of the
4638/// successor blocks of PBI - PBI branches to BI.
4639static bool SimplifyCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI,
4640 DomTreeUpdater *DTU,
4641 const DataLayout &DL,
4642 const TargetTransformInfo &TTI) {
4643 BasicBlock *BB = BI->getParent();
4644
4645 // If this block ends with a branch instruction, and if there is a
4646 // predecessor that ends on a branch of the same condition, make
4647 // this conditional branch redundant.
4648 if (PBI->getCondition() == BI->getCondition() &&
4649 PBI->getSuccessor(i: 0) != PBI->getSuccessor(i: 1)) {
4650 // Okay, the outcome of this conditional branch is statically
4651 // knowable. If this block had a single pred, handle specially, otherwise
4652 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4653 if (BB->getSinglePredecessor()) {
4654 // Turn this into a branch on constant.
4655 bool CondIsTrue = PBI->getSuccessor(i: 0) == BB;
4656 BI->setCondition(
4657 ConstantInt::get(Ty: Type::getInt1Ty(C&: BB->getContext()), V: CondIsTrue));
4658 return true; // Nuke the branch on constant.
4659 }
4660 }
4661
4662 // If the previous block ended with a widenable branch, determine if reusing
4663 // the target block is profitable and legal. This will have the effect of
4664 // "widening" PBI, but doesn't require us to reason about hosting safety.
4665 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4666 return true;
4667
4668 // If both branches are conditional and both contain stores to the same
4669 // address, remove the stores from the conditionals and create a conditional
4670 // merged store at the end.
4671 if (MergeCondStores && mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
4672 return true;
4673
4674 // If this is a conditional branch in an empty block, and if any
4675 // predecessors are a conditional branch to one of our destinations,
4676 // fold the conditions into logical ops and one cond br.
4677
4678 // Ignore dbg intrinsics.
4679 if (&*BB->instructionsWithoutDebug(SkipPseudoOp: false).begin() != BI)
4680 return false;
4681
4682 int PBIOp, BIOp;
4683 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
4684 PBIOp = 0;
4685 BIOp = 0;
4686 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
4687 PBIOp = 0;
4688 BIOp = 1;
4689 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
4690 PBIOp = 1;
4691 BIOp = 0;
4692 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
4693 PBIOp = 1;
4694 BIOp = 1;
4695 } else {
4696 return false;
4697 }
4698
4699 // Check to make sure that the other destination of this branch
4700 // isn't BB itself. If so, this is an infinite loop that will
4701 // keep getting unwound.
4702 if (PBI->getSuccessor(i: PBIOp) == BB)
4703 return false;
4704
4705 // If predecessor's branch probability to BB is too low don't merge branches.
4706 SmallVector<uint32_t, 2> PredWeights;
4707 if (!PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
4708 extractBranchWeights(I: *PBI, Weights&: PredWeights) &&
4709 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4710
4711 BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4712 Numerator: PredWeights[PBIOp],
4713 Denominator: static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4714
4715 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4716 if (CommonDestProb >= Likely)
4717 return false;
4718 }
4719
4720 // Do not perform this transformation if it would require
4721 // insertion of a large number of select instructions. For targets
4722 // without predication/cmovs, this is a big pessimization.
4723
4724 BasicBlock *CommonDest = PBI->getSuccessor(i: PBIOp);
4725 BasicBlock *RemovedDest = PBI->getSuccessor(i: PBIOp ^ 1);
4726 unsigned NumPhis = 0;
4727 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(Val: II);
4728 ++II, ++NumPhis) {
4729 if (NumPhis > 2) // Disable this xform.
4730 return false;
4731 }
4732
4733 // Finally, if everything is ok, fold the branches to logical ops.
4734 BasicBlock *OtherDest = BI->getSuccessor(i: BIOp ^ 1);
4735
4736 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4737 << "AND: " << *BI->getParent());
4738
4739 SmallVector<DominatorTree::UpdateType, 5> Updates;
4740
4741 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4742 // branch in it, where one edge (OtherDest) goes back to itself but the other
4743 // exits. We don't *know* that the program avoids the infinite loop
4744 // (even though that seems likely). If we do this xform naively, we'll end up
4745 // recursively unpeeling the loop. Since we know that (after the xform is
4746 // done) that the block *is* infinite if reached, we just make it an obviously
4747 // infinite loop with no cond branch.
4748 if (OtherDest == BB) {
4749 // Insert it at the end of the function, because it's either code,
4750 // or it won't matter if it's hot. :)
4751 BasicBlock *InfLoopBlock =
4752 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
4753 UncondBrInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
4754 if (DTU)
4755 Updates.push_back(Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4756 OtherDest = InfLoopBlock;
4757 }
4758
4759 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4760
4761 // BI may have other predecessors. Because of this, we leave
4762 // it alone, but modify PBI.
4763
4764 // Make sure we get to CommonDest on True&True directions.
4765 Value *PBICond = PBI->getCondition();
4766 IRBuilder<NoFolder> Builder(PBI);
4767 if (PBIOp)
4768 PBICond = Builder.CreateNot(V: PBICond, Name: PBICond->getName() + ".not");
4769
4770 Value *BICond = BI->getCondition();
4771 if (BIOp)
4772 BICond = Builder.CreateNot(V: BICond, Name: BICond->getName() + ".not");
4773
4774 // Merge the conditions.
4775 Value *Cond =
4776 createLogicalOp(Builder, Opc: Instruction::Or, LHS: PBICond, RHS: BICond, Name: "brmerge");
4777
4778 // Modify PBI to branch on the new condition to the new dests.
4779 PBI->setCondition(Cond);
4780 PBI->setSuccessor(idx: 0, NewSucc: CommonDest);
4781 PBI->setSuccessor(idx: 1, NewSucc: OtherDest);
4782
4783 if (DTU) {
4784 Updates.push_back(Elt: {DominatorTree::Insert, PBI->getParent(), OtherDest});
4785 Updates.push_back(Elt: {DominatorTree::Delete, PBI->getParent(), RemovedDest});
4786
4787 DTU->applyUpdates(Updates);
4788 }
4789
4790 // Update branch weight for PBI.
4791 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4792 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4793 bool HasWeights =
4794 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4795 SuccTrueWeight, SuccFalseWeight);
4796 if (HasWeights) {
4797 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4798 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4799 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4800 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4801 // The weight to CommonDest should be PredCommon * SuccTotal +
4802 // PredOther * SuccCommon.
4803 // The weight to OtherDest should be PredOther * SuccOther.
4804 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4805 PredOther * SuccCommon,
4806 PredOther * SuccOther};
4807
4808 setFittedBranchWeights(I&: *PBI, Weights: NewWeights, /*IsExpected=*/false,
4809 /*ElideAllZero=*/true);
4810 // Cond may be a select instruction with the first operand set to "true", or
4811 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4812 if (!ProfcheckDisableMetadataFixes)
4813 if (auto *SI = dyn_cast<SelectInst>(Val: Cond)) {
4814 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4815 // The select is predicated on PBICond
4816 assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
4817 // The corresponding probabilities are what was referred to above as
4818 // PredCommon and PredOther.
4819 setFittedBranchWeights(I&: *SI, Weights: {PredCommon, PredOther},
4820 /*IsExpected=*/false, /*ElideAllZero=*/true);
4821 }
4822 }
4823
4824 // OtherDest may have phi nodes. If so, add an entry from PBI's
4825 // block that are identical to the entries for BI's block.
4826 addPredecessorToBlock(Succ: OtherDest, NewPred: PBI->getParent(), ExistPred: BB);
4827
4828 // We know that the CommonDest already had an edge from PBI to
4829 // it. If it has PHIs though, the PHIs may have different
4830 // entries for BB and PBI's BB. If so, insert a select to make
4831 // them agree.
4832 for (PHINode &PN : CommonDest->phis()) {
4833 Value *BIV = PN.getIncomingValueForBlock(BB);
4834 unsigned PBBIdx = PN.getBasicBlockIndex(BB: PBI->getParent());
4835 Value *PBIV = PN.getIncomingValue(i: PBBIdx);
4836 if (BIV != PBIV) {
4837 // Insert a select in PBI to pick the right value.
4838 SelectInst *NV = cast<SelectInst>(
4839 Val: Builder.CreateSelect(C: PBICond, True: PBIV, False: BIV, Name: PBIV->getName() + ".mux"));
4840 PN.setIncomingValue(i: PBBIdx, V: NV);
4841 // The select has the same condition as PBI, in the same BB. The
4842 // probabilities don't change.
4843 if (HasWeights) {
4844 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4845 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4846 setFittedBranchWeights(I&: *NV, Weights: {TrueWeight, FalseWeight},
4847 /*IsExpected=*/false, /*ElideAllZero=*/true);
4848 }
4849 }
4850 }
4851
4852 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4853 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4854
4855 // This basic block is probably dead. We know it has at least
4856 // one fewer predecessor.
4857 return true;
4858}
4859
4860// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4861// true or to FalseBB if Cond is false.
4862// Takes care of updating the successors and removing the old terminator.
4863// Also makes sure not to introduce new successors by assuming that edges to
4864// non-successor TrueBBs and FalseBBs aren't reachable.
4865bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4866 Value *Cond, BasicBlock *TrueBB,
4867 BasicBlock *FalseBB,
4868 uint32_t TrueWeight,
4869 uint32_t FalseWeight) {
4870 auto *BB = OldTerm->getParent();
4871 // Remove any superfluous successor edges from the CFG.
4872 // First, figure out which successors to preserve.
4873 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4874 // successor.
4875 BasicBlock *KeepEdge1 = TrueBB;
4876 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4877
4878 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4879
4880 // Then remove the rest.
4881 for (BasicBlock *Succ : successors(I: OldTerm)) {
4882 // Make sure only to keep exactly one copy of each edge.
4883 if (Succ == KeepEdge1)
4884 KeepEdge1 = nullptr;
4885 else if (Succ == KeepEdge2)
4886 KeepEdge2 = nullptr;
4887 else {
4888 Succ->removePredecessor(Pred: BB,
4889 /*KeepOneInputPHIs=*/true);
4890
4891 if (Succ != TrueBB && Succ != FalseBB)
4892 RemovedSuccessors.insert(X: Succ);
4893 }
4894 }
4895
4896 IRBuilder<> Builder(OldTerm);
4897 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4898
4899 // Insert an appropriate new terminator.
4900 if (!KeepEdge1 && !KeepEdge2) {
4901 if (TrueBB == FalseBB) {
4902 // We were only looking for one successor, and it was present.
4903 // Create an unconditional branch to it.
4904 Builder.CreateBr(Dest: TrueBB);
4905 } else {
4906 // We found both of the successors we were looking for.
4907 // Create a conditional branch sharing the condition of the select.
4908 CondBrInst *NewBI = Builder.CreateCondBr(Cond, True: TrueBB, False: FalseBB);
4909 setBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
4910 /*IsExpected=*/false, /*ElideAllZero=*/true);
4911 }
4912 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4913 // Neither of the selected blocks were successors, so this
4914 // terminator must be unreachable.
4915 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4916 } else {
4917 // One of the selected values was a successor, but the other wasn't.
4918 // Insert an unconditional branch to the one that was found;
4919 // the edge to the one that wasn't must be unreachable.
4920 if (!KeepEdge1) {
4921 // Only TrueBB was found.
4922 Builder.CreateBr(Dest: TrueBB);
4923 } else {
4924 // Only FalseBB was found.
4925 Builder.CreateBr(Dest: FalseBB);
4926 }
4927 }
4928
4929 eraseTerminatorAndDCECond(TI: OldTerm);
4930
4931 if (DTU) {
4932 SmallVector<DominatorTree::UpdateType, 2> Updates;
4933 Updates.reserve(N: RemovedSuccessors.size());
4934 for (auto *RemovedSuccessor : RemovedSuccessors)
4935 Updates.push_back(Elt: {DominatorTree::Delete, BB, RemovedSuccessor});
4936 DTU->applyUpdates(Updates);
4937 }
4938
4939 return true;
4940}
4941
4942// Replaces
4943// (switch (select cond, X, Y)) on constant X, Y
4944// with a branch - conditional if X and Y lead to distinct BBs,
4945// unconditional otherwise.
4946bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4947 SelectInst *Select) {
4948 // Check for constant integer values in the select.
4949 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Val: Select->getTrueValue());
4950 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Val: Select->getFalseValue());
4951 if (!TrueVal || !FalseVal)
4952 return false;
4953
4954 // Find the relevant condition and destinations.
4955 Value *Condition = Select->getCondition();
4956 BasicBlock *TrueBB = SI->findCaseValue(C: TrueVal)->getCaseSuccessor();
4957 BasicBlock *FalseBB = SI->findCaseValue(C: FalseVal)->getCaseSuccessor();
4958
4959 // Get weight for TrueBB and FalseBB.
4960 uint32_t TrueWeight = 0, FalseWeight = 0;
4961 SmallVector<uint64_t, 8> Weights;
4962 bool HasWeights = hasBranchWeightMD(I: *SI);
4963 if (HasWeights) {
4964 getBranchWeights(TI: SI, Weights);
4965 if (Weights.size() == 1 + SI->getNumCases()) {
4966 TrueWeight =
4967 (uint32_t)Weights[SI->findCaseValue(C: TrueVal)->getSuccessorIndex()];
4968 FalseWeight =
4969 (uint32_t)Weights[SI->findCaseValue(C: FalseVal)->getSuccessorIndex()];
4970 }
4971 }
4972
4973 // Perform the actual simplification.
4974 return simplifyTerminatorOnSelect(OldTerm: SI, Cond: Condition, TrueBB, FalseBB, TrueWeight,
4975 FalseWeight);
4976}
4977
4978// Replaces
4979// (indirectbr (select cond, blockaddress(@fn, BlockA),
4980// blockaddress(@fn, BlockB)))
4981// with
4982// (br cond, BlockA, BlockB).
4983bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4984 SelectInst *SI) {
4985 // Check that both operands of the select are block addresses.
4986 BlockAddress *TBA = dyn_cast<BlockAddress>(Val: SI->getTrueValue());
4987 BlockAddress *FBA = dyn_cast<BlockAddress>(Val: SI->getFalseValue());
4988 if (!TBA || !FBA)
4989 return false;
4990
4991 // Extract the actual blocks.
4992 BasicBlock *TrueBB = TBA->getBasicBlock();
4993 BasicBlock *FalseBB = FBA->getBasicBlock();
4994
4995 // The select's profile becomes the profile of the conditional branch that
4996 // replaces the indirect branch.
4997 SmallVector<uint32_t> SelectBranchWeights(2);
4998 if (!ProfcheckDisableMetadataFixes)
4999 extractBranchWeights(I: *SI, Weights&: SelectBranchWeights);
5000 // Perform the actual simplification.
5001 return simplifyTerminatorOnSelect(OldTerm: IBI, Cond: SI->getCondition(), TrueBB, FalseBB,
5002 TrueWeight: SelectBranchWeights[0],
5003 FalseWeight: SelectBranchWeights[1]);
5004}
5005
5006/// This is called when we find an icmp instruction
5007/// (a seteq/setne with a constant) as the only instruction in a
5008/// block that ends with an uncond branch. We are looking for a very specific
5009/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5010/// this case, we merge the first two "or's of icmp" into a switch, but then the
5011/// default value goes to an uncond block with a seteq in it, we get something
5012/// like:
5013///
5014/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5015/// DEFAULT:
5016/// %tmp = icmp eq i8 %A, 92
5017/// br label %end
5018/// end:
5019/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5020///
5021/// We prefer to split the edge to 'end' so that there is a true/false entry to
5022/// the PHI, merging the third icmp into the switch.
5023bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5024 ICmpInst *ICI, IRBuilder<> &Builder) {
5025 // Select == nullptr means we assume that there is a hidden no-op select
5026 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5027 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: nullptr, Builder);
5028}
5029
5030/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5031/// case. This is called when we find an icmp instruction (a seteq/setne with a
5032/// constant) and its following select instruction as the only TWO instructions
5033/// in a block that ends with an uncond branch. We are looking for a very
5034/// specific pattern that occurs when "
5035/// if (A == 1) return C1;
5036/// if (A == 2) return C2;
5037/// if (A < 3) return C3;
5038/// return C4;
5039/// " gets simplified. In this case, we merge the first two "branches of icmp"
5040/// into a switch, but then the default value goes to an uncond block with a lt
5041/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5042/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5043/// get something like:
5044///
5045/// case1:
5046/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5047/// case2:
5048/// br label %end
5049/// DEFAULT:
5050/// %tmp = icmp eq i8 %A, 2
5051/// %val = select i1 %tmp, i8 C3, i8 C4
5052/// br label %end
5053/// end:
5054/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5055///
5056/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5057/// to the PHI, merging the icmp & select into the switch, as follows:
5058///
5059/// case1:
5060/// switch i8 %A, label %DEFAULT [
5061/// i8 0, label %end
5062/// i8 1, label %case2
5063/// i8 2, label %case3
5064/// ]
5065/// case2:
5066/// br label %end
5067/// case3:
5068/// br label %end
5069/// DEFAULT:
5070/// br label %end
5071/// end:
5072/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5073bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5074 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5075 BasicBlock *BB = ICI->getParent();
5076
5077 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5078 // too complex.
5079 /// TODO: support multi-phis in succ BB of select's BB.
5080 if (isa<PHINode>(Val: BB->begin()) || !ICI->hasOneUse() ||
5081 (Select && !Select->hasOneUse()))
5082 return false;
5083
5084 // The pattern we're looking for is where our only predecessor is a switch on
5085 // 'V' and this block is the default case for the switch. In this case we can
5086 // fold the compared value into the switch to simplify things.
5087 BasicBlock *Pred = BB->getSinglePredecessor();
5088 if (!Pred || !isa<SwitchInst>(Val: Pred->getTerminator()))
5089 return false;
5090
5091 Value *IcmpCond;
5092 ConstantInt *NewCaseVal;
5093 CmpPredicate Predicate;
5094
5095 // Match icmp X, C
5096 if (!match(V: ICI,
5097 P: m_ICmp(Pred&: Predicate, L: m_Value(V&: IcmpCond), R: m_ConstantInt(CI&: NewCaseVal))))
5098 return false;
5099
5100 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5101 Instruction *User;
5102 if (!Select) {
5103 // If Select == nullptr, we can assume that there is a hidden no-op select
5104 // just after icmp
5105 SelectCond = ICI;
5106 SelectTrueVal = Builder.getTrue();
5107 SelectFalseVal = Builder.getFalse();
5108 User = ICI->user_back();
5109 } else {
5110 SelectCond = Select->getCondition();
5111 // Check if the select condition is the same as the icmp condition.
5112 if (SelectCond != ICI)
5113 return false;
5114 SelectTrueVal = Select->getTrueValue();
5115 SelectFalseVal = Select->getFalseValue();
5116 User = Select->user_back();
5117 }
5118
5119 SwitchInst *SI = cast<SwitchInst>(Val: Pred->getTerminator());
5120 if (SI->getCondition() != IcmpCond)
5121 return false;
5122
5123 // If BB is reachable on a non-default case, then we simply know the value of
5124 // V in this block. Substitute it and constant fold the icmp instruction
5125 // away.
5126 if (SI->getDefaultDest() != BB) {
5127 ConstantInt *VVal = SI->findCaseDest(BB);
5128 assert(VVal && "Should have a unique destination value");
5129 ICI->setOperand(i_nocapture: 0, Val_nocapture: VVal);
5130
5131 if (Value *V = simplifyInstruction(I: ICI, Q: {DL, ICI})) {
5132 ICI->replaceAllUsesWith(V);
5133 ICI->eraseFromParent();
5134 }
5135 // BB is now empty, so it is likely to simplify away.
5136 return requestResimplify();
5137 }
5138
5139 // Ok, the block is reachable from the default dest. If the constant we're
5140 // comparing exists in one of the other edges, then we can constant fold ICI
5141 // and zap it.
5142 if (SI->findCaseValue(C: NewCaseVal) != SI->case_default()) {
5143 Value *V;
5144 if (Predicate == ICmpInst::ICMP_EQ)
5145 V = ConstantInt::getFalse(Context&: BB->getContext());
5146 else
5147 V = ConstantInt::getTrue(Context&: BB->getContext());
5148
5149 ICI->replaceAllUsesWith(V);
5150 ICI->eraseFromParent();
5151 // BB is now empty, so it is likely to simplify away.
5152 return requestResimplify();
5153 }
5154
5155 // The use of the select has to be in the 'end' block, by the only PHI node in
5156 // the block.
5157 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(Idx: 0);
5158 PHINode *PHIUse = dyn_cast<PHINode>(Val: User);
5159 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5160 isa<PHINode>(Val: ++BasicBlock::iterator(PHIUse)))
5161 return false;
5162
5163 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5164 // edge gets SelectTrueVal in the PHI.
5165 Value *DefaultCst = SelectFalseVal;
5166 Value *NewCst = SelectTrueVal;
5167
5168 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5169 std::swap(a&: DefaultCst, b&: NewCst);
5170
5171 // Replace Select (which is used by the PHI for the default value) with
5172 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5173 if (Select) {
5174 Select->replaceAllUsesWith(V: DefaultCst);
5175 Select->eraseFromParent();
5176 } else {
5177 ICI->replaceAllUsesWith(V: DefaultCst);
5178 }
5179 ICI->eraseFromParent();
5180
5181 SmallVector<DominatorTree::UpdateType, 2> Updates;
5182
5183 // Okay, the switch goes to this block on a default value. Add an edge from
5184 // the switch to the merge point on the compared value.
5185 BasicBlock *NewBB =
5186 BasicBlock::Create(Context&: BB->getContext(), Name: "switch.edge", Parent: BB->getParent(), InsertBefore: BB);
5187 {
5188 SwitchInstProfUpdateWrapper SIW(*SI);
5189 auto W0 = SIW.getSuccessorWeight(idx: 0);
5190 SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
5191 if (W0) {
5192 NewW = ((uint64_t(*W0) + 1) >> 1);
5193 SIW.setSuccessorWeight(idx: 0, W: *NewW);
5194 }
5195 SIW.addCase(OnVal: NewCaseVal, Dest: NewBB, W: NewW);
5196 if (DTU)
5197 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewBB});
5198 }
5199
5200 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5201 Builder.SetInsertPoint(NewBB);
5202 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5203 Builder.CreateBr(Dest: SuccBlock);
5204 PHIUse->addIncoming(V: NewCst, BB: NewBB);
5205 if (DTU) {
5206 Updates.push_back(Elt: {DominatorTree::Insert, NewBB, SuccBlock});
5207 DTU->applyUpdates(Updates);
5208 }
5209 return true;
5210}
5211
5212/// Check to see if it is branching on an or/and chain of icmp instructions, and
5213/// fold it into a switch instruction if so.
5214bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5215 IRBuilder<> &Builder,
5216 const DataLayout &DL) {
5217 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
5218 if (!Cond)
5219 return false;
5220
5221 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5222 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5223 // 'setne's and'ed together, collect them.
5224
5225 // Try to gather values from a chain of and/or to be turned into a switch
5226 ConstantComparesGatherer ConstantCompare(Cond, DL);
5227 // Unpack the result
5228 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5229 Value *CompVal = ConstantCompare.CompValue;
5230 unsigned UsedICmps = ConstantCompare.UsedICmps;
5231 Value *ExtraCase = ConstantCompare.Extra;
5232 bool TrueWhenEqual = ConstantCompare.IsEq;
5233
5234 // If we didn't have a multiply compared value, fail.
5235 if (!CompVal)
5236 return false;
5237
5238 // Avoid turning single icmps into a switch.
5239 if (UsedICmps <= 1)
5240 return false;
5241
5242 // There might be duplicate constants in the list, which the switch
5243 // instruction can't handle, remove them now.
5244 array_pod_sort(Start: Values.begin(), End: Values.end(), Compare: constantIntSortPredicate);
5245 Values.erase(CS: llvm::unique(R&: Values), CE: Values.end());
5246
5247 // If Extra was used, we require at least two switch values to do the
5248 // transformation. A switch with one value is just a conditional branch.
5249 if (ExtraCase && Values.size() < 2)
5250 return false;
5251
5252 SmallVector<uint32_t> BranchWeights;
5253 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5254 extractBranchWeights(I: *BI, Weights&: BranchWeights);
5255
5256 // Figure out which block is which destination.
5257 BasicBlock *DefaultBB = BI->getSuccessor(i: 1);
5258 BasicBlock *EdgeBB = BI->getSuccessor(i: 0);
5259 if (!TrueWhenEqual) {
5260 std::swap(a&: DefaultBB, b&: EdgeBB);
5261 if (HasProfile)
5262 std::swap(a&: BranchWeights[0], b&: BranchWeights[1]);
5263 }
5264
5265 BasicBlock *BB = BI->getParent();
5266
5267 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5268 << " cases into SWITCH. BB is:\n"
5269 << *BB);
5270
5271 SmallVector<DominatorTree::UpdateType, 2> Updates;
5272
5273 // If there are any extra values that couldn't be folded into the switch
5274 // then we evaluate them with an explicit branch first. Split the block
5275 // right before the condbr to handle it.
5276 if (ExtraCase) {
5277 BasicBlock *NewBB = SplitBlock(Old: BB, SplitPt: BI, DTU, /*LI=*/nullptr,
5278 /*MSSAU=*/nullptr, BBName: "switch.early.test");
5279
5280 // Remove the uncond branch added to the old block.
5281 Instruction *OldTI = BB->getTerminator();
5282 Builder.SetInsertPoint(OldTI);
5283
5284 // There can be an unintended UB if extra values are Poison. Before the
5285 // transformation, extra values may not be evaluated according to the
5286 // condition, and it will not raise UB. But after transformation, we are
5287 // evaluating extra values before checking the condition, and it will raise
5288 // UB. It can be solved by adding freeze instruction to extra values.
5289 AssumptionCache *AC = Options.AC;
5290
5291 if (!isGuaranteedNotToBeUndefOrPoison(V: ExtraCase, AC, CtxI: BI, DT: nullptr))
5292 ExtraCase = Builder.CreateFreeze(V: ExtraCase);
5293
5294 // We don't have any info about this condition.
5295 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(Cond: ExtraCase, True: EdgeBB, False: NewBB)
5296 : Builder.CreateCondBr(Cond: ExtraCase, True: NewBB, False: EdgeBB);
5297 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *Br, DEBUG_TYPE);
5298
5299 OldTI->eraseFromParent();
5300
5301 if (DTU)
5302 Updates.push_back(Elt: {DominatorTree::Insert, BB, EdgeBB});
5303
5304 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5305 // for the edge we just added.
5306 addPredecessorToBlock(Succ: EdgeBB, NewPred: BB, ExistPred: NewBB);
5307
5308 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5309 << "\nEXTRABB = " << *BB);
5310 BB = NewBB;
5311 }
5312
5313 Builder.SetInsertPoint(BI);
5314 // Convert pointer to int before we switch.
5315 if (CompVal->getType()->isPointerTy()) {
5316 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5317 "Should not end up here with unstable pointers");
5318 CompVal = Builder.CreatePtrToInt(
5319 V: CompVal, DestTy: DL.getIntPtrType(CompVal->getType()), Name: "magicptr");
5320 }
5321
5322 // Check if we can represent the values as a contiguous range. If so, we use a
5323 // range check + conditional branch instead of a switch.
5324 if (Values.front()->getValue() - Values.back()->getValue() ==
5325 Values.size() - 1) {
5326 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5327 Lower: Values.back()->getValue(), Upper: Values.front()->getValue() + 1);
5328 APInt Offset, RHS;
5329 ICmpInst::Predicate Pred;
5330 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5331 Value *X = CompVal;
5332 if (!Offset.isZero())
5333 X = Builder.CreateAdd(LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: Offset));
5334 Value *Cond =
5335 Builder.CreateICmp(P: Pred, LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: RHS));
5336 CondBrInst *NewBI = Builder.CreateCondBr(Cond, True: EdgeBB, False: DefaultBB);
5337 if (HasProfile)
5338 setBranchWeights(I&: *NewBI, Weights: BranchWeights, /*IsExpected=*/false);
5339 // We don't need to update PHI nodes since we don't add any new edges.
5340 } else {
5341 // Create the new switch instruction now.
5342 SwitchInst *New = Builder.CreateSwitch(V: CompVal, Dest: DefaultBB, NumCases: Values.size());
5343 if (HasProfile) {
5344 // We know the weight of the default case. We don't know the weight of the
5345 // other cases, but rather than completely lose profiling info, we split
5346 // the remaining probability equally over them.
5347 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5348 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5349 // if TrueWhenEqual.
5350 for (auto &V : drop_begin(RangeOrContainer&: NewWeights))
5351 V = BranchWeights[0] / Values.size();
5352 setBranchWeights(I&: *New, Weights: NewWeights, /*IsExpected=*/false);
5353 }
5354
5355 // Add all of the 'cases' to the switch instruction.
5356 for (ConstantInt *Val : Values)
5357 New->addCase(OnVal: Val, Dest: EdgeBB);
5358
5359 // We added edges from PI to the EdgeBB. As such, if there were any
5360 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5361 // the number of edges added.
5362 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5363 PHINode *PN = cast<PHINode>(Val&: BBI);
5364 Value *InVal = PN->getIncomingValueForBlock(BB);
5365 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5366 PN->addIncoming(V: InVal, BB);
5367 }
5368 }
5369
5370 // Erase the old branch instruction.
5371 eraseTerminatorAndDCECond(TI: BI);
5372 if (DTU)
5373 DTU->applyUpdates(Updates);
5374
5375 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5376 return true;
5377}
5378
5379bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5380 if (isa<PHINode>(Val: RI->getValue()))
5381 return simplifyCommonResume(RI);
5382 else if (isa<LandingPadInst>(Val: RI->getParent()->getFirstNonPHIIt()) &&
5383 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5384 // The resume must unwind the exception that caused control to branch here.
5385 return simplifySingleResume(RI);
5386
5387 return false;
5388}
5389
5390// Check if cleanup block is empty
5391static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
5392 for (Instruction &I : R) {
5393 auto *II = dyn_cast<IntrinsicInst>(Val: &I);
5394 if (!II)
5395 return false;
5396
5397 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5398 switch (IntrinsicID) {
5399 case Intrinsic::dbg_declare:
5400 case Intrinsic::dbg_value:
5401 case Intrinsic::dbg_label:
5402 case Intrinsic::lifetime_end:
5403 break;
5404 default:
5405 return false;
5406 }
5407 }
5408 return true;
5409}
5410
5411// Simplify resume that is shared by several landing pads (phi of landing pad).
5412bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5413 BasicBlock *BB = RI->getParent();
5414
5415 // Check that there are no other instructions except for debug and lifetime
5416 // intrinsics between the phi's and resume instruction.
5417 if (!isCleanupBlockEmpty(R: make_range(x: RI->getParent()->getFirstNonPHIIt(),
5418 y: BB->getTerminator()->getIterator())))
5419 return false;
5420
5421 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5422 auto *PhiLPInst = cast<PHINode>(Val: RI->getValue());
5423
5424 // Check incoming blocks to see if any of them are trivial.
5425 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5426 Idx++) {
5427 auto *IncomingBB = PhiLPInst->getIncomingBlock(i: Idx);
5428 auto *IncomingValue = PhiLPInst->getIncomingValue(i: Idx);
5429
5430 // If the block has other successors, we can not delete it because
5431 // it has other dependents.
5432 if (IncomingBB->getUniqueSuccessor() != BB)
5433 continue;
5434
5435 auto *LandingPad = dyn_cast<LandingPadInst>(Val: IncomingBB->getFirstNonPHIIt());
5436 // Not the landing pad that caused the control to branch here.
5437 if (IncomingValue != LandingPad)
5438 continue;
5439
5440 if (isCleanupBlockEmpty(
5441 R: make_range(x: LandingPad->getNextNode(), y: IncomingBB->getTerminator())))
5442 TrivialUnwindBlocks.insert(X: IncomingBB);
5443 }
5444
5445 // If no trivial unwind blocks, don't do any simplifications.
5446 if (TrivialUnwindBlocks.empty())
5447 return false;
5448
5449 // Turn all invokes that unwind here into calls.
5450 for (auto *TrivialBB : TrivialUnwindBlocks) {
5451 // Blocks that will be simplified should be removed from the phi node.
5452 // Note there could be multiple edges to the resume block, and we need
5453 // to remove them all.
5454 while (PhiLPInst->getBasicBlockIndex(BB: TrivialBB) != -1)
5455 BB->removePredecessor(Pred: TrivialBB, KeepOneInputPHIs: true);
5456
5457 for (BasicBlock *Pred :
5458 llvm::make_early_inc_range(Range: predecessors(BB: TrivialBB))) {
5459 removeUnwindEdge(BB: Pred, DTU);
5460 ++NumInvokes;
5461 }
5462
5463 // In each SimplifyCFG run, only the current processed block can be erased.
5464 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5465 // of erasing TrivialBB, we only remove the branch to the common resume
5466 // block so that we can later erase the resume block since it has no
5467 // predecessors.
5468 TrivialBB->getTerminator()->eraseFromParent();
5469 new UnreachableInst(RI->getContext(), TrivialBB);
5470 if (DTU)
5471 DTU->applyUpdates(Updates: {{DominatorTree::Delete, TrivialBB, BB}});
5472 }
5473
5474 // Delete the resume block if all its predecessors have been removed.
5475 if (pred_empty(BB))
5476 DeleteDeadBlock(BB, DTU);
5477
5478 return !TrivialUnwindBlocks.empty();
5479}
5480
5481// Simplify resume that is only used by a single (non-phi) landing pad.
5482bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5483 BasicBlock *BB = RI->getParent();
5484 auto *LPInst = cast<LandingPadInst>(Val: BB->getFirstNonPHIIt());
5485 assert(RI->getValue() == LPInst &&
5486 "Resume must unwind the exception that caused control to here");
5487
5488 // Check that there are no other instructions except for debug intrinsics.
5489 if (!isCleanupBlockEmpty(
5490 R: make_range<Instruction *>(x: LPInst->getNextNode(), y: RI)))
5491 return false;
5492
5493 // Turn all invokes that unwind here into calls and delete the basic block.
5494 for (BasicBlock *Pred : llvm::make_early_inc_range(Range: predecessors(BB))) {
5495 removeUnwindEdge(BB: Pred, DTU);
5496 ++NumInvokes;
5497 }
5498
5499 // The landingpad is now unreachable. Zap it.
5500 DeleteDeadBlock(BB, DTU);
5501 return true;
5502}
5503
5504static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
5505 // If this is a trivial cleanup pad that executes no instructions, it can be
5506 // eliminated. If the cleanup pad continues to the caller, any predecessor
5507 // that is an EH pad will be updated to continue to the caller and any
5508 // predecessor that terminates with an invoke instruction will have its invoke
5509 // instruction converted to a call instruction. If the cleanup pad being
5510 // simplified does not continue to the caller, each predecessor will be
5511 // updated to continue to the unwind destination of the cleanup pad being
5512 // simplified.
5513 BasicBlock *BB = RI->getParent();
5514 CleanupPadInst *CPInst = RI->getCleanupPad();
5515 if (CPInst->getParent() != BB)
5516 // This isn't an empty cleanup.
5517 return false;
5518
5519 // We cannot kill the pad if it has multiple uses. This typically arises
5520 // from unreachable basic blocks.
5521 if (!CPInst->hasOneUse())
5522 return false;
5523
5524 // Check that there are no other instructions except for benign intrinsics.
5525 if (!isCleanupBlockEmpty(
5526 R: make_range<Instruction *>(x: CPInst->getNextNode(), y: RI)))
5527 return false;
5528
5529 // If the cleanup return we are simplifying unwinds to the caller, this will
5530 // set UnwindDest to nullptr.
5531 BasicBlock *UnwindDest = RI->getUnwindDest();
5532
5533 // We're about to remove BB from the control flow. Before we do, sink any
5534 // PHINodes into the unwind destination. Doing this before changing the
5535 // control flow avoids some potentially slow checks, since we can currently
5536 // be certain that UnwindDest and BB have no common predecessors (since they
5537 // are both EH pads).
5538 if (UnwindDest) {
5539 // First, go through the PHI nodes in UnwindDest and update any nodes that
5540 // reference the block we are removing
5541 for (PHINode &DestPN : UnwindDest->phis()) {
5542 int Idx = DestPN.getBasicBlockIndex(BB);
5543 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5544 assert(Idx != -1);
5545 // This PHI node has an incoming value that corresponds to a control
5546 // path through the cleanup pad we are removing. If the incoming
5547 // value is in the cleanup pad, it must be a PHINode (because we
5548 // verified above that the block is otherwise empty). Otherwise, the
5549 // value is either a constant or a value that dominates the cleanup
5550 // pad being removed.
5551 //
5552 // Because BB and UnwindDest are both EH pads, all of their
5553 // predecessors must unwind to these blocks, and since no instruction
5554 // can have multiple unwind destinations, there will be no overlap in
5555 // incoming blocks between SrcPN and DestPN.
5556 Value *SrcVal = DestPN.getIncomingValue(i: Idx);
5557 PHINode *SrcPN = dyn_cast<PHINode>(Val: SrcVal);
5558
5559 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5560 for (auto *Pred : predecessors(BB)) {
5561 Value *Incoming =
5562 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(BB: Pred) : SrcVal;
5563 DestPN.addIncoming(V: Incoming, BB: Pred);
5564 }
5565 }
5566
5567 // Sink any remaining PHI nodes directly into UnwindDest.
5568 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5569 for (PHINode &PN : make_early_inc_range(Range: BB->phis())) {
5570 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5571 // If the PHI node has no uses or all of its uses are in this basic
5572 // block (meaning they are debug or lifetime intrinsics), just leave
5573 // it. It will be erased when we erase BB below.
5574 continue;
5575
5576 // Otherwise, sink this PHI node into UnwindDest.
5577 // Any predecessors to UnwindDest which are not already represented
5578 // must be back edges which inherit the value from the path through
5579 // BB. In this case, the PHI value must reference itself.
5580 for (auto *pred : predecessors(BB: UnwindDest))
5581 if (pred != BB)
5582 PN.addIncoming(V: &PN, BB: pred);
5583 PN.moveBefore(InsertPos: InsertPt);
5584 // Also, add a dummy incoming value for the original BB itself,
5585 // so that the PHI is well-formed until we drop said predecessor.
5586 PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB);
5587 }
5588 }
5589
5590 std::vector<DominatorTree::UpdateType> Updates;
5591
5592 // We use make_early_inc_range here because we will remove all predecessors.
5593 for (BasicBlock *PredBB : llvm::make_early_inc_range(Range: predecessors(BB))) {
5594 if (UnwindDest == nullptr) {
5595 if (DTU) {
5596 DTU->applyUpdates(Updates);
5597 Updates.clear();
5598 }
5599 removeUnwindEdge(BB: PredBB, DTU);
5600 ++NumInvokes;
5601 } else {
5602 BB->removePredecessor(Pred: PredBB);
5603 Instruction *TI = PredBB->getTerminator();
5604 TI->replaceUsesOfWith(From: BB, To: UnwindDest);
5605 if (DTU) {
5606 Updates.push_back(x: {DominatorTree::Insert, PredBB, UnwindDest});
5607 Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
5608 }
5609 }
5610 }
5611
5612 if (DTU)
5613 DTU->applyUpdates(Updates);
5614
5615 DeleteDeadBlock(BB, DTU);
5616
5617 return true;
5618}
5619
5620// Try to merge two cleanuppads together.
5621static bool mergeCleanupPad(CleanupReturnInst *RI) {
5622 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5623 // with.
5624 BasicBlock *UnwindDest = RI->getUnwindDest();
5625 if (!UnwindDest)
5626 return false;
5627
5628 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5629 // be safe to merge without code duplication.
5630 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5631 return false;
5632
5633 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5634 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(Val: &UnwindDest->front());
5635 if (!SuccessorCleanupPad)
5636 return false;
5637
5638 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5639 // Replace any uses of the successor cleanupad with the predecessor pad
5640 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5641 // funclet bundle operands.
5642 SuccessorCleanupPad->replaceAllUsesWith(V: PredecessorCleanupPad);
5643 // Remove the old cleanuppad.
5644 SuccessorCleanupPad->eraseFromParent();
5645 // Now, we simply replace the cleanupret with a branch to the unwind
5646 // destination.
5647 UncondBrInst::Create(IfTrue: UnwindDest, InsertBefore: RI->getParent());
5648 RI->eraseFromParent();
5649
5650 return true;
5651}
5652
5653bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5654 // It is possible to transiantly have an undef cleanuppad operand because we
5655 // have deleted some, but not all, dead blocks.
5656 // Eventually, this block will be deleted.
5657 if (isa<UndefValue>(Val: RI->getOperand(i_nocapture: 0)))
5658 return false;
5659
5660 if (mergeCleanupPad(RI))
5661 return true;
5662
5663 if (removeEmptyCleanup(RI, DTU))
5664 return true;
5665
5666 return false;
5667}
5668
5669// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5670bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5671 BasicBlock *BB = UI->getParent();
5672
5673 bool Changed = false;
5674
5675 // Ensure that any debug-info records that used to occur after the Unreachable
5676 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5677 // the block.
5678 BB->flushTerminatorDbgRecords();
5679
5680 // Debug-info records on the unreachable inst itself should be deleted, as
5681 // below we delete everything past the final executable instruction.
5682 UI->dropDbgRecords();
5683
5684 // If there are any instructions immediately before the unreachable that can
5685 // be removed, do so.
5686 while (UI->getIterator() != BB->begin()) {
5687 BasicBlock::iterator BBI = UI->getIterator();
5688 --BBI;
5689
5690 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBI))
5691 break; // Can not drop any more instructions. We're done here.
5692 // Otherwise, this instruction can be freely erased,
5693 // even if it is not side-effect free.
5694
5695 // Note that deleting EH's here is in fact okay, although it involves a bit
5696 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5697 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5698 // and we can therefore guarantee this block will be erased.
5699
5700 // If we're deleting this, we're deleting any subsequent debug info, so
5701 // delete DbgRecords.
5702 BBI->dropDbgRecords();
5703
5704 // Delete this instruction (any uses are guaranteed to be dead)
5705 BBI->replaceAllUsesWith(V: PoisonValue::get(T: BBI->getType()));
5706 BBI->eraseFromParent();
5707 Changed = true;
5708 }
5709
5710 // If the unreachable instruction is the first in the block, take a gander
5711 // at all of the predecessors of this instruction, and simplify them.
5712 if (&BB->front() != UI)
5713 return Changed;
5714
5715 std::vector<DominatorTree::UpdateType> Updates;
5716
5717 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5718 for (BasicBlock *Predecessor : Preds) {
5719 Instruction *TI = Predecessor->getTerminator();
5720 IRBuilder<> Builder(TI);
5721 if (isa<UncondBrInst>(Val: TI)) {
5722 new UnreachableInst(TI->getContext(), TI->getIterator());
5723 TI->eraseFromParent();
5724 Changed = true;
5725 if (DTU)
5726 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5727 } else if (auto *BI = dyn_cast<CondBrInst>(Val: TI)) {
5728 // We could either have a proper unconditional branch,
5729 // or a degenerate conditional branch with matching destinations.
5730 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
5731 new UnreachableInst(TI->getContext(), TI->getIterator());
5732 TI->eraseFromParent();
5733 Changed = true;
5734 } else {
5735 Value* Cond = BI->getCondition();
5736 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5737 "The destinations are guaranteed to be different here.");
5738 CallInst *Assumption;
5739 if (BI->getSuccessor(i: 0) == BB) {
5740 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
5741 Builder.CreateBr(Dest: BI->getSuccessor(i: 1));
5742 } else {
5743 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5744 Assumption = Builder.CreateAssumption(Cond);
5745 Builder.CreateBr(Dest: BI->getSuccessor(i: 0));
5746 }
5747 if (Options.AC)
5748 Options.AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
5749
5750 eraseTerminatorAndDCECond(TI: BI);
5751 Changed = true;
5752 }
5753 if (DTU)
5754 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5755 } else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) {
5756 SwitchInstProfUpdateWrapper SU(*SI);
5757 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5758 if (i->getCaseSuccessor() != BB) {
5759 ++i;
5760 continue;
5761 }
5762 BB->removePredecessor(Pred: SU->getParent());
5763 i = SU.removeCase(I: i);
5764 e = SU->case_end();
5765 Changed = true;
5766 }
5767 // Note that the default destination can't be removed!
5768 if (DTU && SI->getDefaultDest() != BB)
5769 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5770 } else if (auto *II = dyn_cast<InvokeInst>(Val: TI)) {
5771 if (II->getUnwindDest() == BB) {
5772 if (DTU) {
5773 DTU->applyUpdates(Updates);
5774 Updates.clear();
5775 }
5776 auto *CI = cast<CallInst>(Val: removeUnwindEdge(BB: TI->getParent(), DTU));
5777 if (!CI->doesNotThrow())
5778 CI->setDoesNotThrow();
5779 Changed = true;
5780 }
5781 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: TI)) {
5782 if (CSI->getUnwindDest() == BB) {
5783 if (DTU) {
5784 DTU->applyUpdates(Updates);
5785 Updates.clear();
5786 }
5787 removeUnwindEdge(BB: TI->getParent(), DTU);
5788 Changed = true;
5789 continue;
5790 }
5791
5792 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5793 E = CSI->handler_end();
5794 I != E; ++I) {
5795 if (*I == BB) {
5796 CSI->removeHandler(HI: I);
5797 --I;
5798 --E;
5799 Changed = true;
5800 }
5801 }
5802 if (DTU)
5803 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5804 if (CSI->getNumHandlers() == 0) {
5805 if (CSI->hasUnwindDest()) {
5806 // Redirect all predecessors of the block containing CatchSwitchInst
5807 // to instead branch to the CatchSwitchInst's unwind destination.
5808 if (DTU) {
5809 for (auto *PredecessorOfPredecessor : predecessors(BB: Predecessor)) {
5810 Updates.push_back(x: {DominatorTree::Insert,
5811 PredecessorOfPredecessor,
5812 CSI->getUnwindDest()});
5813 Updates.push_back(x: {DominatorTree::Delete,
5814 PredecessorOfPredecessor, Predecessor});
5815 }
5816 }
5817 Predecessor->replaceAllUsesWith(V: CSI->getUnwindDest());
5818 } else {
5819 // Rewrite all preds to unwind to caller (or from invoke to call).
5820 if (DTU) {
5821 DTU->applyUpdates(Updates);
5822 Updates.clear();
5823 }
5824 SmallVector<BasicBlock *, 8> EHPreds(predecessors(BB: Predecessor));
5825 for (BasicBlock *EHPred : EHPreds)
5826 removeUnwindEdge(BB: EHPred, DTU);
5827 }
5828 // The catchswitch is no longer reachable.
5829 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5830 CSI->eraseFromParent();
5831 Changed = true;
5832 }
5833 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: TI)) {
5834 (void)CRI;
5835 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5836 "Expected to always have an unwind to BB.");
5837 if (DTU)
5838 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5839 new UnreachableInst(TI->getContext(), TI->getIterator());
5840 TI->eraseFromParent();
5841 Changed = true;
5842 }
5843 }
5844
5845 if (DTU)
5846 DTU->applyUpdates(Updates);
5847
5848 // If this block is now dead, remove it.
5849 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5850 DeleteDeadBlock(BB, DTU);
5851 return true;
5852 }
5853
5854 return Changed;
5855}
5856
5857struct ContiguousCasesResult {
5858 ConstantInt *Min;
5859 ConstantInt *Max;
5860 BasicBlock *Dest;
5861 BasicBlock *OtherDest;
5862 SmallVectorImpl<ConstantInt *> *Cases;
5863 SmallVectorImpl<ConstantInt *> *OtherCases;
5864};
5865
5866static std::optional<ContiguousCasesResult>
5867findContiguousCases(Value *Condition, SmallVectorImpl<ConstantInt *> &Cases,
5868 SmallVectorImpl<ConstantInt *> &OtherCases,
5869 BasicBlock *Dest, BasicBlock *OtherDest) {
5870 assert(Cases.size() >= 1);
5871
5872 array_pod_sort(Start: Cases.begin(), End: Cases.end(), Compare: constantIntSortPredicate);
5873 const APInt &Min = Cases.back()->getValue();
5874 const APInt &Max = Cases.front()->getValue();
5875 APInt Offset = Max - Min;
5876 size_t ContiguousOffset = Cases.size() - 1;
5877 if (Offset == ContiguousOffset) {
5878 return ContiguousCasesResult{
5879 /*Min=*/Cases.back(),
5880 /*Max=*/Cases.front(),
5881 /*Dest=*/Dest,
5882 /*OtherDest=*/OtherDest,
5883 /*Cases=*/&Cases,
5884 /*OtherCases=*/&OtherCases,
5885 };
5886 }
5887 ConstantRange CR = computeConstantRange(V: Condition, /*ForSigned=*/false);
5888 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5889 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5890 // contiguous range for the other destination. N.B. If CR is not a full range,
5891 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5892 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5893 assert(Cases.size() >= 2);
5894 auto *It =
5895 std::adjacent_find(first: Cases.begin(), last: Cases.end(), binary_pred: [](auto L, auto R) {
5896 return L->getValue() != R->getValue() + 1;
5897 });
5898 if (It == Cases.end())
5899 return std::nullopt;
5900 auto [OtherMax, OtherMin] = std::make_pair(x&: *It, y&: *std::next(x: It));
5901 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5902 Cases.size() - 2) {
5903 return ContiguousCasesResult{
5904 /*Min=*/cast<ConstantInt>(
5905 Val: ConstantInt::get(Ty: OtherMin->getType(), V: OtherMin->getValue() + 1)),
5906 /*Max=*/
5907 cast<ConstantInt>(
5908 Val: ConstantInt::get(Ty: OtherMax->getType(), V: OtherMax->getValue() - 1)),
5909 /*Dest=*/OtherDest,
5910 /*OtherDest=*/Dest,
5911 /*Cases=*/&OtherCases,
5912 /*OtherCases=*/&Cases,
5913 };
5914 }
5915 }
5916 return std::nullopt;
5917}
5918
5919static void createUnreachableSwitchDefault(SwitchInst *Switch,
5920 DomTreeUpdater *DTU,
5921 bool RemoveOrigDefaultBlock = true) {
5922 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5923 auto *BB = Switch->getParent();
5924 auto *OrigDefaultBlock = Switch->getDefaultDest();
5925 if (RemoveOrigDefaultBlock)
5926 OrigDefaultBlock->removePredecessor(Pred: BB);
5927 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5928 Context&: BB->getContext(), Name: BB->getName() + ".unreachabledefault", Parent: BB->getParent(),
5929 InsertBefore: OrigDefaultBlock);
5930 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5931 UI->setDebugLoc(DebugLoc::getTemporary());
5932 Switch->setDefaultDest(&*NewDefaultBlock);
5933 if (DTU) {
5934 SmallVector<DominatorTree::UpdateType, 2> Updates;
5935 Updates.push_back(Elt: {DominatorTree::Insert, BB, &*NewDefaultBlock});
5936 if (RemoveOrigDefaultBlock &&
5937 !is_contained(Range: successors(BB), Element: OrigDefaultBlock))
5938 Updates.push_back(Elt: {DominatorTree::Delete, BB, &*OrigDefaultBlock});
5939 DTU->applyUpdates(Updates);
5940 }
5941}
5942
5943/// Turn a switch into an integer range comparison and branch.
5944/// Switches with more than 2 destinations are ignored.
5945/// Switches with 1 destination are also ignored.
5946bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5947 IRBuilder<> &Builder) {
5948 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5949
5950 bool HasDefault = !SI->defaultDestUnreachable();
5951
5952 auto *BB = SI->getParent();
5953 // Partition the cases into two sets with different destinations.
5954 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5955 BasicBlock *DestB = nullptr;
5956 SmallVector<ConstantInt *, 16> CasesA;
5957 SmallVector<ConstantInt *, 16> CasesB;
5958
5959 for (auto Case : SI->cases()) {
5960 BasicBlock *Dest = Case.getCaseSuccessor();
5961 if (!DestA)
5962 DestA = Dest;
5963 if (Dest == DestA) {
5964 CasesA.push_back(Elt: Case.getCaseValue());
5965 continue;
5966 }
5967 if (!DestB)
5968 DestB = Dest;
5969 if (Dest == DestB) {
5970 CasesB.push_back(Elt: Case.getCaseValue());
5971 continue;
5972 }
5973 return false; // More than two destinations.
5974 }
5975 if (!DestB)
5976 return false; // All destinations are the same and the default is unreachable
5977
5978 assert(DestA && DestB &&
5979 "Single-destination switch should have been folded.");
5980 assert(DestA != DestB);
5981 assert(DestB != SI->getDefaultDest());
5982 assert(!CasesB.empty() && "There must be non-default cases.");
5983 assert(!CasesA.empty() || HasDefault);
5984
5985 // Figure out if one of the sets of cases form a contiguous range.
5986 std::optional<ContiguousCasesResult> ContiguousCases;
5987
5988 // Only one icmp is needed when there is only one case.
5989 if (!HasDefault && CasesA.size() == 1)
5990 ContiguousCases = ContiguousCasesResult{
5991 /*Min=*/CasesA[0],
5992 /*Max=*/CasesA[0],
5993 /*Dest=*/DestA,
5994 /*OtherDest=*/DestB,
5995 /*Cases=*/&CasesA,
5996 /*OtherCases=*/&CasesB,
5997 };
5998 else if (CasesB.size() == 1)
5999 ContiguousCases = ContiguousCasesResult{
6000 /*Min=*/CasesB[0],
6001 /*Max=*/CasesB[0],
6002 /*Dest=*/DestB,
6003 /*OtherDest=*/DestA,
6004 /*Cases=*/&CasesB,
6005 /*OtherCases=*/&CasesA,
6006 };
6007 // Correctness: Cases to the default destination cannot be contiguous cases.
6008 else if (!HasDefault)
6009 ContiguousCases =
6010 findContiguousCases(Condition: SI->getCondition(), Cases&: CasesA, OtherCases&: CasesB, Dest: DestA, OtherDest: DestB);
6011
6012 if (!ContiguousCases)
6013 ContiguousCases =
6014 findContiguousCases(Condition: SI->getCondition(), Cases&: CasesB, OtherCases&: CasesA, Dest: DestB, OtherDest: DestA);
6015
6016 if (!ContiguousCases)
6017 return false;
6018
6019 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6020
6021 // Start building the compare and branch.
6022
6023 Constant *Offset = ConstantExpr::getNeg(C: Min);
6024 Constant *NumCases = ConstantInt::get(Ty: Offset->getType(),
6025 V: Max->getValue() - Min->getValue() + 1);
6026 Instruction *NewBI;
6027 if (NumCases->isOneValue()) {
6028 assert(Max->getValue() == Min->getValue());
6029 Value *Cmp = Builder.CreateICmpEQ(LHS: SI->getCondition(), RHS: Min);
6030 NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
6031 }
6032 // If NumCases overflowed, then all possible values jump to the successor.
6033 else if (NumCases->isNullValue() && !Cases->empty()) {
6034 NewBI = Builder.CreateBr(Dest);
6035 } else {
6036 Value *Sub = SI->getCondition();
6037 if (!Offset->isNullValue())
6038 Sub = Builder.CreateAdd(LHS: Sub, RHS: Offset, Name: Sub->getName() + ".off");
6039 Value *Cmp = Builder.CreateICmpULT(LHS: Sub, RHS: NumCases, Name: "switch");
6040 NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
6041 }
6042
6043 // Update weight for the newly-created conditional branch.
6044 if (hasBranchWeightMD(I: *SI) && isa<CondBrInst>(Val: NewBI)) {
6045 SmallVector<uint64_t, 8> Weights;
6046 getBranchWeights(TI: SI, Weights);
6047 if (Weights.size() == 1 + SI->getNumCases()) {
6048 uint64_t TrueWeight = 0;
6049 uint64_t FalseWeight = 0;
6050 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6051 if (SI->getSuccessor(idx: I) == Dest)
6052 TrueWeight += Weights[I];
6053 else
6054 FalseWeight += Weights[I];
6055 }
6056 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6057 TrueWeight /= 2;
6058 FalseWeight /= 2;
6059 }
6060 setFittedBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
6061 /*IsExpected=*/false, /*ElideAllZero=*/true);
6062 }
6063 }
6064
6065 // Prune obsolete incoming values off the successors' PHI nodes.
6066 for (auto &PHI : make_early_inc_range(Range: Dest->phis())) {
6067 unsigned PreviousEdges = Cases->size();
6068 if (Dest == SI->getDefaultDest())
6069 ++PreviousEdges;
6070 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6071 PHI.removeIncomingValue(BB: SI->getParent());
6072 }
6073 for (auto &PHI : make_early_inc_range(Range: OtherDest->phis())) {
6074 unsigned PreviousEdges = OtherCases->size();
6075 if (OtherDest == SI->getDefaultDest())
6076 ++PreviousEdges;
6077 unsigned E = PreviousEdges - 1;
6078 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6079 if (isa<UncondBrInst>(Val: NewBI))
6080 ++E;
6081 for (unsigned I = 0; I != E; ++I)
6082 PHI.removeIncomingValue(BB: SI->getParent());
6083 }
6084
6085 // Clean up the default block - it may have phis or other instructions before
6086 // the unreachable terminator.
6087 if (!HasDefault)
6088 createUnreachableSwitchDefault(Switch: SI, DTU);
6089
6090 auto *UnreachableDefault = SI->getDefaultDest();
6091
6092 // Drop the switch.
6093 SI->eraseFromParent();
6094
6095 if (!HasDefault && DTU)
6096 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, UnreachableDefault}});
6097
6098 return true;
6099}
6100
6101/// Compute masked bits for the condition of a switch
6102/// and use it to remove dead cases.
6103static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
6104 AssumptionCache *AC,
6105 const DataLayout &DL) {
6106 Value *Cond = SI->getCondition();
6107 KnownBits Known = computeKnownBits(V: Cond, DL, AC, CxtI: SI);
6108 SmallPtrSet<const Constant *, 4> KnownValues;
6109 bool IsKnownValuesValid = collectPossibleValues(V: Cond, Constants&: KnownValues, MaxCount: 4);
6110
6111 // We can also eliminate cases by determining that their values are outside of
6112 // the limited range of the condition based on how many significant (non-sign)
6113 // bits are in the condition value.
6114 unsigned MaxSignificantBitsInCond =
6115 ComputeMaxSignificantBits(Op: Cond, DL, AC, CxtI: SI);
6116
6117 // Gather dead cases.
6118 SmallVector<ConstantInt *, 8> DeadCases;
6119 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6120 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6121 for (const auto &Case : SI->cases()) {
6122 auto *Successor = Case.getCaseSuccessor();
6123 if (DTU) {
6124 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Key: Successor);
6125 if (Inserted)
6126 UniqueSuccessors.push_back(Elt: Successor);
6127 ++It->second;
6128 }
6129 ConstantInt *CaseC = Case.getCaseValue();
6130 const APInt &CaseVal = CaseC->getValue();
6131 if (Known.Zero.intersects(RHS: CaseVal) || !Known.One.isSubsetOf(RHS: CaseVal) ||
6132 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6133 (IsKnownValuesValid && !KnownValues.contains(Ptr: CaseC))) {
6134 DeadCases.push_back(Elt: CaseC);
6135 if (DTU)
6136 --NumPerSuccessorCases[Successor];
6137 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6138 << " is dead.\n");
6139 } else if (IsKnownValuesValid)
6140 KnownValues.erase(Ptr: CaseC);
6141 }
6142
6143 // If we can prove that the cases must cover all possible values, the
6144 // default destination becomes dead and we can remove it. If we know some
6145 // of the bits in the value, we can use that to more precisely compute the
6146 // number of possible unique case values.
6147 bool HasDefault = !SI->defaultDestUnreachable();
6148 const unsigned NumUnknownBits =
6149 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6150 assert(NumUnknownBits <= Known.getBitWidth());
6151 if (HasDefault && DeadCases.empty()) {
6152 if (IsKnownValuesValid && all_of(Range&: KnownValues, P: IsaPred<UndefValue>)) {
6153 createUnreachableSwitchDefault(Switch: SI, DTU);
6154 return true;
6155 }
6156
6157 if (NumUnknownBits < 64 /* avoid overflow */) {
6158 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6159 if (SI->getNumCases() == AllNumCases) {
6160 createUnreachableSwitchDefault(Switch: SI, DTU);
6161 return true;
6162 }
6163 // When only one case value is missing, replace default with that case.
6164 // Eliminating the default branch will provide more opportunities for
6165 // optimization, such as lookup tables.
6166 if (SI->getNumCases() == AllNumCases - 1) {
6167 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6168 IntegerType *CondTy = cast<IntegerType>(Val: Cond->getType());
6169 if (CondTy->getIntegerBitWidth() > 64 ||
6170 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
6171 return false;
6172
6173 uint64_t MissingCaseVal = 0;
6174 for (const auto &Case : SI->cases())
6175 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6176 auto *MissingCase = cast<ConstantInt>(
6177 Val: ConstantInt::get(Ty: Cond->getType(), V: MissingCaseVal));
6178 SwitchInstProfUpdateWrapper SIW(*SI);
6179 SIW.addCase(OnVal: MissingCase, Dest: SI->getDefaultDest(),
6180 W: SIW.getSuccessorWeight(idx: 0));
6181 createUnreachableSwitchDefault(Switch: SI, DTU,
6182 /*RemoveOrigDefaultBlock*/ false);
6183 SIW.setSuccessorWeight(idx: 0, W: 0);
6184 return true;
6185 }
6186 }
6187 }
6188
6189 if (DeadCases.empty())
6190 return false;
6191
6192 SwitchInstProfUpdateWrapper SIW(*SI);
6193 for (ConstantInt *DeadCase : DeadCases) {
6194 SwitchInst::CaseIt CaseI = SI->findCaseValue(C: DeadCase);
6195 assert(CaseI != SI->case_default() &&
6196 "Case was not found. Probably mistake in DeadCases forming.");
6197 // Prune unused values from PHI nodes.
6198 CaseI->getCaseSuccessor()->removePredecessor(Pred: SI->getParent());
6199 SIW.removeCase(I: CaseI);
6200 }
6201
6202 if (DTU) {
6203 std::vector<DominatorTree::UpdateType> Updates;
6204 for (auto *Successor : UniqueSuccessors)
6205 if (NumPerSuccessorCases[Successor] == 0)
6206 Updates.push_back(x: {DominatorTree::Delete, SI->getParent(), Successor});
6207 DTU->applyUpdates(Updates);
6208 }
6209
6210 return true;
6211}
6212
6213/// If BB would be eligible for simplification by
6214/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6215/// by an unconditional branch), look at the phi node for BB in the successor
6216/// block and see if the incoming value is equal to CaseValue. If so, return
6217/// the phi node, and set PhiIndex to BB's index in the phi node.
6218static PHINode *findPHIForConditionForwarding(ConstantInt *CaseValue,
6219 BasicBlock *BB, int *PhiIndex) {
6220 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6221 return nullptr; // BB must be empty to be a candidate for simplification.
6222 if (!BB->getSinglePredecessor())
6223 return nullptr; // BB must be dominated by the switch.
6224
6225 UncondBrInst *Branch = dyn_cast<UncondBrInst>(Val: BB->getTerminator());
6226 if (!Branch)
6227 return nullptr; // Terminator must be unconditional branch.
6228
6229 BasicBlock *Succ = Branch->getSuccessor();
6230
6231 for (PHINode &PHI : Succ->phis()) {
6232 int Idx = PHI.getBasicBlockIndex(BB);
6233 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6234
6235 Value *InValue = PHI.getIncomingValue(i: Idx);
6236 if (InValue != CaseValue)
6237 continue;
6238
6239 *PhiIndex = Idx;
6240 return &PHI;
6241 }
6242
6243 return nullptr;
6244}
6245
6246/// Try to forward the condition of a switch instruction to a phi node
6247/// dominated by the switch, if that would mean that some of the destination
6248/// blocks of the switch can be folded away. Return true if a change is made.
6249static bool forwardSwitchConditionToPHI(SwitchInst *SI) {
6250 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6251
6252 ForwardingNodesMap ForwardingNodes;
6253 BasicBlock *SwitchBlock = SI->getParent();
6254 bool Changed = false;
6255 for (const auto &Case : SI->cases()) {
6256 ConstantInt *CaseValue = Case.getCaseValue();
6257 BasicBlock *CaseDest = Case.getCaseSuccessor();
6258
6259 // Replace phi operands in successor blocks that are using the constant case
6260 // value rather than the switch condition variable:
6261 // switchbb:
6262 // switch i32 %x, label %default [
6263 // i32 17, label %succ
6264 // ...
6265 // succ:
6266 // %r = phi i32 ... [ 17, %switchbb ] ...
6267 // -->
6268 // %r = phi i32 ... [ %x, %switchbb ] ...
6269
6270 for (PHINode &Phi : CaseDest->phis()) {
6271 // This only works if there is exactly 1 incoming edge from the switch to
6272 // a phi. If there is >1, that means multiple cases of the switch map to 1
6273 // value in the phi, and that phi value is not the switch condition. Thus,
6274 // this transform would not make sense (the phi would be invalid because
6275 // a phi can't have different incoming values from the same block).
6276 int SwitchBBIdx = Phi.getBasicBlockIndex(BB: SwitchBlock);
6277 if (Phi.getIncomingValue(i: SwitchBBIdx) == CaseValue &&
6278 count(Range: Phi.blocks(), Element: SwitchBlock) == 1) {
6279 Phi.setIncomingValue(i: SwitchBBIdx, V: SI->getCondition());
6280 Changed = true;
6281 }
6282 }
6283
6284 // Collect phi nodes that are indirectly using this switch's case constants.
6285 int PhiIdx;
6286 if (auto *Phi = findPHIForConditionForwarding(CaseValue, BB: CaseDest, PhiIndex: &PhiIdx))
6287 ForwardingNodes[Phi].push_back(Elt: PhiIdx);
6288 }
6289
6290 for (auto &ForwardingNode : ForwardingNodes) {
6291 PHINode *Phi = ForwardingNode.first;
6292 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6293 // Check if it helps to fold PHI.
6294 if (Indexes.size() < 2 && !llvm::is_contained(Range: Phi->incoming_values(), Element: SI->getCondition()))
6295 continue;
6296
6297 for (int Index : Indexes)
6298 Phi->setIncomingValue(i: Index, V: SI->getCondition());
6299 Changed = true;
6300 }
6301
6302 return Changed;
6303}
6304
6305/// Return true if the backend will be able to handle
6306/// initializing an array of constants like C.
6307static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
6308 if (C->isThreadDependent())
6309 return false;
6310 if (C->isDLLImportDependent())
6311 return false;
6312
6313 if (!isa<ConstantDataVector, ConstantExpr, ConstantFP, ConstantInt,
6314 ConstantPointerNull, GlobalValue, UndefValue>(Val: C))
6315 return false;
6316
6317 // Globals cannot contain scalable types.
6318 if (C->getType()->isScalableTy())
6319 return false;
6320
6321 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
6322 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6323 // materializing the array of constants.
6324 Constant *StrippedC = cast<Constant>(Val: CE->stripInBoundsConstantOffsets());
6325 if (StrippedC == C || !validLookupTableConstant(C: StrippedC, TTI))
6326 return false;
6327 }
6328
6329 if (!TTI.shouldBuildLookupTablesForConstant(C))
6330 return false;
6331
6332 return true;
6333}
6334
6335/// If V is a Constant, return it. Otherwise, try to look up
6336/// its constant value in ConstantPool, returning 0 if it's not there.
6337static Constant *
6338lookupConstant(Value *V,
6339 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6340 if (Constant *C = dyn_cast<Constant>(Val: V))
6341 return C;
6342 return ConstantPool.lookup(Val: V);
6343}
6344
6345/// Try to fold instruction I into a constant. This works for
6346/// simple instructions such as binary operations where both operands are
6347/// constant or can be replaced by constants from the ConstantPool. Returns the
6348/// resulting constant on success, 0 otherwise.
6349static Constant *
6350constantFold(Instruction *I, const DataLayout &DL,
6351 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6352 if (SelectInst *Select = dyn_cast<SelectInst>(Val: I)) {
6353 Constant *A = lookupConstant(V: Select->getCondition(), ConstantPool);
6354 if (!A)
6355 return nullptr;
6356 if (A->isAllOnesValue())
6357 return lookupConstant(V: Select->getTrueValue(), ConstantPool);
6358 if (A->isNullValue())
6359 return lookupConstant(V: Select->getFalseValue(), ConstantPool);
6360 return nullptr;
6361 }
6362
6363 SmallVector<Constant *, 4> COps;
6364 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6365 if (Constant *A = lookupConstant(V: I->getOperand(i: N), ConstantPool))
6366 COps.push_back(Elt: A);
6367 else
6368 return nullptr;
6369 }
6370
6371 return ConstantFoldInstOperands(I, Ops: COps, DL);
6372}
6373
6374/// Try to determine the resulting constant values in phi nodes
6375/// at the common destination basic block, *CommonDest, for one of the case
6376/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6377/// default case), of a switch instruction SI.
6378static bool
6379getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
6380 BasicBlock **CommonDest,
6381 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6382 const DataLayout &DL, const TargetTransformInfo &TTI) {
6383 // The block from which we enter the common destination.
6384 BasicBlock *Pred = SI->getParent();
6385
6386 // If CaseDest is empty except for some side-effect free instructions through
6387 // which we can constant-propagate the CaseVal, continue to its successor.
6388 SmallDenseMap<Value *, Constant *> ConstantPool;
6389 ConstantPool.insert(KV: std::make_pair(x: SI->getCondition(), y&: CaseVal));
6390 for (Instruction &I : CaseDest->instructionsWithoutDebug(SkipPseudoOp: false)) {
6391 if (I.isTerminator()) {
6392 // If the terminator is a simple branch, continue to the next block.
6393 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6394 return false;
6395 Pred = CaseDest;
6396 CaseDest = I.getSuccessor(Idx: 0);
6397 } else if (Constant *C = constantFold(I: &I, DL, ConstantPool)) {
6398 // Instruction is side-effect free and constant.
6399
6400 // If the instruction has uses outside this block or a phi node slot for
6401 // the block, it is not safe to bypass the instruction since it would then
6402 // no longer dominate all its uses.
6403 for (auto &Use : I.uses()) {
6404 User *User = Use.getUser();
6405 if (Instruction *I = dyn_cast<Instruction>(Val: User))
6406 if (I->getParent() == CaseDest)
6407 continue;
6408 if (PHINode *Phi = dyn_cast<PHINode>(Val: User))
6409 if (Phi->getIncomingBlock(U: Use) == CaseDest)
6410 continue;
6411 return false;
6412 }
6413
6414 ConstantPool.insert(KV: std::make_pair(x: &I, y&: C));
6415 } else {
6416 break;
6417 }
6418 }
6419
6420 // If we did not have a CommonDest before, use the current one.
6421 if (!*CommonDest)
6422 *CommonDest = CaseDest;
6423 // If the destination isn't the common one, abort.
6424 if (CaseDest != *CommonDest)
6425 return false;
6426
6427 // Get the values for this case from phi nodes in the destination block.
6428 for (PHINode &PHI : (*CommonDest)->phis()) {
6429 int Idx = PHI.getBasicBlockIndex(BB: Pred);
6430 if (Idx == -1)
6431 continue;
6432
6433 Constant *ConstVal =
6434 lookupConstant(V: PHI.getIncomingValue(i: Idx), ConstantPool);
6435 if (!ConstVal)
6436 return false;
6437
6438 // Be conservative about which kinds of constants we support.
6439 if (!validLookupTableConstant(C: ConstVal, TTI))
6440 return false;
6441
6442 Res.push_back(Elt: std::make_pair(x: &PHI, y&: ConstVal));
6443 }
6444
6445 return Res.size() > 0;
6446}
6447
6448// Helper function used to add CaseVal to the list of cases that generate
6449// Result. Returns the updated number of cases that generate this result.
6450static size_t mapCaseToResult(ConstantInt *CaseVal,
6451 SwitchCaseResultVectorTy &UniqueResults,
6452 Constant *Result) {
6453 for (auto &I : UniqueResults) {
6454 if (I.first == Result) {
6455 I.second.push_back(Elt: CaseVal);
6456 return I.second.size();
6457 }
6458 }
6459 UniqueResults.push_back(
6460 Elt: std::make_pair(x&: Result, y: SmallVector<ConstantInt *, 4>(1, CaseVal)));
6461 return 1;
6462}
6463
6464// Helper function that initializes a map containing
6465// results for the PHI node of the common destination block for a switch
6466// instruction. Returns false if multiple PHI nodes have been found or if
6467// there is not a common destination block for the switch.
6468static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
6469 BasicBlock *&CommonDest,
6470 SwitchCaseResultVectorTy &UniqueResults,
6471 Constant *&DefaultResult,
6472 const DataLayout &DL,
6473 const TargetTransformInfo &TTI,
6474 uintptr_t MaxUniqueResults) {
6475 for (const auto &I : SI->cases()) {
6476 ConstantInt *CaseVal = I.getCaseValue();
6477
6478 // Resulting value at phi nodes for this case value.
6479 SwitchCaseResultsTy Results;
6480 if (!getCaseResults(SI, CaseVal, CaseDest: I.getCaseSuccessor(), CommonDest: &CommonDest, Res&: Results,
6481 DL, TTI))
6482 return false;
6483
6484 // Only one value per case is permitted.
6485 if (Results.size() > 1)
6486 return false;
6487
6488 // Add the case->result mapping to UniqueResults.
6489 const size_t NumCasesForResult =
6490 mapCaseToResult(CaseVal, UniqueResults, Result: Results.begin()->second);
6491
6492 // Early out if there are too many cases for this result.
6493 if (NumCasesForResult > MaxSwitchCasesPerResult)
6494 return false;
6495
6496 // Early out if there are too many unique results.
6497 if (UniqueResults.size() > MaxUniqueResults)
6498 return false;
6499
6500 // Check the PHI consistency.
6501 if (!PHI)
6502 PHI = Results[0].first;
6503 else if (PHI != Results[0].first)
6504 return false;
6505 }
6506 // Find the default result value.
6507 SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
6508 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest, Res&: DefaultResults,
6509 DL, TTI);
6510 // If the default value is not found abort unless the default destination
6511 // is unreachable.
6512 DefaultResult =
6513 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6514
6515 return DefaultResult || SI->defaultDestUnreachable();
6516}
6517
6518// Helper function that checks if it is possible to transform a switch with only
6519// two cases (or two cases + default) that produces a result into a select.
6520// TODO: Handle switches with more than 2 cases that map to the same result.
6521// The branch weights correspond to the provided Condition (i.e. if Condition is
6522// modified from the original SwitchInst, the caller must adjust the weights)
6523static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6524 Constant *DefaultResult, Value *Condition,
6525 IRBuilder<> &Builder, const DataLayout &DL,
6526 ArrayRef<uint32_t> BranchWeights) {
6527 // If we are selecting between only two cases transform into a simple
6528 // select or a two-way select if default is possible.
6529 // Example:
6530 // switch (a) { %0 = icmp eq i32 %a, 10
6531 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6532 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6533 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6534 // }
6535
6536 const bool HasBranchWeights =
6537 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6538
6539 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6540 ResultVector[1].second.size() == 1) {
6541 ConstantInt *FirstCase = ResultVector[0].second[0];
6542 ConstantInt *SecondCase = ResultVector[1].second[0];
6543 Value *SelectValue = ResultVector[1].first;
6544 if (DefaultResult) {
6545 Value *ValueCompare =
6546 Builder.CreateICmpEQ(LHS: Condition, RHS: SecondCase, Name: "switch.selectcmp");
6547 SelectValue = Builder.CreateSelect(C: ValueCompare, True: ResultVector[1].first,
6548 False: DefaultResult, Name: "switch.select");
6549 if (auto *SI = dyn_cast<SelectInst>(Val: SelectValue);
6550 SI && HasBranchWeights) {
6551 // We start with 3 probabilities, where the numerator is the
6552 // corresponding BranchWeights[i], and the denominator is the sum over
6553 // BranchWeights. We want the probability and negative probability of
6554 // Condition == SecondCase.
6555 assert(BranchWeights.size() == 3);
6556 setBranchWeights(
6557 I&: *SI, Weights: {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6558 /*IsExpected=*/false, /*ElideAllZero=*/true);
6559 }
6560 }
6561 Value *ValueCompare =
6562 Builder.CreateICmpEQ(LHS: Condition, RHS: FirstCase, Name: "switch.selectcmp");
6563 Value *Ret = Builder.CreateSelect(C: ValueCompare, True: ResultVector[0].first,
6564 False: SelectValue, Name: "switch.select");
6565 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6566 // We may have had a DefaultResult. Base the position of the first and
6567 // second's branch weights accordingly. Also the proability that Condition
6568 // != FirstCase needs to take that into account.
6569 assert(BranchWeights.size() >= 2);
6570 size_t FirstCasePos = (Condition != nullptr);
6571 size_t SecondCasePos = FirstCasePos + 1;
6572 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6573 setBranchWeights(I&: *SI,
6574 Weights: {BranchWeights[FirstCasePos],
6575 DefaultCase + BranchWeights[SecondCasePos]},
6576 /*IsExpected=*/false, /*ElideAllZero=*/true);
6577 }
6578 return Ret;
6579 }
6580
6581 // Handle the degenerate case where two cases have the same result value.
6582 if (ResultVector.size() == 1 && DefaultResult) {
6583 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6584 unsigned CaseCount = CaseValues.size();
6585 // n bits group cases map to the same result:
6586 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6587 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6588 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6589 if (isPowerOf2_32(Value: CaseCount)) {
6590 ConstantInt *MinCaseVal = CaseValues[0];
6591 // If there are bits that are set exclusively by CaseValues, we
6592 // can transform the switch into a select if the conjunction of
6593 // all the values uniquely identify CaseValues.
6594 APInt AndMask = APInt::getAllOnes(numBits: MinCaseVal->getBitWidth());
6595
6596 // Find the minimum value and compute the and of all the case values.
6597 for (auto *Case : CaseValues) {
6598 if (Case->getValue().slt(RHS: MinCaseVal->getValue()))
6599 MinCaseVal = Case;
6600 AndMask &= Case->getValue();
6601 }
6602 KnownBits Known = computeKnownBits(V: Condition, DL);
6603
6604 if (!AndMask.isZero() && Known.getMaxValue().uge(RHS: AndMask)) {
6605 // Compute the number of bits that are free to vary.
6606 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6607
6608 // Check if the number of values covered by the mask is equal
6609 // to the number of cases.
6610 if (FreeBits == Log2_32(Value: CaseCount)) {
6611 Value *And = Builder.CreateAnd(LHS: Condition, RHS: AndMask);
6612 Value *Cmp = Builder.CreateICmpEQ(
6613 LHS: And, RHS: Constant::getIntegerValue(Ty: And->getType(), V: AndMask));
6614 Value *Ret =
6615 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6616 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6617 // We know there's a Default case. We base the resulting branch
6618 // weights off its probability.
6619 assert(BranchWeights.size() >= 2);
6620 setBranchWeights(
6621 I&: *SI,
6622 Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6623 /*IsExpected=*/false, /*ElideAllZero=*/true);
6624 }
6625 return Ret;
6626 }
6627 }
6628
6629 // Mark the bits case number touched.
6630 APInt BitMask = APInt::getZero(numBits: MinCaseVal->getBitWidth());
6631 for (auto *Case : CaseValues)
6632 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6633
6634 // Check if cases with the same result can cover all number
6635 // in touched bits.
6636 if (BitMask.popcount() == Log2_32(Value: CaseCount)) {
6637 if (!MinCaseVal->isNullValue())
6638 Condition = Builder.CreateSub(LHS: Condition, RHS: MinCaseVal);
6639 Value *And = Builder.CreateAnd(LHS: Condition, RHS: ~BitMask, Name: "switch.and");
6640 Value *Cmp = Builder.CreateICmpEQ(
6641 LHS: And, RHS: Constant::getNullValue(Ty: And->getType()), Name: "switch.selectcmp");
6642 Value *Ret =
6643 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6644 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6645 assert(BranchWeights.size() >= 2);
6646 setBranchWeights(
6647 I&: *SI,
6648 Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6649 /*IsExpected=*/false, /*ElideAllZero=*/true);
6650 }
6651 return Ret;
6652 }
6653 }
6654
6655 // Handle the degenerate case where two cases have the same value.
6656 if (CaseValues.size() == 2) {
6657 Value *Cmp1 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[0],
6658 Name: "switch.selectcmp.case1");
6659 Value *Cmp2 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[1],
6660 Name: "switch.selectcmp.case2");
6661 Value *Cmp = Builder.CreateOr(LHS: Cmp1, RHS: Cmp2, Name: "switch.selectcmp");
6662 Value *Ret =
6663 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6664 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6665 assert(BranchWeights.size() >= 2);
6666 setBranchWeights(
6667 I&: *SI, Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6668 /*IsExpected=*/false, /*ElideAllZero=*/true);
6669 }
6670 return Ret;
6671 }
6672 }
6673
6674 return nullptr;
6675}
6676
6677// Helper function to cleanup a switch instruction that has been converted into
6678// a select, fixing up PHI nodes and basic blocks.
6679static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
6680 Value *SelectValue,
6681 IRBuilder<> &Builder,
6682 DomTreeUpdater *DTU) {
6683 std::vector<DominatorTree::UpdateType> Updates;
6684
6685 BasicBlock *SelectBB = SI->getParent();
6686 BasicBlock *DestBB = PHI->getParent();
6687
6688 if (DTU && !is_contained(Range: predecessors(BB: DestBB), Element: SelectBB))
6689 Updates.push_back(x: {DominatorTree::Insert, SelectBB, DestBB});
6690 Builder.CreateBr(Dest: DestBB);
6691
6692 // Remove the switch.
6693
6694 PHI->removeIncomingValueIf(
6695 Predicate: [&](unsigned Idx) { return PHI->getIncomingBlock(i: Idx) == SelectBB; });
6696 PHI->addIncoming(V: SelectValue, BB: SelectBB);
6697
6698 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6699 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6700 BasicBlock *Succ = SI->getSuccessor(idx: i);
6701
6702 if (Succ == DestBB)
6703 continue;
6704 Succ->removePredecessor(Pred: SelectBB);
6705 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6706 Updates.push_back(x: {DominatorTree::Delete, SelectBB, Succ});
6707 }
6708 SI->eraseFromParent();
6709 if (DTU)
6710 DTU->applyUpdates(Updates);
6711}
6712
6713/// If a switch is only used to initialize one or more phi nodes in a common
6714/// successor block with only two different constant values, try to replace the
6715/// switch with a select. Returns true if the fold was made.
6716static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6717 DomTreeUpdater *DTU, const DataLayout &DL,
6718 const TargetTransformInfo &TTI) {
6719 Value *const Cond = SI->getCondition();
6720 PHINode *PHI = nullptr;
6721 BasicBlock *CommonDest = nullptr;
6722 Constant *DefaultResult;
6723 SwitchCaseResultVectorTy UniqueResults;
6724 // Collect all the cases that will deliver the same value from the switch.
6725 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6726 DL, TTI, /*MaxUniqueResults*/ 2))
6727 return false;
6728
6729 assert(PHI != nullptr && "PHI for value select not found");
6730 Builder.SetInsertPoint(SI);
6731 SmallVector<uint32_t, 4> BranchWeights;
6732 if (!ProfcheckDisableMetadataFixes) {
6733 [[maybe_unused]] auto HasWeights =
6734 extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights&: BranchWeights);
6735 assert(!HasWeights == (BranchWeights.empty()));
6736 }
6737 assert(BranchWeights.empty() ||
6738 (BranchWeights.size() >=
6739 UniqueResults.size() + (DefaultResult != nullptr)));
6740
6741 Value *SelectValue = foldSwitchToSelect(ResultVector: UniqueResults, DefaultResult, Condition: Cond,
6742 Builder, DL, BranchWeights);
6743 if (!SelectValue)
6744 return false;
6745
6746 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6747 return true;
6748}
6749
6750namespace {
6751
6752/// This class finds alternatives for switches to ultimately
6753/// replace the switch.
6754class SwitchReplacement {
6755public:
6756 /// Create a helper for optimizations to use as a switch replacement.
6757 /// Find a better representation for the content of Values,
6758 /// using DefaultValue to fill any holes in the table.
6759 SwitchReplacement(
6760 Module &M, uint64_t TableSize, ConstantInt *Offset,
6761 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6762 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6763
6764 /// Build instructions with Builder to retrieve values using Index
6765 /// and replace the switch.
6766 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6767 Function *Func);
6768
6769 /// Return true if a table with TableSize elements of
6770 /// type ElementType would fit in a target-legal register.
6771 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6772 Type *ElementType);
6773
6774 /// Return the default value of the switch.
6775 Constant *getDefaultValue();
6776
6777 /// Return true if the replacement is a lookup table.
6778 bool isLookupTable();
6779
6780 /// Return true if the replacement is a bit map.
6781 bool isBitMap();
6782
6783private:
6784 // Depending on the switch, there are different alternatives.
6785 enum {
6786 // For switches where each case contains the same value, we just have to
6787 // store that single value and return it for each lookup.
6788 SingleValueKind,
6789
6790 // For switches where there is a linear relationship between table index
6791 // and values. We calculate the result with a simple multiplication
6792 // and addition instead of a table lookup.
6793 LinearMapKind,
6794
6795 // For small tables with integer elements, we can pack them into a bitmap
6796 // that fits into a target-legal register. Values are retrieved by
6797 // shift and mask operations.
6798 BitMapKind,
6799
6800 // The table is stored as an array of values. Values are retrieved by load
6801 // instructions from the table.
6802 LookupTableKind
6803 } Kind;
6804
6805 // The default value of the switch.
6806 Constant *DefaultValue;
6807
6808 // The type of the output values.
6809 Type *ValueType;
6810
6811 // For SingleValueKind, this is the single value.
6812 Constant *SingleValue = nullptr;
6813
6814 // For BitMapKind, this is the bitmap.
6815 ConstantInt *BitMap = nullptr;
6816 IntegerType *BitMapElementTy = nullptr;
6817
6818 // For LinearMapKind, these are the constants used to derive the value.
6819 ConstantInt *LinearOffset = nullptr;
6820 ConstantInt *LinearMultiplier = nullptr;
6821 bool LinearMapValWrapped = false;
6822
6823 // For LookupTableKind, this is the table.
6824 Constant *Initializer = nullptr;
6825};
6826
6827} // end anonymous namespace
6828
6829SwitchReplacement::SwitchReplacement(
6830 Module &M, uint64_t TableSize, ConstantInt *Offset,
6831 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6832 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6833 : DefaultValue(DefaultValue) {
6834 assert(Values.size() && "Can't build lookup table without values!");
6835 assert(TableSize >= Values.size() && "Can't fit values in table!");
6836
6837 // If all values in the table are equal, this is that value.
6838 SingleValue = Values.begin()->second;
6839
6840 ValueType = Values.begin()->second->getType();
6841
6842 // Build up the table contents.
6843 SmallVector<Constant *, 64> TableContents(TableSize);
6844 for (const auto &[CaseVal, CaseRes] : Values) {
6845 assert(CaseRes->getType() == ValueType);
6846
6847 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6848 TableContents[Idx] = CaseRes;
6849
6850 if (SingleValue && !isa<PoisonValue>(Val: CaseRes) && CaseRes != SingleValue)
6851 SingleValue = isa<PoisonValue>(Val: SingleValue) ? CaseRes : nullptr;
6852 }
6853
6854 // Fill in any holes in the table with the default result.
6855 if (Values.size() < TableSize) {
6856 assert(DefaultValue &&
6857 "Need a default value to fill the lookup table holes.");
6858 assert(DefaultValue->getType() == ValueType);
6859 for (uint64_t I = 0; I < TableSize; ++I) {
6860 if (!TableContents[I])
6861 TableContents[I] = DefaultValue;
6862 }
6863
6864 // If the default value is poison, all the holes are poison.
6865 bool DefaultValueIsPoison = isa<PoisonValue>(Val: DefaultValue);
6866
6867 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6868 SingleValue = nullptr;
6869 }
6870
6871 // If each element in the table contains the same value, we only need to store
6872 // that single value.
6873 if (SingleValue) {
6874 Kind = SingleValueKind;
6875 return;
6876 }
6877
6878 // Check if we can derive the value with a linear transformation from the
6879 // table index.
6880 if (isa<IntegerType>(Val: ValueType)) {
6881 bool LinearMappingPossible = true;
6882 APInt PrevVal;
6883 APInt DistToPrev;
6884 // When linear map is monotonic and signed overflow doesn't happen on
6885 // maximum index, we can attach nsw on Add and Mul.
6886 bool NonMonotonic = false;
6887 assert(TableSize >= 2 && "Should be a SingleValue table.");
6888 // Check if there is the same distance between two consecutive values.
6889 for (uint64_t I = 0; I < TableSize; ++I) {
6890 ConstantInt *ConstVal = dyn_cast<ConstantInt>(Val: TableContents[I]);
6891
6892 if (!ConstVal && isa<PoisonValue>(Val: TableContents[I])) {
6893 // This is an poison, so it's (probably) a lookup table hole.
6894 // To prevent any regressions from before we switched to using poison as
6895 // the default value, holes will fall back to using the first value.
6896 // This can be removed once we add proper handling for poisons in lookup
6897 // tables.
6898 ConstVal = dyn_cast<ConstantInt>(Val: Values[0].second);
6899 }
6900
6901 if (!ConstVal) {
6902 // This is an undef. We could deal with it, but undefs in lookup tables
6903 // are very seldom. It's probably not worth the additional complexity.
6904 LinearMappingPossible = false;
6905 break;
6906 }
6907 const APInt &Val = ConstVal->getValue();
6908 if (I != 0) {
6909 APInt Dist = Val - PrevVal;
6910 if (I == 1) {
6911 DistToPrev = Dist;
6912 } else if (Dist != DistToPrev) {
6913 LinearMappingPossible = false;
6914 break;
6915 }
6916 NonMonotonic |=
6917 Dist.isStrictlyPositive() ? Val.sle(RHS: PrevVal) : Val.sgt(RHS: PrevVal);
6918 }
6919 PrevVal = Val;
6920 }
6921 if (LinearMappingPossible) {
6922 LinearOffset = cast<ConstantInt>(Val: TableContents[0]);
6923 LinearMultiplier = ConstantInt::get(Context&: M.getContext(), V: DistToPrev);
6924 APInt M = LinearMultiplier->getValue();
6925 bool MayWrap = true;
6926 if (isIntN(N: M.getBitWidth(), x: TableSize - 1))
6927 (void)M.smul_ov(RHS: APInt(M.getBitWidth(), TableSize - 1), Overflow&: MayWrap);
6928 LinearMapValWrapped = NonMonotonic || MayWrap;
6929 Kind = LinearMapKind;
6930 return;
6931 }
6932 }
6933
6934 // If the type is integer and the table fits in a register, build a bitmap.
6935 if (wouldFitInRegister(DL, TableSize, ElementType: ValueType)) {
6936 IntegerType *IT = cast<IntegerType>(Val: ValueType);
6937 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6938 for (uint64_t I = TableSize; I > 0; --I) {
6939 TableInt <<= IT->getBitWidth();
6940 // Insert values into the bitmap. Undef values are set to zero.
6941 if (!isa<UndefValue>(Val: TableContents[I - 1])) {
6942 ConstantInt *Val = cast<ConstantInt>(Val: TableContents[I - 1]);
6943 TableInt |= Val->getValue().zext(width: TableInt.getBitWidth());
6944 }
6945 }
6946 BitMap = ConstantInt::get(Context&: M.getContext(), V: TableInt);
6947 BitMapElementTy = IT;
6948 Kind = BitMapKind;
6949 return;
6950 }
6951
6952 // Store the table in an array.
6953 auto *TableTy = ArrayType::get(ElementType: ValueType, NumElements: TableSize);
6954 Initializer = ConstantArray::get(T: TableTy, V: TableContents);
6955
6956 Kind = LookupTableKind;
6957}
6958
6959Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6960 const DataLayout &DL, Function *Func) {
6961 switch (Kind) {
6962 case SingleValueKind:
6963 return SingleValue;
6964 case LinearMapKind: {
6965 ++NumLinearMaps;
6966 // Derive the result value from the input value.
6967 Value *Result = Builder.CreateIntCast(V: Index, DestTy: LinearMultiplier->getType(),
6968 isSigned: false, Name: "switch.idx.cast");
6969 if (!LinearMultiplier->isOne())
6970 Result = Builder.CreateMul(LHS: Result, RHS: LinearMultiplier, Name: "switch.idx.mult",
6971 /*HasNUW = */ false,
6972 /*HasNSW = */ !LinearMapValWrapped);
6973
6974 if (!LinearOffset->isZero())
6975 Result = Builder.CreateAdd(LHS: Result, RHS: LinearOffset, Name: "switch.offset",
6976 /*HasNUW = */ false,
6977 /*HasNSW = */ !LinearMapValWrapped);
6978 return Result;
6979 }
6980 case BitMapKind: {
6981 ++NumBitMaps;
6982 // Type of the bitmap (e.g. i59).
6983 IntegerType *MapTy = BitMap->getIntegerType();
6984
6985 // Cast Index to the same type as the bitmap.
6986 // Note: The Index is <= the number of elements in the table, so
6987 // truncating it to the width of the bitmask is safe.
6988 Value *ShiftAmt = Builder.CreateZExtOrTrunc(V: Index, DestTy: MapTy, Name: "switch.cast");
6989
6990 // Multiply the shift amount by the element width. NUW/NSW can always be
6991 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6992 // BitMap's bit width.
6993 ShiftAmt = Builder.CreateMul(
6994 LHS: ShiftAmt, RHS: ConstantInt::get(Ty: MapTy, V: BitMapElementTy->getBitWidth()),
6995 Name: "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6996
6997 // Shift down.
6998 Value *DownShifted =
6999 Builder.CreateLShr(LHS: BitMap, RHS: ShiftAmt, Name: "switch.downshift");
7000 // Mask off.
7001 return Builder.CreateTrunc(V: DownShifted, DestTy: BitMapElementTy, Name: "switch.masked");
7002 }
7003 case LookupTableKind: {
7004 ++NumLookupTables;
7005 auto *Table =
7006 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7007 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7008 Initializer, "switch.table." + Func->getName());
7009 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7010 // Set the alignment to that of an array items. We will be only loading one
7011 // value out of it.
7012 Table->setAlignment(DL.getPrefTypeAlign(Ty: ValueType));
7013 Type *IndexTy = DL.getIndexType(PtrTy: Table->getType());
7014 auto *ArrayTy = cast<ArrayType>(Val: Table->getValueType());
7015
7016 if (Index->getType() != IndexTy) {
7017 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7018 Index = Builder.CreateZExtOrTrunc(V: Index, DestTy: IndexTy);
7019 if (auto *Zext = dyn_cast<ZExtInst>(Val: Index))
7020 Zext->setNonNeg(
7021 isUIntN(N: OldBitWidth - 1, x: ArrayTy->getNumElements() - 1));
7022 }
7023
7024 Value *GEPIndices[] = {ConstantInt::get(Ty: IndexTy, V: 0), Index};
7025 Value *GEP =
7026 Builder.CreateInBoundsGEP(Ty: ArrayTy, Ptr: Table, IdxList: GEPIndices, Name: "switch.gep");
7027 return Builder.CreateLoad(Ty: ArrayTy->getElementType(), Ptr: GEP, Name: "switch.load");
7028 }
7029 }
7030 llvm_unreachable("Unknown helper kind!");
7031}
7032
7033bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7034 uint64_t TableSize,
7035 Type *ElementType) {
7036 auto *IT = dyn_cast<IntegerType>(Val: ElementType);
7037 if (!IT)
7038 return false;
7039 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7040 // are <= 15, we could try to narrow the type.
7041
7042 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7043 if (TableSize >= UINT_MAX / IT->getBitWidth())
7044 return false;
7045 return DL.fitsInLegalInteger(Width: TableSize * IT->getBitWidth());
7046}
7047
7048static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
7049 const DataLayout &DL) {
7050 // Allow any legal type.
7051 if (TTI.isTypeLegal(Ty))
7052 return true;
7053
7054 auto *IT = dyn_cast<IntegerType>(Val: Ty);
7055 if (!IT)
7056 return false;
7057
7058 // Also allow power of 2 integer types that have at least 8 bits and fit in
7059 // a register. These types are common in frontend languages and targets
7060 // usually support loads of these types.
7061 // TODO: We could relax this to any integer that fits in a register and rely
7062 // on ABI alignment and padding in the table to allow the load to be widened.
7063 // Or we could widen the constants and truncate the load.
7064 unsigned BitWidth = IT->getBitWidth();
7065 return BitWidth >= 8 && isPowerOf2_32(Value: BitWidth) &&
7066 DL.fitsInLegalInteger(Width: IT->getBitWidth());
7067}
7068
7069Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7070
7071bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7072
7073bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7074
7075static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7076 // 40% is the default density for building a jump table in optsize/minsize
7077 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7078 // function was based on.
7079 const uint64_t MinDensity = 40;
7080
7081 if (CaseRange >= UINT64_MAX / 100)
7082 return false; // Avoid multiplication overflows below.
7083
7084 return NumCases * 100 >= CaseRange * MinDensity;
7085}
7086
7087static bool isSwitchDense(ArrayRef<int64_t> Values) {
7088 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7089 uint64_t Range = Diff + 1;
7090 if (Range < Diff)
7091 return false; // Overflow.
7092
7093 return isSwitchDense(NumCases: Values.size(), CaseRange: Range);
7094}
7095
7096/// Determine whether a lookup table should be built for this switch, based on
7097/// the number of cases, size of the table, and the types of the results.
7098// TODO: We could support larger than legal types by limiting based on the
7099// number of loads required and/or table size. If the constants are small we
7100// could use smaller table entries and extend after the load.
7101static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
7102 const TargetTransformInfo &TTI,
7103 const DataLayout &DL,
7104 const SmallVector<Type *> &ResultTypes) {
7105 if (SI->getNumCases() > TableSize)
7106 return false; // TableSize overflowed.
7107
7108 bool AllTablesFitInRegister = true;
7109 bool HasIllegalType = false;
7110 for (const auto &Ty : ResultTypes) {
7111 // Saturate this flag to true.
7112 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7113
7114 // Saturate this flag to false.
7115 AllTablesFitInRegister =
7116 AllTablesFitInRegister &&
7117 SwitchReplacement::wouldFitInRegister(DL, TableSize, ElementType: Ty);
7118
7119 // If both flags saturate, we're done. NOTE: This *only* works with
7120 // saturating flags, and all flags have to saturate first due to the
7121 // non-deterministic behavior of iterating over a dense map.
7122 if (HasIllegalType && !AllTablesFitInRegister)
7123 break;
7124 }
7125
7126 // If each table would fit in a register, we should build it anyway.
7127 if (AllTablesFitInRegister)
7128 return true;
7129
7130 // Don't build a table that doesn't fit in-register if it has illegal types.
7131 if (HasIllegalType)
7132 return false;
7133
7134 return isSwitchDense(NumCases: SI->getNumCases(), CaseRange: TableSize);
7135}
7136
7137static bool shouldUseSwitchConditionAsTableIndex(
7138 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7139 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7140 const DataLayout &DL, const TargetTransformInfo &TTI) {
7141 if (MinCaseVal.isNullValue())
7142 return true;
7143 if (MinCaseVal.isNegative() ||
7144 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7145 !HasDefaultResults)
7146 return false;
7147 return all_of(Range: ResultTypes, P: [&](const auto &ResultType) {
7148 return SwitchReplacement::wouldFitInRegister(
7149 DL, TableSize: MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ElementType: ResultType);
7150 });
7151}
7152
7153/// Try to reuse the switch table index compare. Following pattern:
7154/// \code
7155/// if (idx < tablesize)
7156/// r = table[idx]; // table does not contain default_value
7157/// else
7158/// r = default_value;
7159/// if (r != default_value)
7160/// ...
7161/// \endcode
7162/// Is optimized to:
7163/// \code
7164/// cond = idx < tablesize;
7165/// if (cond)
7166/// r = table[idx];
7167/// else
7168/// r = default_value;
7169/// if (cond)
7170/// ...
7171/// \endcode
7172/// Jump threading will then eliminate the second if(cond).
7173static void reuseTableCompare(
7174 User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch,
7175 Constant *DefaultValue,
7176 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7177 ICmpInst *CmpInst = dyn_cast<ICmpInst>(Val: PhiUser);
7178 if (!CmpInst)
7179 return;
7180
7181 // We require that the compare is in the same block as the phi so that jump
7182 // threading can do its work afterwards.
7183 if (CmpInst->getParent() != PhiBlock)
7184 return;
7185
7186 Constant *CmpOp1 = dyn_cast<Constant>(Val: CmpInst->getOperand(i_nocapture: 1));
7187 if (!CmpOp1)
7188 return;
7189
7190 Value *RangeCmp = RangeCheckBranch->getCondition();
7191 Constant *TrueConst = ConstantInt::getTrue(Ty: RangeCmp->getType());
7192 Constant *FalseConst = ConstantInt::getFalse(Ty: RangeCmp->getType());
7193
7194 // Check if the compare with the default value is constant true or false.
7195 const DataLayout &DL = PhiBlock->getDataLayout();
7196 Constant *DefaultConst = ConstantFoldCompareInstOperands(
7197 Predicate: CmpInst->getPredicate(), LHS: DefaultValue, RHS: CmpOp1, DL);
7198 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7199 return;
7200
7201 // Check if the compare with the case values is distinct from the default
7202 // compare result.
7203 for (auto ValuePair : Values) {
7204 Constant *CaseConst = ConstantFoldCompareInstOperands(
7205 Predicate: CmpInst->getPredicate(), LHS: ValuePair.second, RHS: CmpOp1, DL);
7206 if (!CaseConst || CaseConst == DefaultConst ||
7207 (CaseConst != TrueConst && CaseConst != FalseConst))
7208 return;
7209 }
7210
7211 // Check if the branch instruction dominates the phi node. It's a simple
7212 // dominance check, but sufficient for our needs.
7213 // Although this check is invariant in the calling loops, it's better to do it
7214 // at this late stage. Practically we do it at most once for a switch.
7215 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7216 for (BasicBlock *Pred : predecessors(BB: PhiBlock)) {
7217 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7218 return;
7219 }
7220
7221 if (DefaultConst == FalseConst) {
7222 // The compare yields the same result. We can replace it.
7223 CmpInst->replaceAllUsesWith(V: RangeCmp);
7224 ++NumTableCmpReuses;
7225 } else {
7226 // The compare yields the same result, just inverted. We can replace it.
7227 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7228 V1: RangeCmp, V2: ConstantInt::get(Ty: RangeCmp->getType(), V: 1), Name: "inverted.cmp",
7229 InsertBefore: RangeCheckBranch->getIterator());
7230 CmpInst->replaceAllUsesWith(V: InvertedTableCmp);
7231 ++NumTableCmpReuses;
7232 }
7233}
7234
7235/// If the switch is only used to initialize one or more phi nodes in a common
7236/// successor block with different constant values, replace the switch with
7237/// lookup tables.
7238static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
7239 DomTreeUpdater *DTU, const DataLayout &DL,
7240 const TargetTransformInfo &TTI,
7241 bool ConvertSwitchToLookupTable) {
7242 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7243
7244 BasicBlock *BB = SI->getParent();
7245 Function *Fn = BB->getParent();
7246
7247 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7248 // split off a dense part and build a lookup table for that.
7249
7250 // FIXME: This creates arrays of GEPs to constant strings, which means each
7251 // GEP needs a runtime relocation in PIC code. We should just build one big
7252 // string and lookup indices into that.
7253
7254 // Ignore switches with less than three cases. Lookup tables will not make
7255 // them faster, so we don't analyze them.
7256 if (SI->getNumCases() < 3)
7257 return false;
7258
7259 // Figure out the corresponding result for each case value and phi node in the
7260 // common destination, as well as the min and max case values.
7261 assert(!SI->cases().empty());
7262 SwitchInst::CaseIt CI = SI->case_begin();
7263 ConstantInt *MinCaseVal = CI->getCaseValue();
7264 ConstantInt *MaxCaseVal = CI->getCaseValue();
7265
7266 BasicBlock *CommonDest = nullptr;
7267
7268 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7269 SmallDenseMap<PHINode *, ResultListTy> ResultLists;
7270
7271 SmallDenseMap<PHINode *, Constant *> DefaultResults;
7272 SmallVector<Type *> ResultTypes;
7273 SmallVector<PHINode *, 4> PHIs;
7274
7275 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7276 ConstantInt *CaseVal = CI->getCaseValue();
7277 if (CaseVal->getValue().slt(RHS: MinCaseVal->getValue()))
7278 MinCaseVal = CaseVal;
7279 if (CaseVal->getValue().sgt(RHS: MaxCaseVal->getValue()))
7280 MaxCaseVal = CaseVal;
7281
7282 // Resulting value at phi nodes for this case value.
7283 using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
7284 ResultsTy Results;
7285 if (!getCaseResults(SI, CaseVal, CaseDest: CI->getCaseSuccessor(), CommonDest: &CommonDest,
7286 Res&: Results, DL, TTI))
7287 return false;
7288
7289 // Append the result and result types from this case to the list for each
7290 // phi.
7291 for (const auto &I : Results) {
7292 PHINode *PHI = I.first;
7293 Constant *Value = I.second;
7294 auto [It, Inserted] = ResultLists.try_emplace(Key: PHI);
7295 if (Inserted)
7296 PHIs.push_back(Elt: PHI);
7297 It->second.push_back(Elt: std::make_pair(x&: CaseVal, y&: Value));
7298 ResultTypes.push_back(Elt: PHI->getType());
7299 }
7300 }
7301
7302 // If the table has holes, we need a constant result for the default case
7303 // or a bitmask that fits in a register.
7304 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7305 bool HasDefaultResults =
7306 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest,
7307 Res&: DefaultResultsList, DL, TTI);
7308 for (const auto &I : DefaultResultsList) {
7309 PHINode *PHI = I.first;
7310 Constant *Result = I.second;
7311 DefaultResults[PHI] = Result;
7312 }
7313
7314 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7315 MinCaseVal&: *MinCaseVal, MaxCaseVal: *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7316 uint64_t TableSize;
7317 ConstantInt *TableIndexOffset;
7318 if (UseSwitchConditionAsTableIndex) {
7319 TableSize = MaxCaseVal->getLimitedValue() + 1;
7320 TableIndexOffset = ConstantInt::get(Ty: MaxCaseVal->getIntegerType(), V: 0);
7321 } else {
7322 TableSize =
7323 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7324
7325 TableIndexOffset = MinCaseVal;
7326 }
7327
7328 // If the default destination is unreachable, or if the lookup table covers
7329 // all values of the conditional variable, branch directly to the lookup table
7330 // BB. Otherwise, check that the condition is within the case range.
7331 uint64_t NumResults = ResultLists[PHIs[0]].size();
7332 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7333
7334 bool TableHasHoles = (NumResults < TableSize);
7335
7336 // If the table has holes but the default destination doesn't produce any
7337 // constant results, the lookup table entries corresponding to the holes will
7338 // contain poison.
7339 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7340
7341 // If the default destination doesn't produce a constant result but is still
7342 // reachable, and the lookup table has holes, we need to use a mask to
7343 // determine if the current index should load from the lookup table or jump
7344 // to the default case.
7345 // The mask is unnecessary if the table has holes but the default destination
7346 // is unreachable, as in that case the holes must also be unreachable.
7347 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7348 if (NeedMask) {
7349 // As an extra penalty for the validity test we require more cases.
7350 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7351 return false;
7352 if (!DL.fitsInLegalInteger(Width: TableSize))
7353 return false;
7354 }
7355
7356 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7357 return false;
7358
7359 // Compute the table index value.
7360 Value *TableIndex;
7361 if (UseSwitchConditionAsTableIndex) {
7362 TableIndex = SI->getCondition();
7363 if (HasDefaultResults) {
7364 // Grow the table to cover all possible index values to avoid the range
7365 // check. It will use the default result to fill in the table hole later,
7366 // so make sure it exist.
7367 ConstantRange CR =
7368 computeConstantRange(V: TableIndex, /* ForSigned */ false);
7369 // Grow the table shouldn't have any size impact by checking
7370 // wouldFitInRegister.
7371 // TODO: Consider growing the table also when it doesn't fit in a register
7372 // if no optsize is specified.
7373 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7374 if (!CR.isUpperWrapped() &&
7375 all_of(Range&: ResultTypes, P: [&](const auto &ResultType) {
7376 return SwitchReplacement::wouldFitInRegister(DL, TableSize: UpperBound,
7377 ElementType: ResultType);
7378 })) {
7379 // There may be some case index larger than the UpperBound (unreachable
7380 // case), so make sure the table size does not get smaller.
7381 TableSize = std::max(a: UpperBound, b: TableSize);
7382 // The default branch is unreachable after we enlarge the lookup table.
7383 // Adjust DefaultIsReachable to reuse code path.
7384 DefaultIsReachable = false;
7385 }
7386 }
7387 }
7388
7389 // Keep track of the switch replacement for each phi
7390 SmallDenseMap<PHINode *, SwitchReplacement> PhiToReplacementMap;
7391 for (PHINode *PHI : PHIs) {
7392 const auto &ResultList = ResultLists[PHI];
7393
7394 Type *ResultType = ResultList.begin()->second->getType();
7395 // Use any value to fill the lookup table holes.
7396 Constant *DefaultVal =
7397 AllHolesArePoison ? PoisonValue::get(T: ResultType) : DefaultResults[PHI];
7398 StringRef FuncName = Fn->getName();
7399 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7400 ResultList, DefaultVal, DL, FuncName);
7401 PhiToReplacementMap.insert(KV: {PHI, Replacement});
7402 }
7403
7404 bool AnyLookupTables = any_of(
7405 Range&: PhiToReplacementMap, P: [](auto &KV) { return KV.second.isLookupTable(); });
7406 bool AnyBitMaps = any_of(Range&: PhiToReplacementMap,
7407 P: [](auto &KV) { return KV.second.isBitMap(); });
7408
7409 // A few conditions prevent the generation of lookup tables:
7410 // 1. The target does not support lookup tables.
7411 // 2. The "no-jump-tables" function attribute is set.
7412 // However, these objections do not apply to other switch replacements, like
7413 // the bitmap, so we only stop here if any of these conditions are met and we
7414 // want to create a LUT. Otherwise, continue with the switch replacement.
7415 if (AnyLookupTables &&
7416 (!TTI.shouldBuildLookupTables() ||
7417 Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()))
7418 return false;
7419
7420 // In the early optimization pipeline, disable formation of lookup tables,
7421 // bit maps and mask checks, as they may inhibit further optimization.
7422 if (!ConvertSwitchToLookupTable &&
7423 (AnyLookupTables || AnyBitMaps || NeedMask))
7424 return false;
7425
7426 Builder.SetInsertPoint(SI);
7427 // TableIndex is the switch condition - TableIndexOffset if we don't
7428 // use the condition directly
7429 if (!UseSwitchConditionAsTableIndex) {
7430 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7431 // we can try to attach nsw.
7432 bool MayWrap = true;
7433 if (!DefaultIsReachable) {
7434 APInt Res =
7435 MaxCaseVal->getValue().ssub_ov(RHS: MinCaseVal->getValue(), Overflow&: MayWrap);
7436 (void)Res;
7437 }
7438 TableIndex = Builder.CreateSub(LHS: SI->getCondition(), RHS: TableIndexOffset,
7439 Name: "switch.tableidx", /*HasNUW =*/false,
7440 /*HasNSW =*/!MayWrap);
7441 }
7442
7443 std::vector<DominatorTree::UpdateType> Updates;
7444
7445 // Compute the maximum table size representable by the integer type we are
7446 // switching upon.
7447 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7448 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7449 assert(MaxTableSize >= TableSize &&
7450 "It is impossible for a switch to have more entries than the max "
7451 "representable value of its input integer type's size.");
7452
7453 // Create the BB that does the lookups.
7454 Module &Mod = *CommonDest->getParent()->getParent();
7455 BasicBlock *LookupBB = BasicBlock::Create(
7456 Context&: Mod.getContext(), Name: "switch.lookup", Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7457
7458 CondBrInst *RangeCheckBranch = nullptr;
7459 CondBrInst *CondBranch = nullptr;
7460
7461 Builder.SetInsertPoint(SI);
7462 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7463 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7464 Builder.CreateBr(Dest: LookupBB);
7465 if (DTU)
7466 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7467 // Note: We call removeProdecessor later since we need to be able to get the
7468 // PHI value for the default case in case we're using a bit mask.
7469 } else {
7470 Value *Cmp = Builder.CreateICmpULT(
7471 LHS: TableIndex, RHS: ConstantInt::get(Ty: MinCaseVal->getType(), V: TableSize));
7472 RangeCheckBranch =
7473 Builder.CreateCondBr(Cond: Cmp, True: LookupBB, False: SI->getDefaultDest());
7474 CondBranch = RangeCheckBranch;
7475 if (DTU)
7476 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7477 }
7478
7479 // Populate the BB that does the lookups.
7480 Builder.SetInsertPoint(LookupBB);
7481
7482 if (NeedMask) {
7483 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7484 // re-purposed to do the hole check, and we create a new LookupBB.
7485 BasicBlock *MaskBB = LookupBB;
7486 MaskBB->setName("switch.hole_check");
7487 LookupBB = BasicBlock::Create(Context&: Mod.getContext(), Name: "switch.lookup",
7488 Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7489
7490 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7491 // unnecessary illegal types.
7492 uint64_t TableSizePowOf2 = NextPowerOf2(A: std::max(a: 7ULL, b: TableSize - 1ULL));
7493 APInt MaskInt(TableSizePowOf2, 0);
7494 APInt One(TableSizePowOf2, 1);
7495 // Build bitmask; fill in a 1 bit for every case.
7496 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7497 for (const auto &Result : ResultList) {
7498 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7499 .getLimitedValue();
7500 MaskInt |= One << Idx;
7501 }
7502 ConstantInt *TableMask = ConstantInt::get(Context&: Mod.getContext(), V: MaskInt);
7503
7504 // Get the TableIndex'th bit of the bitmask.
7505 // If this bit is 0 (meaning hole) jump to the default destination,
7506 // else continue with table lookup.
7507 IntegerType *MapTy = TableMask->getIntegerType();
7508 Value *MaskIndex =
7509 Builder.CreateZExtOrTrunc(V: TableIndex, DestTy: MapTy, Name: "switch.maskindex");
7510 Value *Shifted = Builder.CreateLShr(LHS: TableMask, RHS: MaskIndex, Name: "switch.shifted");
7511 Value *LoBit = Builder.CreateTrunc(
7512 V: Shifted, DestTy: Type::getInt1Ty(C&: Mod.getContext()), Name: "switch.lobit");
7513 CondBranch = Builder.CreateCondBr(Cond: LoBit, True: LookupBB, False: SI->getDefaultDest());
7514 if (DTU) {
7515 Updates.push_back(x: {DominatorTree::Insert, MaskBB, LookupBB});
7516 Updates.push_back(x: {DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7517 }
7518 Builder.SetInsertPoint(LookupBB);
7519 addPredecessorToBlock(Succ: SI->getDefaultDest(), NewPred: MaskBB, ExistPred: BB);
7520 }
7521
7522 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7523 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7524 // do not delete PHINodes here.
7525 SI->getDefaultDest()->removePredecessor(Pred: BB,
7526 /*KeepOneInputPHIs=*/true);
7527 if (DTU)
7528 Updates.push_back(x: {DominatorTree::Delete, BB, SI->getDefaultDest()});
7529 }
7530
7531 for (PHINode *PHI : PHIs) {
7532 const ResultListTy &ResultList = ResultLists[PHI];
7533 auto Replacement = PhiToReplacementMap.at(Val: PHI);
7534 auto *Result = Replacement.replaceSwitch(Index: TableIndex, Builder, DL, Func: Fn);
7535 // Do a small peephole optimization: re-use the switch table compare if
7536 // possible.
7537 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7538 BasicBlock *PhiBlock = PHI->getParent();
7539 // Search for compare instructions which use the phi.
7540 for (auto *User : PHI->users()) {
7541 reuseTableCompare(PhiUser: User, PhiBlock, RangeCheckBranch,
7542 DefaultValue: Replacement.getDefaultValue(), Values: ResultList);
7543 }
7544 }
7545
7546 PHI->addIncoming(V: Result, BB: LookupBB);
7547 }
7548
7549 Builder.CreateBr(Dest: CommonDest);
7550 if (DTU)
7551 Updates.push_back(x: {DominatorTree::Insert, LookupBB, CommonDest});
7552
7553 SmallVector<uint32_t> BranchWeights;
7554 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7555 extractBranchWeights(I: *SI, Weights&: BranchWeights);
7556 uint64_t ToLookupWeight = 0;
7557 uint64_t ToDefaultWeight = 0;
7558
7559 // Remove the switch.
7560 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7561 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7562 BasicBlock *Succ = SI->getSuccessor(idx: I);
7563
7564 if (Succ == SI->getDefaultDest()) {
7565 if (HasBranchWeights)
7566 ToDefaultWeight += BranchWeights[I];
7567 continue;
7568 }
7569 Succ->removePredecessor(Pred: BB);
7570 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
7571 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7572 if (HasBranchWeights)
7573 ToLookupWeight += BranchWeights[I];
7574 }
7575 SI->eraseFromParent();
7576 if (HasBranchWeights)
7577 setFittedBranchWeights(I&: *CondBranch, Weights: {ToLookupWeight, ToDefaultWeight},
7578 /*IsExpected=*/false);
7579 if (DTU)
7580 DTU->applyUpdates(Updates);
7581
7582 if (NeedMask)
7583 ++NumLookupTablesHoles;
7584 return true;
7585}
7586
7587/// Try to transform a switch that has "holes" in it to a contiguous sequence
7588/// of cases.
7589///
7590/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7591/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7592///
7593/// This converts a sparse switch into a dense switch which allows better
7594/// lowering and could also allow transforming into a lookup table.
7595static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7596 const DataLayout &DL,
7597 const TargetTransformInfo &TTI) {
7598 auto *CondTy = cast<IntegerType>(Val: SI->getCondition()->getType());
7599 if (CondTy->getIntegerBitWidth() > 64 ||
7600 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7601 return false;
7602 // Only bother with this optimization if there are more than 3 switch cases;
7603 // SDAG will only bother creating jump tables for 4 or more cases.
7604 if (SI->getNumCases() < 4)
7605 return false;
7606
7607 // This transform is agnostic to the signedness of the input or case values. We
7608 // can treat the case values as signed or unsigned. We can optimize more common
7609 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7610 // as signed.
7611 SmallVector<int64_t,4> Values;
7612 for (const auto &C : SI->cases())
7613 Values.push_back(Elt: C.getCaseValue()->getValue().getSExtValue());
7614 llvm::sort(C&: Values);
7615
7616 // If the switch is already dense, there's nothing useful to do here.
7617 if (isSwitchDense(Values))
7618 return false;
7619
7620 // First, transform the values such that they start at zero and ascend.
7621 int64_t Base = Values[0];
7622 for (auto &V : Values)
7623 V -= (uint64_t)(Base);
7624
7625 // Now we have signed numbers that have been shifted so that, given enough
7626 // precision, there are no negative values. Since the rest of the transform
7627 // is bitwise only, we switch now to an unsigned representation.
7628
7629 // This transform can be done speculatively because it is so cheap - it
7630 // results in a single rotate operation being inserted.
7631
7632 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7633 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7634 // less than 64.
7635 unsigned Shift = 64;
7636 for (auto &V : Values)
7637 Shift = std::min(a: Shift, b: (unsigned)llvm::countr_zero(Val: (uint64_t)V));
7638 assert(Shift < 64);
7639 if (Shift > 0)
7640 for (auto &V : Values)
7641 V = (int64_t)((uint64_t)V >> Shift);
7642
7643 if (!isSwitchDense(Values))
7644 // Transform didn't create a dense switch.
7645 return false;
7646
7647 // The obvious transform is to shift the switch condition right and emit a
7648 // check that the condition actually cleanly divided by GCD, i.e.
7649 // C & (1 << Shift - 1) == 0
7650 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7651 //
7652 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7653 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7654 // are nonzero then the switch condition will be very large and will hit the
7655 // default case.
7656
7657 auto *Ty = cast<IntegerType>(Val: SI->getCondition()->getType());
7658 Builder.SetInsertPoint(SI);
7659 Value *Sub =
7660 Builder.CreateSub(LHS: SI->getCondition(), RHS: ConstantInt::getSigned(Ty, V: Base));
7661 Value *Rot = Builder.CreateIntrinsic(
7662 RetTy: Ty, ID: Intrinsic::fshl,
7663 Args: {Sub, Sub, ConstantInt::get(Ty, V: Ty->getBitWidth() - Shift)});
7664 SI->replaceUsesOfWith(From: SI->getCondition(), To: Rot);
7665
7666 for (auto Case : SI->cases()) {
7667 auto *Orig = Case.getCaseValue();
7668 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7669 Case.setValue(cast<ConstantInt>(Val: ConstantInt::get(Ty, V: Sub.lshr(shiftAmt: Shift))));
7670 }
7671 return true;
7672}
7673
7674/// Tries to transform the switch when the condition is umin with a constant.
7675/// In that case, the default branch can be replaced by the constant's branch.
7676/// This method also removes dead cases when the simplification cannot replace
7677/// the default branch.
7678///
7679/// For example:
7680/// switch(umin(a, 3)) {
7681/// case 0:
7682/// case 1:
7683/// case 2:
7684/// case 3:
7685/// case 4:
7686/// // ...
7687/// default:
7688/// unreachable
7689/// }
7690///
7691/// Transforms into:
7692///
7693/// switch(a) {
7694/// case 0:
7695/// case 1:
7696/// case 2:
7697/// default:
7698/// // This is case 3
7699/// }
7700static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU) {
7701 Value *A;
7702 ConstantInt *Constant;
7703
7704 if (!match(V: SI->getCondition(), P: m_UMin(L: m_Value(V&: A), R: m_ConstantInt(CI&: Constant))))
7705 return false;
7706
7707 SmallVector<DominatorTree::UpdateType> Updates;
7708 SwitchInstProfUpdateWrapper SIW(*SI);
7709 BasicBlock *BB = SIW->getParent();
7710
7711 // Dead cases are removed even when the simplification fails.
7712 // A case is dead when its value is higher than the Constant.
7713 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7714 if (!I->getCaseValue()->getValue().ugt(RHS: Constant->getValue())) {
7715 ++I;
7716 continue;
7717 }
7718 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7719 DeadCaseBB->removePredecessor(Pred: BB);
7720 Updates.push_back(Elt: {DominatorTree::Delete, BB, DeadCaseBB});
7721 I = SIW.removeCase(I);
7722 E = SIW->case_end();
7723 }
7724
7725 auto Case = SI->findCaseValue(C: Constant);
7726 // If the case value is not found, `findCaseValue` returns the default case.
7727 // In this scenario, since there is no explicit `case 3:`, the simplification
7728 // fails. The simplification also fails when the switch’s default destination
7729 // is reachable.
7730 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7731 if (DTU)
7732 DTU->applyUpdates(Updates);
7733 return !Updates.empty();
7734 }
7735
7736 BasicBlock *Unreachable = SI->getDefaultDest();
7737 SIW.replaceDefaultDest(I: Case);
7738 SIW.removeCase(I: Case);
7739 SIW->setCondition(A);
7740
7741 Updates.push_back(Elt: {DominatorTree::Delete, BB, Unreachable});
7742
7743 if (DTU)
7744 DTU->applyUpdates(Updates);
7745
7746 return true;
7747}
7748
7749/// Tries to transform switch of powers of two to reduce switch range.
7750/// For example, switch like:
7751/// switch (C) { case 1: case 2: case 64: case 128: }
7752/// will be transformed to:
7753/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7754///
7755/// This transformation allows better lowering and may transform the switch
7756/// instruction into a sequence of bit manipulation and a smaller
7757/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7758/// address of the jump target, and indirectly jump to it).
7759static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7760 DomTreeUpdater *DTU,
7761 const DataLayout &DL,
7762 const TargetTransformInfo &TTI) {
7763 Value *Condition = SI->getCondition();
7764 LLVMContext &Context = SI->getContext();
7765 auto *CondTy = cast<IntegerType>(Val: Condition->getType());
7766
7767 if (CondTy->getIntegerBitWidth() > 64 ||
7768 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7769 return false;
7770
7771 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7772 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7773 {Condition, ConstantInt::getTrue(Context)});
7774 if (TTI.getIntrinsicInstrCost(ICA: Attrs, CostKind: TTI::TCK_SizeAndLatency) >
7775 TTI::TCC_Basic * 2)
7776 return false;
7777
7778 // Only bother with this optimization if there are more than 3 switch cases.
7779 // SDAG will start emitting jump tables for 4 or more cases.
7780 if (SI->getNumCases() < 4)
7781 return false;
7782
7783 // Check that switch cases are powers of two.
7784 SmallVector<uint64_t, 4> Values;
7785 for (const auto &Case : SI->cases()) {
7786 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7787 if (llvm::has_single_bit(Value: CaseValue))
7788 Values.push_back(Elt: CaseValue);
7789 else
7790 return false;
7791 }
7792
7793 // isSwichDense requires case values to be sorted.
7794 llvm::sort(C&: Values);
7795 if (!isSwitchDense(NumCases: Values.size(), CaseRange: llvm::countr_zero(Val: Values.back()) -
7796 llvm::countr_zero(Val: Values.front()) + 1))
7797 // Transform is unable to generate dense switch.
7798 return false;
7799
7800 Builder.SetInsertPoint(SI);
7801
7802 if (!SI->defaultDestUnreachable()) {
7803 // Let non-power-of-two inputs jump to the default case, when the latter is
7804 // reachable.
7805 auto *PopC = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: Condition);
7806 auto *IsPow2 = Builder.CreateICmpEQ(LHS: PopC, RHS: ConstantInt::get(Ty: CondTy, V: 1));
7807
7808 auto *OrigBB = SI->getParent();
7809 auto *DefaultCaseBB = SI->getDefaultDest();
7810 BasicBlock *SplitBB = SplitBlock(Old: OrigBB, SplitPt: SI, DTU);
7811 auto It = OrigBB->getTerminator()->getIterator();
7812 SmallVector<uint32_t> Weights;
7813 auto HasWeights =
7814 !ProfcheckDisableMetadataFixes && extractBranchWeights(I: *SI, Weights);
7815 auto *BI = CondBrInst::Create(Cond: IsPow2, IfTrue: SplitBB, IfFalse: DefaultCaseBB, InsertBefore: It);
7816 if (HasWeights && any_of(Range&: Weights, P: not_equal_to(Arg: 0))) {
7817 // IsPow2 covers a subset of the cases in which we'd go to the default
7818 // label. The other is those powers of 2 that don't appear in the case
7819 // statement. We don't know the distribution of the values coming in, so
7820 // the safest is to split 50-50 the original probability to `default`.
7821 uint64_t OrigDenominator =
7822 sum_of(Range: map_range(C&: Weights, F: StaticCastTo<uint64_t>));
7823 SmallVector<uint64_t> NewWeights(2);
7824 NewWeights[1] = Weights[0] / 2;
7825 NewWeights[0] = OrigDenominator - NewWeights[1];
7826 setFittedBranchWeights(I&: *BI, Weights: NewWeights, /*IsExpected=*/false);
7827 // The probability of executing the default block stays constant. It was
7828 // p_d = Weights[0] / OrigDenominator
7829 // we rewrite as W/D
7830 // We want to find the probability of the default branch of the switch
7831 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7832 // i.e. the original probability is the probability we go to the default
7833 // branch from the BI branch, or we take the default branch on the SI.
7834 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7835 // This matches using W/2 for the default branch probability numerator and
7836 // D-W/2 as the denominator.
7837 Weights[0] = NewWeights[1];
7838 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7839 for (auto &W : drop_begin(RangeOrContainer&: Weights))
7840 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7841
7842 setBranchWeights(I&: *SI, Weights, /*IsExpected=*/false);
7843 }
7844 // BI is handling the default case for SI, and so should share its DebugLoc.
7845 BI->setDebugLoc(SI->getDebugLoc());
7846 It->eraseFromParent();
7847
7848 addPredecessorToBlock(Succ: DefaultCaseBB, NewPred: OrigBB, ExistPred: SplitBB);
7849 if (DTU)
7850 DTU->applyUpdates(Updates: {{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7851 }
7852
7853 // Replace each case with its trailing zeros number.
7854 for (auto &Case : SI->cases()) {
7855 auto *OrigValue = Case.getCaseValue();
7856 Case.setValue(ConstantInt::get(Ty: OrigValue->getIntegerType(),
7857 V: OrigValue->getValue().countr_zero()));
7858 }
7859
7860 // Replace condition with its trailing zeros number.
7861 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7862 ID: Intrinsic::cttz, Types: {CondTy}, Args: {Condition, ConstantInt::getTrue(Context)});
7863
7864 SI->setCondition(ConditionTrailingZeros);
7865
7866 return true;
7867}
7868
7869/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7870/// the same destination.
7871static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
7872 DomTreeUpdater *DTU) {
7873 auto *Cmp = dyn_cast<CmpIntrinsic>(Val: SI->getCondition());
7874 if (!Cmp || !Cmp->hasOneUse())
7875 return false;
7876
7877 SmallVector<uint32_t, 4> Weights;
7878 bool HasWeights = extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights);
7879 if (!HasWeights)
7880 Weights.resize(N: 4); // Avoid checking HasWeights everywhere.
7881
7882 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7883 int64_t Res;
7884 BasicBlock *Succ, *OtherSucc;
7885 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7886 BasicBlock *Unreachable = nullptr;
7887
7888 if (SI->getNumCases() == 2) {
7889 // Find which of 1, 0 or -1 is missing (handled by default dest).
7890 SmallSet<int64_t, 3> Missing;
7891 Missing.insert(V: 1);
7892 Missing.insert(V: 0);
7893 Missing.insert(V: -1);
7894
7895 Succ = SI->getDefaultDest();
7896 SuccWeight = Weights[0];
7897 OtherSucc = nullptr;
7898 for (auto &Case : SI->cases()) {
7899 std::optional<int64_t> Val =
7900 Case.getCaseValue()->getValue().trySExtValue();
7901 if (!Val)
7902 return false;
7903 if (!Missing.erase(V: *Val))
7904 return false;
7905 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7906 return false;
7907 OtherSucc = Case.getCaseSuccessor();
7908 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7909 }
7910
7911 assert(Missing.size() == 1 && "Should have one case left");
7912 Res = *Missing.begin();
7913 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7914 // Normalize so that Succ is taken once and OtherSucc twice.
7915 Unreachable = SI->getDefaultDest();
7916 Succ = OtherSucc = nullptr;
7917 for (auto &Case : SI->cases()) {
7918 BasicBlock *NewSucc = Case.getCaseSuccessor();
7919 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7920 if (!OtherSucc || OtherSucc == NewSucc) {
7921 OtherSucc = NewSucc;
7922 OtherSuccWeight += Weight;
7923 } else if (!Succ) {
7924 Succ = NewSucc;
7925 SuccWeight = Weight;
7926 } else if (Succ == NewSucc) {
7927 std::swap(a&: Succ, b&: OtherSucc);
7928 std::swap(a&: SuccWeight, b&: OtherSuccWeight);
7929 } else
7930 return false;
7931 }
7932 for (auto &Case : SI->cases()) {
7933 std::optional<int64_t> Val =
7934 Case.getCaseValue()->getValue().trySExtValue();
7935 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7936 return false;
7937 if (Case.getCaseSuccessor() == Succ) {
7938 Res = *Val;
7939 break;
7940 }
7941 }
7942 } else {
7943 return false;
7944 }
7945
7946 // Determine predicate for the missing case.
7947 ICmpInst::Predicate Pred;
7948 switch (Res) {
7949 case 1:
7950 Pred = ICmpInst::ICMP_UGT;
7951 break;
7952 case 0:
7953 Pred = ICmpInst::ICMP_EQ;
7954 break;
7955 case -1:
7956 Pred = ICmpInst::ICMP_ULT;
7957 break;
7958 }
7959 if (Cmp->isSigned())
7960 Pred = ICmpInst::getSignedPredicate(Pred);
7961
7962 MDNode *NewWeights = nullptr;
7963 if (HasWeights)
7964 NewWeights = MDBuilder(SI->getContext())
7965 .createBranchWeights(TrueWeight: SuccWeight, FalseWeight: OtherSuccWeight);
7966
7967 BasicBlock *BB = SI->getParent();
7968 Builder.SetInsertPoint(SI->getIterator());
7969 Value *ICmp = Builder.CreateICmp(P: Pred, LHS: Cmp->getLHS(), RHS: Cmp->getRHS());
7970 Builder.CreateCondBr(Cond: ICmp, True: Succ, False: OtherSucc, BranchWeights: NewWeights,
7971 Unpredictable: SI->getMetadata(KindID: LLVMContext::MD_unpredictable));
7972 OtherSucc->removePredecessor(Pred: BB);
7973 if (Unreachable)
7974 Unreachable->removePredecessor(Pred: BB);
7975 SI->eraseFromParent();
7976 Cmp->eraseFromParent();
7977 if (DTU && Unreachable)
7978 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, Unreachable}});
7979 return true;
7980}
7981
7982/// Checking whether two BBs are equal depends on the contents of the
7983/// BasicBlock and the incoming values of their successor PHINodes.
7984/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7985/// calling this function on each BasicBlock every time isEqual is called,
7986/// especially since the same BasicBlock may be passed as an argument multiple
7987/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7988/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7989/// of the incoming values.
7990struct EqualBBWrapper {
7991 BasicBlock *BB;
7992
7993 // One Phi usually has < 8 incoming values.
7994 using BB2ValueMap = SmallDenseMap<BasicBlock *, Value *, 8>;
7995 using Phi2IVsMap = DenseMap<PHINode *, BB2ValueMap>;
7996 Phi2IVsMap *PhiPredIVs;
7997
7998 // We only merge the identical non-entry BBs with
7999 // - terminator unconditional br to Succ (pending relaxation),
8000 // - does not have address taken / weird control.
8001 static bool canBeMerged(const BasicBlock *BB) {
8002 assert(BB && "Expected non-null BB");
8003 // Entry block cannot be eliminated or have predecessors.
8004 if (BB->isEntryBlock())
8005 return false;
8006
8007 // Single successor and must be Succ.
8008 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8009 // on other kinds of terminators. We decide to only support unconditional
8010 // branches for now for compile time reasons.
8011 auto *BI = dyn_cast<UncondBrInst>(Val: BB->getTerminator());
8012 if (!BI)
8013 return false;
8014
8015 // Avoid blocks that are "address-taken" (blockaddress) or have unusual
8016 // uses.
8017 if (BB->hasAddressTaken() || BB->isEHPad())
8018 return false;
8019
8020 // TODO: relax this condition to merge equal blocks with >1 instructions?
8021 // Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
8022 if (&BB->front() != &BB->back())
8023 return false;
8024
8025 // The BB must have at least one predecessor.
8026 if (pred_empty(BB))
8027 return false;
8028
8029 return true;
8030 }
8031};
8032
8033template <> struct llvm::DenseMapInfo<const EqualBBWrapper *> {
8034 static const EqualBBWrapper *getEmptyKey() {
8035 return static_cast<EqualBBWrapper *>(DenseMapInfo<void *>::getEmptyKey());
8036 }
8037 static const EqualBBWrapper *getTombstoneKey() {
8038 return static_cast<EqualBBWrapper *>(
8039 DenseMapInfo<void *>::getTombstoneKey());
8040 }
8041 static unsigned getHashValue(const EqualBBWrapper *EBW) {
8042 BasicBlock *BB = EBW->BB;
8043 UncondBrInst *BI = cast<UncondBrInst>(Val: BB->getTerminator());
8044 assert(BB->size() == 1 && "Expected just a single branch in the BB");
8045
8046 // Since we assume the BB is just a single UncondBrInst with a single
8047 // successor, we hash as the BB and the incoming Values of its successor
8048 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8049 // including the incoming PHI values leads to better performance.
8050 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8051 // time and passing it in EqualBBWrapper, but this slowed down the average
8052 // compile time without having any impact on the worst case compile time.
8053 BasicBlock *Succ = BI->getSuccessor();
8054 auto PhiValsForBB = map_range(C: Succ->phis(), F: [&](PHINode &Phi) {
8055 return (*EBW->PhiPredIVs)[&Phi][BB];
8056 });
8057 return hash_combine(args: Succ, args: hash_combine_range(R&: PhiValsForBB));
8058 }
8059 static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
8060 auto *EKey = DenseMapInfo<EqualBBWrapper *>::getEmptyKey();
8061 auto *TKey = DenseMapInfo<EqualBBWrapper *>::getTombstoneKey();
8062 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8063 return LHS == RHS;
8064
8065 BasicBlock *A = LHS->BB;
8066 BasicBlock *B = RHS->BB;
8067
8068 // FIXME: we checked that the size of A and B are both 1 in
8069 // mergeIdenticalUncondBBs to make the Case list smaller to
8070 // improve performance. If we decide to support BasicBlocks with more
8071 // than just a single instruction, we need to check that A.size() ==
8072 // B.size() here, and we need to check more than just the BranchInsts
8073 // for equality.
8074
8075 UncondBrInst *ABI = cast<UncondBrInst>(Val: A->getTerminator());
8076 UncondBrInst *BBI = cast<UncondBrInst>(Val: B->getTerminator());
8077 if (ABI->getSuccessor() != BBI->getSuccessor())
8078 return false;
8079
8080 // Need to check that PHIs in successor have matching values.
8081 BasicBlock *Succ = ABI->getSuccessor();
8082 auto IfPhiIVMatch = [&](PHINode &Phi) {
8083 // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
8084 // query.
8085 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8086 return PredIVs[A] == PredIVs[B];
8087 };
8088 return all_of(Range: Succ->phis(), P: IfPhiIVMatch);
8089 }
8090};
8091
8092// Merge identical BBs into one of them.
8093static bool mergeIdenticalBBs(ArrayRef<BasicBlock *> Candidates,
8094 DomTreeUpdater *DTU) {
8095 if (Candidates.size() < 2)
8096 return false;
8097
8098 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8099 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8100 // an entire PHI at once after the loop, opposed to calling
8101 // getIncomingValueForBlock inside this loop, since each call to
8102 // getIncomingValueForBlock is O(|Preds|).
8103 EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8104 SmallVector<EqualBBWrapper> BBs2Merge;
8105 BBs2Merge.reserve(N: Candidates.size());
8106 SmallSetVector<PHINode *, 8> Phis;
8107
8108 for (BasicBlock *BB : Candidates) {
8109 BasicBlock *Succ = BB->getSingleSuccessor();
8110 assert(Succ && "Expected unconditional BB");
8111 BBs2Merge.emplace_back(Args: EqualBBWrapper{.BB: BB, .PhiPredIVs: &PhiPredIVs});
8112 Phis.insert_range(R: make_pointer_range(Range: Succ->phis()));
8113 }
8114
8115 // Precompute a data structure to improve performance of isEqual for
8116 // EqualBBWrapper.
8117 PhiPredIVs.reserve(NumEntries: Phis.size());
8118 for (PHINode *Phi : Phis) {
8119 auto &IVs =
8120 PhiPredIVs.try_emplace(Key: Phi, Args: Phi->getNumIncomingValues()).first->second;
8121 // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8122 // O(|Pred|).
8123 for (auto &IV : Phi->incoming_values())
8124 IVs.insert(KV: {Phi->getIncomingBlock(U: IV), IV.get()});
8125 }
8126
8127 // Group duplicates using DenseSet with custom equality/hashing.
8128 // Build a set such that if the EqualBBWrapper exists in the set and another
8129 // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8130 // the set should be replaced with the one in the set. If the EqualBBWrapper
8131 // is not in the set, then it should be added to the set so other
8132 // EqualBBWrapper can check against it in the same manner. We use
8133 // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8134 // information to isEquality, getHashValue, and when doing the replacement
8135 // with better performance.
8136 DenseSet<const EqualBBWrapper *> Keep;
8137 Keep.reserve(Size: BBs2Merge.size());
8138
8139 SmallVector<DominatorTree::UpdateType> Updates;
8140 Updates.reserve(N: BBs2Merge.size() * 2);
8141
8142 bool MadeChange = false;
8143
8144 // Helper: redirect all edges X -> DeadPred to X -> LivePred.
8145 auto RedirectIncomingEdges = [&](BasicBlock *Dead, BasicBlock *Live) {
8146 SmallSetVector<BasicBlock *, 8> DeadPreds(llvm::from_range,
8147 predecessors(BB: Dead));
8148 if (DTU) {
8149 // All predecessors of DeadPred (except the common predecessor) will be
8150 // moved to LivePred.
8151 Updates.reserve(N: Updates.size() + DeadPreds.size() * 2);
8152 SmallPtrSet<BasicBlock *, 16> LivePreds(llvm::from_range,
8153 predecessors(BB: Live));
8154 for (BasicBlock *PredOfDead : DeadPreds) {
8155 // Do not modify those common predecessors of DeadPred and LivePred.
8156 if (!LivePreds.contains(Ptr: PredOfDead))
8157 Updates.push_back(Elt: {DominatorTree::Insert, PredOfDead, Live});
8158 Updates.push_back(Elt: {DominatorTree::Delete, PredOfDead, Dead});
8159 }
8160 }
8161 LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8162 Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8163 Live->printAsOperand(dbgs()); dbgs() << " for ";
8164 Live->getSingleSuccessor()->printAsOperand(dbgs());
8165 dbgs() << "\n");
8166 // Replace successors in all predecessors of DeadPred.
8167 for (BasicBlock *PredOfDead : DeadPreds) {
8168 Instruction *T = PredOfDead->getTerminator();
8169 T->replaceSuccessorWith(OldBB: Dead, NewBB: Live);
8170 }
8171 };
8172
8173 // Try to eliminate duplicate predecessors.
8174 for (const auto &EBW : BBs2Merge) {
8175 // EBW is a candidate for simplification. If we find a duplicate BB,
8176 // replace it.
8177 const auto &[It, Inserted] = Keep.insert(V: &EBW);
8178 if (Inserted)
8179 continue;
8180
8181 // Found duplicate: merge P into canonical predecessor It->Pred.
8182 BasicBlock *KeepBB = (*It)->BB;
8183 BasicBlock *DeadBB = EBW.BB;
8184
8185 // Avoid merging a BB with itself.
8186 if (KeepBB == DeadBB)
8187 continue;
8188
8189 // Redirect all edges into DeadPred to KeepPred.
8190 RedirectIncomingEdges(DeadBB, KeepBB);
8191
8192 // Now DeadBB should become unreachable; leave DCE to later,
8193 // but we can try to simplify it if it only branches to Succ.
8194 // (We won't erase here to keep the routine simple and DT-safe.)
8195 assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8196 MadeChange = true;
8197 }
8198
8199 if (DTU && !Updates.empty())
8200 DTU->applyUpdates(Updates);
8201
8202 return MadeChange;
8203}
8204
8205bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8206 DomTreeUpdater *DTU) {
8207 // Collect candidate switch-arms top-down.
8208 SmallSetVector<BasicBlock *, 16> FilteredArms(
8209 llvm::from_range,
8210 make_filter_range(Range: successors(I: SI), Pred: EqualBBWrapper::canBeMerged));
8211 return mergeIdenticalBBs(Candidates: FilteredArms.getArrayRef(), DTU);
8212}
8213
8214bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8215 DomTreeUpdater *DTU) {
8216 // Need at least 2 predecessors to do anything.
8217 if (!BB || !BB->hasNPredecessorsOrMore(N: 2))
8218 return false;
8219
8220 // Compilation time consideration: retain the canonical loop, otherwise, we
8221 // require more time in the later loop canonicalization.
8222 if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BB))
8223 return false;
8224
8225 // Collect candidate predecessors bottom-up.
8226 SmallSetVector<BasicBlock *, 8> FilteredPreds(
8227 llvm::from_range,
8228 make_filter_range(Range: predecessors(BB), Pred: EqualBBWrapper::canBeMerged));
8229 return mergeIdenticalBBs(Candidates: FilteredPreds.getArrayRef(), DTU);
8230}
8231
8232bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8233 BasicBlock *BB = SI->getParent();
8234
8235 if (isValueEqualityComparison(TI: SI)) {
8236 // If we only have one predecessor, and if it is a branch on this value,
8237 // see if that predecessor totally determines the outcome of this switch.
8238 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8239 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: SI, Pred: OnlyPred, Builder))
8240 return requestResimplify();
8241
8242 Value *Cond = SI->getCondition();
8243 if (SelectInst *Select = dyn_cast<SelectInst>(Val: Cond))
8244 if (simplifySwitchOnSelect(SI, Select))
8245 return requestResimplify();
8246
8247 // If the block only contains the switch, see if we can fold the block
8248 // away into any preds.
8249 if (SI == &*BB->instructionsWithoutDebug(SkipPseudoOp: false).begin())
8250 if (foldValueComparisonIntoPredecessors(TI: SI, Builder))
8251 return requestResimplify();
8252 }
8253
8254 // Try to transform the switch into an icmp and a branch.
8255 // The conversion from switch to comparison may lose information on
8256 // impossible switch values, so disable it early in the pipeline.
8257 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8258 return requestResimplify();
8259
8260 // Remove unreachable cases.
8261 if (eliminateDeadSwitchCases(SI, DTU, AC: Options.AC, DL))
8262 return requestResimplify();
8263
8264 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8265 return requestResimplify();
8266
8267 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8268 return requestResimplify();
8269
8270 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8271 return requestResimplify();
8272
8273 // The conversion of switches to arithmetic or lookup table is disabled in
8274 // the early optimization pipeline, as it may lose information or make the
8275 // resulting code harder to analyze.
8276 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8277 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8278 ConvertSwitchToLookupTable: Options.ConvertSwitchToLookupTable))
8279 return requestResimplify();
8280
8281 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8282 return requestResimplify();
8283
8284 if (reduceSwitchRange(SI, Builder, DL, TTI))
8285 return requestResimplify();
8286
8287 if (HoistCommon &&
8288 hoistCommonCodeFromSuccessors(TI: SI, AllInstsEqOnly: !Options.HoistCommonInsts))
8289 return requestResimplify();
8290
8291 // We can merge identical switch arms early to enhance more aggressive
8292 // optimization on switch.
8293 if (simplifyDuplicateSwitchArms(SI, DTU))
8294 return requestResimplify();
8295
8296 if (simplifySwitchWhenUMin(SI, DTU))
8297 return requestResimplify();
8298
8299 return false;
8300}
8301
8302bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8303 BasicBlock *BB = IBI->getParent();
8304 bool Changed = false;
8305 SmallVector<uint32_t> BranchWeights;
8306 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8307 extractBranchWeights(I: *IBI, Weights&: BranchWeights);
8308
8309 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8310 if (HasBranchWeights)
8311 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8312 TargetWeight[IBI->getDestination(i: I)] += BranchWeights[I];
8313
8314 // Eliminate redundant destinations.
8315 SmallPtrSet<Value *, 8> Succs;
8316 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8317 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8318 BasicBlock *Dest = IBI->getDestination(i: I);
8319 if (!Dest->hasAddressTaken() || !Succs.insert(Ptr: Dest).second) {
8320 if (!Dest->hasAddressTaken())
8321 RemovedSuccs.insert(X: Dest);
8322 Dest->removePredecessor(Pred: BB);
8323 IBI->removeDestination(i: I);
8324 --I;
8325 --E;
8326 Changed = true;
8327 }
8328 }
8329
8330 if (DTU) {
8331 std::vector<DominatorTree::UpdateType> Updates;
8332 Updates.reserve(n: RemovedSuccs.size());
8333 for (auto *RemovedSucc : RemovedSuccs)
8334 Updates.push_back(x: {DominatorTree::Delete, BB, RemovedSucc});
8335 DTU->applyUpdates(Updates);
8336 }
8337
8338 if (IBI->getNumDestinations() == 0) {
8339 // If the indirectbr has no successors, change it to unreachable.
8340 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8341 eraseTerminatorAndDCECond(TI: IBI);
8342 return true;
8343 }
8344
8345 if (IBI->getNumDestinations() == 1) {
8346 // If the indirectbr has one successor, change it to a direct branch.
8347 UncondBrInst::Create(IfTrue: IBI->getDestination(i: 0), InsertBefore: IBI->getIterator());
8348 eraseTerminatorAndDCECond(TI: IBI);
8349 return true;
8350 }
8351 if (HasBranchWeights) {
8352 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8353 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8354 NewBranchWeights[I] += TargetWeight.find(Val: IBI->getDestination(i: I))->second;
8355 setFittedBranchWeights(I&: *IBI, Weights: NewBranchWeights, /*IsExpected=*/false);
8356 }
8357 if (SelectInst *SI = dyn_cast<SelectInst>(Val: IBI->getAddress())) {
8358 if (simplifyIndirectBrOnSelect(IBI, SI))
8359 return requestResimplify();
8360 }
8361 return Changed;
8362}
8363
8364/// Given an block with only a single landing pad and a unconditional branch
8365/// try to find another basic block which this one can be merged with. This
8366/// handles cases where we have multiple invokes with unique landing pads, but
8367/// a shared handler.
8368///
8369/// We specifically choose to not worry about merging non-empty blocks
8370/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8371/// practice, the optimizer produces empty landing pad blocks quite frequently
8372/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8373/// sinking in this file)
8374///
8375/// This is primarily a code size optimization. We need to avoid performing
8376/// any transform which might inhibit optimization (such as our ability to
8377/// specialize a particular handler via tail commoning). We do this by not
8378/// merging any blocks which require us to introduce a phi. Since the same
8379/// values are flowing through both blocks, we don't lose any ability to
8380/// specialize. If anything, we make such specialization more likely.
8381///
8382/// TODO - This transformation could remove entries from a phi in the target
8383/// block when the inputs in the phi are the same for the two blocks being
8384/// merged. In some cases, this could result in removal of the PHI entirely.
8385static bool tryToMergeLandingPad(LandingPadInst *LPad, UncondBrInst *BI,
8386 BasicBlock *BB, DomTreeUpdater *DTU) {
8387 auto Succ = BB->getUniqueSuccessor();
8388 assert(Succ);
8389 // If there's a phi in the successor block, we'd likely have to introduce
8390 // a phi into the merged landing pad block.
8391 if (isa<PHINode>(Val: *Succ->begin()))
8392 return false;
8393
8394 for (BasicBlock *OtherPred : predecessors(BB: Succ)) {
8395 if (BB == OtherPred)
8396 continue;
8397 BasicBlock::iterator I = OtherPred->begin();
8398 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(Val&: I);
8399 if (!LPad2 || !LPad2->isIdenticalTo(I: LPad))
8400 continue;
8401 ++I;
8402 UncondBrInst *BI2 = dyn_cast<UncondBrInst>(Val&: I);
8403 if (!BI2 || !BI2->isIdenticalTo(I: BI))
8404 continue;
8405
8406 std::vector<DominatorTree::UpdateType> Updates;
8407
8408 // We've found an identical block. Update our predecessors to take that
8409 // path instead and make ourselves dead.
8410 SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
8411 for (BasicBlock *Pred : UniquePreds) {
8412 InvokeInst *II = cast<InvokeInst>(Val: Pred->getTerminator());
8413 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8414 "unexpected successor");
8415 II->setUnwindDest(OtherPred);
8416 if (DTU) {
8417 Updates.push_back(x: {DominatorTree::Insert, Pred, OtherPred});
8418 Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
8419 }
8420 }
8421
8422 SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
8423 for (BasicBlock *Succ : UniqueSuccs) {
8424 Succ->removePredecessor(Pred: BB);
8425 if (DTU)
8426 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
8427 }
8428
8429 IRBuilder<> Builder(BI);
8430 Builder.CreateUnreachable();
8431 BI->eraseFromParent();
8432 if (DTU)
8433 DTU->applyUpdates(Updates);
8434 return true;
8435 }
8436 return false;
8437}
8438
8439bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8440 IRBuilder<> &Builder) {
8441 BasicBlock *BB = BI->getParent();
8442 BasicBlock *Succ = BI->getSuccessor(i: 0);
8443
8444 // If the Terminator is the only non-phi instruction, simplify the block.
8445 // If LoopHeader is provided, check if the block or its successor is a loop
8446 // header. (This is for early invocations before loop simplify and
8447 // vectorization to keep canonical loop forms for nested loops. These blocks
8448 // can be eliminated when the pass is invoked later in the back-end.)
8449 // Note that if BB has only one predecessor then we do not introduce new
8450 // backedge, so we can eliminate BB.
8451 bool NeedCanonicalLoop =
8452 Options.NeedCanonicalLoop &&
8453 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(N: 2) &&
8454 (is_contained(Range&: LoopHeaders, Element: BB) || is_contained(Range&: LoopHeaders, Element: Succ)));
8455 BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
8456 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8457 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8458 return true;
8459
8460 // If the only instruction in the block is a seteq/setne comparison against a
8461 // constant, try to simplify the block.
8462 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val&: I)) {
8463 if (ICI->isEquality() && isa<ConstantInt>(Val: ICI->getOperand(i_nocapture: 1))) {
8464 ++I;
8465 if (I->isTerminator() &&
8466 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8467 return true;
8468 if (isa<SelectInst>(Val: I) && I->getNextNode()->isTerminator() &&
8469 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: cast<SelectInst>(Val&: I),
8470 Builder))
8471 return true;
8472 }
8473 }
8474
8475 // See if we can merge an empty landing pad block with another which is
8476 // equivalent.
8477 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(Val&: I)) {
8478 ++I;
8479 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8480 return true;
8481 }
8482
8483 return false;
8484}
8485
8486static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
8487 BasicBlock *PredPred = nullptr;
8488 for (auto *P : predecessors(BB)) {
8489 BasicBlock *PPred = P->getSinglePredecessor();
8490 if (!PPred || (PredPred && PredPred != PPred))
8491 return nullptr;
8492 PredPred = PPred;
8493 }
8494 return PredPred;
8495}
8496
8497/// Fold the following pattern:
8498/// bb0:
8499/// br i1 %cond1, label %bb1, label %bb2
8500/// bb1:
8501/// br i1 %cond2, label %bb3, label %bb4
8502/// bb2:
8503/// br i1 %cond2, label %bb4, label %bb3
8504/// bb3:
8505/// ...
8506/// bb4:
8507/// ...
8508/// into
8509/// bb0:
8510/// %cond = xor i1 %cond1, %cond2
8511/// br i1 %cond, label %bb4, label %bb3
8512/// bb3:
8513/// ...
8514/// bb4:
8515/// ...
8516/// NOTE: %cond2 always dominates the terminator of bb0.
8517static bool mergeNestedCondBranch(CondBrInst *BI, DomTreeUpdater *DTU) {
8518 BasicBlock *BB = BI->getParent();
8519 BasicBlock *BB1 = BI->getSuccessor(i: 0);
8520 BasicBlock *BB2 = BI->getSuccessor(i: 1);
8521 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, CondBrInst *&SuccBI) {
8522 if (Succ == BB)
8523 return false;
8524 if (&Succ->front() != Succ->getTerminator())
8525 return false;
8526 SuccBI = dyn_cast<CondBrInst>(Val: Succ->getTerminator());
8527 if (!SuccBI)
8528 return false;
8529 BasicBlock *Succ1 = SuccBI->getSuccessor(i: 0);
8530 BasicBlock *Succ2 = SuccBI->getSuccessor(i: 1);
8531 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8532 !isa<PHINode>(Val: Succ1->front()) && !isa<PHINode>(Val: Succ2->front());
8533 };
8534 CondBrInst *BB1BI, *BB2BI;
8535 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8536 return false;
8537
8538 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8539 BB1BI->getSuccessor(i: 0) != BB2BI->getSuccessor(i: 1) ||
8540 BB1BI->getSuccessor(i: 1) != BB2BI->getSuccessor(i: 0))
8541 return false;
8542
8543 BasicBlock *BB3 = BB1BI->getSuccessor(i: 0);
8544 BasicBlock *BB4 = BB1BI->getSuccessor(i: 1);
8545 IRBuilder<> Builder(BI);
8546 BI->setCondition(
8547 Builder.CreateXor(LHS: BI->getCondition(), RHS: BB1BI->getCondition()));
8548 BB1->removePredecessor(Pred: BB);
8549 BI->setSuccessor(idx: 0, NewSucc: BB4);
8550 BB2->removePredecessor(Pred: BB);
8551 BI->setSuccessor(idx: 1, NewSucc: BB3);
8552 if (DTU) {
8553 SmallVector<DominatorTree::UpdateType, 4> Updates;
8554 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB1});
8555 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB4});
8556 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB2});
8557 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB3});
8558
8559 DTU->applyUpdates(Updates);
8560 }
8561 bool HasWeight = false;
8562 uint64_t BBTWeight, BBFWeight;
8563 if (extractBranchWeights(I: *BI, TrueVal&: BBTWeight, FalseVal&: BBFWeight))
8564 HasWeight = true;
8565 else
8566 BBTWeight = BBFWeight = 1;
8567 uint64_t BB1TWeight, BB1FWeight;
8568 if (extractBranchWeights(I: *BB1BI, TrueVal&: BB1TWeight, FalseVal&: BB1FWeight))
8569 HasWeight = true;
8570 else
8571 BB1TWeight = BB1FWeight = 1;
8572 uint64_t BB2TWeight, BB2FWeight;
8573 if (extractBranchWeights(I: *BB2BI, TrueVal&: BB2TWeight, FalseVal&: BB2FWeight))
8574 HasWeight = true;
8575 else
8576 BB2TWeight = BB2FWeight = 1;
8577 if (HasWeight) {
8578 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8579 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8580 setFittedBranchWeights(I&: *BI, Weights, /*IsExpected=*/false,
8581 /*ElideAllZero=*/true);
8582 }
8583 return true;
8584}
8585
8586bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8587 assert(
8588 !isa<ConstantInt>(BI->getCondition()) &&
8589 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8590 "Tautological conditional branch should have been eliminated already.");
8591
8592 BasicBlock *BB = BI->getParent();
8593 if (!Options.SimplifyCondBranch ||
8594 BI->getFunction()->hasFnAttribute(Kind: Attribute::OptForFuzzing))
8595 return false;
8596
8597 // Conditional branch
8598 if (isValueEqualityComparison(TI: BI)) {
8599 // If we only have one predecessor, and if it is a branch on this value,
8600 // see if that predecessor totally determines the outcome of this
8601 // switch.
8602 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8603 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: BI, Pred: OnlyPred, Builder))
8604 return requestResimplify();
8605
8606 // This block must be empty, except for the setcond inst, if it exists.
8607 // Ignore dbg and pseudo intrinsics.
8608 auto I = BB->instructionsWithoutDebug(SkipPseudoOp: true).begin();
8609 if (&*I == BI) {
8610 if (foldValueComparisonIntoPredecessors(TI: BI, Builder))
8611 return requestResimplify();
8612 } else if (&*I == cast<Instruction>(Val: BI->getCondition())) {
8613 ++I;
8614 if (&*I == BI && foldValueComparisonIntoPredecessors(TI: BI, Builder))
8615 return requestResimplify();
8616 }
8617 }
8618
8619 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8620 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8621 return true;
8622
8623 // If this basic block has dominating predecessor blocks and the dominating
8624 // blocks' conditions imply BI's condition, we know the direction of BI.
8625 std::optional<bool> Imp = isImpliedByDomCondition(Cond: BI->getCondition(), ContextI: BI, DL);
8626 if (Imp) {
8627 // Turn this into a branch on constant.
8628 auto *OldCond = BI->getCondition();
8629 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(Context&: BB->getContext())
8630 : ConstantInt::getFalse(Context&: BB->getContext());
8631 BI->setCondition(TorF);
8632 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
8633 return requestResimplify();
8634 }
8635
8636 // If this basic block is ONLY a compare and a branch, and if a predecessor
8637 // branches to us and one of our successors, fold the comparison into the
8638 // predecessor and use logical operations to pick the right destination.
8639 if (Options.SpeculateBlocks &&
8640 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, TTI: &TTI,
8641 BonusInstThreshold: Options.BonusInstThreshold))
8642 return requestResimplify();
8643
8644 // We have a conditional branch to two blocks that are only reachable
8645 // from BI. We know that the condbr dominates the two blocks, so see if
8646 // there is any identical code in the "then" and "else" blocks. If so, we
8647 // can hoist it up to the branching block.
8648 if (BI->getSuccessor(i: 0)->getSinglePredecessor()) {
8649 if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8650 if (HoistCommon &&
8651 hoistCommonCodeFromSuccessors(TI: BI, AllInstsEqOnly: !Options.HoistCommonInsts))
8652 return requestResimplify();
8653
8654 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8655 isProfitableToSpeculate(BI, Invert: std::nullopt, TTI)) {
8656 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8657 auto CanSpeculateConditionalLoadsStores = [&]() {
8658 for (auto *Succ : successors(BB)) {
8659 for (Instruction &I : *Succ) {
8660 if (I.isTerminator()) {
8661 if (I.getNumSuccessors() > 1)
8662 return false;
8663 continue;
8664 } else if (!isSafeCheapLoadStore(I: &I, TTI) ||
8665 SpeculatedConditionalLoadsStores.size() ==
8666 HoistLoadsStoresWithCondFaultingThreshold) {
8667 return false;
8668 }
8669 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
8670 }
8671 }
8672 return !SpeculatedConditionalLoadsStores.empty();
8673 };
8674
8675 if (CanSpeculateConditionalLoadsStores()) {
8676 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8677 Invert: std::nullopt, Sel: nullptr);
8678 return requestResimplify();
8679 }
8680 }
8681 } else {
8682 // If Successor #1 has multiple preds, we may be able to conditionally
8683 // execute Successor #0 if it branches to Successor #1.
8684 Instruction *Succ0TI = BI->getSuccessor(i: 0)->getTerminator();
8685 if (Succ0TI->getNumSuccessors() == 1 &&
8686 Succ0TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 1))
8687 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 0)))
8688 return requestResimplify();
8689 }
8690 } else if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8691 // If Successor #0 has multiple preds, we may be able to conditionally
8692 // execute Successor #1 if it branches to Successor #0.
8693 Instruction *Succ1TI = BI->getSuccessor(i: 1)->getTerminator();
8694 if (Succ1TI->getNumSuccessors() == 1 &&
8695 Succ1TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 0))
8696 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 1)))
8697 return requestResimplify();
8698 }
8699
8700 // If this is a branch on something for which we know the constant value in
8701 // predecessors (e.g. a phi node in the current block), thread control
8702 // through this block.
8703 if (foldCondBranchOnValueKnownInPredecessor(BI))
8704 return requestResimplify();
8705
8706 // Scan predecessor blocks for conditional branches.
8707 for (BasicBlock *Pred : predecessors(BB))
8708 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Val: Pred->getTerminator()))
8709 if (PBI != BI)
8710 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8711 return requestResimplify();
8712
8713 // Look for diamond patterns.
8714 if (MergeCondStores)
8715 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8716 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Val: PrevBB->getTerminator()))
8717 if (PBI != BI)
8718 if (mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
8719 return requestResimplify();
8720
8721 // Look for nested conditional branches.
8722 if (mergeNestedCondBranch(BI, DTU))
8723 return requestResimplify();
8724
8725 return false;
8726}
8727
8728/// Check if passing a value to an instruction will cause undefined behavior.
8729static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8730 assert(V->getType() == I->getType() && "Mismatched types");
8731 Constant *C = dyn_cast<Constant>(Val: V);
8732 if (!C)
8733 return false;
8734
8735 if (I->use_empty())
8736 return false;
8737
8738 if (C->isNullValue() || isa<UndefValue>(Val: C)) {
8739 // Only look at the first use we can handle, avoid hurting compile time with
8740 // long uselists
8741 auto FindUse = llvm::find_if(Range: I->uses(), P: [](auto &U) {
8742 auto *Use = cast<Instruction>(U.getUser());
8743 // Change this list when we want to add new instructions.
8744 switch (Use->getOpcode()) {
8745 default:
8746 return false;
8747 case Instruction::GetElementPtr:
8748 case Instruction::Ret:
8749 case Instruction::BitCast:
8750 case Instruction::Load:
8751 case Instruction::Store:
8752 case Instruction::Call:
8753 case Instruction::CallBr:
8754 case Instruction::Invoke:
8755 case Instruction::UDiv:
8756 case Instruction::URem:
8757 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8758 // implemented to avoid code complexity as it is unclear how useful such
8759 // logic is.
8760 case Instruction::SDiv:
8761 case Instruction::SRem:
8762 return true;
8763 }
8764 });
8765 if (FindUse == I->use_end())
8766 return false;
8767 auto &Use = *FindUse;
8768 auto *User = cast<Instruction>(Val: Use.getUser());
8769 // Bail out if User is not in the same BB as I or User == I or User comes
8770 // before I in the block. The latter two can be the case if User is a
8771 // PHI node.
8772 if (User->getParent() != I->getParent() || User == I ||
8773 User->comesBefore(Other: I))
8774 return false;
8775
8776 // Now make sure that there are no instructions in between that can alter
8777 // control flow (eg. calls)
8778 auto InstrRange =
8779 make_range(x: std::next(x: I->getIterator()), y: User->getIterator());
8780 if (any_of(Range&: InstrRange, P: [](Instruction &I) {
8781 return !isGuaranteedToTransferExecutionToSuccessor(I: &I);
8782 }))
8783 return false;
8784
8785 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8786 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: User))
8787 if (GEP->getPointerOperand() == I) {
8788 // The type of GEP may differ from the type of base pointer.
8789 // Bail out on vector GEPs, as they are not handled by other checks.
8790 if (GEP->getType()->isVectorTy())
8791 return false;
8792 // The current base address is null, there are four cases to consider:
8793 // getelementptr (TY, null, 0) -> null
8794 // getelementptr (TY, null, not zero) -> may be modified
8795 // getelementptr inbounds (TY, null, 0) -> null
8796 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8797 // undefined?
8798 if (!GEP->hasAllZeroIndices() &&
8799 (!GEP->isInBounds() ||
8800 NullPointerIsDefined(F: GEP->getFunction(),
8801 AS: GEP->getPointerAddressSpace())))
8802 PtrValueMayBeModified = true;
8803 return passingValueIsAlwaysUndefined(V, I: GEP, PtrValueMayBeModified);
8804 }
8805
8806 // Look through return.
8807 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: User)) {
8808 bool HasNoUndefAttr =
8809 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NoUndef);
8810 // Return undefined to a noundef return value is undefined.
8811 if (isa<UndefValue>(Val: C) && HasNoUndefAttr)
8812 return true;
8813 // Return null to a nonnull+noundef return value is undefined.
8814 if (C->isNullValue() && HasNoUndefAttr &&
8815 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NonNull)) {
8816 return !PtrValueMayBeModified;
8817 }
8818 }
8819
8820 // Load from null is undefined.
8821 if (LoadInst *LI = dyn_cast<LoadInst>(Val: User))
8822 if (!LI->isVolatile())
8823 return !NullPointerIsDefined(F: LI->getFunction(),
8824 AS: LI->getPointerAddressSpace());
8825
8826 // Store to null is undefined.
8827 if (StoreInst *SI = dyn_cast<StoreInst>(Val: User))
8828 if (!SI->isVolatile())
8829 return (!NullPointerIsDefined(F: SI->getFunction(),
8830 AS: SI->getPointerAddressSpace())) &&
8831 SI->getPointerOperand() == I;
8832
8833 // llvm.assume(false/undef) always triggers immediate UB.
8834 if (auto *Assume = dyn_cast<AssumeInst>(Val: User)) {
8835 // Ignore assume operand bundles.
8836 if (I == Assume->getArgOperand(i: 0))
8837 return true;
8838 }
8839
8840 if (auto *CB = dyn_cast<CallBase>(Val: User)) {
8841 if (C->isNullValue() && NullPointerIsDefined(F: CB->getFunction()))
8842 return false;
8843 // A call to null is undefined.
8844 if (CB->getCalledOperand() == I)
8845 return true;
8846
8847 if (CB->isArgOperand(U: &Use)) {
8848 unsigned ArgIdx = CB->getArgOperandNo(U: &Use);
8849 // Passing null to a nonnnull+noundef argument is undefined.
8850 if (isa<ConstantPointerNull>(Val: C) &&
8851 CB->paramHasNonNullAttr(ArgNo: ArgIdx, /*AllowUndefOrPoison=*/false))
8852 return !PtrValueMayBeModified;
8853 // Passing undef to a noundef argument is undefined.
8854 if (isa<UndefValue>(Val: C) && CB->isPassingUndefUB(ArgNo: ArgIdx))
8855 return true;
8856 }
8857 }
8858 // Div/Rem by zero is immediate UB
8859 if (match(V: User, P: m_BinOp(L: m_Value(), R: m_Specific(V: I))) && User->isIntDivRem())
8860 return true;
8861 }
8862 return false;
8863}
8864
8865/// If BB has an incoming value that will always trigger undefined behavior
8866/// (eg. null pointer dereference), remove the branch leading here.
8867static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
8868 DomTreeUpdater *DTU,
8869 AssumptionCache *AC) {
8870 for (PHINode &PHI : BB->phis())
8871 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8872 if (passingValueIsAlwaysUndefined(V: PHI.getIncomingValue(i), I: &PHI)) {
8873 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8874 Instruction *T = Predecessor->getTerminator();
8875 IRBuilder<> Builder(T);
8876 if (isa<UncondBrInst>(Val: T)) {
8877 BB->removePredecessor(Pred: Predecessor);
8878 // Turn unconditional branches into unreachables.
8879 Builder.CreateUnreachable();
8880 T->eraseFromParent();
8881 if (DTU)
8882 DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8883 return true;
8884 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: T)) {
8885 BB->removePredecessor(Pred: Predecessor);
8886 // Preserve guarding condition in assume, because it might not be
8887 // inferrable from any dominating condition.
8888 Value *Cond = BI->getCondition();
8889 CallInst *Assumption;
8890 if (BI->getSuccessor(i: 0) == BB)
8891 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
8892 else
8893 Assumption = Builder.CreateAssumption(Cond);
8894 if (AC)
8895 AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
8896 Builder.CreateBr(Dest: BI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 1)
8897 : BI->getSuccessor(i: 0));
8898 BI->eraseFromParent();
8899 if (DTU)
8900 DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8901 return true;
8902 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: T)) {
8903 // Redirect all branches leading to UB into
8904 // a newly created unreachable block.
8905 BasicBlock *Unreachable = BasicBlock::Create(
8906 Context&: Predecessor->getContext(), Name: "unreachable", Parent: BB->getParent(), InsertBefore: BB);
8907 Builder.SetInsertPoint(Unreachable);
8908 // The new block contains only one instruction: Unreachable
8909 Builder.CreateUnreachable();
8910 for (const auto &Case : SI->cases())
8911 if (Case.getCaseSuccessor() == BB) {
8912 BB->removePredecessor(Pred: Predecessor);
8913 Case.setSuccessor(Unreachable);
8914 }
8915 if (SI->getDefaultDest() == BB) {
8916 BB->removePredecessor(Pred: Predecessor);
8917 SI->setDefaultDest(Unreachable);
8918 }
8919
8920 if (DTU)
8921 DTU->applyUpdates(
8922 Updates: { { DominatorTree::Insert, Predecessor, Unreachable },
8923 { DominatorTree::Delete, Predecessor, BB } });
8924 return true;
8925 }
8926 }
8927
8928 return false;
8929}
8930
8931bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8932 bool Changed = false;
8933
8934 assert(BB && BB->getParent() && "Block not embedded in function!");
8935 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8936
8937 // Remove basic blocks that have no predecessors (except the entry block)...
8938 // or that just have themself as a predecessor. These are unreachable.
8939 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8940 BB->getSinglePredecessor() == BB) {
8941 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8942 DeleteDeadBlock(BB, DTU);
8943 return true;
8944 }
8945
8946 // Check to see if we can constant propagate this terminator instruction
8947 // away...
8948 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8949 /*TLI=*/nullptr, DTU);
8950
8951 // Check for and eliminate duplicate PHI nodes in this block.
8952 Changed |= EliminateDuplicatePHINodes(BB);
8953
8954 // Check for and remove branches that will always cause undefined behavior.
8955 if (removeUndefIntroducingPredecessor(BB, DTU, AC: Options.AC))
8956 return requestResimplify();
8957
8958 // Merge basic blocks into their predecessor if there is only one distinct
8959 // pred, and if there is only one distinct successor of the predecessor, and
8960 // if there are no PHI nodes.
8961 if (MergeBlockIntoPredecessor(BB, DTU))
8962 return true;
8963
8964 if (SinkCommon && Options.SinkCommonInsts) {
8965 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8966 mergeCompatibleInvokes(BB, DTU)) {
8967 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8968 // so we may now how duplicate PHI's.
8969 // Let's rerun EliminateDuplicatePHINodes() first,
8970 // before foldTwoEntryPHINode() potentially converts them into select's,
8971 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8972 return true;
8973 }
8974 // Merge identical predecessors of this block.
8975 if (simplifyDuplicatePredecessors(BB, DTU))
8976 return true;
8977 }
8978
8979 if (Options.SpeculateBlocks &&
8980 !BB->getParent()->hasFnAttribute(Kind: Attribute::OptForFuzzing)) {
8981 // If there is a trivial two-entry PHI node in this basic block, and we can
8982 // eliminate it, do so now.
8983 if (auto *PN = dyn_cast<PHINode>(Val: BB->begin()))
8984 if (PN->getNumIncomingValues() == 2)
8985 if (foldTwoEntryPHINode(PN, TTI, DTU, AC: Options.AC, DL,
8986 SpeculateUnpredictables: Options.SpeculateUnpredictables))
8987 return true;
8988 }
8989
8990 IRBuilder<> Builder(BB);
8991 Instruction *Terminator = BB->getTerminator();
8992 Builder.SetInsertPoint(Terminator);
8993 switch (Terminator->getOpcode()) {
8994 case Instruction::UncondBr:
8995 Changed |= simplifyUncondBranch(BI: cast<UncondBrInst>(Val: Terminator), Builder);
8996 break;
8997 case Instruction::CondBr:
8998 Changed |= simplifyCondBranch(BI: cast<CondBrInst>(Val: Terminator), Builder);
8999 break;
9000 case Instruction::Resume:
9001 Changed |= simplifyResume(RI: cast<ResumeInst>(Val: Terminator), Builder);
9002 break;
9003 case Instruction::CleanupRet:
9004 Changed |= simplifyCleanupReturn(RI: cast<CleanupReturnInst>(Val: Terminator));
9005 break;
9006 case Instruction::Switch:
9007 Changed |= simplifySwitch(SI: cast<SwitchInst>(Val: Terminator), Builder);
9008 break;
9009 case Instruction::Unreachable:
9010 Changed |= simplifyUnreachable(UI: cast<UnreachableInst>(Val: Terminator));
9011 break;
9012 case Instruction::IndirectBr:
9013 Changed |= simplifyIndirectBr(IBI: cast<IndirectBrInst>(Val: Terminator));
9014 break;
9015 }
9016
9017 return Changed;
9018}
9019
9020bool SimplifyCFGOpt::run(BasicBlock *BB) {
9021 bool Changed = false;
9022
9023 // Repeated simplify BB as long as resimplification is requested.
9024 do {
9025 Resimplify = false;
9026
9027 // Perform one round of simplifcation. Resimplify flag will be set if
9028 // another iteration is requested.
9029 Changed |= simplifyOnce(BB);
9030 } while (Resimplify);
9031
9032 return Changed;
9033}
9034
9035bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
9036 DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
9037 ArrayRef<WeakVH> LoopHeaders) {
9038 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
9039 Options)
9040 .run(BB);
9041}
9042