1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
19#include "llvm/ADT/SetOperations.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/CaptureTracking.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/Analysis/DomTreeUpdater.h"
29#include "llvm/Analysis/GuardUtils.h"
30#include "llvm/Analysis/InstructionSimplify.h"
31#include "llvm/Analysis/Loads.h"
32#include "llvm/Analysis/MemorySSA.h"
33#include "llvm/Analysis/MemorySSAUpdater.h"
34#include "llvm/Analysis/TargetTransformInfo.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
40#include "llvm/IR/ConstantRange.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
51#include "llvm/IR/Instructions.h"
52#include "llvm/IR/IntrinsicInst.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PatternMatch.h"
61#include "llvm/IR/ProfDataUtils.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
67#include "llvm/Support/BranchProbability.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/CommandLine.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/ErrorHandling.h"
72#include "llvm/Support/KnownBits.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Transforms/Utils/BasicBlockUtils.h"
76#include "llvm/Transforms/Utils/Cloning.h"
77#include "llvm/Transforms/Utils/Local.h"
78#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
79#include "llvm/Transforms/Utils/ValueMapper.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
101cl::opt<bool> RequireAndPreserveDomTree(
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
112static cl::opt<unsigned> PHINodeFoldingThreshold(
113 "phi-node-folding-threshold", cl::Hidden, cl::init(Val: 2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
117static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(Val: 4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(Val: true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
127static cl::opt<bool> HoistLoadsWithCondFaulting(
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(Val: true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
131static cl::opt<bool> HoistStoresWithCondFaulting(
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(Val: true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
135static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(Val: 6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
141static cl::opt<unsigned>
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(Val: 20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(Val: true),
149 cl::desc("Sink common instructions down to the end block"));
150
151static cl::opt<bool> HoistCondStores(
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(Val: true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
155static cl::opt<bool> MergeCondStores(
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(Val: true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
161static cl::opt<bool> MergeCondStoresAggressively(
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(Val: false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
166static cl::opt<bool> SpeculateOneExpensiveInst(
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(Val: true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
171static cl::opt<unsigned> MaxSpeculationDepth(
172 "max-speculation-depth", cl::Hidden, cl::init(Val: 10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(Val: 10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
183static cl::opt<unsigned>
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(Val: 2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
189static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(Val: 2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
196static cl::opt<bool> EnableMergeCompatibleInvokes(
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(Val: true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
200static cl::opt<unsigned> MaxSwitchCasesPerResult(
201 "max-switch-cases-per-result", cl::Hidden, cl::init(Val: 16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
204static cl::opt<unsigned> MaxJumpThreadingLiveBlocks(
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(Val: 24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
209extern cl::opt<bool> ProfcheckDisableMetadataFixes;
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
218STATISTIC(
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
226STATISTIC(
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
247 SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
256 ConstantInt *Value;
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
299 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
300 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
301 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
302
303 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
304 IRBuilder<> &Builder);
305 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
306 SelectInst *Select,
307 IRBuilder<> &Builder);
308 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
309 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
310 Instruction *TI, Instruction *I1,
311 SmallVectorImpl<Instruction *> &OtherSuccTIs,
312 ArrayRef<BasicBlock *> UniqueSuccessors);
313 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
314 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
315 BasicBlock *TrueBB, BasicBlock *FalseBB,
316 uint32_t TrueWeight, uint32_t FalseWeight);
317 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
318 const DataLayout &DL);
319 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
320 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
321 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(Val: SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(Val: SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(Val: SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(Val: SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
363static bool incomingValuesAreCompatible(
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(Range: BB->phis(), P: [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(BB: IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(BB: IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(Ptr: IV0) &&
378 EquivalenceSet->contains(Ptr: IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
387safeToMergeTerminators(Instruction *SI1, Instruction *SI2,
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
398 SmallPtrSet<BasicBlock *, 16> SI1Succs(llvm::from_range, successors(BB: SI1BB));
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(BB: SI2BB)) {
401 if (!SI1Succs.count(Ptr: Succ))
402 continue;
403 if (incomingValuesAreCompatible(BB: Succ, IncomingBlocks: {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(X: Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(V: PN.getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(BB: Succ))
426 MPhi->addIncoming(V: MPhi->getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
433static InstructionCost computeSpeculationCost(const User *I,
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
455static bool dominatesMergePoint(
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
458 InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
464 if (Depth == MaxSpeculationDepth)
465 return false;
466
467 Instruction *I = dyn_cast<Instruction>(Val: V);
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
483 BranchInst *BI = dyn_cast<BranchInst>(Val: PBB->getTerminator());
484 if (!BI || BI->isConditional() || BI->getSuccessor(i: 0) != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(Ptr: I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, CtxI: InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(V: I, P: m_ExtractValue<1>(V: m_OneUse(SubPattern: m_WithOverflowInst(I&: OverflowInst))))) {
504 ZeroCostInstructions.insert(Ptr: OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(Ptr: I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(V: Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth: Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(Ptr: I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
533static ConstantInt *getConstantInt(Value *V, const DataLayout &DL) {
534 // Normal constant int.
535 ConstantInt *CI = dyn_cast<ConstantInt>(Val: V);
536 if (CI || !isa<Constant>(Val: V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(Ty: V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(Val: DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
549 if (isa<ConstantPointerNull>(Val: V))
550 return ConstantInt::get(Ty: IntPtrTy, V: 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
554 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: 0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 Val: ConstantFoldIntegerCast(C: CI, DestTy: IntPtrTy, /*isSigned=*/IsSigned: false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
589 SmallVector<ConstantInt *, 8> Vals;
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(V: Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(V: Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(V: I, P: m_Not(V: m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(V: I, P: m_NUWTrunc(Op: m_Value(V&: Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(Elt: ConstantInt::get(Ty: cast<IntegerType>(Val: Val->getType()), V: isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(Val: I)) &&
657 (C = getConstantInt(V: I->getOperand(i: 1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(V: ICI->getOperand(i_nocapture: 0),
709 P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(Elt: C);
717 Vals.push_back(
718 Elt: ConstantInt::get(Context&: C->getContext(),
719 V: C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(V: ICI->getOperand(i_nocapture: 0),
732 P: m_Or(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(Elt: C);
740 Vals.push_back(Elt: ConstantInt::get(Context&: C->getContext(),
741 V: C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(i_nocapture: 0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(Elt: C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
758 ConstantRange::makeExactICmpRegion(Pred: ICI->getPredicate(), Other: C->getValue());
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(i: 0);
763 if (match(V: I->getOperand(i: 0), P: m_Add(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
764 Span = Span.subtract(CI: *RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(MaxSize: 8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(Elt: ConstantInt::get(Context&: I->getContext(), V: Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
801 IsEq = true;
802 else if (match(V, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))
816 : match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
817 if (Visited.insert(Ptr: Op1).second)
818 DFT.push_back(Elt: Op1);
819 if (Visited.insert(Ptr: Op0).second)
820 DFT.push_back(Elt: Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, isEQ: IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
847static void eraseTerminatorAndDCECond(Instruction *TI,
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
850 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
851 Cond = dyn_cast<Instruction>(Val: SI->getCondition());
852 } else if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
853 if (BI->isConditional())
854 Cond = dyn_cast<Instruction>(Val: BI->getCondition());
855 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(Val: TI)) {
856 Cond = dyn_cast<Instruction>(Val: IBI->getAddress());
857 }
858
859 TI->eraseFromParent();
860 if (Cond)
861 RecursivelyDeleteTriviallyDeadInstructions(V: Cond, TLI: nullptr, MSSAU);
862}
863
864/// Return true if the specified terminator checks
865/// to see if a value is equal to constant integer value.
866Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
867 Value *CV = nullptr;
868 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
869 // Do not permit merging of large switch instructions into their
870 // predecessors unless there is only one predecessor.
871 if (!SI->getParent()->hasNPredecessorsOrMore(N: 128 / SI->getNumSuccessors()))
872 CV = SI->getCondition();
873 } else if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI))
874 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
875 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition())) {
876 if (ICI->isEquality() && getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL))
877 CV = ICI->getOperand(i_nocapture: 0);
878 } else if (auto *Trunc = dyn_cast<TruncInst>(Val: BI->getCondition())) {
879 if (Trunc->hasNoUnsignedWrap())
880 CV = Trunc->getOperand(i_nocapture: 0);
881 }
882 }
883
884 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
885 if (CV) {
886 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(Val: CV)) {
887 Value *Ptr = PTII->getPointerOperand();
888 if (DL.hasUnstableRepresentation(Ty: Ptr->getType()))
889 return CV;
890 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
891 CV = Ptr;
892 }
893 }
894 return CV;
895}
896
897/// Given a value comparison instruction,
898/// decode all of the 'cases' that it represents and return the 'default' block.
899BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
900 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
901 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
902 Cases.reserve(n: SI->getNumCases());
903 for (auto Case : SI->cases())
904 Cases.push_back(x: ValueEqualityComparisonCase(Case.getCaseValue(),
905 Case.getCaseSuccessor()));
906 return SI->getDefaultDest();
907 }
908
909 BranchInst *BI = cast<BranchInst>(Val: TI);
910 Value *Cond = BI->getCondition();
911 ICmpInst::Predicate Pred;
912 ConstantInt *C;
913 if (auto *ICI = dyn_cast<ICmpInst>(Val: Cond)) {
914 Pred = ICI->getPredicate();
915 C = getConstantInt(V: ICI->getOperand(i_nocapture: 1), DL);
916 } else {
917 Pred = ICmpInst::ICMP_NE;
918 auto *Trunc = cast<TruncInst>(Val: Cond);
919 C = ConstantInt::get(Ty: cast<IntegerType>(Val: Trunc->getOperand(i_nocapture: 0)->getType()), V: 0);
920 }
921 BasicBlock *Succ = BI->getSuccessor(i: Pred == ICmpInst::ICMP_NE);
922 Cases.push_back(x: ValueEqualityComparisonCase(C, Succ));
923 return BI->getSuccessor(i: Pred == ICmpInst::ICMP_EQ);
924}
925
926/// Given a vector of bb/value pairs, remove any entries
927/// in the list that match the specified block.
928static void
929eliminateBlockCases(BasicBlock *BB,
930 std::vector<ValueEqualityComparisonCase> &Cases) {
931 llvm::erase(C&: Cases, V: BB);
932}
933
934/// Return true if there are any keys in C1 that exist in C2 as well.
935static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
936 std::vector<ValueEqualityComparisonCase> &C2) {
937 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
938
939 // Make V1 be smaller than V2.
940 if (V1->size() > V2->size())
941 std::swap(a&: V1, b&: V2);
942
943 if (V1->empty())
944 return false;
945 if (V1->size() == 1) {
946 // Just scan V2.
947 ConstantInt *TheVal = (*V1)[0].Value;
948 for (const ValueEqualityComparisonCase &VECC : *V2)
949 if (TheVal == VECC.Value)
950 return true;
951 }
952
953 // Otherwise, just sort both lists and compare element by element.
954 array_pod_sort(Start: V1->begin(), End: V1->end());
955 array_pod_sort(Start: V2->begin(), End: V2->end());
956 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
957 while (i1 != e1 && i2 != e2) {
958 if ((*V1)[i1].Value == (*V2)[i2].Value)
959 return true;
960 if ((*V1)[i1].Value < (*V2)[i2].Value)
961 ++i1;
962 else
963 ++i2;
964 }
965 return false;
966}
967
968/// If TI is known to be a terminator instruction and its block is known to
969/// only have a single predecessor block, check to see if that predecessor is
970/// also a value comparison with the same value, and if that comparison
971/// determines the outcome of this comparison. If so, simplify TI. This does a
972/// very limited form of jump threading.
973bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
974 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
975 Value *PredVal = isValueEqualityComparison(TI: Pred->getTerminator());
976 if (!PredVal)
977 return false; // Not a value comparison in predecessor.
978
979 Value *ThisVal = isValueEqualityComparison(TI);
980 assert(ThisVal && "This isn't a value comparison!!");
981 if (ThisVal != PredVal)
982 return false; // Different predicates.
983
984 // TODO: Preserve branch weight metadata, similarly to how
985 // foldValueComparisonIntoPredecessors preserves it.
986
987 // Find out information about when control will move from Pred to TI's block.
988 std::vector<ValueEqualityComparisonCase> PredCases;
989 BasicBlock *PredDef =
990 getValueEqualityComparisonCases(TI: Pred->getTerminator(), Cases&: PredCases);
991 eliminateBlockCases(BB: PredDef, Cases&: PredCases); // Remove default from cases.
992
993 // Find information about how control leaves this block.
994 std::vector<ValueEqualityComparisonCase> ThisCases;
995 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, Cases&: ThisCases);
996 eliminateBlockCases(BB: ThisDef, Cases&: ThisCases); // Remove default from cases.
997
998 // If TI's block is the default block from Pred's comparison, potentially
999 // simplify TI based on this knowledge.
1000 if (PredDef == TI->getParent()) {
1001 // If we are here, we know that the value is none of those cases listed in
1002 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1003 // can simplify TI.
1004 if (!valuesOverlap(C1&: PredCases, C2&: ThisCases))
1005 return false;
1006
1007 if (isa<BranchInst>(Val: TI)) {
1008 // Okay, one of the successors of this condbr is dead. Convert it to a
1009 // uncond br.
1010 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1011 // Insert the new branch.
1012 Instruction *NI = Builder.CreateBr(Dest: ThisDef);
1013 (void)NI;
1014
1015 // Remove PHI node entries for the dead edge.
1016 ThisCases[0].Dest->removePredecessor(Pred: PredDef);
1017
1018 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1019 << "Through successor TI: " << *TI << "Leaving: " << *NI
1020 << "\n");
1021
1022 eraseTerminatorAndDCECond(TI);
1023
1024 if (DTU)
1025 DTU->applyUpdates(
1026 Updates: {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1027
1028 return true;
1029 }
1030
1031 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(Val: TI);
1032 // Okay, TI has cases that are statically dead, prune them away.
1033 SmallPtrSet<Constant *, 16> DeadCases;
1034 for (const ValueEqualityComparisonCase &Case : PredCases)
1035 DeadCases.insert(Ptr: Case.Value);
1036
1037 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1038 << "Through successor TI: " << *TI);
1039
1040 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1041 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1042 --i;
1043 auto *Successor = i->getCaseSuccessor();
1044 if (DTU)
1045 ++NumPerSuccessorCases[Successor];
1046 if (DeadCases.count(Ptr: i->getCaseValue())) {
1047 Successor->removePredecessor(Pred: PredDef);
1048 SI.removeCase(I: i);
1049 if (DTU)
1050 --NumPerSuccessorCases[Successor];
1051 }
1052 }
1053
1054 if (DTU) {
1055 std::vector<DominatorTree::UpdateType> Updates;
1056 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1057 if (I.second == 0)
1058 Updates.push_back(x: {DominatorTree::Delete, PredDef, I.first});
1059 DTU->applyUpdates(Updates);
1060 }
1061
1062 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1063 return true;
1064 }
1065
1066 // Otherwise, TI's block must correspond to some matched value. Find out
1067 // which value (or set of values) this is.
1068 ConstantInt *TIV = nullptr;
1069 BasicBlock *TIBB = TI->getParent();
1070 for (const auto &[Value, Dest] : PredCases)
1071 if (Dest == TIBB) {
1072 if (TIV)
1073 return false; // Cannot handle multiple values coming to this block.
1074 TIV = Value;
1075 }
1076 assert(TIV && "No edge from pred to succ?");
1077
1078 // Okay, we found the one constant that our value can be if we get into TI's
1079 // BB. Find out which successor will unconditionally be branched to.
1080 BasicBlock *TheRealDest = nullptr;
1081 for (const auto &[Value, Dest] : ThisCases)
1082 if (Value == TIV) {
1083 TheRealDest = Dest;
1084 break;
1085 }
1086
1087 // If not handled by any explicit cases, it is handled by the default case.
1088 if (!TheRealDest)
1089 TheRealDest = ThisDef;
1090
1091 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1092
1093 // Remove PHI node entries for dead edges.
1094 BasicBlock *CheckEdge = TheRealDest;
1095 for (BasicBlock *Succ : successors(BB: TIBB))
1096 if (Succ != CheckEdge) {
1097 if (Succ != TheRealDest)
1098 RemovedSuccs.insert(Ptr: Succ);
1099 Succ->removePredecessor(Pred: TIBB);
1100 } else
1101 CheckEdge = nullptr;
1102
1103 // Insert the new branch.
1104 Instruction *NI = Builder.CreateBr(Dest: TheRealDest);
1105 (void)NI;
1106
1107 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1108 << "Through successor TI: " << *TI << "Leaving: " << *NI
1109 << "\n");
1110
1111 eraseTerminatorAndDCECond(TI);
1112 if (DTU) {
1113 SmallVector<DominatorTree::UpdateType, 2> Updates;
1114 Updates.reserve(N: RemovedSuccs.size());
1115 for (auto *RemovedSucc : RemovedSuccs)
1116 Updates.push_back(Elt: {DominatorTree::Delete, TIBB, RemovedSucc});
1117 DTU->applyUpdates(Updates);
1118 }
1119 return true;
1120}
1121
1122namespace {
1123
1124/// This class implements a stable ordering of constant
1125/// integers that does not depend on their address. This is important for
1126/// applications that sort ConstantInt's to ensure uniqueness.
1127struct ConstantIntOrdering {
1128 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1129 return LHS->getValue().ult(RHS: RHS->getValue());
1130 }
1131};
1132
1133} // end anonymous namespace
1134
1135static int constantIntSortPredicate(ConstantInt *const *P1,
1136 ConstantInt *const *P2) {
1137 const ConstantInt *LHS = *P1;
1138 const ConstantInt *RHS = *P2;
1139 if (LHS == RHS)
1140 return 0;
1141 return LHS->getValue().ult(RHS: RHS->getValue()) ? 1 : -1;
1142}
1143
1144/// Get Weights of a given terminator, the default weight is at the front
1145/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1146/// metadata.
1147static void getBranchWeights(Instruction *TI,
1148 SmallVectorImpl<uint64_t> &Weights) {
1149 MDNode *MD = TI->getMetadata(KindID: LLVMContext::MD_prof);
1150 assert(MD && "Invalid branch-weight metadata");
1151 extractFromBranchWeightMD64(ProfileData: MD, Weights);
1152
1153 // If TI is a conditional eq, the default case is the false case,
1154 // and the corresponding branch-weight data is at index 2. We swap the
1155 // default weight to be the first entry.
1156 if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
1157 assert(Weights.size() == 2);
1158 auto *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition());
1159 if (!ICI)
1160 return;
1161
1162 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1163 std::swap(a&: Weights.front(), b&: Weights.back());
1164 }
1165}
1166
1167static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1168 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1169 Instruction *PTI = PredBlock->getTerminator();
1170
1171 // If we have bonus instructions, clone them into the predecessor block.
1172 // Note that there may be multiple predecessor blocks, so we cannot move
1173 // bonus instructions to a predecessor block.
1174 for (Instruction &BonusInst : *BB) {
1175 if (BonusInst.isTerminator())
1176 continue;
1177
1178 Instruction *NewBonusInst = BonusInst.clone();
1179
1180 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(Other: PTI->getDebugLoc())) {
1181 // Unless the instruction has the same !dbg location as the original
1182 // branch, drop it. When we fold the bonus instructions we want to make
1183 // sure we reset their debug locations in order to avoid stepping on
1184 // dead code caused by folding dead branches.
1185 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1186 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1187 mapAtomInstance(DL, VMap);
1188 }
1189
1190 RemapInstruction(I: NewBonusInst, VM&: VMap,
1191 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1192
1193 // If we speculated an instruction, we need to drop any metadata that may
1194 // result in undefined behavior, as the metadata might have been valid
1195 // only given the branch precondition.
1196 // Similarly strip attributes on call parameters that may cause UB in
1197 // location the call is moved to.
1198 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1199
1200 NewBonusInst->insertInto(ParentBB: PredBlock, It: PTI->getIterator());
1201 auto Range = NewBonusInst->cloneDebugInfoFrom(From: &BonusInst);
1202 RemapDbgRecordRange(M: NewBonusInst->getModule(), Range, VM&: VMap,
1203 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1204
1205 NewBonusInst->takeName(V: &BonusInst);
1206 BonusInst.setName(NewBonusInst->getName() + ".old");
1207 VMap[&BonusInst] = NewBonusInst;
1208
1209 // Update (liveout) uses of bonus instructions,
1210 // now that the bonus instruction has been cloned into predecessor.
1211 // Note that we expect to be in a block-closed SSA form for this to work!
1212 for (Use &U : make_early_inc_range(Range: BonusInst.uses())) {
1213 auto *UI = cast<Instruction>(Val: U.getUser());
1214 auto *PN = dyn_cast<PHINode>(Val: UI);
1215 if (!PN) {
1216 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1217 "If the user is not a PHI node, then it should be in the same "
1218 "block as, and come after, the original bonus instruction.");
1219 continue; // Keep using the original bonus instruction.
1220 }
1221 // Is this the block-closed SSA form PHI node?
1222 if (PN->getIncomingBlock(U) == BB)
1223 continue; // Great, keep using the original bonus instruction.
1224 // The only other alternative is an "use" when coming from
1225 // the predecessor block - here we should refer to the cloned bonus instr.
1226 assert(PN->getIncomingBlock(U) == PredBlock &&
1227 "Not in block-closed SSA form?");
1228 U.set(NewBonusInst);
1229 }
1230 }
1231
1232 // Key Instructions: We may have propagated atom info into the pred. If the
1233 // pred's terminator already has atom info do nothing as merging would drop
1234 // one atom group anyway. If it doesn't, propagte the remapped atom group
1235 // from BB's terminator.
1236 if (auto &PredDL = PTI->getDebugLoc()) {
1237 auto &DL = BB->getTerminator()->getDebugLoc();
1238 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1239 PredDL.isSameSourceLocation(Other: DL)) {
1240 PTI->setDebugLoc(DL);
1241 RemapSourceAtom(I: PTI, VM&: VMap);
1242 }
1243 }
1244}
1245
1246bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1247 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1248 BasicBlock *BB = TI->getParent();
1249 BasicBlock *Pred = PTI->getParent();
1250
1251 SmallVector<DominatorTree::UpdateType, 32> Updates;
1252
1253 // Figure out which 'cases' to copy from SI to PSI.
1254 std::vector<ValueEqualityComparisonCase> BBCases;
1255 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, Cases&: BBCases);
1256
1257 std::vector<ValueEqualityComparisonCase> PredCases;
1258 BasicBlock *PredDefault = getValueEqualityComparisonCases(TI: PTI, Cases&: PredCases);
1259
1260 // Based on whether the default edge from PTI goes to BB or not, fill in
1261 // PredCases and PredDefault with the new switch cases we would like to
1262 // build.
1263 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1264
1265 // Update the branch weight metadata along the way
1266 SmallVector<uint64_t, 8> Weights;
1267 bool PredHasWeights = hasBranchWeightMD(I: *PTI);
1268 bool SuccHasWeights = hasBranchWeightMD(I: *TI);
1269
1270 if (PredHasWeights) {
1271 getBranchWeights(TI: PTI, Weights);
1272 // branch-weight metadata is inconsistent here.
1273 if (Weights.size() != 1 + PredCases.size())
1274 PredHasWeights = SuccHasWeights = false;
1275 } else if (SuccHasWeights)
1276 // If there are no predecessor weights but there are successor weights,
1277 // populate Weights with 1, which will later be scaled to the sum of
1278 // successor's weights
1279 Weights.assign(NumElts: 1 + PredCases.size(), Elt: 1);
1280
1281 SmallVector<uint64_t, 8> SuccWeights;
1282 if (SuccHasWeights) {
1283 getBranchWeights(TI, Weights&: SuccWeights);
1284 // branch-weight metadata is inconsistent here.
1285 if (SuccWeights.size() != 1 + BBCases.size())
1286 PredHasWeights = SuccHasWeights = false;
1287 } else if (PredHasWeights)
1288 SuccWeights.assign(NumElts: 1 + BBCases.size(), Elt: 1);
1289
1290 if (PredDefault == BB) {
1291 // If this is the default destination from PTI, only the edges in TI
1292 // that don't occur in PTI, or that branch to BB will be activated.
1293 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1294 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1295 if (PredCases[i].Dest != BB)
1296 PTIHandled.insert(x: PredCases[i].Value);
1297 else {
1298 // The default destination is BB, we don't need explicit targets.
1299 std::swap(a&: PredCases[i], b&: PredCases.back());
1300
1301 if (PredHasWeights || SuccHasWeights) {
1302 // Increase weight for the default case.
1303 Weights[0] += Weights[i + 1];
1304 std::swap(a&: Weights[i + 1], b&: Weights.back());
1305 Weights.pop_back();
1306 }
1307
1308 PredCases.pop_back();
1309 --i;
1310 --e;
1311 }
1312
1313 // Reconstruct the new switch statement we will be building.
1314 if (PredDefault != BBDefault) {
1315 PredDefault->removePredecessor(Pred);
1316 if (DTU && PredDefault != BB)
1317 Updates.push_back(Elt: {DominatorTree::Delete, Pred, PredDefault});
1318 PredDefault = BBDefault;
1319 ++NewSuccessors[BBDefault];
1320 }
1321
1322 unsigned CasesFromPred = Weights.size();
1323 uint64_t ValidTotalSuccWeight = 0;
1324 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1325 if (!PTIHandled.count(x: BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1326 PredCases.push_back(x: BBCases[i]);
1327 ++NewSuccessors[BBCases[i].Dest];
1328 if (SuccHasWeights || PredHasWeights) {
1329 // The default weight is at index 0, so weight for the ith case
1330 // should be at index i+1. Scale the cases from successor by
1331 // PredDefaultWeight (Weights[0]).
1332 Weights.push_back(Elt: Weights[0] * SuccWeights[i + 1]);
1333 ValidTotalSuccWeight += SuccWeights[i + 1];
1334 }
1335 }
1336
1337 if (SuccHasWeights || PredHasWeights) {
1338 ValidTotalSuccWeight += SuccWeights[0];
1339 // Scale the cases from predecessor by ValidTotalSuccWeight.
1340 for (unsigned i = 1; i < CasesFromPred; ++i)
1341 Weights[i] *= ValidTotalSuccWeight;
1342 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1343 Weights[0] *= SuccWeights[0];
1344 }
1345 } else {
1346 // If this is not the default destination from PSI, only the edges
1347 // in SI that occur in PSI with a destination of BB will be
1348 // activated.
1349 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1350 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1351 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1352 if (PredCases[i].Dest == BB) {
1353 PTIHandled.insert(x: PredCases[i].Value);
1354
1355 if (PredHasWeights || SuccHasWeights) {
1356 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1357 std::swap(a&: Weights[i + 1], b&: Weights.back());
1358 Weights.pop_back();
1359 }
1360
1361 std::swap(a&: PredCases[i], b&: PredCases.back());
1362 PredCases.pop_back();
1363 --i;
1364 --e;
1365 }
1366
1367 // Okay, now we know which constants were sent to BB from the
1368 // predecessor. Figure out where they will all go now.
1369 for (const ValueEqualityComparisonCase &Case : BBCases)
1370 if (PTIHandled.count(x: Case.Value)) {
1371 // If this is one we are capable of getting...
1372 if (PredHasWeights || SuccHasWeights)
1373 Weights.push_back(Elt: WeightsForHandled[Case.Value]);
1374 PredCases.push_back(x: Case);
1375 ++NewSuccessors[Case.Dest];
1376 PTIHandled.erase(x: Case.Value); // This constant is taken care of
1377 }
1378
1379 // If there are any constants vectored to BB that TI doesn't handle,
1380 // they must go to the default destination of TI.
1381 for (ConstantInt *I : PTIHandled) {
1382 if (PredHasWeights || SuccHasWeights)
1383 Weights.push_back(Elt: WeightsForHandled[I]);
1384 PredCases.push_back(x: ValueEqualityComparisonCase(I, BBDefault));
1385 ++NewSuccessors[BBDefault];
1386 }
1387 }
1388
1389 // Okay, at this point, we know which new successor Pred will get. Make
1390 // sure we update the number of entries in the PHI nodes for these
1391 // successors.
1392 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1393 if (DTU) {
1394 SuccsOfPred = {llvm::from_range, successors(BB: Pred)};
1395 Updates.reserve(N: Updates.size() + NewSuccessors.size());
1396 }
1397 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1398 NewSuccessors) {
1399 for (auto I : seq(Size: NewSuccessor.second)) {
1400 (void)I;
1401 addPredecessorToBlock(Succ: NewSuccessor.first, NewPred: Pred, ExistPred: BB);
1402 }
1403 if (DTU && !SuccsOfPred.contains(Ptr: NewSuccessor.first))
1404 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewSuccessor.first});
1405 }
1406
1407 Builder.SetInsertPoint(PTI);
1408 // Convert pointer to int before we switch.
1409 if (CV->getType()->isPointerTy()) {
1410 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1411 "Should not end up here with unstable pointers");
1412 CV =
1413 Builder.CreatePtrToInt(V: CV, DestTy: DL.getIntPtrType(CV->getType()), Name: "magicptr");
1414 }
1415
1416 // Now that the successors are updated, create the new Switch instruction.
1417 SwitchInst *NewSI = Builder.CreateSwitch(V: CV, Dest: PredDefault, NumCases: PredCases.size());
1418 NewSI->setDebugLoc(PTI->getDebugLoc());
1419 for (ValueEqualityComparisonCase &V : PredCases)
1420 NewSI->addCase(OnVal: V.Value, Dest: V.Dest);
1421
1422 if (PredHasWeights || SuccHasWeights)
1423 setFittedBranchWeights(I&: *NewSI, Weights, /*IsExpected=*/false,
1424 /*ElideAllZero=*/true);
1425
1426 eraseTerminatorAndDCECond(TI: PTI);
1427
1428 // Okay, last check. If BB is still a successor of PSI, then we must
1429 // have an infinite loop case. If so, add an infinitely looping block
1430 // to handle the case to preserve the behavior of the code.
1431 BasicBlock *InfLoopBlock = nullptr;
1432 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1433 if (NewSI->getSuccessor(idx: i) == BB) {
1434 if (!InfLoopBlock) {
1435 // Insert it at the end of the function, because it's either code,
1436 // or it won't matter if it's hot. :)
1437 InfLoopBlock =
1438 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
1439 BranchInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
1440 if (DTU)
1441 Updates.push_back(
1442 Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1443 }
1444 NewSI->setSuccessor(idx: i, NewSucc: InfLoopBlock);
1445 }
1446
1447 if (DTU) {
1448 if (InfLoopBlock)
1449 Updates.push_back(Elt: {DominatorTree::Insert, Pred, InfLoopBlock});
1450
1451 Updates.push_back(Elt: {DominatorTree::Delete, Pred, BB});
1452
1453 DTU->applyUpdates(Updates);
1454 }
1455
1456 ++NumFoldValueComparisonIntoPredecessors;
1457 return true;
1458}
1459
1460/// The specified terminator is a value equality comparison instruction
1461/// (either a switch or a branch on "X == c").
1462/// See if any of the predecessors of the terminator block are value comparisons
1463/// on the same value. If so, and if safe to do so, fold them together.
1464bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1465 IRBuilder<> &Builder) {
1466 BasicBlock *BB = TI->getParent();
1467 Value *CV = isValueEqualityComparison(TI); // CondVal
1468 assert(CV && "Not a comparison?");
1469
1470 bool Changed = false;
1471
1472 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1473 while (!Preds.empty()) {
1474 BasicBlock *Pred = Preds.pop_back_val();
1475 Instruction *PTI = Pred->getTerminator();
1476
1477 // Don't try to fold into itself.
1478 if (Pred == BB)
1479 continue;
1480
1481 // See if the predecessor is a comparison with the same value.
1482 Value *PCV = isValueEqualityComparison(TI: PTI); // PredCondVal
1483 if (PCV != CV)
1484 continue;
1485
1486 SmallSetVector<BasicBlock *, 4> FailBlocks;
1487 if (!safeToMergeTerminators(SI1: TI, SI2: PTI, FailBlocks: &FailBlocks)) {
1488 for (auto *Succ : FailBlocks) {
1489 if (!SplitBlockPredecessors(BB: Succ, Preds: TI->getParent(), Suffix: ".fold.split", DTU))
1490 return false;
1491 }
1492 }
1493
1494 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1495 Changed = true;
1496 }
1497 return Changed;
1498}
1499
1500// If we would need to insert a select that uses the value of this invoke
1501// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1502// need to do this), we can't hoist the invoke, as there is nowhere to put the
1503// select in this case.
1504static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1505 Instruction *I1, Instruction *I2) {
1506 for (BasicBlock *Succ : successors(BB: BB1)) {
1507 for (const PHINode &PN : Succ->phis()) {
1508 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1509 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1510 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1511 return false;
1512 }
1513 }
1514 }
1515 return true;
1516}
1517
1518// Get interesting characteristics of instructions that
1519// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1520// instructions can be reordered across.
1521enum SkipFlags {
1522 SkipReadMem = 1,
1523 SkipSideEffect = 2,
1524 SkipImplicitControlFlow = 4
1525};
1526
1527static unsigned skippedInstrFlags(Instruction *I) {
1528 unsigned Flags = 0;
1529 if (I->mayReadFromMemory())
1530 Flags |= SkipReadMem;
1531 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1532 // inalloca) across stacksave/stackrestore boundaries.
1533 if (I->mayHaveSideEffects() || isa<AllocaInst>(Val: I))
1534 Flags |= SkipSideEffect;
1535 if (!isGuaranteedToTransferExecutionToSuccessor(I))
1536 Flags |= SkipImplicitControlFlow;
1537 return Flags;
1538}
1539
1540// Returns true if it is safe to reorder an instruction across preceding
1541// instructions in a basic block.
1542static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1543 // Don't reorder a store over a load.
1544 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1545 return false;
1546
1547 // If we have seen an instruction with side effects, it's unsafe to reorder an
1548 // instruction which reads memory or itself has side effects.
1549 if ((Flags & SkipSideEffect) &&
1550 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(Val: I)))
1551 return false;
1552
1553 // Reordering across an instruction which does not necessarily transfer
1554 // control to the next instruction is speculation.
1555 if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1556 return false;
1557
1558 // Hoisting of llvm.deoptimize is only legal together with the next return
1559 // instruction, which this pass is not always able to do.
1560 if (auto *CB = dyn_cast<CallBase>(Val: I))
1561 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1562 return false;
1563
1564 // It's also unsafe/illegal to hoist an instruction above its instruction
1565 // operands
1566 BasicBlock *BB = I->getParent();
1567 for (Value *Op : I->operands()) {
1568 if (auto *J = dyn_cast<Instruction>(Val: Op))
1569 if (J->getParent() == BB)
1570 return false;
1571 }
1572
1573 return true;
1574}
1575
1576static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1577
1578/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1579/// instructions \p I1 and \p I2 can and should be hoisted.
1580static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
1581 const TargetTransformInfo &TTI) {
1582 // If we're going to hoist a call, make sure that the two instructions
1583 // we're commoning/hoisting are both marked with musttail, or neither of
1584 // them is marked as such. Otherwise, we might end up in a situation where
1585 // we hoist from a block where the terminator is a `ret` to a block where
1586 // the terminator is a `br`, and `musttail` calls expect to be followed by
1587 // a return.
1588 auto *C1 = dyn_cast<CallInst>(Val: I1);
1589 auto *C2 = dyn_cast<CallInst>(Val: I2);
1590 if (C1 && C2)
1591 if (C1->isMustTailCall() != C2->isMustTailCall())
1592 return false;
1593
1594 if (!TTI.isProfitableToHoist(I: I1) || !TTI.isProfitableToHoist(I: I2))
1595 return false;
1596
1597 // If any of the two call sites has nomerge or convergent attribute, stop
1598 // hoisting.
1599 if (const auto *CB1 = dyn_cast<CallBase>(Val: I1))
1600 if (CB1->cannotMerge() || CB1->isConvergent())
1601 return false;
1602 if (const auto *CB2 = dyn_cast<CallBase>(Val: I2))
1603 if (CB2->cannotMerge() || CB2->isConvergent())
1604 return false;
1605
1606 return true;
1607}
1608
1609/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1610/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1611/// hoistCommonCodeFromSuccessors. e.g. The input:
1612/// I1 DVRs: { x, z },
1613/// OtherInsts: { I2 DVRs: { x, y, z } }
1614/// would result in hoisting only DbgVariableRecord x.
1615static void hoistLockstepIdenticalDbgVariableRecords(
1616 Instruction *TI, Instruction *I1,
1617 SmallVectorImpl<Instruction *> &OtherInsts) {
1618 if (!I1->hasDbgRecords())
1619 return;
1620 using CurrentAndEndIt =
1621 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1622 // Vector of {Current, End} iterators.
1623 SmallVector<CurrentAndEndIt> Itrs;
1624 Itrs.reserve(N: OtherInsts.size() + 1);
1625 // Helper lambdas for lock-step checks:
1626 // Return true if this Current == End.
1627 auto atEnd = [](const CurrentAndEndIt &Pair) {
1628 return Pair.first == Pair.second;
1629 };
1630 // Return true if all Current are identical.
1631 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1632 return all_of(Range: make_first_range(c: ArrayRef(Itrs).drop_front()),
1633 P: [&](DbgRecord::self_iterator I) {
1634 return Itrs[0].first->isIdenticalToWhenDefined(R: *I);
1635 });
1636 };
1637
1638 // Collect the iterators.
1639 Itrs.push_back(
1640 Elt: {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1641 for (Instruction *Other : OtherInsts) {
1642 if (!Other->hasDbgRecords())
1643 return;
1644 Itrs.push_back(
1645 Elt: {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1646 }
1647
1648 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1649 // the lock-step DbgRecord are identical, hoist all of them to TI.
1650 // This replicates the dbg.* intrinsic behaviour in
1651 // hoistCommonCodeFromSuccessors.
1652 while (none_of(Range&: Itrs, P: atEnd)) {
1653 bool HoistDVRs = allIdentical(Itrs);
1654 for (CurrentAndEndIt &Pair : Itrs) {
1655 // Increment Current iterator now as we may be about to move the
1656 // DbgRecord.
1657 DbgRecord &DR = *Pair.first++;
1658 if (HoistDVRs) {
1659 DR.removeFromParent();
1660 TI->getParent()->insertDbgRecordBefore(DR: &DR, Here: TI->getIterator());
1661 }
1662 }
1663 }
1664}
1665
1666static bool areIdenticalUpToCommutativity(const Instruction *I1,
1667 const Instruction *I2) {
1668 if (I1->isIdenticalToWhenDefined(I: I2, /*IntersectAttrs=*/true))
1669 return true;
1670
1671 if (auto *Cmp1 = dyn_cast<CmpInst>(Val: I1))
1672 if (auto *Cmp2 = dyn_cast<CmpInst>(Val: I2))
1673 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1674 Cmp1->getOperand(i_nocapture: 0) == Cmp2->getOperand(i_nocapture: 1) &&
1675 Cmp1->getOperand(i_nocapture: 1) == Cmp2->getOperand(i_nocapture: 0);
1676
1677 if (I1->isCommutative() && I1->isSameOperationAs(I: I2)) {
1678 return I1->getOperand(i: 0) == I2->getOperand(i: 1) &&
1679 I1->getOperand(i: 1) == I2->getOperand(i: 0) &&
1680 equal(LRange: drop_begin(RangeOrContainer: I1->operands(), N: 2), RRange: drop_begin(RangeOrContainer: I2->operands(), N: 2));
1681 }
1682
1683 return false;
1684}
1685
1686/// If the target supports conditional faulting,
1687/// we look for the following pattern:
1688/// \code
1689/// BB:
1690/// ...
1691/// %cond = icmp ult %x, %y
1692/// br i1 %cond, label %TrueBB, label %FalseBB
1693/// FalseBB:
1694/// store i32 1, ptr %q, align 4
1695/// ...
1696/// TrueBB:
1697/// %maskedloadstore = load i32, ptr %b, align 4
1698/// store i32 %maskedloadstore, ptr %p, align 4
1699/// ...
1700/// \endcode
1701///
1702/// and transform it into:
1703///
1704/// \code
1705/// BB:
1706/// ...
1707/// %cond = icmp ult %x, %y
1708/// %maskedloadstore = cload i32, ptr %b, %cond
1709/// cstore i32 %maskedloadstore, ptr %p, %cond
1710/// cstore i32 1, ptr %q, ~%cond
1711/// br i1 %cond, label %TrueBB, label %FalseBB
1712/// FalseBB:
1713/// ...
1714/// TrueBB:
1715/// ...
1716/// \endcode
1717///
1718/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1719/// e.g.
1720///
1721/// \code
1722/// %vcond = bitcast i1 %cond to <1 x i1>
1723/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1724/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1725/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1726/// call void @llvm.masked.store.v1i32.p0
1727/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1728/// %cond.not = xor i1 %cond, true
1729/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1730/// call void @llvm.masked.store.v1i32.p0
1731/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1732/// \endcode
1733///
1734/// So we need to turn hoisted load/store into cload/cstore.
1735///
1736/// \param BI The branch instruction.
1737/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1738/// will be speculated.
1739/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1740static void hoistConditionalLoadsStores(
1741 BranchInst *BI,
1742 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1743 std::optional<bool> Invert, Instruction *Sel) {
1744 auto &Context = BI->getParent()->getContext();
1745 auto *VCondTy = FixedVectorType::get(ElementType: Type::getInt1Ty(C&: Context), NumElts: 1);
1746 auto *Cond = BI->getOperand(i_nocapture: 0);
1747 // Construct the condition if needed.
1748 BasicBlock *BB = BI->getParent();
1749 Value *Mask = nullptr;
1750 Value *MaskFalse = nullptr;
1751 Value *MaskTrue = nullptr;
1752 if (Invert.has_value()) {
1753 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1754 Mask = Builder.CreateBitCast(
1755 V: *Invert ? Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)) : Cond,
1756 DestTy: VCondTy);
1757 } else {
1758 IRBuilder<> Builder(BI);
1759 MaskFalse = Builder.CreateBitCast(
1760 V: Builder.CreateXor(LHS: Cond, RHS: ConstantInt::getTrue(Context)), DestTy: VCondTy);
1761 MaskTrue = Builder.CreateBitCast(V: Cond, DestTy: VCondTy);
1762 }
1763 auto PeekThroughBitcasts = [](Value *V) {
1764 while (auto *BitCast = dyn_cast<BitCastInst>(Val: V))
1765 V = BitCast->getOperand(i_nocapture: 0);
1766 return V;
1767 };
1768 for (auto *I : SpeculatedConditionalLoadsStores) {
1769 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1770 if (!Invert.has_value())
1771 Mask = I->getParent() == BI->getSuccessor(i: 0) ? MaskTrue : MaskFalse;
1772 // We currently assume conditional faulting load/store is supported for
1773 // scalar types only when creating new instructions. This can be easily
1774 // extended for vector types in the future.
1775 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1776 auto *Op0 = I->getOperand(i: 0);
1777 CallInst *MaskedLoadStore = nullptr;
1778 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
1779 // Handle Load.
1780 auto *Ty = I->getType();
1781 PHINode *PN = nullptr;
1782 Value *PassThru = nullptr;
1783 if (Invert.has_value())
1784 for (User *U : I->users()) {
1785 if ((PN = dyn_cast<PHINode>(Val: U))) {
1786 PassThru = Builder.CreateBitCast(
1787 V: PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1788 DestTy: FixedVectorType::get(ElementType: Ty, NumElts: 1));
1789 } else if (auto *Ins = cast<Instruction>(Val: U);
1790 Sel && Ins->getParent() == BB) {
1791 // This happens when store or/and a speculative instruction between
1792 // load and store were hoisted to the BB. Make sure the masked load
1793 // inserted before its use.
1794 // We assume there's one of such use.
1795 Builder.SetInsertPoint(Ins);
1796 }
1797 }
1798 MaskedLoadStore = Builder.CreateMaskedLoad(
1799 Ty: FixedVectorType::get(ElementType: Ty, NumElts: 1), Ptr: Op0, Alignment: LI->getAlign(), Mask, PassThru);
1800 Value *NewLoadStore = Builder.CreateBitCast(V: MaskedLoadStore, DestTy: Ty);
1801 if (PN)
1802 PN->setIncomingValue(i: PN->getBasicBlockIndex(BB), V: NewLoadStore);
1803 I->replaceAllUsesWith(V: NewLoadStore);
1804 } else {
1805 // Handle Store.
1806 auto *StoredVal = Builder.CreateBitCast(
1807 V: PeekThroughBitcasts(Op0), DestTy: FixedVectorType::get(ElementType: Op0->getType(), NumElts: 1));
1808 MaskedLoadStore = Builder.CreateMaskedStore(
1809 Val: StoredVal, Ptr: I->getOperand(i: 1), Alignment: cast<StoreInst>(Val: I)->getAlign(), Mask);
1810 }
1811 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1812 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1813 //
1814 // !nonnull, !align : Not support pointer type, no need to keep.
1815 // !range: Load type is changed from scalar to vector, but the metadata on
1816 // vector specifies a per-element range, so the semantics stay the
1817 // same. Keep it.
1818 // !annotation: Not impact semantics. Keep it.
1819 if (const MDNode *Ranges = I->getMetadata(KindID: LLVMContext::MD_range))
1820 MaskedLoadStore->addRangeRetAttr(CR: getConstantRangeFromMetadata(RangeMD: *Ranges));
1821 I->dropUBImplyingAttrsAndUnknownMetadata(KnownIDs: {LLVMContext::MD_annotation});
1822 // FIXME: DIAssignID is not supported for masked store yet.
1823 // (Verifier::visitDIAssignIDMetadata)
1824 at::deleteAssignmentMarkers(Inst: I);
1825 I->eraseMetadataIf(Pred: [](unsigned MDKind, MDNode *Node) {
1826 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1827 });
1828 MaskedLoadStore->copyMetadata(SrcInst: *I);
1829 I->eraseFromParent();
1830 }
1831}
1832
1833static bool isSafeCheapLoadStore(const Instruction *I,
1834 const TargetTransformInfo &TTI) {
1835 // Not handle volatile or atomic.
1836 bool IsStore = false;
1837 if (auto *L = dyn_cast<LoadInst>(Val: I)) {
1838 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1839 return false;
1840 } else if (auto *S = dyn_cast<StoreInst>(Val: I)) {
1841 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1842 return false;
1843 IsStore = true;
1844 } else
1845 return false;
1846
1847 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1848 // That's why we have the alignment limitation.
1849 // FIXME: Update the prototype of the intrinsics?
1850 return TTI.hasConditionalLoadStoreForType(Ty: getLoadStoreType(I), IsStore) &&
1851 getLoadStoreAlignment(I) < Value::MaximumAlignment;
1852}
1853
1854/// Hoist any common code in the successor blocks up into the block. This
1855/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1856/// given, only perform hoisting in case all successors blocks contain matching
1857/// instructions only. In that case, all instructions can be hoisted and the
1858/// original branch will be replaced and selects for PHIs are added.
1859bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1860 bool AllInstsEqOnly) {
1861 // This does very trivial matching, with limited scanning, to find identical
1862 // instructions in the two blocks. In particular, we don't want to get into
1863 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1864 // such, we currently just scan for obviously identical instructions in an
1865 // identical order, possibly separated by the same number of non-identical
1866 // instructions.
1867 BasicBlock *BB = TI->getParent();
1868 unsigned int SuccSize = succ_size(BB);
1869 if (SuccSize < 2)
1870 return false;
1871
1872 // If either of the blocks has it's address taken, then we can't do this fold,
1873 // because the code we'd hoist would no longer run when we jump into the block
1874 // by it's address.
1875 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1876 for (auto *Succ : UniqueSuccessors) {
1877 if (Succ->hasAddressTaken())
1878 return false;
1879 // Use getUniquePredecessor instead of getSinglePredecessor to support
1880 // multi-cases successors in switch.
1881 if (Succ->getUniquePredecessor())
1882 continue;
1883 // If Succ has >1 predecessors, continue to check if the Succ contains only
1884 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1885 // can relax the condition based on the assumptiom that the program would
1886 // never enter Succ and trigger such an UB.
1887 if (isa<UnreachableInst>(Val: *Succ->begin()))
1888 continue;
1889 return false;
1890 }
1891 // The second of pair is a SkipFlags bitmask.
1892 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1893 SmallVector<SuccIterPair, 8> SuccIterPairs;
1894 for (auto *Succ : UniqueSuccessors) {
1895 BasicBlock::iterator SuccItr = Succ->begin();
1896 if (isa<PHINode>(Val: *SuccItr))
1897 return false;
1898 SuccIterPairs.push_back(Elt: SuccIterPair(SuccItr, 0));
1899 }
1900
1901 if (AllInstsEqOnly) {
1902 // Check if all instructions in the successor blocks match. This allows
1903 // hoisting all instructions and removing the blocks we are hoisting from,
1904 // so does not add any new instructions.
1905
1906 // Check if sizes and terminators of all successors match.
1907 unsigned Size0 = UniqueSuccessors[0]->size();
1908 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1909 bool AllSame =
1910 all_of(Range: drop_begin(RangeOrContainer&: UniqueSuccessors), P: [Term0, Size0](BasicBlock *Succ) {
1911 return Succ->getTerminator()->isIdenticalTo(I: Term0) &&
1912 Succ->size() == Size0;
1913 });
1914 if (!AllSame)
1915 return false;
1916 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1917 while (LRI.isValid()) {
1918 Instruction *I0 = (*LRI)[0];
1919 if (any_of(Range: *LRI, P: [I0](Instruction *I) {
1920 return !areIdenticalUpToCommutativity(I1: I0, I2: I);
1921 })) {
1922 return false;
1923 }
1924 --LRI;
1925 }
1926 // Now we know that all instructions in all successors can be hoisted. Let
1927 // the loop below handle the hoisting.
1928 }
1929
1930 // Count how many instructions were not hoisted so far. There's a limit on how
1931 // many instructions we skip, serving as a compilation time control as well as
1932 // preventing excessive increase of life ranges.
1933 unsigned NumSkipped = 0;
1934 // If we find an unreachable instruction at the beginning of a basic block, we
1935 // can still hoist instructions from the rest of the basic blocks.
1936 if (SuccIterPairs.size() > 2) {
1937 erase_if(C&: SuccIterPairs,
1938 P: [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1939 if (SuccIterPairs.size() < 2)
1940 return false;
1941 }
1942
1943 bool Changed = false;
1944
1945 for (;;) {
1946 auto *SuccIterPairBegin = SuccIterPairs.begin();
1947 auto &BB1ItrPair = *SuccIterPairBegin++;
1948 auto OtherSuccIterPairRange =
1949 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1950 auto OtherSuccIterRange = make_first_range(c&: OtherSuccIterPairRange);
1951
1952 Instruction *I1 = &*BB1ItrPair.first;
1953
1954 bool AllInstsAreIdentical = true;
1955 bool HasTerminator = I1->isTerminator();
1956 for (auto &SuccIter : OtherSuccIterRange) {
1957 Instruction *I2 = &*SuccIter;
1958 HasTerminator |= I2->isTerminator();
1959 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1960 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1961 AllInstsAreIdentical = false;
1962 }
1963
1964 SmallVector<Instruction *, 8> OtherInsts;
1965 for (auto &SuccIter : OtherSuccIterRange)
1966 OtherInsts.push_back(Elt: &*SuccIter);
1967
1968 // If we are hoisting the terminator instruction, don't move one (making a
1969 // broken BB), instead clone it, and remove BI.
1970 if (HasTerminator) {
1971 // Even if BB, which contains only one unreachable instruction, is ignored
1972 // at the beginning of the loop, we can hoist the terminator instruction.
1973 // If any instructions remain in the block, we cannot hoist terminators.
1974 if (NumSkipped || !AllInstsAreIdentical) {
1975 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1976 return Changed;
1977 }
1978
1979 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1980 TI, I1, OtherSuccTIs&: OtherInsts, UniqueSuccessors: UniqueSuccessors.getArrayRef()) ||
1981 Changed;
1982 }
1983
1984 if (AllInstsAreIdentical) {
1985 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1986 AllInstsAreIdentical =
1987 isSafeToHoistInstr(I: I1, Flags: SkipFlagsBB1) &&
1988 all_of(Range&: OtherSuccIterPairRange, P: [=](const auto &Pair) {
1989 Instruction *I2 = &*Pair.first;
1990 unsigned SkipFlagsBB2 = Pair.second;
1991 // Even if the instructions are identical, it may not
1992 // be safe to hoist them if we have skipped over
1993 // instructions with side effects or their operands
1994 // weren't hoisted.
1995 return isSafeToHoistInstr(I: I2, Flags: SkipFlagsBB2) &&
1996 shouldHoistCommonInstructions(I1, I2, TTI);
1997 });
1998 }
1999
2000 if (AllInstsAreIdentical) {
2001 BB1ItrPair.first++;
2002 // For a normal instruction, we just move one to right before the
2003 // branch, then replace all uses of the other with the first. Finally,
2004 // we remove the now redundant second instruction.
2005 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2006 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2007 // and leave any that were not hoisted behind (by calling moveBefore
2008 // rather than moveBeforePreserving).
2009 I1->moveBefore(InsertPos: TI->getIterator());
2010 for (auto &SuccIter : OtherSuccIterRange) {
2011 Instruction *I2 = &*SuccIter++;
2012 assert(I2 != I1);
2013 if (!I2->use_empty())
2014 I2->replaceAllUsesWith(V: I1);
2015 I1->andIRFlags(V: I2);
2016 if (auto *CB = dyn_cast<CallBase>(Val: I1)) {
2017 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I2));
2018 assert(Success && "We should not be trying to hoist callbases "
2019 "with non-intersectable attributes");
2020 // For NDEBUG Compile.
2021 (void)Success;
2022 }
2023
2024 combineMetadataForCSE(K: I1, J: I2, DoesKMove: true);
2025 // I1 and I2 are being combined into a single instruction. Its debug
2026 // location is the merged locations of the original instructions.
2027 I1->applyMergedLocation(LocA: I1->getDebugLoc(), LocB: I2->getDebugLoc());
2028 I2->eraseFromParent();
2029 }
2030 if (!Changed)
2031 NumHoistCommonCode += SuccIterPairs.size();
2032 Changed = true;
2033 NumHoistCommonInstrs += SuccIterPairs.size();
2034 } else {
2035 if (NumSkipped >= HoistCommonSkipLimit) {
2036 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2037 return Changed;
2038 }
2039 // We are about to skip over a pair of non-identical instructions. Record
2040 // if any have characteristics that would prevent reordering instructions
2041 // across them.
2042 for (auto &SuccIterPair : SuccIterPairs) {
2043 Instruction *I = &*SuccIterPair.first++;
2044 SuccIterPair.second |= skippedInstrFlags(I);
2045 }
2046 ++NumSkipped;
2047 }
2048 }
2049}
2050
2051bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2052 Instruction *TI, Instruction *I1,
2053 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2054 ArrayRef<BasicBlock *> UniqueSuccessors) {
2055
2056 auto *BI = dyn_cast<BranchInst>(Val: TI);
2057
2058 bool Changed = false;
2059 BasicBlock *TIParent = TI->getParent();
2060 BasicBlock *BB1 = I1->getParent();
2061
2062 // Use only for an if statement.
2063 auto *I2 = *OtherSuccTIs.begin();
2064 auto *BB2 = I2->getParent();
2065 if (BI) {
2066 assert(OtherSuccTIs.size() == 1);
2067 assert(BI->getSuccessor(0) == I1->getParent());
2068 assert(BI->getSuccessor(1) == I2->getParent());
2069 }
2070
2071 // In the case of an if statement, we try to hoist an invoke.
2072 // FIXME: Can we define a safety predicate for CallBr?
2073 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2074 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2075 if (isa<InvokeInst>(Val: I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2076 return false;
2077
2078 // TODO: callbr hoisting currently disabled pending further study.
2079 if (isa<CallBrInst>(Val: I1))
2080 return false;
2081
2082 for (BasicBlock *Succ : successors(BB: BB1)) {
2083 for (PHINode &PN : Succ->phis()) {
2084 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2085 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2086 Value *BB2V = PN.getIncomingValueForBlock(BB: OtherSuccTI->getParent());
2087 if (BB1V == BB2V)
2088 continue;
2089
2090 // In the case of an if statement, check for
2091 // passingValueIsAlwaysUndefined here because we would rather eliminate
2092 // undefined control flow then converting it to a select.
2093 if (!BI || passingValueIsAlwaysUndefined(V: BB1V, I: &PN) ||
2094 passingValueIsAlwaysUndefined(V: BB2V, I: &PN))
2095 return false;
2096 }
2097 }
2098 }
2099
2100 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2101 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2102 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts&: OtherSuccTIs);
2103 // Clone the terminator and hoist it into the pred, without any debug info.
2104 Instruction *NT = I1->clone();
2105 NT->insertInto(ParentBB: TIParent, It: TI->getIterator());
2106 if (!NT->getType()->isVoidTy()) {
2107 I1->replaceAllUsesWith(V: NT);
2108 for (Instruction *OtherSuccTI : OtherSuccTIs)
2109 OtherSuccTI->replaceAllUsesWith(V: NT);
2110 NT->takeName(V: I1);
2111 }
2112 Changed = true;
2113 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2114
2115 // Ensure terminator gets a debug location, even an unknown one, in case
2116 // it involves inlinable calls.
2117 SmallVector<DebugLoc, 4> Locs;
2118 Locs.push_back(Elt: I1->getDebugLoc());
2119 for (auto *OtherSuccTI : OtherSuccTIs)
2120 Locs.push_back(Elt: OtherSuccTI->getDebugLoc());
2121 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2122
2123 // PHIs created below will adopt NT's merged DebugLoc.
2124 IRBuilder<NoFolder> Builder(NT);
2125
2126 // In the case of an if statement, hoisting one of the terminators from our
2127 // successor is a great thing. Unfortunately, the successors of the if/else
2128 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2129 // must agree for all PHI nodes, so we insert select instruction to compute
2130 // the final result.
2131 if (BI) {
2132 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2133 for (BasicBlock *Succ : successors(BB: BB1)) {
2134 for (PHINode &PN : Succ->phis()) {
2135 Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
2136 Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
2137 if (BB1V == BB2V)
2138 continue;
2139
2140 // These values do not agree. Insert a select instruction before NT
2141 // that determines the right value.
2142 SelectInst *&SI = InsertedSelects[std::make_pair(x&: BB1V, y&: BB2V)];
2143 if (!SI) {
2144 // Propagate fast-math-flags from phi node to its replacement select.
2145 SI = cast<SelectInst>(Val: Builder.CreateSelectFMF(
2146 C: BI->getCondition(), True: BB1V, False: BB2V,
2147 FMFSource: isa<FPMathOperator>(Val: PN) ? &PN : nullptr,
2148 Name: BB1V->getName() + "." + BB2V->getName(), MDFrom: BI));
2149 }
2150
2151 // Make the PHI node use the select for all incoming values for BB1/BB2
2152 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2153 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2154 PN.setIncomingValue(i, V: SI);
2155 }
2156 }
2157 }
2158
2159 SmallVector<DominatorTree::UpdateType, 4> Updates;
2160
2161 // Update any PHI nodes in our new successors.
2162 for (BasicBlock *Succ : successors(BB: BB1)) {
2163 addPredecessorToBlock(Succ, NewPred: TIParent, ExistPred: BB1);
2164 if (DTU)
2165 Updates.push_back(Elt: {DominatorTree::Insert, TIParent, Succ});
2166 }
2167
2168 if (DTU) {
2169 // TI might be a switch with multi-cases destination, so we need to care for
2170 // the duplication of successors.
2171 for (BasicBlock *Succ : UniqueSuccessors)
2172 Updates.push_back(Elt: {DominatorTree::Delete, TIParent, Succ});
2173 }
2174
2175 eraseTerminatorAndDCECond(TI);
2176 if (DTU)
2177 DTU->applyUpdates(Updates);
2178 return Changed;
2179}
2180
2181// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2182// into variables.
2183static bool replacingOperandWithVariableIsCheap(const Instruction *I,
2184 int OpIdx) {
2185 // Divide/Remainder by constant is typically much cheaper than by variable.
2186 if (I->isIntDivRem())
2187 return OpIdx != 1;
2188 return !isa<IntrinsicInst>(Val: I);
2189}
2190
2191// All instructions in Insts belong to different blocks that all unconditionally
2192// branch to a common successor. Analyze each instruction and return true if it
2193// would be possible to sink them into their successor, creating one common
2194// instruction instead. For every value that would be required to be provided by
2195// PHI node (because an operand varies in each input block), add to PHIOperands.
2196static bool canSinkInstructions(
2197 ArrayRef<Instruction *> Insts,
2198 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2199 // Prune out obviously bad instructions to move. Each instruction must have
2200 // the same number of uses, and we check later that the uses are consistent.
2201 std::optional<unsigned> NumUses;
2202 for (auto *I : Insts) {
2203 // These instructions may change or break semantics if moved.
2204 if (isa<PHINode>(Val: I) || I->isEHPad() || isa<AllocaInst>(Val: I) ||
2205 I->getType()->isTokenTy())
2206 return false;
2207
2208 // Do not try to sink an instruction in an infinite loop - it can cause
2209 // this algorithm to infinite loop.
2210 if (I->getParent()->getSingleSuccessor() == I->getParent())
2211 return false;
2212
2213 // Conservatively return false if I is an inline-asm instruction. Sinking
2214 // and merging inline-asm instructions can potentially create arguments
2215 // that cannot satisfy the inline-asm constraints.
2216 // If the instruction has nomerge or convergent attribute, return false.
2217 if (const auto *C = dyn_cast<CallBase>(Val: I))
2218 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2219 return false;
2220
2221 if (!NumUses)
2222 NumUses = I->getNumUses();
2223 else if (NumUses != I->getNumUses())
2224 return false;
2225 }
2226
2227 const Instruction *I0 = Insts.front();
2228 const auto I0MMRA = MMRAMetadata(*I0);
2229 for (auto *I : Insts) {
2230 if (!I->isSameOperationAs(I: I0, flags: Instruction::CompareUsingIntersectedAttrs))
2231 return false;
2232
2233 // Treat MMRAs conservatively. This pass can be quite aggressive and
2234 // could drop a lot of MMRAs otherwise.
2235 if (MMRAMetadata(*I) != I0MMRA)
2236 return false;
2237 }
2238
2239 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2240 // then the other phi operands must match the instructions from Insts. This
2241 // also has to hold true for any phi nodes that would be created as a result
2242 // of sinking. Both of these cases are represented by PhiOperands.
2243 for (const Use &U : I0->uses()) {
2244 auto It = PHIOperands.find(Val: &U);
2245 if (It == PHIOperands.end())
2246 // There may be uses in other blocks when sinking into a loop header.
2247 return false;
2248 if (!equal(LRange&: Insts, RRange&: It->second))
2249 return false;
2250 }
2251
2252 // For calls to be sinkable, they must all be indirect, or have same callee.
2253 // I.e. if we have two direct calls to different callees, we don't want to
2254 // turn that into an indirect call. Likewise, if we have an indirect call,
2255 // and a direct call, we don't actually want to have a single indirect call.
2256 if (isa<CallBase>(Val: I0)) {
2257 auto IsIndirectCall = [](const Instruction *I) {
2258 return cast<CallBase>(Val: I)->isIndirectCall();
2259 };
2260 bool HaveIndirectCalls = any_of(Range&: Insts, P: IsIndirectCall);
2261 bool AllCallsAreIndirect = all_of(Range&: Insts, P: IsIndirectCall);
2262 if (HaveIndirectCalls) {
2263 if (!AllCallsAreIndirect)
2264 return false;
2265 } else {
2266 // All callees must be identical.
2267 Value *Callee = nullptr;
2268 for (const Instruction *I : Insts) {
2269 Value *CurrCallee = cast<CallBase>(Val: I)->getCalledOperand();
2270 if (!Callee)
2271 Callee = CurrCallee;
2272 else if (Callee != CurrCallee)
2273 return false;
2274 }
2275 }
2276 }
2277
2278 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2279 Value *Op = I0->getOperand(i: OI);
2280 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2281 assert(I->getNumOperands() == I0->getNumOperands());
2282 return I->getOperand(i: OI) == I0->getOperand(i: OI);
2283 };
2284 if (!all_of(Range&: Insts, P: SameAsI0)) {
2285 if ((isa<Constant>(Val: Op) && !replacingOperandWithVariableIsCheap(I: I0, OpIdx: OI)) ||
2286 !canReplaceOperandWithVariable(I: I0, OpIdx: OI))
2287 // We can't create a PHI from this GEP.
2288 return false;
2289 auto &Ops = PHIOperands[&I0->getOperandUse(i: OI)];
2290 for (auto *I : Insts)
2291 Ops.push_back(Elt: I->getOperand(i: OI));
2292 }
2293 }
2294 return true;
2295}
2296
2297// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2298// instruction of every block in Blocks to their common successor, commoning
2299// into one instruction.
2300static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2301 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(Idx: 0);
2302
2303 // canSinkInstructions returning true guarantees that every block has at
2304 // least one non-terminator instruction.
2305 SmallVector<Instruction*,4> Insts;
2306 for (auto *BB : Blocks) {
2307 Instruction *I = BB->getTerminator();
2308 I = I->getPrevNode();
2309 Insts.push_back(Elt: I);
2310 }
2311
2312 // We don't need to do any more checking here; canSinkInstructions should
2313 // have done it all for us.
2314 SmallVector<Value*, 4> NewOperands;
2315 Instruction *I0 = Insts.front();
2316 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2317 // This check is different to that in canSinkInstructions. There, we
2318 // cared about the global view once simplifycfg (and instcombine) have
2319 // completed - it takes into account PHIs that become trivially
2320 // simplifiable. However here we need a more local view; if an operand
2321 // differs we create a PHI and rely on instcombine to clean up the very
2322 // small mess we may make.
2323 bool NeedPHI = any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
2324 return I->getOperand(i: O) != I0->getOperand(i: O);
2325 });
2326 if (!NeedPHI) {
2327 NewOperands.push_back(Elt: I0->getOperand(i: O));
2328 continue;
2329 }
2330
2331 // Create a new PHI in the successor block and populate it.
2332 auto *Op = I0->getOperand(i: O);
2333 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2334 auto *PN =
2335 PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
2336 PN->insertBefore(InsertPos: BBEnd->begin());
2337 for (auto *I : Insts)
2338 PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
2339 NewOperands.push_back(Elt: PN);
2340 }
2341
2342 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2343 // and move it to the start of the successor block.
2344 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2345 I0->getOperandUse(i: O).set(NewOperands[O]);
2346
2347 I0->moveBefore(BB&: *BBEnd, I: BBEnd->getFirstInsertionPt());
2348
2349 // Update metadata and IR flags, and merge debug locations.
2350 for (auto *I : Insts)
2351 if (I != I0) {
2352 // The debug location for the "common" instruction is the merged locations
2353 // of all the commoned instructions. We start with the original location
2354 // of the "common" instruction and iteratively merge each location in the
2355 // loop below.
2356 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2357 // However, as N-way merge for CallInst is rare, so we use simplified API
2358 // instead of using complex API for N-way merge.
2359 I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
2360 combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
2361 I0->andIRFlags(V: I);
2362 if (auto *CB = dyn_cast<CallBase>(Val: I0)) {
2363 bool Success = CB->tryIntersectAttributes(Other: cast<CallBase>(Val: I));
2364 assert(Success && "We should not be trying to sink callbases "
2365 "with non-intersectable attributes");
2366 // For NDEBUG Compile.
2367 (void)Success;
2368 }
2369 }
2370
2371 for (User *U : make_early_inc_range(Range: I0->users())) {
2372 // canSinkLastInstruction checked that all instructions are only used by
2373 // phi nodes in a way that allows replacing the phi node with the common
2374 // instruction.
2375 auto *PN = cast<PHINode>(Val: U);
2376 PN->replaceAllUsesWith(V: I0);
2377 PN->eraseFromParent();
2378 }
2379
2380 // Finally nuke all instructions apart from the common instruction.
2381 for (auto *I : Insts) {
2382 if (I == I0)
2383 continue;
2384 // The remaining uses are debug users, replace those with the common inst.
2385 // In most (all?) cases this just introduces a use-before-def.
2386 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2387 I->replaceAllUsesWith(V: I0);
2388 I->eraseFromParent();
2389 }
2390}
2391
2392/// Check whether BB's predecessors end with unconditional branches. If it is
2393/// true, sink any common code from the predecessors to BB.
2394static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
2395 DomTreeUpdater *DTU) {
2396 // We support two situations:
2397 // (1) all incoming arcs are unconditional
2398 // (2) there are non-unconditional incoming arcs
2399 //
2400 // (2) is very common in switch defaults and
2401 // else-if patterns;
2402 //
2403 // if (a) f(1);
2404 // else if (b) f(2);
2405 //
2406 // produces:
2407 //
2408 // [if]
2409 // / \
2410 // [f(1)] [if]
2411 // | | \
2412 // | | |
2413 // | [f(2)]|
2414 // \ | /
2415 // [ end ]
2416 //
2417 // [end] has two unconditional predecessor arcs and one conditional. The
2418 // conditional refers to the implicit empty 'else' arc. This conditional
2419 // arc can also be caused by an empty default block in a switch.
2420 //
2421 // In this case, we attempt to sink code from all *unconditional* arcs.
2422 // If we can sink instructions from these arcs (determined during the scan
2423 // phase below) we insert a common successor for all unconditional arcs and
2424 // connect that to [end], to enable sinking:
2425 //
2426 // [if]
2427 // / \
2428 // [x(1)] [if]
2429 // | | \
2430 // | | \
2431 // | [x(2)] |
2432 // \ / |
2433 // [sink.split] |
2434 // \ /
2435 // [ end ]
2436 //
2437 SmallVector<BasicBlock*,4> UnconditionalPreds;
2438 bool HaveNonUnconditionalPredecessors = false;
2439 for (auto *PredBB : predecessors(BB)) {
2440 auto *PredBr = dyn_cast<BranchInst>(Val: PredBB->getTerminator());
2441 if (PredBr && PredBr->isUnconditional())
2442 UnconditionalPreds.push_back(Elt: PredBB);
2443 else
2444 HaveNonUnconditionalPredecessors = true;
2445 }
2446 if (UnconditionalPreds.size() < 2)
2447 return false;
2448
2449 // We take a two-step approach to tail sinking. First we scan from the end of
2450 // each block upwards in lockstep. If the n'th instruction from the end of each
2451 // block can be sunk, those instructions are added to ValuesToSink and we
2452 // carry on. If we can sink an instruction but need to PHI-merge some operands
2453 // (because they're not identical in each instruction) we add these to
2454 // PHIOperands.
2455 // We prepopulate PHIOperands with the phis that already exist in BB.
2456 DenseMap<const Use *, SmallVector<Value *, 4>> PHIOperands;
2457 for (PHINode &PN : BB->phis()) {
2458 SmallDenseMap<BasicBlock *, const Use *, 4> IncomingVals;
2459 for (const Use &U : PN.incoming_values())
2460 IncomingVals.insert(KV: {PN.getIncomingBlock(U), &U});
2461 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2462 for (BasicBlock *Pred : UnconditionalPreds)
2463 Ops.push_back(Elt: *IncomingVals[Pred]);
2464 }
2465
2466 int ScanIdx = 0;
2467 SmallPtrSet<Value*,4> InstructionsToSink;
2468 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2469 while (LRI.isValid() &&
2470 canSinkInstructions(Insts: *LRI, PHIOperands)) {
2471 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2472 << "\n");
2473 InstructionsToSink.insert_range(R: *LRI);
2474 ++ScanIdx;
2475 --LRI;
2476 }
2477
2478 // If no instructions can be sunk, early-return.
2479 if (ScanIdx == 0)
2480 return false;
2481
2482 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2483
2484 if (!followedByDeoptOrUnreachable) {
2485 // Check whether this is the pointer operand of a load/store.
2486 auto IsMemOperand = [](Use &U) {
2487 auto *I = cast<Instruction>(Val: U.getUser());
2488 if (isa<LoadInst>(Val: I))
2489 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2490 if (isa<StoreInst>(Val: I))
2491 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2492 return false;
2493 };
2494
2495 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2496 // actually sink before encountering instruction that is unprofitable to
2497 // sink?
2498 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2499 unsigned NumPHIInsts = 0;
2500 for (Use &U : (*LRI)[0]->operands()) {
2501 auto It = PHIOperands.find(Val: &U);
2502 if (It != PHIOperands.end() && !all_of(Range&: It->second, P: [&](Value *V) {
2503 return InstructionsToSink.contains(Ptr: V);
2504 })) {
2505 ++NumPHIInsts;
2506 // Do not separate a load/store from the gep producing the address.
2507 // The gep can likely be folded into the load/store as an addressing
2508 // mode. Additionally, a load of a gep is easier to analyze than a
2509 // load of a phi.
2510 if (IsMemOperand(U) &&
2511 any_of(Range&: It->second, P: [](Value *V) { return isa<GEPOperator>(Val: V); }))
2512 return false;
2513 // FIXME: this check is overly optimistic. We may end up not sinking
2514 // said instruction, due to the very same profitability check.
2515 // See @creating_too_many_phis in sink-common-code.ll.
2516 }
2517 }
2518 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2519 return NumPHIInsts <= 1;
2520 };
2521
2522 // We've determined that we are going to sink last ScanIdx instructions,
2523 // and recorded them in InstructionsToSink. Now, some instructions may be
2524 // unprofitable to sink. But that determination depends on the instructions
2525 // that we are going to sink.
2526
2527 // First, forward scan: find the first instruction unprofitable to sink,
2528 // recording all the ones that are profitable to sink.
2529 // FIXME: would it be better, after we detect that not all are profitable.
2530 // to either record the profitable ones, or erase the unprofitable ones?
2531 // Maybe we need to choose (at runtime) the one that will touch least
2532 // instrs?
2533 LRI.reset();
2534 int Idx = 0;
2535 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2536 while (Idx < ScanIdx) {
2537 if (!ProfitableToSinkInstruction(LRI)) {
2538 // Too many PHIs would be created.
2539 LLVM_DEBUG(
2540 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2541 break;
2542 }
2543 InstructionsProfitableToSink.insert_range(R: *LRI);
2544 --LRI;
2545 ++Idx;
2546 }
2547
2548 // If no instructions can be sunk, early-return.
2549 if (Idx == 0)
2550 return false;
2551
2552 // Did we determine that (only) some instructions are unprofitable to sink?
2553 if (Idx < ScanIdx) {
2554 // Okay, some instructions are unprofitable.
2555 ScanIdx = Idx;
2556 InstructionsToSink = InstructionsProfitableToSink;
2557
2558 // But, that may make other instructions unprofitable, too.
2559 // So, do a backward scan, do any earlier instructions become
2560 // unprofitable?
2561 assert(
2562 !ProfitableToSinkInstruction(LRI) &&
2563 "We already know that the last instruction is unprofitable to sink");
2564 ++LRI;
2565 --Idx;
2566 while (Idx >= 0) {
2567 // If we detect that an instruction becomes unprofitable to sink,
2568 // all earlier instructions won't be sunk either,
2569 // so preemptively keep InstructionsProfitableToSink in sync.
2570 // FIXME: is this the most performant approach?
2571 for (auto *I : *LRI)
2572 InstructionsProfitableToSink.erase(Ptr: I);
2573 if (!ProfitableToSinkInstruction(LRI)) {
2574 // Everything starting with this instruction won't be sunk.
2575 ScanIdx = Idx;
2576 InstructionsToSink = InstructionsProfitableToSink;
2577 }
2578 ++LRI;
2579 --Idx;
2580 }
2581 }
2582
2583 // If no instructions can be sunk, early-return.
2584 if (ScanIdx == 0)
2585 return false;
2586 }
2587
2588 bool Changed = false;
2589
2590 if (HaveNonUnconditionalPredecessors) {
2591 if (!followedByDeoptOrUnreachable) {
2592 // It is always legal to sink common instructions from unconditional
2593 // predecessors. However, if not all predecessors are unconditional,
2594 // this transformation might be pessimizing. So as a rule of thumb,
2595 // don't do it unless we'd sink at least one non-speculatable instruction.
2596 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2597 LRI.reset();
2598 int Idx = 0;
2599 bool Profitable = false;
2600 while (Idx < ScanIdx) {
2601 if (!isSafeToSpeculativelyExecute(I: (*LRI)[0])) {
2602 Profitable = true;
2603 break;
2604 }
2605 --LRI;
2606 ++Idx;
2607 }
2608 if (!Profitable)
2609 return false;
2610 }
2611
2612 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2613 // We have a conditional edge and we're going to sink some instructions.
2614 // Insert a new block postdominating all blocks we're going to sink from.
2615 if (!SplitBlockPredecessors(BB, Preds: UnconditionalPreds, Suffix: ".sink.split", DTU))
2616 // Edges couldn't be split.
2617 return false;
2618 Changed = true;
2619 }
2620
2621 // Now that we've analyzed all potential sinking candidates, perform the
2622 // actual sink. We iteratively sink the last non-terminator of the source
2623 // blocks into their common successor unless doing so would require too
2624 // many PHI instructions to be generated (currently only one PHI is allowed
2625 // per sunk instruction).
2626 //
2627 // We can use InstructionsToSink to discount values needing PHI-merging that will
2628 // actually be sunk in a later iteration. This allows us to be more
2629 // aggressive in what we sink. This does allow a false positive where we
2630 // sink presuming a later value will also be sunk, but stop half way through
2631 // and never actually sink it which means we produce more PHIs than intended.
2632 // This is unlikely in practice though.
2633 int SinkIdx = 0;
2634 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2635 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2636 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2637 << "\n");
2638
2639 // Because we've sunk every instruction in turn, the current instruction to
2640 // sink is always at index 0.
2641 LRI.reset();
2642
2643 sinkLastInstruction(Blocks: UnconditionalPreds);
2644 NumSinkCommonInstrs++;
2645 Changed = true;
2646 }
2647 if (SinkIdx != 0)
2648 ++NumSinkCommonCode;
2649 return Changed;
2650}
2651
2652namespace {
2653
2654struct CompatibleSets {
2655 using SetTy = SmallVector<InvokeInst *, 2>;
2656
2657 SmallVector<SetTy, 1> Sets;
2658
2659 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2660
2661 SetTy &getCompatibleSet(InvokeInst *II);
2662
2663 void insert(InvokeInst *II);
2664};
2665
2666CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2667 // Perform a linear scan over all the existing sets, see if the new `invoke`
2668 // is compatible with any particular set. Since we know that all the `invokes`
2669 // within a set are compatible, only check the first `invoke` in each set.
2670 // WARNING: at worst, this has quadratic complexity.
2671 for (CompatibleSets::SetTy &Set : Sets) {
2672 if (CompatibleSets::shouldBelongToSameSet(Invokes: {Set.front(), II}))
2673 return Set;
2674 }
2675
2676 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2677 return Sets.emplace_back();
2678}
2679
2680void CompatibleSets::insert(InvokeInst *II) {
2681 getCompatibleSet(II).emplace_back(Args&: II);
2682}
2683
2684bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2685 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2686
2687 // Can we theoretically merge these `invoke`s?
2688 auto IsIllegalToMerge = [](InvokeInst *II) {
2689 return II->cannotMerge() || II->isInlineAsm();
2690 };
2691 if (any_of(Range&: Invokes, P: IsIllegalToMerge))
2692 return false;
2693
2694 // Either both `invoke`s must be direct,
2695 // or both `invoke`s must be indirect.
2696 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2697 bool HaveIndirectCalls = any_of(Range&: Invokes, P: IsIndirectCall);
2698 bool AllCallsAreIndirect = all_of(Range&: Invokes, P: IsIndirectCall);
2699 if (HaveIndirectCalls) {
2700 if (!AllCallsAreIndirect)
2701 return false;
2702 } else {
2703 // All callees must be identical.
2704 Value *Callee = nullptr;
2705 for (InvokeInst *II : Invokes) {
2706 Value *CurrCallee = II->getCalledOperand();
2707 assert(CurrCallee && "There is always a called operand.");
2708 if (!Callee)
2709 Callee = CurrCallee;
2710 else if (Callee != CurrCallee)
2711 return false;
2712 }
2713 }
2714
2715 // Either both `invoke`s must not have a normal destination,
2716 // or both `invoke`s must have a normal destination,
2717 auto HasNormalDest = [](InvokeInst *II) {
2718 return !isa<UnreachableInst>(Val: II->getNormalDest()->getFirstNonPHIOrDbg());
2719 };
2720 if (any_of(Range&: Invokes, P: HasNormalDest)) {
2721 // Do not merge `invoke` that does not have a normal destination with one
2722 // that does have a normal destination, even though doing so would be legal.
2723 if (!all_of(Range&: Invokes, P: HasNormalDest))
2724 return false;
2725
2726 // All normal destinations must be identical.
2727 BasicBlock *NormalBB = nullptr;
2728 for (InvokeInst *II : Invokes) {
2729 BasicBlock *CurrNormalBB = II->getNormalDest();
2730 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2731 if (!NormalBB)
2732 NormalBB = CurrNormalBB;
2733 else if (NormalBB != CurrNormalBB)
2734 return false;
2735 }
2736
2737 // In the normal destination, the incoming values for these two `invoke`s
2738 // must be compatible.
2739 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2740 if (!incomingValuesAreCompatible(
2741 BB: NormalBB, IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()},
2742 EquivalenceSet: &EquivalenceSet))
2743 return false;
2744 }
2745
2746#ifndef NDEBUG
2747 // All unwind destinations must be identical.
2748 // We know that because we have started from said unwind destination.
2749 BasicBlock *UnwindBB = nullptr;
2750 for (InvokeInst *II : Invokes) {
2751 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2752 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2753 if (!UnwindBB)
2754 UnwindBB = CurrUnwindBB;
2755 else
2756 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2757 }
2758#endif
2759
2760 // In the unwind destination, the incoming values for these two `invoke`s
2761 // must be compatible.
2762 if (!incomingValuesAreCompatible(
2763 BB: Invokes.front()->getUnwindDest(),
2764 IncomingBlocks: {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2765 return false;
2766
2767 // Ignoring arguments, these `invoke`s must be identical,
2768 // including operand bundles.
2769 const InvokeInst *II0 = Invokes.front();
2770 for (auto *II : Invokes.drop_front())
2771 if (!II->isSameOperationAs(I: II0, flags: Instruction::CompareUsingIntersectedAttrs))
2772 return false;
2773
2774 // Can we theoretically form the data operands for the merged `invoke`?
2775 auto IsIllegalToMergeArguments = [](auto Ops) {
2776 Use &U0 = std::get<0>(Ops);
2777 Use &U1 = std::get<1>(Ops);
2778 if (U0 == U1)
2779 return false;
2780 return !canReplaceOperandWithVariable(I: cast<Instruction>(Val: U0.getUser()),
2781 OpIdx: U0.getOperandNo());
2782 };
2783 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2784 if (any_of(Range: zip(t: Invokes[0]->data_ops(), u: Invokes[1]->data_ops()),
2785 P: IsIllegalToMergeArguments))
2786 return false;
2787
2788 return true;
2789}
2790
2791} // namespace
2792
2793// Merge all invokes in the provided set, all of which are compatible
2794// as per the `CompatibleSets::shouldBelongToSameSet()`.
2795static void mergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2796 DomTreeUpdater *DTU) {
2797 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2798
2799 SmallVector<DominatorTree::UpdateType, 8> Updates;
2800 if (DTU)
2801 Updates.reserve(N: 2 + 3 * Invokes.size());
2802
2803 bool HasNormalDest =
2804 !isa<UnreachableInst>(Val: Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2805
2806 // Clone one of the invokes into a new basic block.
2807 // Since they are all compatible, it doesn't matter which invoke is cloned.
2808 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2809 InvokeInst *II0 = Invokes.front();
2810 BasicBlock *II0BB = II0->getParent();
2811 BasicBlock *InsertBeforeBlock =
2812 II0->getParent()->getIterator()->getNextNode();
2813 Function *Func = II0BB->getParent();
2814 LLVMContext &Ctx = II0->getContext();
2815
2816 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2817 Context&: Ctx, Name: II0BB->getName() + ".invoke", Parent: Func, InsertBefore: InsertBeforeBlock);
2818
2819 auto *MergedInvoke = cast<InvokeInst>(Val: II0->clone());
2820 // NOTE: all invokes have the same attributes, so no handling needed.
2821 MergedInvoke->insertInto(ParentBB: MergedInvokeBB, It: MergedInvokeBB->end());
2822
2823 if (!HasNormalDest) {
2824 // This set does not have a normal destination,
2825 // so just form a new block with unreachable terminator.
2826 BasicBlock *MergedNormalDest = BasicBlock::Create(
2827 Context&: Ctx, Name: II0BB->getName() + ".cont", Parent: Func, InsertBefore: InsertBeforeBlock);
2828 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2829 UI->setDebugLoc(DebugLoc::getTemporary());
2830 MergedInvoke->setNormalDest(MergedNormalDest);
2831 }
2832
2833 // The unwind destination, however, remainds identical for all invokes here.
2834
2835 return MergedInvoke;
2836 }();
2837
2838 if (DTU) {
2839 // Predecessor blocks that contained these invokes will now branch to
2840 // the new block that contains the merged invoke, ...
2841 for (InvokeInst *II : Invokes)
2842 Updates.push_back(
2843 Elt: {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2844
2845 // ... which has the new `unreachable` block as normal destination,
2846 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2847 for (BasicBlock *SuccBBOfMergedInvoke : successors(I: MergedInvoke))
2848 Updates.push_back(Elt: {DominatorTree::Insert, MergedInvoke->getParent(),
2849 SuccBBOfMergedInvoke});
2850
2851 // Since predecessor blocks now unconditionally branch to a new block,
2852 // they no longer branch to their original successors.
2853 for (InvokeInst *II : Invokes)
2854 for (BasicBlock *SuccOfPredBB : successors(BB: II->getParent()))
2855 Updates.push_back(
2856 Elt: {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2857 }
2858
2859 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2860
2861 // Form the merged operands for the merged invoke.
2862 for (Use &U : MergedInvoke->operands()) {
2863 // Only PHI together the indirect callees and data operands.
2864 if (MergedInvoke->isCallee(U: &U)) {
2865 if (!IsIndirectCall)
2866 continue;
2867 } else if (!MergedInvoke->isDataOperand(U: &U))
2868 continue;
2869
2870 // Don't create trivial PHI's with all-identical incoming values.
2871 bool NeedPHI = any_of(Range&: Invokes, P: [&U](InvokeInst *II) {
2872 return II->getOperand(i_nocapture: U.getOperandNo()) != U.get();
2873 });
2874 if (!NeedPHI)
2875 continue;
2876
2877 // Form a PHI out of all the data ops under this index.
2878 PHINode *PN = PHINode::Create(
2879 Ty: U->getType(), /*NumReservedValues=*/Invokes.size(), NameStr: "", InsertBefore: MergedInvoke->getIterator());
2880 for (InvokeInst *II : Invokes)
2881 PN->addIncoming(V: II->getOperand(i_nocapture: U.getOperandNo()), BB: II->getParent());
2882
2883 U.set(PN);
2884 }
2885
2886 // We've ensured that each PHI node has compatible (identical) incoming values
2887 // when coming from each of the `invoke`s in the current merge set,
2888 // so update the PHI nodes accordingly.
2889 for (BasicBlock *Succ : successors(I: MergedInvoke))
2890 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2891 /*ExistPred=*/Invokes.front()->getParent());
2892
2893 // And finally, replace the original `invoke`s with an unconditional branch
2894 // to the block with the merged `invoke`. Also, give that merged `invoke`
2895 // the merged debugloc of all the original `invoke`s.
2896 DILocation *MergedDebugLoc = nullptr;
2897 for (InvokeInst *II : Invokes) {
2898 // Compute the debug location common to all the original `invoke`s.
2899 if (!MergedDebugLoc)
2900 MergedDebugLoc = II->getDebugLoc();
2901 else
2902 MergedDebugLoc =
2903 DebugLoc::getMergedLocation(LocA: MergedDebugLoc, LocB: II->getDebugLoc());
2904
2905 // And replace the old `invoke` with an unconditionally branch
2906 // to the block with the merged `invoke`.
2907 for (BasicBlock *OrigSuccBB : successors(BB: II->getParent()))
2908 OrigSuccBB->removePredecessor(Pred: II->getParent());
2909 auto *BI = BranchInst::Create(IfTrue: MergedInvoke->getParent(), InsertBefore: II->getParent());
2910 // The unconditional branch is part of the replacement for the original
2911 // invoke, so should use its DebugLoc.
2912 BI->setDebugLoc(II->getDebugLoc());
2913 bool Success = MergedInvoke->tryIntersectAttributes(Other: II);
2914 assert(Success && "Merged invokes with incompatible attributes");
2915 // For NDEBUG Compile
2916 (void)Success;
2917 II->replaceAllUsesWith(V: MergedInvoke);
2918 II->eraseFromParent();
2919 ++NumInvokesMerged;
2920 }
2921 MergedInvoke->setDebugLoc(MergedDebugLoc);
2922 ++NumInvokeSetsFormed;
2923
2924 if (DTU)
2925 DTU->applyUpdates(Updates);
2926}
2927
2928/// If this block is a `landingpad` exception handling block, categorize all
2929/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2930/// being "mergeable" together, and then merge invokes in each set together.
2931///
2932/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2933/// [...] [...]
2934/// | |
2935/// [invoke0] [invoke1]
2936/// / \ / \
2937/// [cont0] [landingpad] [cont1]
2938/// to:
2939/// [...] [...]
2940/// \ /
2941/// [invoke]
2942/// / \
2943/// [cont] [landingpad]
2944///
2945/// But of course we can only do that if the invokes share the `landingpad`,
2946/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2947/// and the invoked functions are "compatible".
2948static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
2949 if (!EnableMergeCompatibleInvokes)
2950 return false;
2951
2952 bool Changed = false;
2953
2954 // FIXME: generalize to all exception handling blocks?
2955 if (!BB->isLandingPad())
2956 return Changed;
2957
2958 CompatibleSets Grouper;
2959
2960 // Record all the predecessors of this `landingpad`. As per verifier,
2961 // the only allowed predecessor is the unwind edge of an `invoke`.
2962 // We want to group "compatible" `invokes` into the same set to be merged.
2963 for (BasicBlock *PredBB : predecessors(BB))
2964 Grouper.insert(II: cast<InvokeInst>(Val: PredBB->getTerminator()));
2965
2966 // And now, merge `invoke`s that were grouped togeter.
2967 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2968 if (Invokes.size() < 2)
2969 continue;
2970 Changed = true;
2971 mergeCompatibleInvokesImpl(Invokes, DTU);
2972 }
2973
2974 return Changed;
2975}
2976
2977namespace {
2978/// Track ephemeral values, which should be ignored for cost-modelling
2979/// purposes. Requires walking instructions in reverse order.
2980class EphemeralValueTracker {
2981 SmallPtrSet<const Instruction *, 32> EphValues;
2982
2983 bool isEphemeral(const Instruction *I) {
2984 if (isa<AssumeInst>(Val: I))
2985 return true;
2986 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2987 all_of(Range: I->users(), P: [&](const User *U) {
2988 return EphValues.count(Ptr: cast<Instruction>(Val: U));
2989 });
2990 }
2991
2992public:
2993 bool track(const Instruction *I) {
2994 if (isEphemeral(I)) {
2995 EphValues.insert(Ptr: I);
2996 return true;
2997 }
2998 return false;
2999 }
3000
3001 bool contains(const Instruction *I) const { return EphValues.contains(Ptr: I); }
3002};
3003} // namespace
3004
3005/// Determine if we can hoist sink a sole store instruction out of a
3006/// conditional block.
3007///
3008/// We are looking for code like the following:
3009/// BrBB:
3010/// store i32 %add, i32* %arrayidx2
3011/// ... // No other stores or function calls (we could be calling a memory
3012/// ... // function).
3013/// %cmp = icmp ult %x, %y
3014/// br i1 %cmp, label %EndBB, label %ThenBB
3015/// ThenBB:
3016/// store i32 %add5, i32* %arrayidx2
3017/// br label EndBB
3018/// EndBB:
3019/// ...
3020/// We are going to transform this into:
3021/// BrBB:
3022/// store i32 %add, i32* %arrayidx2
3023/// ... //
3024/// %cmp = icmp ult %x, %y
3025/// %add.add5 = select i1 %cmp, i32 %add, %add5
3026/// store i32 %add.add5, i32* %arrayidx2
3027/// ...
3028///
3029/// \return The pointer to the value of the previous store if the store can be
3030/// hoisted into the predecessor block. 0 otherwise.
3031static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
3032 BasicBlock *StoreBB, BasicBlock *EndBB) {
3033 StoreInst *StoreToHoist = dyn_cast<StoreInst>(Val: I);
3034 if (!StoreToHoist)
3035 return nullptr;
3036
3037 // Volatile or atomic.
3038 if (!StoreToHoist->isSimple())
3039 return nullptr;
3040
3041 Value *StorePtr = StoreToHoist->getPointerOperand();
3042 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3043
3044 // Look for a store to the same pointer in BrBB.
3045 unsigned MaxNumInstToLookAt = 9;
3046 // Skip pseudo probe intrinsic calls which are not really killing any memory
3047 // accesses.
3048 for (Instruction &CurI : reverse(C: BrBB->instructionsWithoutDebug(SkipPseudoOp: true))) {
3049 if (!MaxNumInstToLookAt)
3050 break;
3051 --MaxNumInstToLookAt;
3052
3053 // Could be calling an instruction that affects memory like free().
3054 if (CurI.mayWriteToMemory() && !isa<StoreInst>(Val: CurI))
3055 return nullptr;
3056
3057 if (auto *SI = dyn_cast<StoreInst>(Val: &CurI)) {
3058 // Found the previous store to same location and type. Make sure it is
3059 // simple, to avoid introducing a spurious non-atomic write after an
3060 // atomic write.
3061 if (SI->getPointerOperand() == StorePtr &&
3062 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3063 SI->getAlign() >= StoreToHoist->getAlign())
3064 // Found the previous store, return its value operand.
3065 return SI->getValueOperand();
3066 return nullptr; // Unknown store.
3067 }
3068
3069 if (auto *LI = dyn_cast<LoadInst>(Val: &CurI)) {
3070 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3071 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3072 Value *Obj = getUnderlyingObject(V: StorePtr);
3073 bool ExplicitlyDereferenceableOnly;
3074 if (isWritableObject(Object: Obj, ExplicitlyDereferenceableOnly) &&
3075 capturesNothing(
3076 CC: PointerMayBeCaptured(V: Obj, /*ReturnCaptures=*/false,
3077 Mask: CaptureComponents::Provenance)) &&
3078 (!ExplicitlyDereferenceableOnly ||
3079 isDereferenceablePointer(V: StorePtr, Ty: StoreTy,
3080 DL: LI->getDataLayout()))) {
3081 // Found a previous load, return it.
3082 return LI;
3083 }
3084 }
3085 // The load didn't work out, but we may still find a store.
3086 }
3087 }
3088
3089 return nullptr;
3090}
3091
3092/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3093/// converted to selects.
3094static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
3095 BasicBlock *EndBB,
3096 unsigned &SpeculatedInstructions,
3097 InstructionCost &Cost,
3098 const TargetTransformInfo &TTI) {
3099 TargetTransformInfo::TargetCostKind CostKind =
3100 BB->getParent()->hasMinSize()
3101 ? TargetTransformInfo::TCK_CodeSize
3102 : TargetTransformInfo::TCK_SizeAndLatency;
3103
3104 bool HaveRewritablePHIs = false;
3105 for (PHINode &PN : EndBB->phis()) {
3106 Value *OrigV = PN.getIncomingValueForBlock(BB);
3107 Value *ThenV = PN.getIncomingValueForBlock(BB: ThenBB);
3108
3109 // FIXME: Try to remove some of the duplication with
3110 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3111 if (ThenV == OrigV)
3112 continue;
3113
3114 Cost += TTI.getCmpSelInstrCost(Opcode: Instruction::Select, ValTy: PN.getType(),
3115 CondTy: CmpInst::makeCmpResultType(opnd_type: PN.getType()),
3116 VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
3117
3118 // Don't convert to selects if we could remove undefined behavior instead.
3119 if (passingValueIsAlwaysUndefined(V: OrigV, I: &PN) ||
3120 passingValueIsAlwaysUndefined(V: ThenV, I: &PN))
3121 return false;
3122
3123 HaveRewritablePHIs = true;
3124 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(Val: OrigV);
3125 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(Val: ThenV);
3126 if (!OrigCE && !ThenCE)
3127 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3128
3129 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(I: OrigCE, TTI) : 0;
3130 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(I: ThenCE, TTI) : 0;
3131 InstructionCost MaxCost =
3132 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3133 if (OrigCost + ThenCost > MaxCost)
3134 return false;
3135
3136 // Account for the cost of an unfolded ConstantExpr which could end up
3137 // getting expanded into Instructions.
3138 // FIXME: This doesn't account for how many operations are combined in the
3139 // constant expression.
3140 ++SpeculatedInstructions;
3141 if (SpeculatedInstructions > 1)
3142 return false;
3143 }
3144
3145 return HaveRewritablePHIs;
3146}
3147
3148static bool isProfitableToSpeculate(const BranchInst *BI,
3149 std::optional<bool> Invert,
3150 const TargetTransformInfo &TTI) {
3151 // If the branch is non-unpredictable, and is predicted to *not* branch to
3152 // the `then` block, then avoid speculating it.
3153 if (BI->getMetadata(KindID: LLVMContext::MD_unpredictable))
3154 return true;
3155
3156 uint64_t TWeight, FWeight;
3157 if (!extractBranchWeights(I: *BI, TrueVal&: TWeight, FalseVal&: FWeight) || (TWeight + FWeight) == 0)
3158 return true;
3159
3160 if (!Invert.has_value())
3161 return false;
3162
3163 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3164 BranchProbability BIEndProb =
3165 BranchProbability::getBranchProbability(Numerator: EndWeight, Denominator: TWeight + FWeight);
3166 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3167 return BIEndProb < Likely;
3168}
3169
3170/// Speculate a conditional basic block flattening the CFG.
3171///
3172/// Note that this is a very risky transform currently. Speculating
3173/// instructions like this is most often not desirable. Instead, there is an MI
3174/// pass which can do it with full awareness of the resource constraints.
3175/// However, some cases are "obvious" and we should do directly. An example of
3176/// this is speculating a single, reasonably cheap instruction.
3177///
3178/// There is only one distinct advantage to flattening the CFG at the IR level:
3179/// it makes very common but simplistic optimizations such as are common in
3180/// instcombine and the DAG combiner more powerful by removing CFG edges and
3181/// modeling their effects with easier to reason about SSA value graphs.
3182///
3183///
3184/// An illustration of this transform is turning this IR:
3185/// \code
3186/// BB:
3187/// %cmp = icmp ult %x, %y
3188/// br i1 %cmp, label %EndBB, label %ThenBB
3189/// ThenBB:
3190/// %sub = sub %x, %y
3191/// br label BB2
3192/// EndBB:
3193/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3194/// ...
3195/// \endcode
3196///
3197/// Into this IR:
3198/// \code
3199/// BB:
3200/// %cmp = icmp ult %x, %y
3201/// %sub = sub %x, %y
3202/// %cond = select i1 %cmp, 0, %sub
3203/// ...
3204/// \endcode
3205///
3206/// \returns true if the conditional block is removed.
3207bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3208 BasicBlock *ThenBB) {
3209 if (!Options.SpeculateBlocks)
3210 return false;
3211
3212 // Be conservative for now. FP select instruction can often be expensive.
3213 Value *BrCond = BI->getCondition();
3214 if (isa<FCmpInst>(Val: BrCond))
3215 return false;
3216
3217 BasicBlock *BB = BI->getParent();
3218 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(Idx: 0);
3219 InstructionCost Budget =
3220 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3221
3222 // If ThenBB is actually on the false edge of the conditional branch, remember
3223 // to swap the select operands later.
3224 bool Invert = false;
3225 if (ThenBB != BI->getSuccessor(i: 0)) {
3226 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3227 Invert = true;
3228 }
3229 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3230
3231 if (!isProfitableToSpeculate(BI, Invert, TTI))
3232 return false;
3233
3234 // Keep a count of how many times instructions are used within ThenBB when
3235 // they are candidates for sinking into ThenBB. Specifically:
3236 // - They are defined in BB, and
3237 // - They have no side effects, and
3238 // - All of their uses are in ThenBB.
3239 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3240
3241 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3242
3243 unsigned SpeculatedInstructions = 0;
3244 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3245 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3246 Value *SpeculatedStoreValue = nullptr;
3247 StoreInst *SpeculatedStore = nullptr;
3248 EphemeralValueTracker EphTracker;
3249 for (Instruction &I : reverse(C: drop_end(RangeOrContainer&: *ThenBB))) {
3250 // Skip pseudo probes. The consequence is we lose track of the branch
3251 // probability for ThenBB, which is fine since the optimization here takes
3252 // place regardless of the branch probability.
3253 if (isa<PseudoProbeInst>(Val: I)) {
3254 // The probe should be deleted so that it will not be over-counted when
3255 // the samples collected on the non-conditional path are counted towards
3256 // the conditional path. We leave it for the counts inference algorithm to
3257 // figure out a proper count for an unknown probe.
3258 SpeculatedPseudoProbes.push_back(Elt: &I);
3259 continue;
3260 }
3261
3262 // Ignore ephemeral values, they will be dropped by the transform.
3263 if (EphTracker.track(I: &I))
3264 continue;
3265
3266 // Only speculatively execute a single instruction (not counting the
3267 // terminator) for now.
3268 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3269 isSafeCheapLoadStore(I: &I, TTI) &&
3270 SpeculatedConditionalLoadsStores.size() <
3271 HoistLoadsStoresWithCondFaultingThreshold;
3272 // Not count load/store into cost if target supports conditional faulting
3273 // b/c it's cheap to speculate it.
3274 if (IsSafeCheapLoadStore)
3275 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
3276 else
3277 ++SpeculatedInstructions;
3278
3279 if (SpeculatedInstructions > 1)
3280 return false;
3281
3282 // Don't hoist the instruction if it's unsafe or expensive.
3283 if (!IsSafeCheapLoadStore &&
3284 !isSafeToSpeculativelyExecute(I: &I, CtxI: BI, AC: Options.AC) &&
3285 !(HoistCondStores && !SpeculatedStoreValue &&
3286 (SpeculatedStoreValue =
3287 isSafeToSpeculateStore(I: &I, BrBB: BB, StoreBB: ThenBB, EndBB))))
3288 return false;
3289 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3290 computeSpeculationCost(I: &I, TTI) >
3291 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3292 return false;
3293
3294 // Store the store speculation candidate.
3295 if (!SpeculatedStore && SpeculatedStoreValue)
3296 SpeculatedStore = cast<StoreInst>(Val: &I);
3297
3298 // Do not hoist the instruction if any of its operands are defined but not
3299 // used in BB. The transformation will prevent the operand from
3300 // being sunk into the use block.
3301 for (Use &Op : I.operands()) {
3302 Instruction *OpI = dyn_cast<Instruction>(Val&: Op);
3303 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3304 continue; // Not a candidate for sinking.
3305
3306 ++SinkCandidateUseCounts[OpI];
3307 }
3308 }
3309
3310 // Consider any sink candidates which are only used in ThenBB as costs for
3311 // speculation. Note, while we iterate over a DenseMap here, we are summing
3312 // and so iteration order isn't significant.
3313 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3314 if (Inst->hasNUses(N: Count)) {
3315 ++SpeculatedInstructions;
3316 if (SpeculatedInstructions > 1)
3317 return false;
3318 }
3319
3320 // Check that we can insert the selects and that it's not too expensive to do
3321 // so.
3322 bool Convert =
3323 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3324 InstructionCost Cost = 0;
3325 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3326 SpeculatedInstructions, Cost, TTI);
3327 if (!Convert || Cost > Budget)
3328 return false;
3329
3330 // If we get here, we can hoist the instruction and if-convert.
3331 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3332
3333 Instruction *Sel = nullptr;
3334 // Insert a select of the value of the speculated store.
3335 if (SpeculatedStoreValue) {
3336 IRBuilder<NoFolder> Builder(BI);
3337 Value *OrigV = SpeculatedStore->getValueOperand();
3338 Value *TrueV = SpeculatedStore->getValueOperand();
3339 Value *FalseV = SpeculatedStoreValue;
3340 if (Invert)
3341 std::swap(a&: TrueV, b&: FalseV);
3342 Value *S = Builder.CreateSelect(
3343 C: BrCond, True: TrueV, False: FalseV, Name: "spec.store.select", MDFrom: BI);
3344 Sel = cast<Instruction>(Val: S);
3345 SpeculatedStore->setOperand(i_nocapture: 0, Val_nocapture: S);
3346 SpeculatedStore->applyMergedLocation(LocA: BI->getDebugLoc(),
3347 LocB: SpeculatedStore->getDebugLoc());
3348 // The value stored is still conditional, but the store itself is now
3349 // unconditonally executed, so we must be sure that any linked dbg.assign
3350 // intrinsics are tracking the new stored value (the result of the
3351 // select). If we don't, and the store were to be removed by another pass
3352 // (e.g. DSE), then we'd eventually end up emitting a location describing
3353 // the conditional value, unconditionally.
3354 //
3355 // === Before this transformation ===
3356 // pred:
3357 // store %one, %x.dest, !DIAssignID !1
3358 // dbg.assign %one, "x", ..., !1, ...
3359 // br %cond if.then
3360 //
3361 // if.then:
3362 // store %two, %x.dest, !DIAssignID !2
3363 // dbg.assign %two, "x", ..., !2, ...
3364 //
3365 // === After this transformation ===
3366 // pred:
3367 // store %one, %x.dest, !DIAssignID !1
3368 // dbg.assign %one, "x", ..., !1
3369 /// ...
3370 // %merge = select %cond, %two, %one
3371 // store %merge, %x.dest, !DIAssignID !2
3372 // dbg.assign %merge, "x", ..., !2
3373 for (DbgVariableRecord *DbgAssign :
3374 at::getDVRAssignmentMarkers(Inst: SpeculatedStore))
3375 if (llvm::is_contained(Range: DbgAssign->location_ops(), Element: OrigV))
3376 DbgAssign->replaceVariableLocationOp(OldValue: OrigV, NewValue: S);
3377 }
3378
3379 // Metadata can be dependent on the condition we are hoisting above.
3380 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3381 // to avoid making it appear as if the condition is a constant, which would
3382 // be misleading while debugging.
3383 // Similarly strip attributes that maybe dependent on condition we are
3384 // hoisting above.
3385 for (auto &I : make_early_inc_range(Range&: *ThenBB)) {
3386 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3387 I.dropLocation();
3388 }
3389 I.dropUBImplyingAttrsAndMetadata();
3390
3391 // Drop ephemeral values.
3392 if (EphTracker.contains(I: &I)) {
3393 I.replaceAllUsesWith(V: PoisonValue::get(T: I.getType()));
3394 I.eraseFromParent();
3395 }
3396 }
3397
3398 // Hoist the instructions.
3399 // Drop DbgVariableRecords attached to these instructions.
3400 for (auto &It : *ThenBB)
3401 for (DbgRecord &DR : make_early_inc_range(Range: It.getDbgRecordRange()))
3402 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3403 // equivalent).
3404 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(Val: &DR);
3405 !DVR || !DVR->isDbgAssign())
3406 It.dropOneDbgRecord(I: &DR);
3407 BB->splice(ToIt: BI->getIterator(), FromBB: ThenBB, FromBeginIt: ThenBB->begin(),
3408 FromEndIt: std::prev(x: ThenBB->end()));
3409
3410 if (!SpeculatedConditionalLoadsStores.empty())
3411 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3412 Sel);
3413
3414 // Insert selects and rewrite the PHI operands.
3415 IRBuilder<NoFolder> Builder(BI);
3416 for (PHINode &PN : EndBB->phis()) {
3417 unsigned OrigI = PN.getBasicBlockIndex(BB);
3418 unsigned ThenI = PN.getBasicBlockIndex(BB: ThenBB);
3419 Value *OrigV = PN.getIncomingValue(i: OrigI);
3420 Value *ThenV = PN.getIncomingValue(i: ThenI);
3421
3422 // Skip PHIs which are trivial.
3423 if (OrigV == ThenV)
3424 continue;
3425
3426 // Create a select whose true value is the speculatively executed value and
3427 // false value is the pre-existing value. Swap them if the branch
3428 // destinations were inverted.
3429 Value *TrueV = ThenV, *FalseV = OrigV;
3430 if (Invert)
3431 std::swap(a&: TrueV, b&: FalseV);
3432 Value *V = Builder.CreateSelect(C: BrCond, True: TrueV, False: FalseV, Name: "spec.select", MDFrom: BI);
3433 PN.setIncomingValue(i: OrigI, V);
3434 PN.setIncomingValue(i: ThenI, V);
3435 }
3436
3437 // Remove speculated pseudo probes.
3438 for (Instruction *I : SpeculatedPseudoProbes)
3439 I->eraseFromParent();
3440
3441 ++NumSpeculations;
3442 return true;
3443}
3444
3445using BlocksSet = SmallPtrSet<BasicBlock *, 8>;
3446
3447// Return false if number of blocks searched is too much.
3448static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3449 BlocksSet &ReachesNonLocalUses) {
3450 if (BB == DefBB)
3451 return true;
3452 if (!ReachesNonLocalUses.insert(Ptr: BB).second)
3453 return true;
3454
3455 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3456 return false;
3457 for (BasicBlock *Pred : predecessors(BB))
3458 if (!findReaching(BB: Pred, DefBB, ReachesNonLocalUses))
3459 return false;
3460 return true;
3461}
3462
3463/// Return true if we can thread a branch across this block.
3464static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3465 BlocksSet &NonLocalUseBlocks) {
3466 int Size = 0;
3467 EphemeralValueTracker EphTracker;
3468
3469 // Walk the loop in reverse so that we can identify ephemeral values properly
3470 // (values only feeding assumes).
3471 for (Instruction &I : reverse(C: BB->instructionsWithoutDebug(SkipPseudoOp: false))) {
3472 // Can't fold blocks that contain noduplicate or convergent calls.
3473 if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
3474 if (CI->cannotDuplicate() || CI->isConvergent())
3475 return false;
3476
3477 // Ignore ephemeral values which are deleted during codegen.
3478 // We will delete Phis while threading, so Phis should not be accounted in
3479 // block's size.
3480 if (!EphTracker.track(I: &I) && !isa<PHINode>(Val: I)) {
3481 if (Size++ > MaxSmallBlockSize)
3482 return false; // Don't clone large BB's.
3483 }
3484
3485 // Record blocks with non-local uses of values defined in the current basic
3486 // block.
3487 for (User *U : I.users()) {
3488 Instruction *UI = cast<Instruction>(Val: U);
3489 BasicBlock *UsedInBB = UI->getParent();
3490 if (UsedInBB == BB) {
3491 if (isa<PHINode>(Val: UI))
3492 return false;
3493 } else
3494 NonLocalUseBlocks.insert(Ptr: UsedInBB);
3495 }
3496
3497 // Looks ok, continue checking.
3498 }
3499
3500 return true;
3501}
3502
3503static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
3504 BasicBlock *To) {
3505 // Don't look past the block defining the value, we might get the value from
3506 // a previous loop iteration.
3507 auto *I = dyn_cast<Instruction>(Val: V);
3508 if (I && I->getParent() == To)
3509 return nullptr;
3510
3511 // We know the value if the From block branches on it.
3512 auto *BI = dyn_cast<BranchInst>(Val: From->getTerminator());
3513 if (BI && BI->isConditional() && BI->getCondition() == V &&
3514 BI->getSuccessor(i: 0) != BI->getSuccessor(i: 1))
3515 return BI->getSuccessor(i: 0) == To ? ConstantInt::getTrue(Context&: BI->getContext())
3516 : ConstantInt::getFalse(Context&: BI->getContext());
3517
3518 return nullptr;
3519}
3520
3521/// If we have a conditional branch on something for which we know the constant
3522/// value in predecessors (e.g. a phi node in the current block), thread edges
3523/// from the predecessor to their ultimate destination.
3524static std::optional<bool>
3525foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
3526 const DataLayout &DL,
3527 AssumptionCache *AC) {
3528 SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
3529 BasicBlock *BB = BI->getParent();
3530 Value *Cond = BI->getCondition();
3531 PHINode *PN = dyn_cast<PHINode>(Val: Cond);
3532 if (PN && PN->getParent() == BB) {
3533 // Degenerate case of a single entry PHI.
3534 if (PN->getNumIncomingValues() == 1) {
3535 FoldSingleEntryPHINodes(BB: PN->getParent());
3536 return true;
3537 }
3538
3539 for (Use &U : PN->incoming_values())
3540 if (auto *CB = dyn_cast<ConstantInt>(Val&: U))
3541 KnownValues[CB].insert(X: PN->getIncomingBlock(U));
3542 } else {
3543 for (BasicBlock *Pred : predecessors(BB)) {
3544 if (ConstantInt *CB = getKnownValueOnEdge(V: Cond, From: Pred, To: BB))
3545 KnownValues[CB].insert(X: Pred);
3546 }
3547 }
3548
3549 if (KnownValues.empty())
3550 return false;
3551
3552 // Now we know that this block has multiple preds and two succs.
3553 // Check that the block is small enough and record which non-local blocks use
3554 // values defined in the block.
3555
3556 BlocksSet NonLocalUseBlocks;
3557 BlocksSet ReachesNonLocalUseBlocks;
3558 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3559 return false;
3560
3561 // Jump-threading can only be done to destinations where no values defined
3562 // in BB are live.
3563
3564 // Quickly check if both destinations have uses. If so, jump-threading cannot
3565 // be done.
3566 if (NonLocalUseBlocks.contains(Ptr: BI->getSuccessor(i: 0)) &&
3567 NonLocalUseBlocks.contains(Ptr: BI->getSuccessor(i: 1)))
3568 return false;
3569
3570 // Search backward from NonLocalUseBlocks to find which blocks
3571 // reach non-local uses.
3572 for (BasicBlock *UseBB : NonLocalUseBlocks)
3573 // Give up if too many blocks are searched.
3574 if (!findReaching(BB: UseBB, DefBB: BB, ReachesNonLocalUses&: ReachesNonLocalUseBlocks))
3575 return false;
3576
3577 for (const auto &Pair : KnownValues) {
3578 ConstantInt *CB = Pair.first;
3579 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3580 BasicBlock *RealDest = BI->getSuccessor(i: !CB->getZExtValue());
3581
3582 // Okay, we now know that all edges from PredBB should be revectored to
3583 // branch to RealDest.
3584 if (RealDest == BB)
3585 continue; // Skip self loops.
3586
3587 // Skip if the predecessor's terminator is an indirect branch.
3588 if (any_of(Range&: PredBBs, P: [](BasicBlock *PredBB) {
3589 return isa<IndirectBrInst>(Val: PredBB->getTerminator());
3590 }))
3591 continue;
3592
3593 // Only revector to RealDest if no values defined in BB are live.
3594 if (ReachesNonLocalUseBlocks.contains(Ptr: RealDest))
3595 continue;
3596
3597 LLVM_DEBUG({
3598 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3599 << " has value " << *Pair.first << " in predecessors:\n";
3600 for (const BasicBlock *PredBB : Pair.second)
3601 dbgs() << " " << PredBB->getName() << "\n";
3602 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3603 });
3604
3605 // Split the predecessors we are threading into a new edge block. We'll
3606 // clone the instructions into this block, and then redirect it to RealDest.
3607 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, Preds: PredBBs, Suffix: ".critedge", DTU);
3608 if (!EdgeBB)
3609 continue;
3610
3611 // TODO: These just exist to reduce test diff, we can drop them if we like.
3612 EdgeBB->setName(RealDest->getName() + ".critedge");
3613 EdgeBB->moveBefore(MovePos: RealDest);
3614
3615 // Update PHI nodes.
3616 addPredecessorToBlock(Succ: RealDest, NewPred: EdgeBB, ExistPred: BB);
3617
3618 // BB may have instructions that are being threaded over. Clone these
3619 // instructions into EdgeBB. We know that there will be no uses of the
3620 // cloned instructions outside of EdgeBB.
3621 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3622 ValueToValueMapTy TranslateMap; // Track translated values.
3623 TranslateMap[Cond] = CB;
3624
3625 // RemoveDIs: track instructions that we optimise away while folding, so
3626 // that we can copy DbgVariableRecords from them later.
3627 BasicBlock::iterator SrcDbgCursor = BB->begin();
3628 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3629 if (PHINode *PN = dyn_cast<PHINode>(Val&: BBI)) {
3630 TranslateMap[PN] = PN->getIncomingValueForBlock(BB: EdgeBB);
3631 continue;
3632 }
3633 // Clone the instruction.
3634 Instruction *N = BBI->clone();
3635 // Insert the new instruction into its new home.
3636 N->insertInto(ParentBB: EdgeBB, It: InsertPt);
3637
3638 if (BBI->hasName())
3639 N->setName(BBI->getName() + ".c");
3640
3641 // Update operands due to translation.
3642 // Key Instructions: Remap all the atom groups.
3643 if (const DebugLoc &DL = BBI->getDebugLoc())
3644 mapAtomInstance(DL, VMap&: TranslateMap);
3645 RemapInstruction(I: N, VM&: TranslateMap,
3646 Flags: RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
3647
3648 // Check for trivial simplification.
3649 if (Value *V = simplifyInstruction(I: N, Q: {DL, nullptr, nullptr, AC})) {
3650 if (!BBI->use_empty())
3651 TranslateMap[&*BBI] = V;
3652 if (!N->mayHaveSideEffects()) {
3653 N->eraseFromParent(); // Instruction folded away, don't need actual
3654 // inst
3655 N = nullptr;
3656 }
3657 } else {
3658 if (!BBI->use_empty())
3659 TranslateMap[&*BBI] = N;
3660 }
3661 if (N) {
3662 // Copy all debug-info attached to instructions from the last we
3663 // successfully clone, up to this instruction (they might have been
3664 // folded away).
3665 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3666 N->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3667 SrcDbgCursor = std::next(x: BBI);
3668 // Clone debug-info on this instruction too.
3669 N->cloneDebugInfoFrom(From: &*BBI);
3670
3671 // Register the new instruction with the assumption cache if necessary.
3672 if (auto *Assume = dyn_cast<AssumeInst>(Val: N))
3673 if (AC)
3674 AC->registerAssumption(CI: Assume);
3675 }
3676 }
3677
3678 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3679 InsertPt->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3680 InsertPt->cloneDebugInfoFrom(From: BI);
3681
3682 BB->removePredecessor(Pred: EdgeBB);
3683 BranchInst *EdgeBI = cast<BranchInst>(Val: EdgeBB->getTerminator());
3684 EdgeBI->setSuccessor(idx: 0, NewSucc: RealDest);
3685 EdgeBI->setDebugLoc(BI->getDebugLoc());
3686
3687 if (DTU) {
3688 SmallVector<DominatorTree::UpdateType, 2> Updates;
3689 Updates.push_back(Elt: {DominatorTree::Delete, EdgeBB, BB});
3690 Updates.push_back(Elt: {DominatorTree::Insert, EdgeBB, RealDest});
3691 DTU->applyUpdates(Updates);
3692 }
3693
3694 // For simplicity, we created a separate basic block for the edge. Merge
3695 // it back into the predecessor if possible. This not only avoids
3696 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3697 // bypass the check for trivial cycles above.
3698 MergeBlockIntoPredecessor(BB: EdgeBB, DTU);
3699
3700 // Signal repeat, simplifying any other constants.
3701 return std::nullopt;
3702 }
3703
3704 return false;
3705}
3706
3707bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3708 // Note: If BB is a loop header then there is a risk that threading introduces
3709 // a non-canonical loop by moving a back edge. So we avoid this optimization
3710 // for loop headers if NeedCanonicalLoop is set.
3711 if (Options.NeedCanonicalLoop && is_contained(Range&: LoopHeaders, Element: BI->getParent()))
3712 return false;
3713
3714 std::optional<bool> Result;
3715 bool EverChanged = false;
3716 do {
3717 // Note that None means "we changed things, but recurse further."
3718 Result =
3719 foldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC: Options.AC);
3720 EverChanged |= Result == std::nullopt || *Result;
3721 } while (Result == std::nullopt);
3722 return EverChanged;
3723}
3724
3725/// Given a BB that starts with the specified two-entry PHI node,
3726/// see if we can eliminate it.
3727static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
3728 DomTreeUpdater *DTU, AssumptionCache *AC,
3729 const DataLayout &DL,
3730 bool SpeculateUnpredictables) {
3731 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3732 // statement", which has a very simple dominance structure. Basically, we
3733 // are trying to find the condition that is being branched on, which
3734 // subsequently causes this merge to happen. We really want control
3735 // dependence information for this check, but simplifycfg can't keep it up
3736 // to date, and this catches most of the cases we care about anyway.
3737 BasicBlock *BB = PN->getParent();
3738
3739 BasicBlock *IfTrue, *IfFalse;
3740 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3741 if (!DomBI)
3742 return false;
3743 Value *IfCond = DomBI->getCondition();
3744 // Don't bother if the branch will be constant folded trivially.
3745 if (isa<ConstantInt>(Val: IfCond))
3746 return false;
3747
3748 BasicBlock *DomBlock = DomBI->getParent();
3749 SmallVector<BasicBlock *, 2> IfBlocks;
3750 llvm::copy_if(
3751 Range: PN->blocks(), Out: std::back_inserter(x&: IfBlocks), P: [](BasicBlock *IfBlock) {
3752 return cast<BranchInst>(Val: IfBlock->getTerminator())->isUnconditional();
3753 });
3754 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3755 "Will have either one or two blocks to speculate.");
3756
3757 // If the branch is non-unpredictable, see if we either predictably jump to
3758 // the merge bb (if we have only a single 'then' block), or if we predictably
3759 // jump to one specific 'then' block (if we have two of them).
3760 // It isn't beneficial to speculatively execute the code
3761 // from the block that we know is predictably not entered.
3762 bool IsUnpredictable = DomBI->getMetadata(KindID: LLVMContext::MD_unpredictable);
3763 if (!IsUnpredictable) {
3764 uint64_t TWeight, FWeight;
3765 if (extractBranchWeights(I: *DomBI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3766 (TWeight + FWeight) != 0) {
3767 BranchProbability BITrueProb =
3768 BranchProbability::getBranchProbability(Numerator: TWeight, Denominator: TWeight + FWeight);
3769 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3770 BranchProbability BIFalseProb = BITrueProb.getCompl();
3771 if (IfBlocks.size() == 1) {
3772 BranchProbability BIBBProb =
3773 DomBI->getSuccessor(i: 0) == BB ? BITrueProb : BIFalseProb;
3774 if (BIBBProb >= Likely)
3775 return false;
3776 } else {
3777 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3778 return false;
3779 }
3780 }
3781 }
3782
3783 // Don't try to fold an unreachable block. For example, the phi node itself
3784 // can't be the candidate if-condition for a select that we want to form.
3785 if (auto *IfCondPhiInst = dyn_cast<PHINode>(Val: IfCond))
3786 if (IfCondPhiInst->getParent() == BB)
3787 return false;
3788
3789 // Okay, we found that we can merge this two-entry phi node into a select.
3790 // Doing so would require us to fold *all* two entry phi nodes in this block.
3791 // At some point this becomes non-profitable (particularly if the target
3792 // doesn't support cmov's). Only do this transformation if there are two or
3793 // fewer PHI nodes in this block.
3794 unsigned NumPhis = 0;
3795 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(Val: I); ++NumPhis, ++I)
3796 if (NumPhis > 2)
3797 return false;
3798
3799 // Loop over the PHI's seeing if we can promote them all to select
3800 // instructions. While we are at it, keep track of the instructions
3801 // that need to be moved to the dominating block.
3802 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3803 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3804 InstructionCost Cost = 0;
3805 InstructionCost Budget =
3806 TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3807 if (SpeculateUnpredictables && IsUnpredictable)
3808 Budget += TTI.getBranchMispredictPenalty();
3809
3810 bool Changed = false;
3811 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(Val: II);) {
3812 PHINode *PN = cast<PHINode>(Val: II++);
3813 if (Value *V = simplifyInstruction(I: PN, Q: {DL, PN})) {
3814 PN->replaceAllUsesWith(V);
3815 PN->eraseFromParent();
3816 Changed = true;
3817 continue;
3818 }
3819
3820 if (!dominatesMergePoint(V: PN->getIncomingValue(i: 0), BB, InsertPt: DomBI,
3821 AggressiveInsts, Cost, Budget, TTI, AC,
3822 ZeroCostInstructions) ||
3823 !dominatesMergePoint(V: PN->getIncomingValue(i: 1), BB, InsertPt: DomBI,
3824 AggressiveInsts, Cost, Budget, TTI, AC,
3825 ZeroCostInstructions))
3826 return Changed;
3827 }
3828
3829 // If we folded the first phi, PN dangles at this point. Refresh it. If
3830 // we ran out of PHIs then we simplified them all.
3831 PN = dyn_cast<PHINode>(Val: BB->begin());
3832 if (!PN)
3833 return true;
3834
3835 // Return true if at least one of these is a 'not', and another is either
3836 // a 'not' too, or a constant.
3837 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3838 if (!match(V: V0, P: m_Not(V: m_Value())))
3839 std::swap(a&: V0, b&: V1);
3840 auto Invertible = m_CombineOr(L: m_Not(V: m_Value()), R: m_AnyIntegralConstant());
3841 return match(V: V0, P: m_Not(V: m_Value())) && match(V: V1, P: Invertible);
3842 };
3843
3844 // Don't fold i1 branches on PHIs which contain binary operators or
3845 // (possibly inverted) select form of or/ands, unless one of
3846 // the incoming values is an 'not' and another one is freely invertible.
3847 // These can often be turned into switches and other things.
3848 auto IsBinOpOrAnd = [](Value *V) {
3849 return match(
3850 V, P: m_CombineOr(L: m_BinOp(), R: m_c_Select(L: m_ImmConstant(), R: m_Value())));
3851 };
3852 if (PN->getType()->isIntegerTy(Bitwidth: 1) &&
3853 (IsBinOpOrAnd(PN->getIncomingValue(i: 0)) ||
3854 IsBinOpOrAnd(PN->getIncomingValue(i: 1)) || IsBinOpOrAnd(IfCond)) &&
3855 !CanHoistNotFromBothValues(PN->getIncomingValue(i: 0),
3856 PN->getIncomingValue(i: 1)))
3857 return Changed;
3858
3859 // If all PHI nodes are promotable, check to make sure that all instructions
3860 // in the predecessor blocks can be promoted as well. If not, we won't be able
3861 // to get rid of the control flow, so it's not worth promoting to select
3862 // instructions.
3863 for (BasicBlock *IfBlock : IfBlocks)
3864 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3865 if (!AggressiveInsts.count(Ptr: &*I) && !I->isDebugOrPseudoInst()) {
3866 // This is not an aggressive instruction that we can promote.
3867 // Because of this, we won't be able to get rid of the control flow, so
3868 // the xform is not worth it.
3869 return Changed;
3870 }
3871
3872 // If either of the blocks has it's address taken, we can't do this fold.
3873 if (any_of(Range&: IfBlocks,
3874 P: [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3875 return Changed;
3876
3877 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3878 if (IsUnpredictable) dbgs() << " (unpredictable)";
3879 dbgs() << " T: " << IfTrue->getName()
3880 << " F: " << IfFalse->getName() << "\n");
3881
3882 // If we can still promote the PHI nodes after this gauntlet of tests,
3883 // do all of the PHI's now.
3884
3885 // Move all 'aggressive' instructions, which are defined in the
3886 // conditional parts of the if's up to the dominating block.
3887 for (BasicBlock *IfBlock : IfBlocks)
3888 hoistAllInstructionsInto(DomBlock, InsertPt: DomBI, BB: IfBlock);
3889
3890 IRBuilder<NoFolder> Builder(DomBI);
3891 // Propagate fast-math-flags from phi nodes to replacement selects.
3892 while (PHINode *PN = dyn_cast<PHINode>(Val: BB->begin())) {
3893 // Change the PHI node into a select instruction.
3894 Value *TrueVal = PN->getIncomingValueForBlock(BB: IfTrue);
3895 Value *FalseVal = PN->getIncomingValueForBlock(BB: IfFalse);
3896
3897 Value *Sel = Builder.CreateSelectFMF(C: IfCond, True: TrueVal, False: FalseVal,
3898 FMFSource: isa<FPMathOperator>(Val: PN) ? PN : nullptr,
3899 Name: "", MDFrom: DomBI);
3900 PN->replaceAllUsesWith(V: Sel);
3901 Sel->takeName(V: PN);
3902 PN->eraseFromParent();
3903 }
3904
3905 // At this point, all IfBlocks are empty, so our if statement
3906 // has been flattened. Change DomBlock to jump directly to our new block to
3907 // avoid other simplifycfg's kicking in on the diamond.
3908 Builder.CreateBr(Dest: BB);
3909
3910 SmallVector<DominatorTree::UpdateType, 3> Updates;
3911 if (DTU) {
3912 Updates.push_back(Elt: {DominatorTree::Insert, DomBlock, BB});
3913 for (auto *Successor : successors(BB: DomBlock))
3914 Updates.push_back(Elt: {DominatorTree::Delete, DomBlock, Successor});
3915 }
3916
3917 DomBI->eraseFromParent();
3918 if (DTU)
3919 DTU->applyUpdates(Updates);
3920
3921 return true;
3922}
3923
3924static Value *createLogicalOp(IRBuilderBase &Builder,
3925 Instruction::BinaryOps Opc, Value *LHS,
3926 Value *RHS, const Twine &Name = "") {
3927 // Try to relax logical op to binary op.
3928 if (impliesPoison(ValAssumedPoison: RHS, V: LHS))
3929 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3930 if (Opc == Instruction::And)
3931 return Builder.CreateLogicalAnd(Cond1: LHS, Cond2: RHS, Name);
3932 if (Opc == Instruction::Or)
3933 return Builder.CreateLogicalOr(Cond1: LHS, Cond2: RHS, Name);
3934 llvm_unreachable("Invalid logical opcode");
3935}
3936
3937/// Return true if either PBI or BI has branch weight available, and store
3938/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3939/// not have branch weight, use 1:1 as its weight.
3940static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
3941 uint64_t &PredTrueWeight,
3942 uint64_t &PredFalseWeight,
3943 uint64_t &SuccTrueWeight,
3944 uint64_t &SuccFalseWeight) {
3945 bool PredHasWeights =
3946 extractBranchWeights(I: *PBI, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight);
3947 bool SuccHasWeights =
3948 extractBranchWeights(I: *BI, TrueVal&: SuccTrueWeight, FalseVal&: SuccFalseWeight);
3949 if (PredHasWeights || SuccHasWeights) {
3950 if (!PredHasWeights)
3951 PredTrueWeight = PredFalseWeight = 1;
3952 if (!SuccHasWeights)
3953 SuccTrueWeight = SuccFalseWeight = 1;
3954 return true;
3955 } else {
3956 return false;
3957 }
3958}
3959
3960/// Determine if the two branches share a common destination and deduce a glue
3961/// that joins the branches' conditions to arrive at the common destination if
3962/// that would be profitable.
3963static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3964shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
3965 const TargetTransformInfo *TTI) {
3966 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3967 "Both blocks must end with a conditional branches.");
3968 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3969 "PredBB must be a predecessor of BB.");
3970
3971 // We have the potential to fold the conditions together, but if the
3972 // predecessor branch is predictable, we may not want to merge them.
3973 uint64_t PTWeight, PFWeight;
3974 BranchProbability PBITrueProb, Likely;
3975 if (TTI && !PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
3976 extractBranchWeights(I: *PBI, TrueVal&: PTWeight, FalseVal&: PFWeight) &&
3977 (PTWeight + PFWeight) != 0) {
3978 PBITrueProb =
3979 BranchProbability::getBranchProbability(Numerator: PTWeight, Denominator: PTWeight + PFWeight);
3980 Likely = TTI->getPredictableBranchThreshold();
3981 }
3982
3983 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
3984 // Speculate the 2nd condition unless the 1st is probably true.
3985 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3986 return {{BI->getSuccessor(i: 0), Instruction::Or, false}};
3987 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
3988 // Speculate the 2nd condition unless the 1st is probably false.
3989 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3990 return {{BI->getSuccessor(i: 1), Instruction::And, false}};
3991 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
3992 // Speculate the 2nd condition unless the 1st is probably true.
3993 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3994 return {{BI->getSuccessor(i: 1), Instruction::And, true}};
3995 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
3996 // Speculate the 2nd condition unless the 1st is probably false.
3997 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3998 return {{BI->getSuccessor(i: 0), Instruction::Or, true}};
3999 }
4000 return std::nullopt;
4001}
4002
4003static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
4004 DomTreeUpdater *DTU,
4005 MemorySSAUpdater *MSSAU,
4006 const TargetTransformInfo *TTI) {
4007 BasicBlock *BB = BI->getParent();
4008 BasicBlock *PredBlock = PBI->getParent();
4009
4010 // Determine if the two branches share a common destination.
4011 BasicBlock *CommonSucc;
4012 Instruction::BinaryOps Opc;
4013 bool InvertPredCond;
4014 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) =
4015 *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
4016
4017 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4018
4019 IRBuilder<> Builder(PBI);
4020 // The builder is used to create instructions to eliminate the branch in BB.
4021 // If BB's terminator has !annotation metadata, add it to the new
4022 // instructions.
4023 Builder.CollectMetadataToCopy(Src: BB->getTerminator(),
4024 MetadataKinds: {LLVMContext::MD_annotation});
4025
4026 // If we need to invert the condition in the pred block to match, do so now.
4027 if (InvertPredCond) {
4028 InvertBranch(PBI, Builder);
4029 }
4030
4031 BasicBlock *UniqueSucc =
4032 PBI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 0) : BI->getSuccessor(i: 1);
4033
4034 // Before cloning instructions, notify the successor basic block that it
4035 // is about to have a new predecessor. This will update PHI nodes,
4036 // which will allow us to update live-out uses of bonus instructions.
4037 addPredecessorToBlock(Succ: UniqueSucc, NewPred: PredBlock, ExistPred: BB, MSSAU);
4038
4039 // Try to update branch weights.
4040 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4041 SmallVector<uint64_t, 2> MDWeights;
4042 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4043 SuccTrueWeight, SuccFalseWeight)) {
4044
4045 if (PBI->getSuccessor(i: 0) == BB) {
4046 // PBI: br i1 %x, BB, FalseDest
4047 // BI: br i1 %y, UniqueSucc, FalseDest
4048 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4049 MDWeights.push_back(Elt: PredTrueWeight * SuccTrueWeight);
4050 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4051 // TrueWeight for PBI * FalseWeight for BI.
4052 // We assume that total weights of a BranchInst can fit into 32 bits.
4053 // Therefore, we will not have overflow using 64-bit arithmetic.
4054 MDWeights.push_back(Elt: PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4055 PredTrueWeight * SuccFalseWeight);
4056 } else {
4057 // PBI: br i1 %x, TrueDest, BB
4058 // BI: br i1 %y, TrueDest, UniqueSucc
4059 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4060 // FalseWeight for PBI * TrueWeight for BI.
4061 MDWeights.push_back(Elt: PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4062 PredFalseWeight * SuccTrueWeight);
4063 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4064 MDWeights.push_back(Elt: PredFalseWeight * SuccFalseWeight);
4065 }
4066
4067 setFittedBranchWeights(I&: *PBI, Weights: MDWeights, /*IsExpected=*/false,
4068 /*ElideAllZero=*/true);
4069
4070 // TODO: If BB is reachable from all paths through PredBlock, then we
4071 // could replace PBI's branch probabilities with BI's.
4072 } else
4073 PBI->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
4074
4075 // Now, update the CFG.
4076 PBI->setSuccessor(idx: PBI->getSuccessor(i: 0) != BB, NewSucc: UniqueSucc);
4077
4078 if (DTU)
4079 DTU->applyUpdates(Updates: {{DominatorTree::Insert, PredBlock, UniqueSucc},
4080 {DominatorTree::Delete, PredBlock, BB}});
4081
4082 // If BI was a loop latch, it may have had associated loop metadata.
4083 // We need to copy it to the new latch, that is, PBI.
4084 if (MDNode *LoopMD = BI->getMetadata(KindID: LLVMContext::MD_loop))
4085 PBI->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
4086
4087 ValueToValueMapTy VMap; // maps original values to cloned values
4088 cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
4089
4090 Module *M = BB->getModule();
4091
4092 PredBlock->getTerminator()->cloneDebugInfoFrom(From: BB->getTerminator());
4093 for (DbgVariableRecord &DVR :
4094 filterDbgVars(R: PredBlock->getTerminator()->getDbgRecordRange())) {
4095 RemapDbgRecord(M, DR: &DVR, VM&: VMap,
4096 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
4097 }
4098
4099 // Now that the Cond was cloned into the predecessor basic block,
4100 // or/and the two conditions together.
4101 Value *BICond = VMap[BI->getCondition()];
4102 PBI->setCondition(
4103 createLogicalOp(Builder, Opc, LHS: PBI->getCondition(), RHS: BICond, Name: "or.cond"));
4104 if (!ProfcheckDisableMetadataFixes)
4105 if (auto *SI = dyn_cast<SelectInst>(Val: PBI->getCondition()))
4106 if (!MDWeights.empty()) {
4107 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4108 setFittedBranchWeights(I&: *SI, Weights: {MDWeights[0], MDWeights[1]},
4109 /*IsExpected=*/false, /*ElideAllZero=*/true);
4110 }
4111
4112 ++NumFoldBranchToCommonDest;
4113 return true;
4114}
4115
4116/// Return if an instruction's type or any of its operands' types are a vector
4117/// type.
4118static bool isVectorOp(Instruction &I) {
4119 return I.getType()->isVectorTy() || any_of(Range: I.operands(), P: [](Use &U) {
4120 return U->getType()->isVectorTy();
4121 });
4122}
4123
4124/// If this basic block is simple enough, and if a predecessor branches to us
4125/// and one of our successors, fold the block into the predecessor and use
4126/// logical operations to pick the right destination.
4127bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
4128 MemorySSAUpdater *MSSAU,
4129 const TargetTransformInfo *TTI,
4130 unsigned BonusInstThreshold) {
4131 // If this block ends with an unconditional branch,
4132 // let speculativelyExecuteBB() deal with it.
4133 if (!BI->isConditional())
4134 return false;
4135
4136 BasicBlock *BB = BI->getParent();
4137 TargetTransformInfo::TargetCostKind CostKind =
4138 BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
4139 : TargetTransformInfo::TCK_SizeAndLatency;
4140
4141 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
4142
4143 if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Val: Cond) ||
4144 Cond->getParent() != BB || !Cond->hasOneUse())
4145 return false;
4146
4147 // Finally, don't infinitely unroll conditional loops.
4148 if (is_contained(Range: successors(BB), Element: BB))
4149 return false;
4150
4151 // With which predecessors will we want to deal with?
4152 SmallVector<BasicBlock *, 8> Preds;
4153 for (BasicBlock *PredBlock : predecessors(BB)) {
4154 BranchInst *PBI = dyn_cast<BranchInst>(Val: PredBlock->getTerminator());
4155
4156 // Check that we have two conditional branches. If there is a PHI node in
4157 // the common successor, verify that the same value flows in from both
4158 // blocks.
4159 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(SI1: BI, SI2: PBI))
4160 continue;
4161
4162 // Determine if the two branches share a common destination.
4163 BasicBlock *CommonSucc;
4164 Instruction::BinaryOps Opc;
4165 bool InvertPredCond;
4166 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4167 std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) = *Recipe;
4168 else
4169 continue;
4170
4171 // Check the cost of inserting the necessary logic before performing the
4172 // transformation.
4173 if (TTI) {
4174 Type *Ty = BI->getCondition()->getType();
4175 InstructionCost Cost = TTI->getArithmeticInstrCost(Opcode: Opc, Ty, CostKind);
4176 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4177 !isa<CmpInst>(Val: PBI->getCondition())))
4178 Cost += TTI->getArithmeticInstrCost(Opcode: Instruction::Xor, Ty, CostKind);
4179
4180 if (Cost > BranchFoldThreshold)
4181 continue;
4182 }
4183
4184 // Ok, we do want to deal with this predecessor. Record it.
4185 Preds.emplace_back(Args&: PredBlock);
4186 }
4187
4188 // If there aren't any predecessors into which we can fold,
4189 // don't bother checking the cost.
4190 if (Preds.empty())
4191 return false;
4192
4193 // Only allow this transformation if computing the condition doesn't involve
4194 // too many instructions and these involved instructions can be executed
4195 // unconditionally. We denote all involved instructions except the condition
4196 // as "bonus instructions", and only allow this transformation when the
4197 // number of the bonus instructions we'll need to create when cloning into
4198 // each predecessor does not exceed a certain threshold.
4199 unsigned NumBonusInsts = 0;
4200 bool SawVectorOp = false;
4201 const unsigned PredCount = Preds.size();
4202 for (Instruction &I : *BB) {
4203 // Don't check the branch condition comparison itself.
4204 if (&I == Cond)
4205 continue;
4206 // Ignore the terminator.
4207 if (isa<BranchInst>(Val: I))
4208 continue;
4209 // I must be safe to execute unconditionally.
4210 if (!isSafeToSpeculativelyExecute(I: &I))
4211 return false;
4212 SawVectorOp |= isVectorOp(I);
4213
4214 // Account for the cost of duplicating this instruction into each
4215 // predecessor. Ignore free instructions.
4216 if (!TTI || TTI->getInstructionCost(U: &I, CostKind) !=
4217 TargetTransformInfo::TCC_Free) {
4218 NumBonusInsts += PredCount;
4219
4220 // Early exits once we reach the limit.
4221 if (NumBonusInsts >
4222 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4223 return false;
4224 }
4225
4226 auto IsBCSSAUse = [BB, &I](Use &U) {
4227 auto *UI = cast<Instruction>(Val: U.getUser());
4228 if (auto *PN = dyn_cast<PHINode>(Val: UI))
4229 return PN->getIncomingBlock(U) == BB;
4230 return UI->getParent() == BB && I.comesBefore(Other: UI);
4231 };
4232
4233 // Does this instruction require rewriting of uses?
4234 if (!all_of(Range: I.uses(), P: IsBCSSAUse))
4235 return false;
4236 }
4237 if (NumBonusInsts >
4238 BonusInstThreshold *
4239 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4240 return false;
4241
4242 // Ok, we have the budget. Perform the transformation.
4243 for (BasicBlock *PredBlock : Preds) {
4244 auto *PBI = cast<BranchInst>(Val: PredBlock->getTerminator());
4245 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4246 }
4247 return false;
4248}
4249
4250// If there is only one store in BB1 and BB2, return it, otherwise return
4251// nullptr.
4252static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
4253 StoreInst *S = nullptr;
4254 for (auto *BB : {BB1, BB2}) {
4255 if (!BB)
4256 continue;
4257 for (auto &I : *BB)
4258 if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
4259 if (S)
4260 // Multiple stores seen.
4261 return nullptr;
4262 else
4263 S = SI;
4264 }
4265 }
4266 return S;
4267}
4268
4269static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
4270 Value *AlternativeV = nullptr) {
4271 // PHI is going to be a PHI node that allows the value V that is defined in
4272 // BB to be referenced in BB's only successor.
4273 //
4274 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4275 // doesn't matter to us what the other operand is (it'll never get used). We
4276 // could just create a new PHI with an undef incoming value, but that could
4277 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4278 // other PHI. So here we directly look for some PHI in BB's successor with V
4279 // as an incoming operand. If we find one, we use it, else we create a new
4280 // one.
4281 //
4282 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4283 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4284 // where OtherBB is the single other predecessor of BB's only successor.
4285 PHINode *PHI = nullptr;
4286 BasicBlock *Succ = BB->getSingleSuccessor();
4287
4288 for (auto I = Succ->begin(); isa<PHINode>(Val: I); ++I)
4289 if (cast<PHINode>(Val&: I)->getIncomingValueForBlock(BB) == V) {
4290 PHI = cast<PHINode>(Val&: I);
4291 if (!AlternativeV)
4292 break;
4293
4294 assert(Succ->hasNPredecessors(2));
4295 auto PredI = pred_begin(BB: Succ);
4296 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4297 if (PHI->getIncomingValueForBlock(BB: OtherPredBB) == AlternativeV)
4298 break;
4299 PHI = nullptr;
4300 }
4301 if (PHI)
4302 return PHI;
4303
4304 // If V is not an instruction defined in BB, just return it.
4305 if (!AlternativeV &&
4306 (!isa<Instruction>(Val: V) || cast<Instruction>(Val: V)->getParent() != BB))
4307 return V;
4308
4309 PHI = PHINode::Create(Ty: V->getType(), NumReservedValues: 2, NameStr: "simplifycfg.merge");
4310 PHI->insertBefore(InsertPos: Succ->begin());
4311 PHI->addIncoming(V, BB);
4312 for (BasicBlock *PredBB : predecessors(BB: Succ))
4313 if (PredBB != BB)
4314 PHI->addIncoming(
4315 V: AlternativeV ? AlternativeV : PoisonValue::get(T: V->getType()), BB: PredBB);
4316 return PHI;
4317}
4318
4319static bool mergeConditionalStoreToAddress(
4320 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4321 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4322 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4323 // For every pointer, there must be exactly two stores, one coming from
4324 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4325 // store (to any address) in PTB,PFB or QTB,QFB.
4326 // FIXME: We could relax this restriction with a bit more work and performance
4327 // testing.
4328 StoreInst *PStore = findUniqueStoreInBlocks(BB1: PTB, BB2: PFB);
4329 StoreInst *QStore = findUniqueStoreInBlocks(BB1: QTB, BB2: QFB);
4330 if (!PStore || !QStore)
4331 return false;
4332
4333 // Now check the stores are compatible.
4334 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4335 PStore->getValueOperand()->getType() !=
4336 QStore->getValueOperand()->getType())
4337 return false;
4338
4339 // Check that sinking the store won't cause program behavior changes. Sinking
4340 // the store out of the Q blocks won't change any behavior as we're sinking
4341 // from a block to its unconditional successor. But we're moving a store from
4342 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4343 // So we need to check that there are no aliasing loads or stores in
4344 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4345 // operations between PStore and the end of its parent block.
4346 //
4347 // The ideal way to do this is to query AliasAnalysis, but we don't
4348 // preserve AA currently so that is dangerous. Be super safe and just
4349 // check there are no other memory operations at all.
4350 for (auto &I : *QFB->getSinglePredecessor())
4351 if (I.mayReadOrWriteMemory())
4352 return false;
4353 for (auto &I : *QFB)
4354 if (&I != QStore && I.mayReadOrWriteMemory())
4355 return false;
4356 if (QTB)
4357 for (auto &I : *QTB)
4358 if (&I != QStore && I.mayReadOrWriteMemory())
4359 return false;
4360 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4361 I != E; ++I)
4362 if (&*I != PStore && I->mayReadOrWriteMemory())
4363 return false;
4364
4365 // If we're not in aggressive mode, we only optimize if we have some
4366 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4367 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4368 if (!BB)
4369 return true;
4370 // Heuristic: if the block can be if-converted/phi-folded and the
4371 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4372 // thread this store.
4373 InstructionCost Cost = 0;
4374 InstructionCost Budget =
4375 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4376 for (auto &I : BB->instructionsWithoutDebug(SkipPseudoOp: false)) {
4377 // Consider terminator instruction to be free.
4378 if (I.isTerminator())
4379 continue;
4380 // If this is one the stores that we want to speculate out of this BB,
4381 // then don't count it's cost, consider it to be free.
4382 if (auto *S = dyn_cast<StoreInst>(Val: &I))
4383 if (llvm::find(Range&: FreeStores, Val: S))
4384 continue;
4385 // Else, we have a white-list of instructions that we are ak speculating.
4386 if (!isa<BinaryOperator>(Val: I) && !isa<GetElementPtrInst>(Val: I))
4387 return false; // Not in white-list - not worthwhile folding.
4388 // And finally, if this is a non-free instruction that we are okay
4389 // speculating, ensure that we consider the speculation budget.
4390 Cost +=
4391 TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
4392 if (Cost > Budget)
4393 return false; // Eagerly refuse to fold as soon as we're out of budget.
4394 }
4395 assert(Cost <= Budget &&
4396 "When we run out of budget we will eagerly return from within the "
4397 "per-instruction loop.");
4398 return true;
4399 };
4400
4401 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4402 if (!MergeCondStoresAggressively &&
4403 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4404 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4405 return false;
4406
4407 // If PostBB has more than two predecessors, we need to split it so we can
4408 // sink the store.
4409 if (std::next(x: pred_begin(BB: PostBB), n: 2) != pred_end(BB: PostBB)) {
4410 // We know that QFB's only successor is PostBB. And QFB has a single
4411 // predecessor. If QTB exists, then its only successor is also PostBB.
4412 // If QTB does not exist, then QFB's only predecessor has a conditional
4413 // branch to QFB and PostBB.
4414 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4415 BasicBlock *NewBB =
4416 SplitBlockPredecessors(BB: PostBB, Preds: {QFB, TruePred}, Suffix: "condstore.split", DTU);
4417 if (!NewBB)
4418 return false;
4419 PostBB = NewBB;
4420 }
4421
4422 // OK, we're going to sink the stores to PostBB. The store has to be
4423 // conditional though, so first create the predicate.
4424 BranchInst *PBranch =
4425 cast<BranchInst>(Val: PFB->getSinglePredecessor()->getTerminator());
4426 BranchInst *QBranch =
4427 cast<BranchInst>(Val: QFB->getSinglePredecessor()->getTerminator());
4428 Value *PCond = PBranch->getCondition();
4429 Value *QCond = QBranch->getCondition();
4430
4431 Value *PPHI = ensureValueAvailableInSuccessor(V: PStore->getValueOperand(),
4432 BB: PStore->getParent());
4433 Value *QPHI = ensureValueAvailableInSuccessor(V: QStore->getValueOperand(),
4434 BB: QStore->getParent(), AlternativeV: PPHI);
4435
4436 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4437 IRBuilder<> QB(PostBB, PostBBFirst);
4438 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4439
4440 InvertPCond ^= (PStore->getParent() != PTB);
4441 InvertQCond ^= (QStore->getParent() != QTB);
4442 Value *PPred = InvertPCond ? QB.CreateNot(V: PCond) : PCond;
4443 Value *QPred = InvertQCond ? QB.CreateNot(V: QCond) : QCond;
4444
4445 Value *CombinedPred = QB.CreateOr(LHS: PPred, RHS: QPred);
4446
4447 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4448 auto *T = SplitBlockAndInsertIfThen(Cond: CombinedPred, SplitBefore: InsertPt,
4449 /*Unreachable=*/false,
4450 /*BranchWeights=*/nullptr, DTU);
4451 if (hasBranchWeightMD(I: *PBranch) && hasBranchWeightMD(I: *QBranch) &&
4452 !ProfcheckDisableMetadataFixes) {
4453 SmallVector<uint32_t, 2> PWeights, QWeights;
4454 extractBranchWeights(I: *PBranch, Weights&: PWeights);
4455 extractBranchWeights(I: *QBranch, Weights&: QWeights);
4456 if (InvertPCond)
4457 std::swap(a&: PWeights[0], b&: PWeights[1]);
4458 if (InvertQCond)
4459 std::swap(a&: QWeights[0], b&: QWeights[1]);
4460 auto CombinedWeights = getDisjunctionWeights(B1: PWeights, B2: QWeights);
4461 setFittedBranchWeights(I&: *PostBB->getTerminator(),
4462 Weights: {CombinedWeights[0], CombinedWeights[1]},
4463 /*IsExpected=*/false, /*ElideAllZero=*/true);
4464 }
4465
4466 QB.SetInsertPoint(T);
4467 StoreInst *SI = cast<StoreInst>(Val: QB.CreateStore(Val: QPHI, Ptr: Address));
4468 SI->setAAMetadata(PStore->getAAMetadata().merge(Other: QStore->getAAMetadata()));
4469 // Choose the minimum alignment. If we could prove both stores execute, we
4470 // could use biggest one. In this case, though, we only know that one of the
4471 // stores executes. And we don't know it's safe to take the alignment from a
4472 // store that doesn't execute.
4473 SI->setAlignment(std::min(a: PStore->getAlign(), b: QStore->getAlign()));
4474
4475 QStore->eraseFromParent();
4476 PStore->eraseFromParent();
4477
4478 return true;
4479}
4480
4481static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
4482 DomTreeUpdater *DTU, const DataLayout &DL,
4483 const TargetTransformInfo &TTI) {
4484 // The intention here is to find diamonds or triangles (see below) where each
4485 // conditional block contains a store to the same address. Both of these
4486 // stores are conditional, so they can't be unconditionally sunk. But it may
4487 // be profitable to speculatively sink the stores into one merged store at the
4488 // end, and predicate the merged store on the union of the two conditions of
4489 // PBI and QBI.
4490 //
4491 // This can reduce the number of stores executed if both of the conditions are
4492 // true, and can allow the blocks to become small enough to be if-converted.
4493 // This optimization will also chain, so that ladders of test-and-set
4494 // sequences can be if-converted away.
4495 //
4496 // We only deal with simple diamonds or triangles:
4497 //
4498 // PBI or PBI or a combination of the two
4499 // / \ | \
4500 // PTB PFB | PFB
4501 // \ / | /
4502 // QBI QBI
4503 // / \ | \
4504 // QTB QFB | QFB
4505 // \ / | /
4506 // PostBB PostBB
4507 //
4508 // We model triangles as a type of diamond with a nullptr "true" block.
4509 // Triangles are canonicalized so that the fallthrough edge is represented by
4510 // a true condition, as in the diagram above.
4511 BasicBlock *PTB = PBI->getSuccessor(i: 0);
4512 BasicBlock *PFB = PBI->getSuccessor(i: 1);
4513 BasicBlock *QTB = QBI->getSuccessor(i: 0);
4514 BasicBlock *QFB = QBI->getSuccessor(i: 1);
4515 BasicBlock *PostBB = QFB->getSingleSuccessor();
4516
4517 // Make sure we have a good guess for PostBB. If QTB's only successor is
4518 // QFB, then QFB is a better PostBB.
4519 if (QTB->getSingleSuccessor() == QFB)
4520 PostBB = QFB;
4521
4522 // If we couldn't find a good PostBB, stop.
4523 if (!PostBB)
4524 return false;
4525
4526 bool InvertPCond = false, InvertQCond = false;
4527 // Canonicalize fallthroughs to the true branches.
4528 if (PFB == QBI->getParent()) {
4529 std::swap(a&: PFB, b&: PTB);
4530 InvertPCond = true;
4531 }
4532 if (QFB == PostBB) {
4533 std::swap(a&: QFB, b&: QTB);
4534 InvertQCond = true;
4535 }
4536
4537 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4538 // and QFB may not. Model fallthroughs as a nullptr block.
4539 if (PTB == QBI->getParent())
4540 PTB = nullptr;
4541 if (QTB == PostBB)
4542 QTB = nullptr;
4543
4544 // Legality bailouts. We must have at least the non-fallthrough blocks and
4545 // the post-dominating block, and the non-fallthroughs must only have one
4546 // predecessor.
4547 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4548 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4549 };
4550 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4551 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4552 return false;
4553 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4554 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4555 return false;
4556 if (!QBI->getParent()->hasNUses(N: 2))
4557 return false;
4558
4559 // OK, this is a sequence of two diamonds or triangles.
4560 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4561 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4562 for (auto *BB : {PTB, PFB}) {
4563 if (!BB)
4564 continue;
4565 for (auto &I : *BB)
4566 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4567 PStoreAddresses.insert(Ptr: SI->getPointerOperand());
4568 }
4569 for (auto *BB : {QTB, QFB}) {
4570 if (!BB)
4571 continue;
4572 for (auto &I : *BB)
4573 if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4574 QStoreAddresses.insert(Ptr: SI->getPointerOperand());
4575 }
4576
4577 set_intersect(S1&: PStoreAddresses, S2: QStoreAddresses);
4578 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4579 // clear what it contains.
4580 auto &CommonAddresses = PStoreAddresses;
4581
4582 bool Changed = false;
4583 for (auto *Address : CommonAddresses)
4584 Changed |=
4585 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4586 InvertPCond, InvertQCond, DTU, DL, TTI);
4587 return Changed;
4588}
4589
4590/// If the previous block ended with a widenable branch, determine if reusing
4591/// the target block is profitable and legal. This will have the effect of
4592/// "widening" PBI, but doesn't require us to reason about hosting safety.
4593static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
4594 DomTreeUpdater *DTU) {
4595 // TODO: This can be generalized in two important ways:
4596 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4597 // values from the PBI edge.
4598 // 2) We can sink side effecting instructions into BI's fallthrough
4599 // successor provided they doesn't contribute to computation of
4600 // BI's condition.
4601 BasicBlock *IfTrueBB = PBI->getSuccessor(i: 0);
4602 BasicBlock *IfFalseBB = PBI->getSuccessor(i: 1);
4603 if (!isWidenableBranch(U: PBI) || IfTrueBB != BI->getParent() ||
4604 !BI->getParent()->getSinglePredecessor())
4605 return false;
4606 if (!IfFalseBB->phis().empty())
4607 return false; // TODO
4608 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4609 // may undo the transform done here.
4610 // TODO: There might be a more fine-grained solution to this.
4611 if (!llvm::succ_empty(BB: IfFalseBB))
4612 return false;
4613 // Use lambda to lazily compute expensive condition after cheap ones.
4614 auto NoSideEffects = [](BasicBlock &BB) {
4615 return llvm::none_of(Range&: BB, P: [](const Instruction &I) {
4616 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4617 });
4618 };
4619 if (BI->getSuccessor(i: 1) != IfFalseBB && // no inf looping
4620 BI->getSuccessor(i: 1)->getTerminatingDeoptimizeCall() && // profitability
4621 NoSideEffects(*BI->getParent())) {
4622 auto *OldSuccessor = BI->getSuccessor(i: 1);
4623 OldSuccessor->removePredecessor(Pred: BI->getParent());
4624 BI->setSuccessor(idx: 1, NewSucc: IfFalseBB);
4625 if (DTU)
4626 DTU->applyUpdates(
4627 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4628 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4629 return true;
4630 }
4631 if (BI->getSuccessor(i: 0) != IfFalseBB && // no inf looping
4632 BI->getSuccessor(i: 0)->getTerminatingDeoptimizeCall() && // profitability
4633 NoSideEffects(*BI->getParent())) {
4634 auto *OldSuccessor = BI->getSuccessor(i: 0);
4635 OldSuccessor->removePredecessor(Pred: BI->getParent());
4636 BI->setSuccessor(idx: 0, NewSucc: IfFalseBB);
4637 if (DTU)
4638 DTU->applyUpdates(
4639 Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4640 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4641 return true;
4642 }
4643 return false;
4644}
4645
4646/// If we have a conditional branch as a predecessor of another block,
4647/// this function tries to simplify it. We know
4648/// that PBI and BI are both conditional branches, and BI is in one of the
4649/// successor blocks of PBI - PBI branches to BI.
4650static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
4651 DomTreeUpdater *DTU,
4652 const DataLayout &DL,
4653 const TargetTransformInfo &TTI) {
4654 assert(PBI->isConditional() && BI->isConditional());
4655 BasicBlock *BB = BI->getParent();
4656
4657 // If this block ends with a branch instruction, and if there is a
4658 // predecessor that ends on a branch of the same condition, make
4659 // this conditional branch redundant.
4660 if (PBI->getCondition() == BI->getCondition() &&
4661 PBI->getSuccessor(i: 0) != PBI->getSuccessor(i: 1)) {
4662 // Okay, the outcome of this conditional branch is statically
4663 // knowable. If this block had a single pred, handle specially, otherwise
4664 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4665 if (BB->getSinglePredecessor()) {
4666 // Turn this into a branch on constant.
4667 bool CondIsTrue = PBI->getSuccessor(i: 0) == BB;
4668 BI->setCondition(
4669 ConstantInt::get(Ty: Type::getInt1Ty(C&: BB->getContext()), V: CondIsTrue));
4670 return true; // Nuke the branch on constant.
4671 }
4672 }
4673
4674 // If the previous block ended with a widenable branch, determine if reusing
4675 // the target block is profitable and legal. This will have the effect of
4676 // "widening" PBI, but doesn't require us to reason about hosting safety.
4677 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4678 return true;
4679
4680 // If both branches are conditional and both contain stores to the same
4681 // address, remove the stores from the conditionals and create a conditional
4682 // merged store at the end.
4683 if (MergeCondStores && mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
4684 return true;
4685
4686 // If this is a conditional branch in an empty block, and if any
4687 // predecessors are a conditional branch to one of our destinations,
4688 // fold the conditions into logical ops and one cond br.
4689
4690 // Ignore dbg intrinsics.
4691 if (&*BB->instructionsWithoutDebug(SkipPseudoOp: false).begin() != BI)
4692 return false;
4693
4694 int PBIOp, BIOp;
4695 if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 0)) {
4696 PBIOp = 0;
4697 BIOp = 0;
4698 } else if (PBI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) {
4699 PBIOp = 0;
4700 BIOp = 1;
4701 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 0)) {
4702 PBIOp = 1;
4703 BIOp = 0;
4704 } else if (PBI->getSuccessor(i: 1) == BI->getSuccessor(i: 1)) {
4705 PBIOp = 1;
4706 BIOp = 1;
4707 } else {
4708 return false;
4709 }
4710
4711 // Check to make sure that the other destination of this branch
4712 // isn't BB itself. If so, this is an infinite loop that will
4713 // keep getting unwound.
4714 if (PBI->getSuccessor(i: PBIOp) == BB)
4715 return false;
4716
4717 // If predecessor's branch probability to BB is too low don't merge branches.
4718 SmallVector<uint32_t, 2> PredWeights;
4719 if (!PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
4720 extractBranchWeights(I: *PBI, Weights&: PredWeights) &&
4721 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4722
4723 BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4724 Numerator: PredWeights[PBIOp],
4725 Denominator: static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4726
4727 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4728 if (CommonDestProb >= Likely)
4729 return false;
4730 }
4731
4732 // Do not perform this transformation if it would require
4733 // insertion of a large number of select instructions. For targets
4734 // without predication/cmovs, this is a big pessimization.
4735
4736 BasicBlock *CommonDest = PBI->getSuccessor(i: PBIOp);
4737 BasicBlock *RemovedDest = PBI->getSuccessor(i: PBIOp ^ 1);
4738 unsigned NumPhis = 0;
4739 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(Val: II);
4740 ++II, ++NumPhis) {
4741 if (NumPhis > 2) // Disable this xform.
4742 return false;
4743 }
4744
4745 // Finally, if everything is ok, fold the branches to logical ops.
4746 BasicBlock *OtherDest = BI->getSuccessor(i: BIOp ^ 1);
4747
4748 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4749 << "AND: " << *BI->getParent());
4750
4751 SmallVector<DominatorTree::UpdateType, 5> Updates;
4752
4753 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4754 // branch in it, where one edge (OtherDest) goes back to itself but the other
4755 // exits. We don't *know* that the program avoids the infinite loop
4756 // (even though that seems likely). If we do this xform naively, we'll end up
4757 // recursively unpeeling the loop. Since we know that (after the xform is
4758 // done) that the block *is* infinite if reached, we just make it an obviously
4759 // infinite loop with no cond branch.
4760 if (OtherDest == BB) {
4761 // Insert it at the end of the function, because it's either code,
4762 // or it won't matter if it's hot. :)
4763 BasicBlock *InfLoopBlock =
4764 BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
4765 BranchInst::Create(IfTrue: InfLoopBlock, InsertBefore: InfLoopBlock);
4766 if (DTU)
4767 Updates.push_back(Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4768 OtherDest = InfLoopBlock;
4769 }
4770
4771 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4772
4773 // BI may have other predecessors. Because of this, we leave
4774 // it alone, but modify PBI.
4775
4776 // Make sure we get to CommonDest on True&True directions.
4777 Value *PBICond = PBI->getCondition();
4778 IRBuilder<NoFolder> Builder(PBI);
4779 if (PBIOp)
4780 PBICond = Builder.CreateNot(V: PBICond, Name: PBICond->getName() + ".not");
4781
4782 Value *BICond = BI->getCondition();
4783 if (BIOp)
4784 BICond = Builder.CreateNot(V: BICond, Name: BICond->getName() + ".not");
4785
4786 // Merge the conditions.
4787 Value *Cond =
4788 createLogicalOp(Builder, Opc: Instruction::Or, LHS: PBICond, RHS: BICond, Name: "brmerge");
4789
4790 // Modify PBI to branch on the new condition to the new dests.
4791 PBI->setCondition(Cond);
4792 PBI->setSuccessor(idx: 0, NewSucc: CommonDest);
4793 PBI->setSuccessor(idx: 1, NewSucc: OtherDest);
4794
4795 if (DTU) {
4796 Updates.push_back(Elt: {DominatorTree::Insert, PBI->getParent(), OtherDest});
4797 Updates.push_back(Elt: {DominatorTree::Delete, PBI->getParent(), RemovedDest});
4798
4799 DTU->applyUpdates(Updates);
4800 }
4801
4802 // Update branch weight for PBI.
4803 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4804 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4805 bool HasWeights =
4806 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4807 SuccTrueWeight, SuccFalseWeight);
4808 if (HasWeights) {
4809 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4810 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4811 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4812 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4813 // The weight to CommonDest should be PredCommon * SuccTotal +
4814 // PredOther * SuccCommon.
4815 // The weight to OtherDest should be PredOther * SuccOther.
4816 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4817 PredOther * SuccCommon,
4818 PredOther * SuccOther};
4819
4820 setFittedBranchWeights(I&: *PBI, Weights: NewWeights, /*IsExpected=*/false,
4821 /*ElideAllZero=*/true);
4822 // Cond may be a select instruction with the first operand set to "true", or
4823 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4824 if (!ProfcheckDisableMetadataFixes)
4825 if (auto *SI = dyn_cast<SelectInst>(Val: Cond)) {
4826 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4827 // The select is predicated on PBICond
4828 assert(dyn_cast<SelectInst>(SI)->getCondition() == PBICond);
4829 // The corresponding probabilities are what was referred to above as
4830 // PredCommon and PredOther.
4831 setFittedBranchWeights(I&: *SI, Weights: {PredCommon, PredOther},
4832 /*IsExpected=*/false, /*ElideAllZero=*/true);
4833 }
4834 }
4835
4836 // OtherDest may have phi nodes. If so, add an entry from PBI's
4837 // block that are identical to the entries for BI's block.
4838 addPredecessorToBlock(Succ: OtherDest, NewPred: PBI->getParent(), ExistPred: BB);
4839
4840 // We know that the CommonDest already had an edge from PBI to
4841 // it. If it has PHIs though, the PHIs may have different
4842 // entries for BB and PBI's BB. If so, insert a select to make
4843 // them agree.
4844 for (PHINode &PN : CommonDest->phis()) {
4845 Value *BIV = PN.getIncomingValueForBlock(BB);
4846 unsigned PBBIdx = PN.getBasicBlockIndex(BB: PBI->getParent());
4847 Value *PBIV = PN.getIncomingValue(i: PBBIdx);
4848 if (BIV != PBIV) {
4849 // Insert a select in PBI to pick the right value.
4850 SelectInst *NV = cast<SelectInst>(
4851 Val: Builder.CreateSelect(C: PBICond, True: PBIV, False: BIV, Name: PBIV->getName() + ".mux"));
4852 PN.setIncomingValue(i: PBBIdx, V: NV);
4853 // The select has the same condition as PBI, in the same BB. The
4854 // probabilities don't change.
4855 if (HasWeights) {
4856 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4857 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4858 setFittedBranchWeights(I&: *NV, Weights: {TrueWeight, FalseWeight},
4859 /*IsExpected=*/false, /*ElideAllZero=*/true);
4860 }
4861 }
4862 }
4863
4864 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4865 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4866
4867 // This basic block is probably dead. We know it has at least
4868 // one fewer predecessor.
4869 return true;
4870}
4871
4872// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4873// true or to FalseBB if Cond is false.
4874// Takes care of updating the successors and removing the old terminator.
4875// Also makes sure not to introduce new successors by assuming that edges to
4876// non-successor TrueBBs and FalseBBs aren't reachable.
4877bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4878 Value *Cond, BasicBlock *TrueBB,
4879 BasicBlock *FalseBB,
4880 uint32_t TrueWeight,
4881 uint32_t FalseWeight) {
4882 auto *BB = OldTerm->getParent();
4883 // Remove any superfluous successor edges from the CFG.
4884 // First, figure out which successors to preserve.
4885 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4886 // successor.
4887 BasicBlock *KeepEdge1 = TrueBB;
4888 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4889
4890 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4891
4892 // Then remove the rest.
4893 for (BasicBlock *Succ : successors(I: OldTerm)) {
4894 // Make sure only to keep exactly one copy of each edge.
4895 if (Succ == KeepEdge1)
4896 KeepEdge1 = nullptr;
4897 else if (Succ == KeepEdge2)
4898 KeepEdge2 = nullptr;
4899 else {
4900 Succ->removePredecessor(Pred: BB,
4901 /*KeepOneInputPHIs=*/true);
4902
4903 if (Succ != TrueBB && Succ != FalseBB)
4904 RemovedSuccessors.insert(X: Succ);
4905 }
4906 }
4907
4908 IRBuilder<> Builder(OldTerm);
4909 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4910
4911 // Insert an appropriate new terminator.
4912 if (!KeepEdge1 && !KeepEdge2) {
4913 if (TrueBB == FalseBB) {
4914 // We were only looking for one successor, and it was present.
4915 // Create an unconditional branch to it.
4916 Builder.CreateBr(Dest: TrueBB);
4917 } else {
4918 // We found both of the successors we were looking for.
4919 // Create a conditional branch sharing the condition of the select.
4920 BranchInst *NewBI = Builder.CreateCondBr(Cond, True: TrueBB, False: FalseBB);
4921 setBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
4922 /*IsExpected=*/false, /*ElideAllZero=*/true);
4923 }
4924 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4925 // Neither of the selected blocks were successors, so this
4926 // terminator must be unreachable.
4927 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4928 } else {
4929 // One of the selected values was a successor, but the other wasn't.
4930 // Insert an unconditional branch to the one that was found;
4931 // the edge to the one that wasn't must be unreachable.
4932 if (!KeepEdge1) {
4933 // Only TrueBB was found.
4934 Builder.CreateBr(Dest: TrueBB);
4935 } else {
4936 // Only FalseBB was found.
4937 Builder.CreateBr(Dest: FalseBB);
4938 }
4939 }
4940
4941 eraseTerminatorAndDCECond(TI: OldTerm);
4942
4943 if (DTU) {
4944 SmallVector<DominatorTree::UpdateType, 2> Updates;
4945 Updates.reserve(N: RemovedSuccessors.size());
4946 for (auto *RemovedSuccessor : RemovedSuccessors)
4947 Updates.push_back(Elt: {DominatorTree::Delete, BB, RemovedSuccessor});
4948 DTU->applyUpdates(Updates);
4949 }
4950
4951 return true;
4952}
4953
4954// Replaces
4955// (switch (select cond, X, Y)) on constant X, Y
4956// with a branch - conditional if X and Y lead to distinct BBs,
4957// unconditional otherwise.
4958bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4959 SelectInst *Select) {
4960 // Check for constant integer values in the select.
4961 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Val: Select->getTrueValue());
4962 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Val: Select->getFalseValue());
4963 if (!TrueVal || !FalseVal)
4964 return false;
4965
4966 // Find the relevant condition and destinations.
4967 Value *Condition = Select->getCondition();
4968 BasicBlock *TrueBB = SI->findCaseValue(C: TrueVal)->getCaseSuccessor();
4969 BasicBlock *FalseBB = SI->findCaseValue(C: FalseVal)->getCaseSuccessor();
4970
4971 // Get weight for TrueBB and FalseBB.
4972 uint32_t TrueWeight = 0, FalseWeight = 0;
4973 SmallVector<uint64_t, 8> Weights;
4974 bool HasWeights = hasBranchWeightMD(I: *SI);
4975 if (HasWeights) {
4976 getBranchWeights(TI: SI, Weights);
4977 if (Weights.size() == 1 + SI->getNumCases()) {
4978 TrueWeight =
4979 (uint32_t)Weights[SI->findCaseValue(C: TrueVal)->getSuccessorIndex()];
4980 FalseWeight =
4981 (uint32_t)Weights[SI->findCaseValue(C: FalseVal)->getSuccessorIndex()];
4982 }
4983 }
4984
4985 // Perform the actual simplification.
4986 return simplifyTerminatorOnSelect(OldTerm: SI, Cond: Condition, TrueBB, FalseBB, TrueWeight,
4987 FalseWeight);
4988}
4989
4990// Replaces
4991// (indirectbr (select cond, blockaddress(@fn, BlockA),
4992// blockaddress(@fn, BlockB)))
4993// with
4994// (br cond, BlockA, BlockB).
4995bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4996 SelectInst *SI) {
4997 // Check that both operands of the select are block addresses.
4998 BlockAddress *TBA = dyn_cast<BlockAddress>(Val: SI->getTrueValue());
4999 BlockAddress *FBA = dyn_cast<BlockAddress>(Val: SI->getFalseValue());
5000 if (!TBA || !FBA)
5001 return false;
5002
5003 // Extract the actual blocks.
5004 BasicBlock *TrueBB = TBA->getBasicBlock();
5005 BasicBlock *FalseBB = FBA->getBasicBlock();
5006
5007 // The select's profile becomes the profile of the conditional branch that
5008 // replaces the indirect branch.
5009 SmallVector<uint32_t> SelectBranchWeights(2);
5010 if (!ProfcheckDisableMetadataFixes)
5011 extractBranchWeights(I: *SI, Weights&: SelectBranchWeights);
5012 // Perform the actual simplification.
5013 return simplifyTerminatorOnSelect(OldTerm: IBI, Cond: SI->getCondition(), TrueBB, FalseBB,
5014 TrueWeight: SelectBranchWeights[0],
5015 FalseWeight: SelectBranchWeights[1]);
5016}
5017
5018/// This is called when we find an icmp instruction
5019/// (a seteq/setne with a constant) as the only instruction in a
5020/// block that ends with an uncond branch. We are looking for a very specific
5021/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5022/// this case, we merge the first two "or's of icmp" into a switch, but then the
5023/// default value goes to an uncond block with a seteq in it, we get something
5024/// like:
5025///
5026/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5027/// DEFAULT:
5028/// %tmp = icmp eq i8 %A, 92
5029/// br label %end
5030/// end:
5031/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5032///
5033/// We prefer to split the edge to 'end' so that there is a true/false entry to
5034/// the PHI, merging the third icmp into the switch.
5035bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5036 ICmpInst *ICI, IRBuilder<> &Builder) {
5037 // Select == nullptr means we assume that there is a hidden no-op select
5038 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5039 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: nullptr, Builder);
5040}
5041
5042/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5043/// case. This is called when we find an icmp instruction (a seteq/setne with a
5044/// constant) and its following select instruction as the only TWO instructions
5045/// in a block that ends with an uncond branch. We are looking for a very
5046/// specific pattern that occurs when "
5047/// if (A == 1) return C1;
5048/// if (A == 2) return C2;
5049/// if (A < 3) return C3;
5050/// return C4;
5051/// " gets simplified. In this case, we merge the first two "branches of icmp"
5052/// into a switch, but then the default value goes to an uncond block with a lt
5053/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5054/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5055/// get something like:
5056///
5057/// case1:
5058/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5059/// case2:
5060/// br label %end
5061/// DEFAULT:
5062/// %tmp = icmp eq i8 %A, 2
5063/// %val = select i1 %tmp, i8 C3, i8 C4
5064/// br label %end
5065/// end:
5066/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5067///
5068/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5069/// to the PHI, merging the icmp & select into the switch, as follows:
5070///
5071/// case1:
5072/// switch i8 %A, label %DEFAULT [
5073/// i8 0, label %end
5074/// i8 1, label %case2
5075/// i8 2, label %case3
5076/// ]
5077/// case2:
5078/// br label %end
5079/// case3:
5080/// br label %end
5081/// DEFAULT:
5082/// br label %end
5083/// end:
5084/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5085bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5086 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5087 BasicBlock *BB = ICI->getParent();
5088
5089 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5090 // too complex.
5091 /// TODO: support multi-phis in succ BB of select's BB.
5092 if (isa<PHINode>(Val: BB->begin()) || !ICI->hasOneUse() ||
5093 (Select && !Select->hasOneUse()))
5094 return false;
5095
5096 // The pattern we're looking for is where our only predecessor is a switch on
5097 // 'V' and this block is the default case for the switch. In this case we can
5098 // fold the compared value into the switch to simplify things.
5099 BasicBlock *Pred = BB->getSinglePredecessor();
5100 if (!Pred || !isa<SwitchInst>(Val: Pred->getTerminator()))
5101 return false;
5102
5103 Value *IcmpCond;
5104 ConstantInt *NewCaseVal;
5105 CmpPredicate Predicate;
5106
5107 // Match icmp X, C
5108 if (!match(V: ICI,
5109 P: m_ICmp(Pred&: Predicate, L: m_Value(V&: IcmpCond), R: m_ConstantInt(CI&: NewCaseVal))))
5110 return false;
5111
5112 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5113 Instruction *User;
5114 if (!Select) {
5115 // If Select == nullptr, we can assume that there is a hidden no-op select
5116 // just after icmp
5117 SelectCond = ICI;
5118 SelectTrueVal = Builder.getTrue();
5119 SelectFalseVal = Builder.getFalse();
5120 User = ICI->user_back();
5121 } else {
5122 SelectCond = Select->getCondition();
5123 // Check if the select condition is the same as the icmp condition.
5124 if (SelectCond != ICI)
5125 return false;
5126 SelectTrueVal = Select->getTrueValue();
5127 SelectFalseVal = Select->getFalseValue();
5128 User = Select->user_back();
5129 }
5130
5131 SwitchInst *SI = cast<SwitchInst>(Val: Pred->getTerminator());
5132 if (SI->getCondition() != IcmpCond)
5133 return false;
5134
5135 // If BB is reachable on a non-default case, then we simply know the value of
5136 // V in this block. Substitute it and constant fold the icmp instruction
5137 // away.
5138 if (SI->getDefaultDest() != BB) {
5139 ConstantInt *VVal = SI->findCaseDest(BB);
5140 assert(VVal && "Should have a unique destination value");
5141 ICI->setOperand(i_nocapture: 0, Val_nocapture: VVal);
5142
5143 if (Value *V = simplifyInstruction(I: ICI, Q: {DL, ICI})) {
5144 ICI->replaceAllUsesWith(V);
5145 ICI->eraseFromParent();
5146 }
5147 // BB is now empty, so it is likely to simplify away.
5148 return requestResimplify();
5149 }
5150
5151 // Ok, the block is reachable from the default dest. If the constant we're
5152 // comparing exists in one of the other edges, then we can constant fold ICI
5153 // and zap it.
5154 if (SI->findCaseValue(C: NewCaseVal) != SI->case_default()) {
5155 Value *V;
5156 if (Predicate == ICmpInst::ICMP_EQ)
5157 V = ConstantInt::getFalse(Context&: BB->getContext());
5158 else
5159 V = ConstantInt::getTrue(Context&: BB->getContext());
5160
5161 ICI->replaceAllUsesWith(V);
5162 ICI->eraseFromParent();
5163 // BB is now empty, so it is likely to simplify away.
5164 return requestResimplify();
5165 }
5166
5167 // The use of the select has to be in the 'end' block, by the only PHI node in
5168 // the block.
5169 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(Idx: 0);
5170 PHINode *PHIUse = dyn_cast<PHINode>(Val: User);
5171 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5172 isa<PHINode>(Val: ++BasicBlock::iterator(PHIUse)))
5173 return false;
5174
5175 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5176 // edge gets SelectTrueVal in the PHI.
5177 Value *DefaultCst = SelectFalseVal;
5178 Value *NewCst = SelectTrueVal;
5179
5180 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5181 std::swap(a&: DefaultCst, b&: NewCst);
5182
5183 // Replace Select (which is used by the PHI for the default value) with
5184 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5185 if (Select) {
5186 Select->replaceAllUsesWith(V: DefaultCst);
5187 Select->eraseFromParent();
5188 } else {
5189 ICI->replaceAllUsesWith(V: DefaultCst);
5190 }
5191 ICI->eraseFromParent();
5192
5193 SmallVector<DominatorTree::UpdateType, 2> Updates;
5194
5195 // Okay, the switch goes to this block on a default value. Add an edge from
5196 // the switch to the merge point on the compared value.
5197 BasicBlock *NewBB =
5198 BasicBlock::Create(Context&: BB->getContext(), Name: "switch.edge", Parent: BB->getParent(), InsertBefore: BB);
5199 {
5200 SwitchInstProfUpdateWrapper SIW(*SI);
5201 auto W0 = SIW.getSuccessorWeight(idx: 0);
5202 SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
5203 if (W0) {
5204 NewW = ((uint64_t(*W0) + 1) >> 1);
5205 SIW.setSuccessorWeight(idx: 0, W: *NewW);
5206 }
5207 SIW.addCase(OnVal: NewCaseVal, Dest: NewBB, W: NewW);
5208 if (DTU)
5209 Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewBB});
5210 }
5211
5212 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5213 Builder.SetInsertPoint(NewBB);
5214 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5215 Builder.CreateBr(Dest: SuccBlock);
5216 PHIUse->addIncoming(V: NewCst, BB: NewBB);
5217 if (DTU) {
5218 Updates.push_back(Elt: {DominatorTree::Insert, NewBB, SuccBlock});
5219 DTU->applyUpdates(Updates);
5220 }
5221 return true;
5222}
5223
5224/// The specified branch is a conditional branch.
5225/// Check to see if it is branching on an or/and chain of icmp instructions, and
5226/// fold it into a switch instruction if so.
5227bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5228 IRBuilder<> &Builder,
5229 const DataLayout &DL) {
5230 Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
5231 if (!Cond)
5232 return false;
5233
5234 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5235 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5236 // 'setne's and'ed together, collect them.
5237
5238 // Try to gather values from a chain of and/or to be turned into a switch
5239 ConstantComparesGatherer ConstantCompare(Cond, DL);
5240 // Unpack the result
5241 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5242 Value *CompVal = ConstantCompare.CompValue;
5243 unsigned UsedICmps = ConstantCompare.UsedICmps;
5244 Value *ExtraCase = ConstantCompare.Extra;
5245 bool TrueWhenEqual = ConstantCompare.IsEq;
5246
5247 // If we didn't have a multiply compared value, fail.
5248 if (!CompVal)
5249 return false;
5250
5251 // Avoid turning single icmps into a switch.
5252 if (UsedICmps <= 1)
5253 return false;
5254
5255 // There might be duplicate constants in the list, which the switch
5256 // instruction can't handle, remove them now.
5257 array_pod_sort(Start: Values.begin(), End: Values.end(), Compare: constantIntSortPredicate);
5258 Values.erase(CS: llvm::unique(R&: Values), CE: Values.end());
5259
5260 // If Extra was used, we require at least two switch values to do the
5261 // transformation. A switch with one value is just a conditional branch.
5262 if (ExtraCase && Values.size() < 2)
5263 return false;
5264
5265 SmallVector<uint32_t> BranchWeights;
5266 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5267 extractBranchWeights(I: *BI, Weights&: BranchWeights);
5268
5269 // Figure out which block is which destination.
5270 BasicBlock *DefaultBB = BI->getSuccessor(i: 1);
5271 BasicBlock *EdgeBB = BI->getSuccessor(i: 0);
5272 if (!TrueWhenEqual) {
5273 std::swap(a&: DefaultBB, b&: EdgeBB);
5274 if (HasProfile)
5275 std::swap(a&: BranchWeights[0], b&: BranchWeights[1]);
5276 }
5277
5278 BasicBlock *BB = BI->getParent();
5279
5280 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5281 << " cases into SWITCH. BB is:\n"
5282 << *BB);
5283
5284 SmallVector<DominatorTree::UpdateType, 2> Updates;
5285
5286 // If there are any extra values that couldn't be folded into the switch
5287 // then we evaluate them with an explicit branch first. Split the block
5288 // right before the condbr to handle it.
5289 if (ExtraCase) {
5290 BasicBlock *NewBB = SplitBlock(Old: BB, SplitPt: BI, DTU, /*LI=*/nullptr,
5291 /*MSSAU=*/nullptr, BBName: "switch.early.test");
5292
5293 // Remove the uncond branch added to the old block.
5294 Instruction *OldTI = BB->getTerminator();
5295 Builder.SetInsertPoint(OldTI);
5296
5297 // There can be an unintended UB if extra values are Poison. Before the
5298 // transformation, extra values may not be evaluated according to the
5299 // condition, and it will not raise UB. But after transformation, we are
5300 // evaluating extra values before checking the condition, and it will raise
5301 // UB. It can be solved by adding freeze instruction to extra values.
5302 AssumptionCache *AC = Options.AC;
5303
5304 if (!isGuaranteedNotToBeUndefOrPoison(V: ExtraCase, AC, CtxI: BI, DT: nullptr))
5305 ExtraCase = Builder.CreateFreeze(V: ExtraCase);
5306
5307 // We don't have any info about this condition.
5308 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(Cond: ExtraCase, True: EdgeBB, False: NewBB)
5309 : Builder.CreateCondBr(Cond: ExtraCase, True: NewBB, False: EdgeBB);
5310 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *Br, DEBUG_TYPE);
5311
5312 OldTI->eraseFromParent();
5313
5314 if (DTU)
5315 Updates.push_back(Elt: {DominatorTree::Insert, BB, EdgeBB});
5316
5317 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5318 // for the edge we just added.
5319 addPredecessorToBlock(Succ: EdgeBB, NewPred: BB, ExistPred: NewBB);
5320
5321 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5322 << "\nEXTRABB = " << *BB);
5323 BB = NewBB;
5324 }
5325
5326 Builder.SetInsertPoint(BI);
5327 // Convert pointer to int before we switch.
5328 if (CompVal->getType()->isPointerTy()) {
5329 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5330 "Should not end up here with unstable pointers");
5331 CompVal = Builder.CreatePtrToInt(
5332 V: CompVal, DestTy: DL.getIntPtrType(CompVal->getType()), Name: "magicptr");
5333 }
5334
5335 // Check if we can represent the values as a contiguous range. If so, we use a
5336 // range check + conditional branch instead of a switch.
5337 if (Values.front()->getValue() - Values.back()->getValue() ==
5338 Values.size() - 1) {
5339 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5340 Lower: Values.back()->getValue(), Upper: Values.front()->getValue() + 1);
5341 APInt Offset, RHS;
5342 ICmpInst::Predicate Pred;
5343 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5344 Value *X = CompVal;
5345 if (!Offset.isZero())
5346 X = Builder.CreateAdd(LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: Offset));
5347 Value *Cond =
5348 Builder.CreateICmp(P: Pred, LHS: X, RHS: ConstantInt::get(Ty: CompVal->getType(), V: RHS));
5349 BranchInst *NewBI = Builder.CreateCondBr(Cond, True: EdgeBB, False: DefaultBB);
5350 if (HasProfile)
5351 setBranchWeights(I&: *NewBI, Weights: BranchWeights, /*IsExpected=*/false);
5352 // We don't need to update PHI nodes since we don't add any new edges.
5353 } else {
5354 // Create the new switch instruction now.
5355 SwitchInst *New = Builder.CreateSwitch(V: CompVal, Dest: DefaultBB, NumCases: Values.size());
5356 if (HasProfile) {
5357 // We know the weight of the default case. We don't know the weight of the
5358 // other cases, but rather than completely lose profiling info, we split
5359 // the remaining probability equally over them.
5360 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5361 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5362 // if TrueWhenEqual.
5363 for (auto &V : drop_begin(RangeOrContainer&: NewWeights))
5364 V = BranchWeights[0] / Values.size();
5365 setBranchWeights(I&: *New, Weights: NewWeights, /*IsExpected=*/false);
5366 }
5367
5368 // Add all of the 'cases' to the switch instruction.
5369 for (ConstantInt *Val : Values)
5370 New->addCase(OnVal: Val, Dest: EdgeBB);
5371
5372 // We added edges from PI to the EdgeBB. As such, if there were any
5373 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5374 // the number of edges added.
5375 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5376 PHINode *PN = cast<PHINode>(Val&: BBI);
5377 Value *InVal = PN->getIncomingValueForBlock(BB);
5378 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5379 PN->addIncoming(V: InVal, BB);
5380 }
5381 }
5382
5383 // Erase the old branch instruction.
5384 eraseTerminatorAndDCECond(TI: BI);
5385 if (DTU)
5386 DTU->applyUpdates(Updates);
5387
5388 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5389 return true;
5390}
5391
5392bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5393 if (isa<PHINode>(Val: RI->getValue()))
5394 return simplifyCommonResume(RI);
5395 else if (isa<LandingPadInst>(Val: RI->getParent()->getFirstNonPHIIt()) &&
5396 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5397 // The resume must unwind the exception that caused control to branch here.
5398 return simplifySingleResume(RI);
5399
5400 return false;
5401}
5402
5403// Check if cleanup block is empty
5404static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
5405 for (Instruction &I : R) {
5406 auto *II = dyn_cast<IntrinsicInst>(Val: &I);
5407 if (!II)
5408 return false;
5409
5410 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5411 switch (IntrinsicID) {
5412 case Intrinsic::dbg_declare:
5413 case Intrinsic::dbg_value:
5414 case Intrinsic::dbg_label:
5415 case Intrinsic::lifetime_end:
5416 break;
5417 default:
5418 return false;
5419 }
5420 }
5421 return true;
5422}
5423
5424// Simplify resume that is shared by several landing pads (phi of landing pad).
5425bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5426 BasicBlock *BB = RI->getParent();
5427
5428 // Check that there are no other instructions except for debug and lifetime
5429 // intrinsics between the phi's and resume instruction.
5430 if (!isCleanupBlockEmpty(R: make_range(x: RI->getParent()->getFirstNonPHIIt(),
5431 y: BB->getTerminator()->getIterator())))
5432 return false;
5433
5434 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5435 auto *PhiLPInst = cast<PHINode>(Val: RI->getValue());
5436
5437 // Check incoming blocks to see if any of them are trivial.
5438 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5439 Idx++) {
5440 auto *IncomingBB = PhiLPInst->getIncomingBlock(i: Idx);
5441 auto *IncomingValue = PhiLPInst->getIncomingValue(i: Idx);
5442
5443 // If the block has other successors, we can not delete it because
5444 // it has other dependents.
5445 if (IncomingBB->getUniqueSuccessor() != BB)
5446 continue;
5447
5448 auto *LandingPad = dyn_cast<LandingPadInst>(Val: IncomingBB->getFirstNonPHIIt());
5449 // Not the landing pad that caused the control to branch here.
5450 if (IncomingValue != LandingPad)
5451 continue;
5452
5453 if (isCleanupBlockEmpty(
5454 R: make_range(x: LandingPad->getNextNode(), y: IncomingBB->getTerminator())))
5455 TrivialUnwindBlocks.insert(X: IncomingBB);
5456 }
5457
5458 // If no trivial unwind blocks, don't do any simplifications.
5459 if (TrivialUnwindBlocks.empty())
5460 return false;
5461
5462 // Turn all invokes that unwind here into calls.
5463 for (auto *TrivialBB : TrivialUnwindBlocks) {
5464 // Blocks that will be simplified should be removed from the phi node.
5465 // Note there could be multiple edges to the resume block, and we need
5466 // to remove them all.
5467 while (PhiLPInst->getBasicBlockIndex(BB: TrivialBB) != -1)
5468 BB->removePredecessor(Pred: TrivialBB, KeepOneInputPHIs: true);
5469
5470 for (BasicBlock *Pred :
5471 llvm::make_early_inc_range(Range: predecessors(BB: TrivialBB))) {
5472 removeUnwindEdge(BB: Pred, DTU);
5473 ++NumInvokes;
5474 }
5475
5476 // In each SimplifyCFG run, only the current processed block can be erased.
5477 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5478 // of erasing TrivialBB, we only remove the branch to the common resume
5479 // block so that we can later erase the resume block since it has no
5480 // predecessors.
5481 TrivialBB->getTerminator()->eraseFromParent();
5482 new UnreachableInst(RI->getContext(), TrivialBB);
5483 if (DTU)
5484 DTU->applyUpdates(Updates: {{DominatorTree::Delete, TrivialBB, BB}});
5485 }
5486
5487 // Delete the resume block if all its predecessors have been removed.
5488 if (pred_empty(BB))
5489 DeleteDeadBlock(BB, DTU);
5490
5491 return !TrivialUnwindBlocks.empty();
5492}
5493
5494// Simplify resume that is only used by a single (non-phi) landing pad.
5495bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5496 BasicBlock *BB = RI->getParent();
5497 auto *LPInst = cast<LandingPadInst>(Val: BB->getFirstNonPHIIt());
5498 assert(RI->getValue() == LPInst &&
5499 "Resume must unwind the exception that caused control to here");
5500
5501 // Check that there are no other instructions except for debug intrinsics.
5502 if (!isCleanupBlockEmpty(
5503 R: make_range<Instruction *>(x: LPInst->getNextNode(), y: RI)))
5504 return false;
5505
5506 // Turn all invokes that unwind here into calls and delete the basic block.
5507 for (BasicBlock *Pred : llvm::make_early_inc_range(Range: predecessors(BB))) {
5508 removeUnwindEdge(BB: Pred, DTU);
5509 ++NumInvokes;
5510 }
5511
5512 // The landingpad is now unreachable. Zap it.
5513 DeleteDeadBlock(BB, DTU);
5514 return true;
5515}
5516
5517static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
5518 // If this is a trivial cleanup pad that executes no instructions, it can be
5519 // eliminated. If the cleanup pad continues to the caller, any predecessor
5520 // that is an EH pad will be updated to continue to the caller and any
5521 // predecessor that terminates with an invoke instruction will have its invoke
5522 // instruction converted to a call instruction. If the cleanup pad being
5523 // simplified does not continue to the caller, each predecessor will be
5524 // updated to continue to the unwind destination of the cleanup pad being
5525 // simplified.
5526 BasicBlock *BB = RI->getParent();
5527 CleanupPadInst *CPInst = RI->getCleanupPad();
5528 if (CPInst->getParent() != BB)
5529 // This isn't an empty cleanup.
5530 return false;
5531
5532 // We cannot kill the pad if it has multiple uses. This typically arises
5533 // from unreachable basic blocks.
5534 if (!CPInst->hasOneUse())
5535 return false;
5536
5537 // Check that there are no other instructions except for benign intrinsics.
5538 if (!isCleanupBlockEmpty(
5539 R: make_range<Instruction *>(x: CPInst->getNextNode(), y: RI)))
5540 return false;
5541
5542 // If the cleanup return we are simplifying unwinds to the caller, this will
5543 // set UnwindDest to nullptr.
5544 BasicBlock *UnwindDest = RI->getUnwindDest();
5545
5546 // We're about to remove BB from the control flow. Before we do, sink any
5547 // PHINodes into the unwind destination. Doing this before changing the
5548 // control flow avoids some potentially slow checks, since we can currently
5549 // be certain that UnwindDest and BB have no common predecessors (since they
5550 // are both EH pads).
5551 if (UnwindDest) {
5552 // First, go through the PHI nodes in UnwindDest and update any nodes that
5553 // reference the block we are removing
5554 for (PHINode &DestPN : UnwindDest->phis()) {
5555 int Idx = DestPN.getBasicBlockIndex(BB);
5556 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5557 assert(Idx != -1);
5558 // This PHI node has an incoming value that corresponds to a control
5559 // path through the cleanup pad we are removing. If the incoming
5560 // value is in the cleanup pad, it must be a PHINode (because we
5561 // verified above that the block is otherwise empty). Otherwise, the
5562 // value is either a constant or a value that dominates the cleanup
5563 // pad being removed.
5564 //
5565 // Because BB and UnwindDest are both EH pads, all of their
5566 // predecessors must unwind to these blocks, and since no instruction
5567 // can have multiple unwind destinations, there will be no overlap in
5568 // incoming blocks between SrcPN and DestPN.
5569 Value *SrcVal = DestPN.getIncomingValue(i: Idx);
5570 PHINode *SrcPN = dyn_cast<PHINode>(Val: SrcVal);
5571
5572 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5573 for (auto *Pred : predecessors(BB)) {
5574 Value *Incoming =
5575 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(BB: Pred) : SrcVal;
5576 DestPN.addIncoming(V: Incoming, BB: Pred);
5577 }
5578 }
5579
5580 // Sink any remaining PHI nodes directly into UnwindDest.
5581 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5582 for (PHINode &PN : make_early_inc_range(Range: BB->phis())) {
5583 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5584 // If the PHI node has no uses or all of its uses are in this basic
5585 // block (meaning they are debug or lifetime intrinsics), just leave
5586 // it. It will be erased when we erase BB below.
5587 continue;
5588
5589 // Otherwise, sink this PHI node into UnwindDest.
5590 // Any predecessors to UnwindDest which are not already represented
5591 // must be back edges which inherit the value from the path through
5592 // BB. In this case, the PHI value must reference itself.
5593 for (auto *pred : predecessors(BB: UnwindDest))
5594 if (pred != BB)
5595 PN.addIncoming(V: &PN, BB: pred);
5596 PN.moveBefore(InsertPos: InsertPt);
5597 // Also, add a dummy incoming value for the original BB itself,
5598 // so that the PHI is well-formed until we drop said predecessor.
5599 PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB);
5600 }
5601 }
5602
5603 std::vector<DominatorTree::UpdateType> Updates;
5604
5605 // We use make_early_inc_range here because we will remove all predecessors.
5606 for (BasicBlock *PredBB : llvm::make_early_inc_range(Range: predecessors(BB))) {
5607 if (UnwindDest == nullptr) {
5608 if (DTU) {
5609 DTU->applyUpdates(Updates);
5610 Updates.clear();
5611 }
5612 removeUnwindEdge(BB: PredBB, DTU);
5613 ++NumInvokes;
5614 } else {
5615 BB->removePredecessor(Pred: PredBB);
5616 Instruction *TI = PredBB->getTerminator();
5617 TI->replaceUsesOfWith(From: BB, To: UnwindDest);
5618 if (DTU) {
5619 Updates.push_back(x: {DominatorTree::Insert, PredBB, UnwindDest});
5620 Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
5621 }
5622 }
5623 }
5624
5625 if (DTU)
5626 DTU->applyUpdates(Updates);
5627
5628 DeleteDeadBlock(BB, DTU);
5629
5630 return true;
5631}
5632
5633// Try to merge two cleanuppads together.
5634static bool mergeCleanupPad(CleanupReturnInst *RI) {
5635 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5636 // with.
5637 BasicBlock *UnwindDest = RI->getUnwindDest();
5638 if (!UnwindDest)
5639 return false;
5640
5641 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5642 // be safe to merge without code duplication.
5643 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5644 return false;
5645
5646 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5647 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(Val: &UnwindDest->front());
5648 if (!SuccessorCleanupPad)
5649 return false;
5650
5651 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5652 // Replace any uses of the successor cleanupad with the predecessor pad
5653 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5654 // funclet bundle operands.
5655 SuccessorCleanupPad->replaceAllUsesWith(V: PredecessorCleanupPad);
5656 // Remove the old cleanuppad.
5657 SuccessorCleanupPad->eraseFromParent();
5658 // Now, we simply replace the cleanupret with a branch to the unwind
5659 // destination.
5660 BranchInst::Create(IfTrue: UnwindDest, InsertBefore: RI->getParent());
5661 RI->eraseFromParent();
5662
5663 return true;
5664}
5665
5666bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5667 // It is possible to transiantly have an undef cleanuppad operand because we
5668 // have deleted some, but not all, dead blocks.
5669 // Eventually, this block will be deleted.
5670 if (isa<UndefValue>(Val: RI->getOperand(i_nocapture: 0)))
5671 return false;
5672
5673 if (mergeCleanupPad(RI))
5674 return true;
5675
5676 if (removeEmptyCleanup(RI, DTU))
5677 return true;
5678
5679 return false;
5680}
5681
5682// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5683bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5684 BasicBlock *BB = UI->getParent();
5685
5686 bool Changed = false;
5687
5688 // Ensure that any debug-info records that used to occur after the Unreachable
5689 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5690 // the block.
5691 BB->flushTerminatorDbgRecords();
5692
5693 // Debug-info records on the unreachable inst itself should be deleted, as
5694 // below we delete everything past the final executable instruction.
5695 UI->dropDbgRecords();
5696
5697 // If there are any instructions immediately before the unreachable that can
5698 // be removed, do so.
5699 while (UI->getIterator() != BB->begin()) {
5700 BasicBlock::iterator BBI = UI->getIterator();
5701 --BBI;
5702
5703 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBI))
5704 break; // Can not drop any more instructions. We're done here.
5705 // Otherwise, this instruction can be freely erased,
5706 // even if it is not side-effect free.
5707
5708 // Note that deleting EH's here is in fact okay, although it involves a bit
5709 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5710 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5711 // and we can therefore guarantee this block will be erased.
5712
5713 // If we're deleting this, we're deleting any subsequent debug info, so
5714 // delete DbgRecords.
5715 BBI->dropDbgRecords();
5716
5717 // Delete this instruction (any uses are guaranteed to be dead)
5718 BBI->replaceAllUsesWith(V: PoisonValue::get(T: BBI->getType()));
5719 BBI->eraseFromParent();
5720 Changed = true;
5721 }
5722
5723 // If the unreachable instruction is the first in the block, take a gander
5724 // at all of the predecessors of this instruction, and simplify them.
5725 if (&BB->front() != UI)
5726 return Changed;
5727
5728 std::vector<DominatorTree::UpdateType> Updates;
5729
5730 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5731 for (BasicBlock *Predecessor : Preds) {
5732 Instruction *TI = Predecessor->getTerminator();
5733 IRBuilder<> Builder(TI);
5734 if (auto *BI = dyn_cast<BranchInst>(Val: TI)) {
5735 // We could either have a proper unconditional branch,
5736 // or a degenerate conditional branch with matching destinations.
5737 if (all_of(Range: BI->successors(),
5738 P: [BB](auto *Successor) { return Successor == BB; })) {
5739 new UnreachableInst(TI->getContext(), TI->getIterator());
5740 TI->eraseFromParent();
5741 Changed = true;
5742 } else {
5743 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5744 Value* Cond = BI->getCondition();
5745 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5746 "The destinations are guaranteed to be different here.");
5747 CallInst *Assumption;
5748 if (BI->getSuccessor(i: 0) == BB) {
5749 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
5750 Builder.CreateBr(Dest: BI->getSuccessor(i: 1));
5751 } else {
5752 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5753 Assumption = Builder.CreateAssumption(Cond);
5754 Builder.CreateBr(Dest: BI->getSuccessor(i: 0));
5755 }
5756 if (Options.AC)
5757 Options.AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
5758
5759 eraseTerminatorAndDCECond(TI: BI);
5760 Changed = true;
5761 }
5762 if (DTU)
5763 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5764 } else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) {
5765 SwitchInstProfUpdateWrapper SU(*SI);
5766 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5767 if (i->getCaseSuccessor() != BB) {
5768 ++i;
5769 continue;
5770 }
5771 BB->removePredecessor(Pred: SU->getParent());
5772 i = SU.removeCase(I: i);
5773 e = SU->case_end();
5774 Changed = true;
5775 }
5776 // Note that the default destination can't be removed!
5777 if (DTU && SI->getDefaultDest() != BB)
5778 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5779 } else if (auto *II = dyn_cast<InvokeInst>(Val: TI)) {
5780 if (II->getUnwindDest() == BB) {
5781 if (DTU) {
5782 DTU->applyUpdates(Updates);
5783 Updates.clear();
5784 }
5785 auto *CI = cast<CallInst>(Val: removeUnwindEdge(BB: TI->getParent(), DTU));
5786 if (!CI->doesNotThrow())
5787 CI->setDoesNotThrow();
5788 Changed = true;
5789 }
5790 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: TI)) {
5791 if (CSI->getUnwindDest() == BB) {
5792 if (DTU) {
5793 DTU->applyUpdates(Updates);
5794 Updates.clear();
5795 }
5796 removeUnwindEdge(BB: TI->getParent(), DTU);
5797 Changed = true;
5798 continue;
5799 }
5800
5801 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5802 E = CSI->handler_end();
5803 I != E; ++I) {
5804 if (*I == BB) {
5805 CSI->removeHandler(HI: I);
5806 --I;
5807 --E;
5808 Changed = true;
5809 }
5810 }
5811 if (DTU)
5812 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5813 if (CSI->getNumHandlers() == 0) {
5814 if (CSI->hasUnwindDest()) {
5815 // Redirect all predecessors of the block containing CatchSwitchInst
5816 // to instead branch to the CatchSwitchInst's unwind destination.
5817 if (DTU) {
5818 for (auto *PredecessorOfPredecessor : predecessors(BB: Predecessor)) {
5819 Updates.push_back(x: {DominatorTree::Insert,
5820 PredecessorOfPredecessor,
5821 CSI->getUnwindDest()});
5822 Updates.push_back(x: {DominatorTree::Delete,
5823 PredecessorOfPredecessor, Predecessor});
5824 }
5825 }
5826 Predecessor->replaceAllUsesWith(V: CSI->getUnwindDest());
5827 } else {
5828 // Rewrite all preds to unwind to caller (or from invoke to call).
5829 if (DTU) {
5830 DTU->applyUpdates(Updates);
5831 Updates.clear();
5832 }
5833 SmallVector<BasicBlock *, 8> EHPreds(predecessors(BB: Predecessor));
5834 for (BasicBlock *EHPred : EHPreds)
5835 removeUnwindEdge(BB: EHPred, DTU);
5836 }
5837 // The catchswitch is no longer reachable.
5838 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5839 CSI->eraseFromParent();
5840 Changed = true;
5841 }
5842 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: TI)) {
5843 (void)CRI;
5844 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5845 "Expected to always have an unwind to BB.");
5846 if (DTU)
5847 Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5848 new UnreachableInst(TI->getContext(), TI->getIterator());
5849 TI->eraseFromParent();
5850 Changed = true;
5851 }
5852 }
5853
5854 if (DTU)
5855 DTU->applyUpdates(Updates);
5856
5857 // If this block is now dead, remove it.
5858 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5859 DeleteDeadBlock(BB, DTU);
5860 return true;
5861 }
5862
5863 return Changed;
5864}
5865
5866struct ContiguousCasesResult {
5867 ConstantInt *Min;
5868 ConstantInt *Max;
5869 BasicBlock *Dest;
5870 BasicBlock *OtherDest;
5871 SmallVectorImpl<ConstantInt *> *Cases;
5872 SmallVectorImpl<ConstantInt *> *OtherCases;
5873};
5874
5875static std::optional<ContiguousCasesResult>
5876findContiguousCases(Value *Condition, SmallVectorImpl<ConstantInt *> &Cases,
5877 SmallVectorImpl<ConstantInt *> &OtherCases,
5878 BasicBlock *Dest, BasicBlock *OtherDest) {
5879 assert(Cases.size() >= 1);
5880
5881 array_pod_sort(Start: Cases.begin(), End: Cases.end(), Compare: constantIntSortPredicate);
5882 const APInt &Min = Cases.back()->getValue();
5883 const APInt &Max = Cases.front()->getValue();
5884 APInt Offset = Max - Min;
5885 size_t ContiguousOffset = Cases.size() - 1;
5886 if (Offset == ContiguousOffset) {
5887 return ContiguousCasesResult{
5888 /*Min=*/Cases.back(),
5889 /*Max=*/Cases.front(),
5890 /*Dest=*/Dest,
5891 /*OtherDest=*/OtherDest,
5892 /*Cases=*/&Cases,
5893 /*OtherCases=*/&OtherCases,
5894 };
5895 }
5896 ConstantRange CR = computeConstantRange(V: Condition, /*ForSigned=*/false);
5897 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5898 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5899 // contiguous range for the other destination. N.B. If CR is not a full range,
5900 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5901 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5902 assert(Cases.size() >= 2);
5903 auto *It =
5904 std::adjacent_find(first: Cases.begin(), last: Cases.end(), binary_pred: [](auto L, auto R) {
5905 return L->getValue() != R->getValue() + 1;
5906 });
5907 if (It == Cases.end())
5908 return std::nullopt;
5909 auto [OtherMax, OtherMin] = std::make_pair(x&: *It, y&: *std::next(x: It));
5910 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5911 Cases.size() - 2) {
5912 return ContiguousCasesResult{
5913 /*Min=*/cast<ConstantInt>(
5914 Val: ConstantInt::get(Ty: OtherMin->getType(), V: OtherMin->getValue() + 1)),
5915 /*Max=*/
5916 cast<ConstantInt>(
5917 Val: ConstantInt::get(Ty: OtherMax->getType(), V: OtherMax->getValue() - 1)),
5918 /*Dest=*/OtherDest,
5919 /*OtherDest=*/Dest,
5920 /*Cases=*/&OtherCases,
5921 /*OtherCases=*/&Cases,
5922 };
5923 }
5924 }
5925 return std::nullopt;
5926}
5927
5928static void createUnreachableSwitchDefault(SwitchInst *Switch,
5929 DomTreeUpdater *DTU,
5930 bool RemoveOrigDefaultBlock = true) {
5931 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5932 auto *BB = Switch->getParent();
5933 auto *OrigDefaultBlock = Switch->getDefaultDest();
5934 if (RemoveOrigDefaultBlock)
5935 OrigDefaultBlock->removePredecessor(Pred: BB);
5936 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5937 Context&: BB->getContext(), Name: BB->getName() + ".unreachabledefault", Parent: BB->getParent(),
5938 InsertBefore: OrigDefaultBlock);
5939 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5940 UI->setDebugLoc(DebugLoc::getTemporary());
5941 Switch->setDefaultDest(&*NewDefaultBlock);
5942 if (DTU) {
5943 SmallVector<DominatorTree::UpdateType, 2> Updates;
5944 Updates.push_back(Elt: {DominatorTree::Insert, BB, &*NewDefaultBlock});
5945 if (RemoveOrigDefaultBlock &&
5946 !is_contained(Range: successors(BB), Element: OrigDefaultBlock))
5947 Updates.push_back(Elt: {DominatorTree::Delete, BB, &*OrigDefaultBlock});
5948 DTU->applyUpdates(Updates);
5949 }
5950}
5951
5952/// Turn a switch into an integer range comparison and branch.
5953/// Switches with more than 2 destinations are ignored.
5954/// Switches with 1 destination are also ignored.
5955bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5956 IRBuilder<> &Builder) {
5957 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5958
5959 bool HasDefault = !SI->defaultDestUnreachable();
5960
5961 auto *BB = SI->getParent();
5962 // Partition the cases into two sets with different destinations.
5963 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5964 BasicBlock *DestB = nullptr;
5965 SmallVector<ConstantInt *, 16> CasesA;
5966 SmallVector<ConstantInt *, 16> CasesB;
5967
5968 for (auto Case : SI->cases()) {
5969 BasicBlock *Dest = Case.getCaseSuccessor();
5970 if (!DestA)
5971 DestA = Dest;
5972 if (Dest == DestA) {
5973 CasesA.push_back(Elt: Case.getCaseValue());
5974 continue;
5975 }
5976 if (!DestB)
5977 DestB = Dest;
5978 if (Dest == DestB) {
5979 CasesB.push_back(Elt: Case.getCaseValue());
5980 continue;
5981 }
5982 return false; // More than two destinations.
5983 }
5984 if (!DestB)
5985 return false; // All destinations are the same and the default is unreachable
5986
5987 assert(DestA && DestB &&
5988 "Single-destination switch should have been folded.");
5989 assert(DestA != DestB);
5990 assert(DestB != SI->getDefaultDest());
5991 assert(!CasesB.empty() && "There must be non-default cases.");
5992 assert(!CasesA.empty() || HasDefault);
5993
5994 // Figure out if one of the sets of cases form a contiguous range.
5995 std::optional<ContiguousCasesResult> ContiguousCases;
5996
5997 // Only one icmp is needed when there is only one case.
5998 if (!HasDefault && CasesA.size() == 1)
5999 ContiguousCases = ContiguousCasesResult{
6000 /*Min=*/CasesA[0],
6001 /*Max=*/CasesA[0],
6002 /*Dest=*/DestA,
6003 /*OtherDest=*/DestB,
6004 /*Cases=*/&CasesA,
6005 /*OtherCases=*/&CasesB,
6006 };
6007 else if (CasesB.size() == 1)
6008 ContiguousCases = ContiguousCasesResult{
6009 /*Min=*/CasesB[0],
6010 /*Max=*/CasesB[0],
6011 /*Dest=*/DestB,
6012 /*OtherDest=*/DestA,
6013 /*Cases=*/&CasesB,
6014 /*OtherCases=*/&CasesA,
6015 };
6016 // Correctness: Cases to the default destination cannot be contiguous cases.
6017 else if (!HasDefault)
6018 ContiguousCases =
6019 findContiguousCases(Condition: SI->getCondition(), Cases&: CasesA, OtherCases&: CasesB, Dest: DestA, OtherDest: DestB);
6020
6021 if (!ContiguousCases)
6022 ContiguousCases =
6023 findContiguousCases(Condition: SI->getCondition(), Cases&: CasesB, OtherCases&: CasesA, Dest: DestB, OtherDest: DestA);
6024
6025 if (!ContiguousCases)
6026 return false;
6027
6028 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6029
6030 // Start building the compare and branch.
6031
6032 Constant *Offset = ConstantExpr::getNeg(C: Min);
6033 Constant *NumCases = ConstantInt::get(Ty: Offset->getType(),
6034 V: Max->getValue() - Min->getValue() + 1);
6035 BranchInst *NewBI;
6036 if (NumCases->isOneValue()) {
6037 assert(Max->getValue() == Min->getValue());
6038 Value *Cmp = Builder.CreateICmpEQ(LHS: SI->getCondition(), RHS: Min);
6039 NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
6040 }
6041 // If NumCases overflowed, then all possible values jump to the successor.
6042 else if (NumCases->isNullValue() && !Cases->empty()) {
6043 NewBI = Builder.CreateBr(Dest);
6044 } else {
6045 Value *Sub = SI->getCondition();
6046 if (!Offset->isNullValue())
6047 Sub = Builder.CreateAdd(LHS: Sub, RHS: Offset, Name: Sub->getName() + ".off");
6048 Value *Cmp = Builder.CreateICmpULT(LHS: Sub, RHS: NumCases, Name: "switch");
6049 NewBI = Builder.CreateCondBr(Cond: Cmp, True: Dest, False: OtherDest);
6050 }
6051
6052 // Update weight for the newly-created conditional branch.
6053 if (hasBranchWeightMD(I: *SI) && NewBI->isConditional()) {
6054 SmallVector<uint64_t, 8> Weights;
6055 getBranchWeights(TI: SI, Weights);
6056 if (Weights.size() == 1 + SI->getNumCases()) {
6057 uint64_t TrueWeight = 0;
6058 uint64_t FalseWeight = 0;
6059 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6060 if (SI->getSuccessor(idx: I) == Dest)
6061 TrueWeight += Weights[I];
6062 else
6063 FalseWeight += Weights[I];
6064 }
6065 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6066 TrueWeight /= 2;
6067 FalseWeight /= 2;
6068 }
6069 setFittedBranchWeights(I&: *NewBI, Weights: {TrueWeight, FalseWeight},
6070 /*IsExpected=*/false, /*ElideAllZero=*/true);
6071 }
6072 }
6073
6074 // Prune obsolete incoming values off the successors' PHI nodes.
6075 for (auto &PHI : make_early_inc_range(Range: Dest->phis())) {
6076 unsigned PreviousEdges = Cases->size();
6077 if (Dest == SI->getDefaultDest())
6078 ++PreviousEdges;
6079 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6080 PHI.removeIncomingValue(BB: SI->getParent());
6081 }
6082 for (auto &PHI : make_early_inc_range(Range: OtherDest->phis())) {
6083 unsigned PreviousEdges = OtherCases->size();
6084 if (OtherDest == SI->getDefaultDest())
6085 ++PreviousEdges;
6086 unsigned E = PreviousEdges - 1;
6087 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6088 if (NewBI->isUnconditional())
6089 ++E;
6090 for (unsigned I = 0; I != E; ++I)
6091 PHI.removeIncomingValue(BB: SI->getParent());
6092 }
6093
6094 // Clean up the default block - it may have phis or other instructions before
6095 // the unreachable terminator.
6096 if (!HasDefault)
6097 createUnreachableSwitchDefault(Switch: SI, DTU);
6098
6099 auto *UnreachableDefault = SI->getDefaultDest();
6100
6101 // Drop the switch.
6102 SI->eraseFromParent();
6103
6104 if (!HasDefault && DTU)
6105 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, UnreachableDefault}});
6106
6107 return true;
6108}
6109
6110/// Compute masked bits for the condition of a switch
6111/// and use it to remove dead cases.
6112static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
6113 AssumptionCache *AC,
6114 const DataLayout &DL) {
6115 Value *Cond = SI->getCondition();
6116 KnownBits Known = computeKnownBits(V: Cond, DL, AC, CxtI: SI);
6117 SmallPtrSet<const Constant *, 4> KnownValues;
6118 bool IsKnownValuesValid = collectPossibleValues(V: Cond, Constants&: KnownValues, MaxCount: 4);
6119
6120 // We can also eliminate cases by determining that their values are outside of
6121 // the limited range of the condition based on how many significant (non-sign)
6122 // bits are in the condition value.
6123 unsigned MaxSignificantBitsInCond =
6124 ComputeMaxSignificantBits(Op: Cond, DL, AC, CxtI: SI);
6125
6126 // Gather dead cases.
6127 SmallVector<ConstantInt *, 8> DeadCases;
6128 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6129 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6130 for (const auto &Case : SI->cases()) {
6131 auto *Successor = Case.getCaseSuccessor();
6132 if (DTU) {
6133 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Key: Successor);
6134 if (Inserted)
6135 UniqueSuccessors.push_back(Elt: Successor);
6136 ++It->second;
6137 }
6138 ConstantInt *CaseC = Case.getCaseValue();
6139 const APInt &CaseVal = CaseC->getValue();
6140 if (Known.Zero.intersects(RHS: CaseVal) || !Known.One.isSubsetOf(RHS: CaseVal) ||
6141 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6142 (IsKnownValuesValid && !KnownValues.contains(Ptr: CaseC))) {
6143 DeadCases.push_back(Elt: CaseC);
6144 if (DTU)
6145 --NumPerSuccessorCases[Successor];
6146 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6147 << " is dead.\n");
6148 } else if (IsKnownValuesValid)
6149 KnownValues.erase(Ptr: CaseC);
6150 }
6151
6152 // If we can prove that the cases must cover all possible values, the
6153 // default destination becomes dead and we can remove it. If we know some
6154 // of the bits in the value, we can use that to more precisely compute the
6155 // number of possible unique case values.
6156 bool HasDefault = !SI->defaultDestUnreachable();
6157 const unsigned NumUnknownBits =
6158 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6159 assert(NumUnknownBits <= Known.getBitWidth());
6160 if (HasDefault && DeadCases.empty()) {
6161 if (IsKnownValuesValid && all_of(Range&: KnownValues, P: IsaPred<UndefValue>)) {
6162 createUnreachableSwitchDefault(Switch: SI, DTU);
6163 return true;
6164 }
6165
6166 if (NumUnknownBits < 64 /* avoid overflow */) {
6167 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6168 if (SI->getNumCases() == AllNumCases) {
6169 createUnreachableSwitchDefault(Switch: SI, DTU);
6170 return true;
6171 }
6172 // When only one case value is missing, replace default with that case.
6173 // Eliminating the default branch will provide more opportunities for
6174 // optimization, such as lookup tables.
6175 if (SI->getNumCases() == AllNumCases - 1) {
6176 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6177 IntegerType *CondTy = cast<IntegerType>(Val: Cond->getType());
6178 if (CondTy->getIntegerBitWidth() > 64 ||
6179 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
6180 return false;
6181
6182 uint64_t MissingCaseVal = 0;
6183 for (const auto &Case : SI->cases())
6184 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6185 auto *MissingCase = cast<ConstantInt>(
6186 Val: ConstantInt::get(Ty: Cond->getType(), V: MissingCaseVal));
6187 SwitchInstProfUpdateWrapper SIW(*SI);
6188 SIW.addCase(OnVal: MissingCase, Dest: SI->getDefaultDest(),
6189 W: SIW.getSuccessorWeight(idx: 0));
6190 createUnreachableSwitchDefault(Switch: SI, DTU,
6191 /*RemoveOrigDefaultBlock*/ false);
6192 SIW.setSuccessorWeight(idx: 0, W: 0);
6193 return true;
6194 }
6195 }
6196 }
6197
6198 if (DeadCases.empty())
6199 return false;
6200
6201 SwitchInstProfUpdateWrapper SIW(*SI);
6202 for (ConstantInt *DeadCase : DeadCases) {
6203 SwitchInst::CaseIt CaseI = SI->findCaseValue(C: DeadCase);
6204 assert(CaseI != SI->case_default() &&
6205 "Case was not found. Probably mistake in DeadCases forming.");
6206 // Prune unused values from PHI nodes.
6207 CaseI->getCaseSuccessor()->removePredecessor(Pred: SI->getParent());
6208 SIW.removeCase(I: CaseI);
6209 }
6210
6211 if (DTU) {
6212 std::vector<DominatorTree::UpdateType> Updates;
6213 for (auto *Successor : UniqueSuccessors)
6214 if (NumPerSuccessorCases[Successor] == 0)
6215 Updates.push_back(x: {DominatorTree::Delete, SI->getParent(), Successor});
6216 DTU->applyUpdates(Updates);
6217 }
6218
6219 return true;
6220}
6221
6222/// If BB would be eligible for simplification by
6223/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6224/// by an unconditional branch), look at the phi node for BB in the successor
6225/// block and see if the incoming value is equal to CaseValue. If so, return
6226/// the phi node, and set PhiIndex to BB's index in the phi node.
6227static PHINode *findPHIForConditionForwarding(ConstantInt *CaseValue,
6228 BasicBlock *BB, int *PhiIndex) {
6229 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6230 return nullptr; // BB must be empty to be a candidate for simplification.
6231 if (!BB->getSinglePredecessor())
6232 return nullptr; // BB must be dominated by the switch.
6233
6234 BranchInst *Branch = dyn_cast<BranchInst>(Val: BB->getTerminator());
6235 if (!Branch || !Branch->isUnconditional())
6236 return nullptr; // Terminator must be unconditional branch.
6237
6238 BasicBlock *Succ = Branch->getSuccessor(i: 0);
6239
6240 for (PHINode &PHI : Succ->phis()) {
6241 int Idx = PHI.getBasicBlockIndex(BB);
6242 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6243
6244 Value *InValue = PHI.getIncomingValue(i: Idx);
6245 if (InValue != CaseValue)
6246 continue;
6247
6248 *PhiIndex = Idx;
6249 return &PHI;
6250 }
6251
6252 return nullptr;
6253}
6254
6255/// Try to forward the condition of a switch instruction to a phi node
6256/// dominated by the switch, if that would mean that some of the destination
6257/// blocks of the switch can be folded away. Return true if a change is made.
6258static bool forwardSwitchConditionToPHI(SwitchInst *SI) {
6259 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6260
6261 ForwardingNodesMap ForwardingNodes;
6262 BasicBlock *SwitchBlock = SI->getParent();
6263 bool Changed = false;
6264 for (const auto &Case : SI->cases()) {
6265 ConstantInt *CaseValue = Case.getCaseValue();
6266 BasicBlock *CaseDest = Case.getCaseSuccessor();
6267
6268 // Replace phi operands in successor blocks that are using the constant case
6269 // value rather than the switch condition variable:
6270 // switchbb:
6271 // switch i32 %x, label %default [
6272 // i32 17, label %succ
6273 // ...
6274 // succ:
6275 // %r = phi i32 ... [ 17, %switchbb ] ...
6276 // -->
6277 // %r = phi i32 ... [ %x, %switchbb ] ...
6278
6279 for (PHINode &Phi : CaseDest->phis()) {
6280 // This only works if there is exactly 1 incoming edge from the switch to
6281 // a phi. If there is >1, that means multiple cases of the switch map to 1
6282 // value in the phi, and that phi value is not the switch condition. Thus,
6283 // this transform would not make sense (the phi would be invalid because
6284 // a phi can't have different incoming values from the same block).
6285 int SwitchBBIdx = Phi.getBasicBlockIndex(BB: SwitchBlock);
6286 if (Phi.getIncomingValue(i: SwitchBBIdx) == CaseValue &&
6287 count(Range: Phi.blocks(), Element: SwitchBlock) == 1) {
6288 Phi.setIncomingValue(i: SwitchBBIdx, V: SI->getCondition());
6289 Changed = true;
6290 }
6291 }
6292
6293 // Collect phi nodes that are indirectly using this switch's case constants.
6294 int PhiIdx;
6295 if (auto *Phi = findPHIForConditionForwarding(CaseValue, BB: CaseDest, PhiIndex: &PhiIdx))
6296 ForwardingNodes[Phi].push_back(Elt: PhiIdx);
6297 }
6298
6299 for (auto &ForwardingNode : ForwardingNodes) {
6300 PHINode *Phi = ForwardingNode.first;
6301 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6302 // Check if it helps to fold PHI.
6303 if (Indexes.size() < 2 && !llvm::is_contained(Range: Phi->incoming_values(), Element: SI->getCondition()))
6304 continue;
6305
6306 for (int Index : Indexes)
6307 Phi->setIncomingValue(i: Index, V: SI->getCondition());
6308 Changed = true;
6309 }
6310
6311 return Changed;
6312}
6313
6314/// Return true if the backend will be able to handle
6315/// initializing an array of constants like C.
6316static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
6317 if (C->isThreadDependent())
6318 return false;
6319 if (C->isDLLImportDependent())
6320 return false;
6321
6322 if (!isa<ConstantFP>(Val: C) && !isa<ConstantInt>(Val: C) &&
6323 !isa<ConstantPointerNull>(Val: C) && !isa<GlobalValue>(Val: C) &&
6324 !isa<UndefValue>(Val: C) && !isa<ConstantExpr>(Val: C))
6325 return false;
6326
6327 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
6328 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6329 // materializing the array of constants.
6330 Constant *StrippedC = cast<Constant>(Val: CE->stripInBoundsConstantOffsets());
6331 if (StrippedC == C || !validLookupTableConstant(C: StrippedC, TTI))
6332 return false;
6333 }
6334
6335 if (!TTI.shouldBuildLookupTablesForConstant(C))
6336 return false;
6337
6338 return true;
6339}
6340
6341/// If V is a Constant, return it. Otherwise, try to look up
6342/// its constant value in ConstantPool, returning 0 if it's not there.
6343static Constant *
6344lookupConstant(Value *V,
6345 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6346 if (Constant *C = dyn_cast<Constant>(Val: V))
6347 return C;
6348 return ConstantPool.lookup(Val: V);
6349}
6350
6351/// Try to fold instruction I into a constant. This works for
6352/// simple instructions such as binary operations where both operands are
6353/// constant or can be replaced by constants from the ConstantPool. Returns the
6354/// resulting constant on success, 0 otherwise.
6355static Constant *
6356constantFold(Instruction *I, const DataLayout &DL,
6357 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
6358 if (SelectInst *Select = dyn_cast<SelectInst>(Val: I)) {
6359 Constant *A = lookupConstant(V: Select->getCondition(), ConstantPool);
6360 if (!A)
6361 return nullptr;
6362 if (A->isAllOnesValue())
6363 return lookupConstant(V: Select->getTrueValue(), ConstantPool);
6364 if (A->isNullValue())
6365 return lookupConstant(V: Select->getFalseValue(), ConstantPool);
6366 return nullptr;
6367 }
6368
6369 SmallVector<Constant *, 4> COps;
6370 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6371 if (Constant *A = lookupConstant(V: I->getOperand(i: N), ConstantPool))
6372 COps.push_back(Elt: A);
6373 else
6374 return nullptr;
6375 }
6376
6377 return ConstantFoldInstOperands(I, Ops: COps, DL);
6378}
6379
6380/// Try to determine the resulting constant values in phi nodes
6381/// at the common destination basic block, *CommonDest, for one of the case
6382/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6383/// default case), of a switch instruction SI.
6384static bool
6385getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
6386 BasicBlock **CommonDest,
6387 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6388 const DataLayout &DL, const TargetTransformInfo &TTI) {
6389 // The block from which we enter the common destination.
6390 BasicBlock *Pred = SI->getParent();
6391
6392 // If CaseDest is empty except for some side-effect free instructions through
6393 // which we can constant-propagate the CaseVal, continue to its successor.
6394 SmallDenseMap<Value *, Constant *> ConstantPool;
6395 ConstantPool.insert(KV: std::make_pair(x: SI->getCondition(), y&: CaseVal));
6396 for (Instruction &I : CaseDest->instructionsWithoutDebug(SkipPseudoOp: false)) {
6397 if (I.isTerminator()) {
6398 // If the terminator is a simple branch, continue to the next block.
6399 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6400 return false;
6401 Pred = CaseDest;
6402 CaseDest = I.getSuccessor(Idx: 0);
6403 } else if (Constant *C = constantFold(I: &I, DL, ConstantPool)) {
6404 // Instruction is side-effect free and constant.
6405
6406 // If the instruction has uses outside this block or a phi node slot for
6407 // the block, it is not safe to bypass the instruction since it would then
6408 // no longer dominate all its uses.
6409 for (auto &Use : I.uses()) {
6410 User *User = Use.getUser();
6411 if (Instruction *I = dyn_cast<Instruction>(Val: User))
6412 if (I->getParent() == CaseDest)
6413 continue;
6414 if (PHINode *Phi = dyn_cast<PHINode>(Val: User))
6415 if (Phi->getIncomingBlock(U: Use) == CaseDest)
6416 continue;
6417 return false;
6418 }
6419
6420 ConstantPool.insert(KV: std::make_pair(x: &I, y&: C));
6421 } else {
6422 break;
6423 }
6424 }
6425
6426 // If we did not have a CommonDest before, use the current one.
6427 if (!*CommonDest)
6428 *CommonDest = CaseDest;
6429 // If the destination isn't the common one, abort.
6430 if (CaseDest != *CommonDest)
6431 return false;
6432
6433 // Get the values for this case from phi nodes in the destination block.
6434 for (PHINode &PHI : (*CommonDest)->phis()) {
6435 int Idx = PHI.getBasicBlockIndex(BB: Pred);
6436 if (Idx == -1)
6437 continue;
6438
6439 Constant *ConstVal =
6440 lookupConstant(V: PHI.getIncomingValue(i: Idx), ConstantPool);
6441 if (!ConstVal)
6442 return false;
6443
6444 // Be conservative about which kinds of constants we support.
6445 if (!validLookupTableConstant(C: ConstVal, TTI))
6446 return false;
6447
6448 Res.push_back(Elt: std::make_pair(x: &PHI, y&: ConstVal));
6449 }
6450
6451 return Res.size() > 0;
6452}
6453
6454// Helper function used to add CaseVal to the list of cases that generate
6455// Result. Returns the updated number of cases that generate this result.
6456static size_t mapCaseToResult(ConstantInt *CaseVal,
6457 SwitchCaseResultVectorTy &UniqueResults,
6458 Constant *Result) {
6459 for (auto &I : UniqueResults) {
6460 if (I.first == Result) {
6461 I.second.push_back(Elt: CaseVal);
6462 return I.second.size();
6463 }
6464 }
6465 UniqueResults.push_back(
6466 Elt: std::make_pair(x&: Result, y: SmallVector<ConstantInt *, 4>(1, CaseVal)));
6467 return 1;
6468}
6469
6470// Helper function that initializes a map containing
6471// results for the PHI node of the common destination block for a switch
6472// instruction. Returns false if multiple PHI nodes have been found or if
6473// there is not a common destination block for the switch.
6474static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
6475 BasicBlock *&CommonDest,
6476 SwitchCaseResultVectorTy &UniqueResults,
6477 Constant *&DefaultResult,
6478 const DataLayout &DL,
6479 const TargetTransformInfo &TTI,
6480 uintptr_t MaxUniqueResults) {
6481 for (const auto &I : SI->cases()) {
6482 ConstantInt *CaseVal = I.getCaseValue();
6483
6484 // Resulting value at phi nodes for this case value.
6485 SwitchCaseResultsTy Results;
6486 if (!getCaseResults(SI, CaseVal, CaseDest: I.getCaseSuccessor(), CommonDest: &CommonDest, Res&: Results,
6487 DL, TTI))
6488 return false;
6489
6490 // Only one value per case is permitted.
6491 if (Results.size() > 1)
6492 return false;
6493
6494 // Add the case->result mapping to UniqueResults.
6495 const size_t NumCasesForResult =
6496 mapCaseToResult(CaseVal, UniqueResults, Result: Results.begin()->second);
6497
6498 // Early out if there are too many cases for this result.
6499 if (NumCasesForResult > MaxSwitchCasesPerResult)
6500 return false;
6501
6502 // Early out if there are too many unique results.
6503 if (UniqueResults.size() > MaxUniqueResults)
6504 return false;
6505
6506 // Check the PHI consistency.
6507 if (!PHI)
6508 PHI = Results[0].first;
6509 else if (PHI != Results[0].first)
6510 return false;
6511 }
6512 // Find the default result value.
6513 SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
6514 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest, Res&: DefaultResults,
6515 DL, TTI);
6516 // If the default value is not found abort unless the default destination
6517 // is unreachable.
6518 DefaultResult =
6519 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6520
6521 return DefaultResult || SI->defaultDestUnreachable();
6522}
6523
6524// Helper function that checks if it is possible to transform a switch with only
6525// two cases (or two cases + default) that produces a result into a select.
6526// TODO: Handle switches with more than 2 cases that map to the same result.
6527// The branch weights correspond to the provided Condition (i.e. if Condition is
6528// modified from the original SwitchInst, the caller must adjust the weights)
6529static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6530 Constant *DefaultResult, Value *Condition,
6531 IRBuilder<> &Builder, const DataLayout &DL,
6532 ArrayRef<uint32_t> BranchWeights) {
6533 // If we are selecting between only two cases transform into a simple
6534 // select or a two-way select if default is possible.
6535 // Example:
6536 // switch (a) { %0 = icmp eq i32 %a, 10
6537 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6538 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6539 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6540 // }
6541
6542 const bool HasBranchWeights =
6543 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6544
6545 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6546 ResultVector[1].second.size() == 1) {
6547 ConstantInt *FirstCase = ResultVector[0].second[0];
6548 ConstantInt *SecondCase = ResultVector[1].second[0];
6549 Value *SelectValue = ResultVector[1].first;
6550 if (DefaultResult) {
6551 Value *ValueCompare =
6552 Builder.CreateICmpEQ(LHS: Condition, RHS: SecondCase, Name: "switch.selectcmp");
6553 SelectValue = Builder.CreateSelect(C: ValueCompare, True: ResultVector[1].first,
6554 False: DefaultResult, Name: "switch.select");
6555 if (auto *SI = dyn_cast<SelectInst>(Val: SelectValue);
6556 SI && HasBranchWeights) {
6557 // We start with 3 probabilities, where the numerator is the
6558 // corresponding BranchWeights[i], and the denominator is the sum over
6559 // BranchWeights. We want the probability and negative probability of
6560 // Condition == SecondCase.
6561 assert(BranchWeights.size() == 3);
6562 setBranchWeights(
6563 I&: *SI, Weights: {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6564 /*IsExpected=*/false, /*ElideAllZero=*/true);
6565 }
6566 }
6567 Value *ValueCompare =
6568 Builder.CreateICmpEQ(LHS: Condition, RHS: FirstCase, Name: "switch.selectcmp");
6569 Value *Ret = Builder.CreateSelect(C: ValueCompare, True: ResultVector[0].first,
6570 False: SelectValue, Name: "switch.select");
6571 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6572 // We may have had a DefaultResult. Base the position of the first and
6573 // second's branch weights accordingly. Also the proability that Condition
6574 // != FirstCase needs to take that into account.
6575 assert(BranchWeights.size() >= 2);
6576 size_t FirstCasePos = (Condition != nullptr);
6577 size_t SecondCasePos = FirstCasePos + 1;
6578 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6579 setBranchWeights(I&: *SI,
6580 Weights: {BranchWeights[FirstCasePos],
6581 DefaultCase + BranchWeights[SecondCasePos]},
6582 /*IsExpected=*/false, /*ElideAllZero=*/true);
6583 }
6584 return Ret;
6585 }
6586
6587 // Handle the degenerate case where two cases have the same result value.
6588 if (ResultVector.size() == 1 && DefaultResult) {
6589 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6590 unsigned CaseCount = CaseValues.size();
6591 // n bits group cases map to the same result:
6592 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6593 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6594 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6595 if (isPowerOf2_32(Value: CaseCount)) {
6596 ConstantInt *MinCaseVal = CaseValues[0];
6597 // If there are bits that are set exclusively by CaseValues, we
6598 // can transform the switch into a select if the conjunction of
6599 // all the values uniquely identify CaseValues.
6600 APInt AndMask = APInt::getAllOnes(numBits: MinCaseVal->getBitWidth());
6601
6602 // Find the minimum value and compute the and of all the case values.
6603 for (auto *Case : CaseValues) {
6604 if (Case->getValue().slt(RHS: MinCaseVal->getValue()))
6605 MinCaseVal = Case;
6606 AndMask &= Case->getValue();
6607 }
6608 KnownBits Known = computeKnownBits(V: Condition, DL);
6609
6610 if (!AndMask.isZero() && Known.getMaxValue().uge(RHS: AndMask)) {
6611 // Compute the number of bits that are free to vary.
6612 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6613
6614 // Check if the number of values covered by the mask is equal
6615 // to the number of cases.
6616 if (FreeBits == Log2_32(Value: CaseCount)) {
6617 Value *And = Builder.CreateAnd(LHS: Condition, RHS: AndMask);
6618 Value *Cmp = Builder.CreateICmpEQ(
6619 LHS: And, RHS: Constant::getIntegerValue(Ty: And->getType(), V: AndMask));
6620 Value *Ret =
6621 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6622 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6623 // We know there's a Default case. We base the resulting branch
6624 // weights off its probability.
6625 assert(BranchWeights.size() >= 2);
6626 setBranchWeights(
6627 I&: *SI,
6628 Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6629 /*IsExpected=*/false, /*ElideAllZero=*/true);
6630 }
6631 return Ret;
6632 }
6633 }
6634
6635 // Mark the bits case number touched.
6636 APInt BitMask = APInt::getZero(numBits: MinCaseVal->getBitWidth());
6637 for (auto *Case : CaseValues)
6638 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6639
6640 // Check if cases with the same result can cover all number
6641 // in touched bits.
6642 if (BitMask.popcount() == Log2_32(Value: CaseCount)) {
6643 if (!MinCaseVal->isNullValue())
6644 Condition = Builder.CreateSub(LHS: Condition, RHS: MinCaseVal);
6645 Value *And = Builder.CreateAnd(LHS: Condition, RHS: ~BitMask, Name: "switch.and");
6646 Value *Cmp = Builder.CreateICmpEQ(
6647 LHS: And, RHS: Constant::getNullValue(Ty: And->getType()), Name: "switch.selectcmp");
6648 Value *Ret =
6649 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6650 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6651 assert(BranchWeights.size() >= 2);
6652 setBranchWeights(
6653 I&: *SI,
6654 Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6655 /*IsExpected=*/false, /*ElideAllZero=*/true);
6656 }
6657 return Ret;
6658 }
6659 }
6660
6661 // Handle the degenerate case where two cases have the same value.
6662 if (CaseValues.size() == 2) {
6663 Value *Cmp1 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[0],
6664 Name: "switch.selectcmp.case1");
6665 Value *Cmp2 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues[1],
6666 Name: "switch.selectcmp.case2");
6667 Value *Cmp = Builder.CreateOr(LHS: Cmp1, RHS: Cmp2, Name: "switch.selectcmp");
6668 Value *Ret =
6669 Builder.CreateSelect(C: Cmp, True: ResultVector[0].first, False: DefaultResult);
6670 if (auto *SI = dyn_cast<SelectInst>(Val: Ret); SI && HasBranchWeights) {
6671 assert(BranchWeights.size() >= 2);
6672 setBranchWeights(
6673 I&: *SI, Weights: {accumulate(Range: drop_begin(RangeOrContainer&: BranchWeights), Init: 0U), BranchWeights[0]},
6674 /*IsExpected=*/false, /*ElideAllZero=*/true);
6675 }
6676 return Ret;
6677 }
6678 }
6679
6680 return nullptr;
6681}
6682
6683// Helper function to cleanup a switch instruction that has been converted into
6684// a select, fixing up PHI nodes and basic blocks.
6685static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
6686 Value *SelectValue,
6687 IRBuilder<> &Builder,
6688 DomTreeUpdater *DTU) {
6689 std::vector<DominatorTree::UpdateType> Updates;
6690
6691 BasicBlock *SelectBB = SI->getParent();
6692 BasicBlock *DestBB = PHI->getParent();
6693
6694 if (DTU && !is_contained(Range: predecessors(BB: DestBB), Element: SelectBB))
6695 Updates.push_back(x: {DominatorTree::Insert, SelectBB, DestBB});
6696 Builder.CreateBr(Dest: DestBB);
6697
6698 // Remove the switch.
6699
6700 PHI->removeIncomingValueIf(
6701 Predicate: [&](unsigned Idx) { return PHI->getIncomingBlock(i: Idx) == SelectBB; });
6702 PHI->addIncoming(V: SelectValue, BB: SelectBB);
6703
6704 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6705 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6706 BasicBlock *Succ = SI->getSuccessor(idx: i);
6707
6708 if (Succ == DestBB)
6709 continue;
6710 Succ->removePredecessor(Pred: SelectBB);
6711 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6712 Updates.push_back(x: {DominatorTree::Delete, SelectBB, Succ});
6713 }
6714 SI->eraseFromParent();
6715 if (DTU)
6716 DTU->applyUpdates(Updates);
6717}
6718
6719/// If a switch is only used to initialize one or more phi nodes in a common
6720/// successor block with only two different constant values, try to replace the
6721/// switch with a select. Returns true if the fold was made.
6722static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6723 DomTreeUpdater *DTU, const DataLayout &DL,
6724 const TargetTransformInfo &TTI) {
6725 Value *const Cond = SI->getCondition();
6726 PHINode *PHI = nullptr;
6727 BasicBlock *CommonDest = nullptr;
6728 Constant *DefaultResult;
6729 SwitchCaseResultVectorTy UniqueResults;
6730 // Collect all the cases that will deliver the same value from the switch.
6731 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6732 DL, TTI, /*MaxUniqueResults*/ 2))
6733 return false;
6734
6735 assert(PHI != nullptr && "PHI for value select not found");
6736 Builder.SetInsertPoint(SI);
6737 SmallVector<uint32_t, 4> BranchWeights;
6738 if (!ProfcheckDisableMetadataFixes) {
6739 [[maybe_unused]] auto HasWeights =
6740 extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights&: BranchWeights);
6741 assert(!HasWeights == (BranchWeights.empty()));
6742 }
6743 assert(BranchWeights.empty() ||
6744 (BranchWeights.size() >=
6745 UniqueResults.size() + (DefaultResult != nullptr)));
6746
6747 Value *SelectValue = foldSwitchToSelect(ResultVector: UniqueResults, DefaultResult, Condition: Cond,
6748 Builder, DL, BranchWeights);
6749 if (!SelectValue)
6750 return false;
6751
6752 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6753 return true;
6754}
6755
6756namespace {
6757
6758/// This class finds alternatives for switches to ultimately
6759/// replace the switch.
6760class SwitchReplacement {
6761public:
6762 /// Create a helper for optimizations to use as a switch replacement.
6763 /// Find a better representation for the content of Values,
6764 /// using DefaultValue to fill any holes in the table.
6765 SwitchReplacement(
6766 Module &M, uint64_t TableSize, ConstantInt *Offset,
6767 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6768 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6769
6770 /// Build instructions with Builder to retrieve values using Index
6771 /// and replace the switch.
6772 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6773 Function *Func);
6774
6775 /// Return true if a table with TableSize elements of
6776 /// type ElementType would fit in a target-legal register.
6777 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6778 Type *ElementType);
6779
6780 /// Return the default value of the switch.
6781 Constant *getDefaultValue();
6782
6783 /// Return true if the replacement is a lookup table.
6784 bool isLookupTable();
6785
6786 /// Return true if the replacement is a bit map.
6787 bool isBitMap();
6788
6789private:
6790 // Depending on the switch, there are different alternatives.
6791 enum {
6792 // For switches where each case contains the same value, we just have to
6793 // store that single value and return it for each lookup.
6794 SingleValueKind,
6795
6796 // For switches where there is a linear relationship between table index
6797 // and values. We calculate the result with a simple multiplication
6798 // and addition instead of a table lookup.
6799 LinearMapKind,
6800
6801 // For small tables with integer elements, we can pack them into a bitmap
6802 // that fits into a target-legal register. Values are retrieved by
6803 // shift and mask operations.
6804 BitMapKind,
6805
6806 // The table is stored as an array of values. Values are retrieved by load
6807 // instructions from the table.
6808 LookupTableKind
6809 } Kind;
6810
6811 // The default value of the switch.
6812 Constant *DefaultValue;
6813
6814 // The type of the output values.
6815 Type *ValueType;
6816
6817 // For SingleValueKind, this is the single value.
6818 Constant *SingleValue = nullptr;
6819
6820 // For BitMapKind, this is the bitmap.
6821 ConstantInt *BitMap = nullptr;
6822 IntegerType *BitMapElementTy = nullptr;
6823
6824 // For LinearMapKind, these are the constants used to derive the value.
6825 ConstantInt *LinearOffset = nullptr;
6826 ConstantInt *LinearMultiplier = nullptr;
6827 bool LinearMapValWrapped = false;
6828
6829 // For LookupTableKind, this is the table.
6830 Constant *Initializer = nullptr;
6831};
6832
6833} // end anonymous namespace
6834
6835SwitchReplacement::SwitchReplacement(
6836 Module &M, uint64_t TableSize, ConstantInt *Offset,
6837 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6838 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6839 : DefaultValue(DefaultValue) {
6840 assert(Values.size() && "Can't build lookup table without values!");
6841 assert(TableSize >= Values.size() && "Can't fit values in table!");
6842
6843 // If all values in the table are equal, this is that value.
6844 SingleValue = Values.begin()->second;
6845
6846 ValueType = Values.begin()->second->getType();
6847
6848 // Build up the table contents.
6849 SmallVector<Constant *, 64> TableContents(TableSize);
6850 for (const auto &[CaseVal, CaseRes] : Values) {
6851 assert(CaseRes->getType() == ValueType);
6852
6853 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6854 TableContents[Idx] = CaseRes;
6855
6856 if (SingleValue && !isa<PoisonValue>(Val: CaseRes) && CaseRes != SingleValue)
6857 SingleValue = isa<PoisonValue>(Val: SingleValue) ? CaseRes : nullptr;
6858 }
6859
6860 // Fill in any holes in the table with the default result.
6861 if (Values.size() < TableSize) {
6862 assert(DefaultValue &&
6863 "Need a default value to fill the lookup table holes.");
6864 assert(DefaultValue->getType() == ValueType);
6865 for (uint64_t I = 0; I < TableSize; ++I) {
6866 if (!TableContents[I])
6867 TableContents[I] = DefaultValue;
6868 }
6869
6870 // If the default value is poison, all the holes are poison.
6871 bool DefaultValueIsPoison = isa<PoisonValue>(Val: DefaultValue);
6872
6873 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6874 SingleValue = nullptr;
6875 }
6876
6877 // If each element in the table contains the same value, we only need to store
6878 // that single value.
6879 if (SingleValue) {
6880 Kind = SingleValueKind;
6881 return;
6882 }
6883
6884 // Check if we can derive the value with a linear transformation from the
6885 // table index.
6886 if (isa<IntegerType>(Val: ValueType)) {
6887 bool LinearMappingPossible = true;
6888 APInt PrevVal;
6889 APInt DistToPrev;
6890 // When linear map is monotonic and signed overflow doesn't happen on
6891 // maximum index, we can attach nsw on Add and Mul.
6892 bool NonMonotonic = false;
6893 assert(TableSize >= 2 && "Should be a SingleValue table.");
6894 // Check if there is the same distance between two consecutive values.
6895 for (uint64_t I = 0; I < TableSize; ++I) {
6896 ConstantInt *ConstVal = dyn_cast<ConstantInt>(Val: TableContents[I]);
6897
6898 if (!ConstVal && isa<PoisonValue>(Val: TableContents[I])) {
6899 // This is an poison, so it's (probably) a lookup table hole.
6900 // To prevent any regressions from before we switched to using poison as
6901 // the default value, holes will fall back to using the first value.
6902 // This can be removed once we add proper handling for poisons in lookup
6903 // tables.
6904 ConstVal = dyn_cast<ConstantInt>(Val: Values[0].second);
6905 }
6906
6907 if (!ConstVal) {
6908 // This is an undef. We could deal with it, but undefs in lookup tables
6909 // are very seldom. It's probably not worth the additional complexity.
6910 LinearMappingPossible = false;
6911 break;
6912 }
6913 const APInt &Val = ConstVal->getValue();
6914 if (I != 0) {
6915 APInt Dist = Val - PrevVal;
6916 if (I == 1) {
6917 DistToPrev = Dist;
6918 } else if (Dist != DistToPrev) {
6919 LinearMappingPossible = false;
6920 break;
6921 }
6922 NonMonotonic |=
6923 Dist.isStrictlyPositive() ? Val.sle(RHS: PrevVal) : Val.sgt(RHS: PrevVal);
6924 }
6925 PrevVal = Val;
6926 }
6927 if (LinearMappingPossible) {
6928 LinearOffset = cast<ConstantInt>(Val: TableContents[0]);
6929 LinearMultiplier = ConstantInt::get(Context&: M.getContext(), V: DistToPrev);
6930 APInt M = LinearMultiplier->getValue();
6931 bool MayWrap = true;
6932 if (isIntN(N: M.getBitWidth(), x: TableSize - 1))
6933 (void)M.smul_ov(RHS: APInt(M.getBitWidth(), TableSize - 1), Overflow&: MayWrap);
6934 LinearMapValWrapped = NonMonotonic || MayWrap;
6935 Kind = LinearMapKind;
6936 return;
6937 }
6938 }
6939
6940 // If the type is integer and the table fits in a register, build a bitmap.
6941 if (wouldFitInRegister(DL, TableSize, ElementType: ValueType)) {
6942 IntegerType *IT = cast<IntegerType>(Val: ValueType);
6943 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6944 for (uint64_t I = TableSize; I > 0; --I) {
6945 TableInt <<= IT->getBitWidth();
6946 // Insert values into the bitmap. Undef values are set to zero.
6947 if (!isa<UndefValue>(Val: TableContents[I - 1])) {
6948 ConstantInt *Val = cast<ConstantInt>(Val: TableContents[I - 1]);
6949 TableInt |= Val->getValue().zext(width: TableInt.getBitWidth());
6950 }
6951 }
6952 BitMap = ConstantInt::get(Context&: M.getContext(), V: TableInt);
6953 BitMapElementTy = IT;
6954 Kind = BitMapKind;
6955 return;
6956 }
6957
6958 // Store the table in an array.
6959 auto *TableTy = ArrayType::get(ElementType: ValueType, NumElements: TableSize);
6960 Initializer = ConstantArray::get(T: TableTy, V: TableContents);
6961
6962 Kind = LookupTableKind;
6963}
6964
6965Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6966 const DataLayout &DL, Function *Func) {
6967 switch (Kind) {
6968 case SingleValueKind:
6969 return SingleValue;
6970 case LinearMapKind: {
6971 ++NumLinearMaps;
6972 // Derive the result value from the input value.
6973 Value *Result = Builder.CreateIntCast(V: Index, DestTy: LinearMultiplier->getType(),
6974 isSigned: false, Name: "switch.idx.cast");
6975 if (!LinearMultiplier->isOne())
6976 Result = Builder.CreateMul(LHS: Result, RHS: LinearMultiplier, Name: "switch.idx.mult",
6977 /*HasNUW = */ false,
6978 /*HasNSW = */ !LinearMapValWrapped);
6979
6980 if (!LinearOffset->isZero())
6981 Result = Builder.CreateAdd(LHS: Result, RHS: LinearOffset, Name: "switch.offset",
6982 /*HasNUW = */ false,
6983 /*HasNSW = */ !LinearMapValWrapped);
6984 return Result;
6985 }
6986 case BitMapKind: {
6987 ++NumBitMaps;
6988 // Type of the bitmap (e.g. i59).
6989 IntegerType *MapTy = BitMap->getIntegerType();
6990
6991 // Cast Index to the same type as the bitmap.
6992 // Note: The Index is <= the number of elements in the table, so
6993 // truncating it to the width of the bitmask is safe.
6994 Value *ShiftAmt = Builder.CreateZExtOrTrunc(V: Index, DestTy: MapTy, Name: "switch.cast");
6995
6996 // Multiply the shift amount by the element width. NUW/NSW can always be
6997 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6998 // BitMap's bit width.
6999 ShiftAmt = Builder.CreateMul(
7000 LHS: ShiftAmt, RHS: ConstantInt::get(Ty: MapTy, V: BitMapElementTy->getBitWidth()),
7001 Name: "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7002
7003 // Shift down.
7004 Value *DownShifted =
7005 Builder.CreateLShr(LHS: BitMap, RHS: ShiftAmt, Name: "switch.downshift");
7006 // Mask off.
7007 return Builder.CreateTrunc(V: DownShifted, DestTy: BitMapElementTy, Name: "switch.masked");
7008 }
7009 case LookupTableKind: {
7010 ++NumLookupTables;
7011 auto *Table =
7012 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7013 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7014 Initializer, "switch.table." + Func->getName());
7015 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7016 // Set the alignment to that of an array items. We will be only loading one
7017 // value out of it.
7018 Table->setAlignment(DL.getPrefTypeAlign(Ty: ValueType));
7019 Type *IndexTy = DL.getIndexType(PtrTy: Table->getType());
7020 auto *ArrayTy = cast<ArrayType>(Val: Table->getValueType());
7021
7022 if (Index->getType() != IndexTy) {
7023 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7024 Index = Builder.CreateZExtOrTrunc(V: Index, DestTy: IndexTy);
7025 if (auto *Zext = dyn_cast<ZExtInst>(Val: Index))
7026 Zext->setNonNeg(
7027 isUIntN(N: OldBitWidth - 1, x: ArrayTy->getNumElements() - 1));
7028 }
7029
7030 Value *GEPIndices[] = {ConstantInt::get(Ty: IndexTy, V: 0), Index};
7031 Value *GEP =
7032 Builder.CreateInBoundsGEP(Ty: ArrayTy, Ptr: Table, IdxList: GEPIndices, Name: "switch.gep");
7033 return Builder.CreateLoad(Ty: ArrayTy->getElementType(), Ptr: GEP, Name: "switch.load");
7034 }
7035 }
7036 llvm_unreachable("Unknown helper kind!");
7037}
7038
7039bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7040 uint64_t TableSize,
7041 Type *ElementType) {
7042 auto *IT = dyn_cast<IntegerType>(Val: ElementType);
7043 if (!IT)
7044 return false;
7045 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7046 // are <= 15, we could try to narrow the type.
7047
7048 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7049 if (TableSize >= UINT_MAX / IT->getBitWidth())
7050 return false;
7051 return DL.fitsInLegalInteger(Width: TableSize * IT->getBitWidth());
7052}
7053
7054static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
7055 const DataLayout &DL) {
7056 // Allow any legal type.
7057 if (TTI.isTypeLegal(Ty))
7058 return true;
7059
7060 auto *IT = dyn_cast<IntegerType>(Val: Ty);
7061 if (!IT)
7062 return false;
7063
7064 // Also allow power of 2 integer types that have at least 8 bits and fit in
7065 // a register. These types are common in frontend languages and targets
7066 // usually support loads of these types.
7067 // TODO: We could relax this to any integer that fits in a register and rely
7068 // on ABI alignment and padding in the table to allow the load to be widened.
7069 // Or we could widen the constants and truncate the load.
7070 unsigned BitWidth = IT->getBitWidth();
7071 return BitWidth >= 8 && isPowerOf2_32(Value: BitWidth) &&
7072 DL.fitsInLegalInteger(Width: IT->getBitWidth());
7073}
7074
7075Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7076
7077bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7078
7079bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7080
7081static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7082 // 40% is the default density for building a jump table in optsize/minsize
7083 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7084 // function was based on.
7085 const uint64_t MinDensity = 40;
7086
7087 if (CaseRange >= UINT64_MAX / 100)
7088 return false; // Avoid multiplication overflows below.
7089
7090 return NumCases * 100 >= CaseRange * MinDensity;
7091}
7092
7093static bool isSwitchDense(ArrayRef<int64_t> Values) {
7094 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7095 uint64_t Range = Diff + 1;
7096 if (Range < Diff)
7097 return false; // Overflow.
7098
7099 return isSwitchDense(NumCases: Values.size(), CaseRange: Range);
7100}
7101
7102/// Determine whether a lookup table should be built for this switch, based on
7103/// the number of cases, size of the table, and the types of the results.
7104// TODO: We could support larger than legal types by limiting based on the
7105// number of loads required and/or table size. If the constants are small we
7106// could use smaller table entries and extend after the load.
7107static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
7108 const TargetTransformInfo &TTI,
7109 const DataLayout &DL,
7110 const SmallVector<Type *> &ResultTypes) {
7111 if (SI->getNumCases() > TableSize)
7112 return false; // TableSize overflowed.
7113
7114 bool AllTablesFitInRegister = true;
7115 bool HasIllegalType = false;
7116 for (const auto &Ty : ResultTypes) {
7117 // Saturate this flag to true.
7118 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7119
7120 // Saturate this flag to false.
7121 AllTablesFitInRegister =
7122 AllTablesFitInRegister &&
7123 SwitchReplacement::wouldFitInRegister(DL, TableSize, ElementType: Ty);
7124
7125 // If both flags saturate, we're done. NOTE: This *only* works with
7126 // saturating flags, and all flags have to saturate first due to the
7127 // non-deterministic behavior of iterating over a dense map.
7128 if (HasIllegalType && !AllTablesFitInRegister)
7129 break;
7130 }
7131
7132 // If each table would fit in a register, we should build it anyway.
7133 if (AllTablesFitInRegister)
7134 return true;
7135
7136 // Don't build a table that doesn't fit in-register if it has illegal types.
7137 if (HasIllegalType)
7138 return false;
7139
7140 return isSwitchDense(NumCases: SI->getNumCases(), CaseRange: TableSize);
7141}
7142
7143static bool shouldUseSwitchConditionAsTableIndex(
7144 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7145 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7146 const DataLayout &DL, const TargetTransformInfo &TTI) {
7147 if (MinCaseVal.isNullValue())
7148 return true;
7149 if (MinCaseVal.isNegative() ||
7150 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7151 !HasDefaultResults)
7152 return false;
7153 return all_of(Range: ResultTypes, P: [&](const auto &ResultType) {
7154 return SwitchReplacement::wouldFitInRegister(
7155 DL, TableSize: MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ElementType: ResultType);
7156 });
7157}
7158
7159/// Try to reuse the switch table index compare. Following pattern:
7160/// \code
7161/// if (idx < tablesize)
7162/// r = table[idx]; // table does not contain default_value
7163/// else
7164/// r = default_value;
7165/// if (r != default_value)
7166/// ...
7167/// \endcode
7168/// Is optimized to:
7169/// \code
7170/// cond = idx < tablesize;
7171/// if (cond)
7172/// r = table[idx];
7173/// else
7174/// r = default_value;
7175/// if (cond)
7176/// ...
7177/// \endcode
7178/// Jump threading will then eliminate the second if(cond).
7179static void reuseTableCompare(
7180 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
7181 Constant *DefaultValue,
7182 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7183 ICmpInst *CmpInst = dyn_cast<ICmpInst>(Val: PhiUser);
7184 if (!CmpInst)
7185 return;
7186
7187 // We require that the compare is in the same block as the phi so that jump
7188 // threading can do its work afterwards.
7189 if (CmpInst->getParent() != PhiBlock)
7190 return;
7191
7192 Constant *CmpOp1 = dyn_cast<Constant>(Val: CmpInst->getOperand(i_nocapture: 1));
7193 if (!CmpOp1)
7194 return;
7195
7196 Value *RangeCmp = RangeCheckBranch->getCondition();
7197 Constant *TrueConst = ConstantInt::getTrue(Ty: RangeCmp->getType());
7198 Constant *FalseConst = ConstantInt::getFalse(Ty: RangeCmp->getType());
7199
7200 // Check if the compare with the default value is constant true or false.
7201 const DataLayout &DL = PhiBlock->getDataLayout();
7202 Constant *DefaultConst = ConstantFoldCompareInstOperands(
7203 Predicate: CmpInst->getPredicate(), LHS: DefaultValue, RHS: CmpOp1, DL);
7204 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7205 return;
7206
7207 // Check if the compare with the case values is distinct from the default
7208 // compare result.
7209 for (auto ValuePair : Values) {
7210 Constant *CaseConst = ConstantFoldCompareInstOperands(
7211 Predicate: CmpInst->getPredicate(), LHS: ValuePair.second, RHS: CmpOp1, DL);
7212 if (!CaseConst || CaseConst == DefaultConst ||
7213 (CaseConst != TrueConst && CaseConst != FalseConst))
7214 return;
7215 }
7216
7217 // Check if the branch instruction dominates the phi node. It's a simple
7218 // dominance check, but sufficient for our needs.
7219 // Although this check is invariant in the calling loops, it's better to do it
7220 // at this late stage. Practically we do it at most once for a switch.
7221 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7222 for (BasicBlock *Pred : predecessors(BB: PhiBlock)) {
7223 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7224 return;
7225 }
7226
7227 if (DefaultConst == FalseConst) {
7228 // The compare yields the same result. We can replace it.
7229 CmpInst->replaceAllUsesWith(V: RangeCmp);
7230 ++NumTableCmpReuses;
7231 } else {
7232 // The compare yields the same result, just inverted. We can replace it.
7233 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7234 V1: RangeCmp, V2: ConstantInt::get(Ty: RangeCmp->getType(), V: 1), Name: "inverted.cmp",
7235 InsertBefore: RangeCheckBranch->getIterator());
7236 CmpInst->replaceAllUsesWith(V: InvertedTableCmp);
7237 ++NumTableCmpReuses;
7238 }
7239}
7240
7241/// If the switch is only used to initialize one or more phi nodes in a common
7242/// successor block with different constant values, replace the switch with
7243/// lookup tables.
7244static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
7245 DomTreeUpdater *DTU, const DataLayout &DL,
7246 const TargetTransformInfo &TTI,
7247 bool ConvertSwitchToLookupTable) {
7248 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7249
7250 BasicBlock *BB = SI->getParent();
7251 Function *Fn = BB->getParent();
7252
7253 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7254 // split off a dense part and build a lookup table for that.
7255
7256 // FIXME: This creates arrays of GEPs to constant strings, which means each
7257 // GEP needs a runtime relocation in PIC code. We should just build one big
7258 // string and lookup indices into that.
7259
7260 // Ignore switches with less than three cases. Lookup tables will not make
7261 // them faster, so we don't analyze them.
7262 if (SI->getNumCases() < 3)
7263 return false;
7264
7265 // Figure out the corresponding result for each case value and phi node in the
7266 // common destination, as well as the min and max case values.
7267 assert(!SI->cases().empty());
7268 SwitchInst::CaseIt CI = SI->case_begin();
7269 ConstantInt *MinCaseVal = CI->getCaseValue();
7270 ConstantInt *MaxCaseVal = CI->getCaseValue();
7271
7272 BasicBlock *CommonDest = nullptr;
7273
7274 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7275 SmallDenseMap<PHINode *, ResultListTy> ResultLists;
7276
7277 SmallDenseMap<PHINode *, Constant *> DefaultResults;
7278 SmallVector<Type *> ResultTypes;
7279 SmallVector<PHINode *, 4> PHIs;
7280
7281 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7282 ConstantInt *CaseVal = CI->getCaseValue();
7283 if (CaseVal->getValue().slt(RHS: MinCaseVal->getValue()))
7284 MinCaseVal = CaseVal;
7285 if (CaseVal->getValue().sgt(RHS: MaxCaseVal->getValue()))
7286 MaxCaseVal = CaseVal;
7287
7288 // Resulting value at phi nodes for this case value.
7289 using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
7290 ResultsTy Results;
7291 if (!getCaseResults(SI, CaseVal, CaseDest: CI->getCaseSuccessor(), CommonDest: &CommonDest,
7292 Res&: Results, DL, TTI))
7293 return false;
7294
7295 // Append the result and result types from this case to the list for each
7296 // phi.
7297 for (const auto &I : Results) {
7298 PHINode *PHI = I.first;
7299 Constant *Value = I.second;
7300 auto [It, Inserted] = ResultLists.try_emplace(Key: PHI);
7301 if (Inserted)
7302 PHIs.push_back(Elt: PHI);
7303 It->second.push_back(Elt: std::make_pair(x&: CaseVal, y&: Value));
7304 ResultTypes.push_back(Elt: PHI->getType());
7305 }
7306 }
7307
7308 // If the table has holes, we need a constant result for the default case
7309 // or a bitmask that fits in a register.
7310 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7311 bool HasDefaultResults =
7312 getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest,
7313 Res&: DefaultResultsList, DL, TTI);
7314 for (const auto &I : DefaultResultsList) {
7315 PHINode *PHI = I.first;
7316 Constant *Result = I.second;
7317 DefaultResults[PHI] = Result;
7318 }
7319
7320 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7321 MinCaseVal&: *MinCaseVal, MaxCaseVal: *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7322 uint64_t TableSize;
7323 ConstantInt *TableIndexOffset;
7324 if (UseSwitchConditionAsTableIndex) {
7325 TableSize = MaxCaseVal->getLimitedValue() + 1;
7326 TableIndexOffset = ConstantInt::get(Ty: MaxCaseVal->getIntegerType(), V: 0);
7327 } else {
7328 TableSize =
7329 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7330
7331 TableIndexOffset = MinCaseVal;
7332 }
7333
7334 // If the default destination is unreachable, or if the lookup table covers
7335 // all values of the conditional variable, branch directly to the lookup table
7336 // BB. Otherwise, check that the condition is within the case range.
7337 uint64_t NumResults = ResultLists[PHIs[0]].size();
7338 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7339
7340 bool TableHasHoles = (NumResults < TableSize);
7341
7342 // If the table has holes but the default destination doesn't produce any
7343 // constant results, the lookup table entries corresponding to the holes will
7344 // contain poison.
7345 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7346
7347 // If the default destination doesn't produce a constant result but is still
7348 // reachable, and the lookup table has holes, we need to use a mask to
7349 // determine if the current index should load from the lookup table or jump
7350 // to the default case.
7351 // The mask is unnecessary if the table has holes but the default destination
7352 // is unreachable, as in that case the holes must also be unreachable.
7353 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7354 if (NeedMask) {
7355 // As an extra penalty for the validity test we require more cases.
7356 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7357 return false;
7358 if (!DL.fitsInLegalInteger(Width: TableSize))
7359 return false;
7360 }
7361
7362 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7363 return false;
7364
7365 // Compute the table index value.
7366 Value *TableIndex;
7367 if (UseSwitchConditionAsTableIndex) {
7368 TableIndex = SI->getCondition();
7369 if (HasDefaultResults) {
7370 // Grow the table to cover all possible index values to avoid the range
7371 // check. It will use the default result to fill in the table hole later,
7372 // so make sure it exist.
7373 ConstantRange CR =
7374 computeConstantRange(V: TableIndex, /* ForSigned */ false);
7375 // Grow the table shouldn't have any size impact by checking
7376 // wouldFitInRegister.
7377 // TODO: Consider growing the table also when it doesn't fit in a register
7378 // if no optsize is specified.
7379 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7380 if (!CR.isUpperWrapped() &&
7381 all_of(Range&: ResultTypes, P: [&](const auto &ResultType) {
7382 return SwitchReplacement::wouldFitInRegister(DL, TableSize: UpperBound,
7383 ElementType: ResultType);
7384 })) {
7385 // There may be some case index larger than the UpperBound (unreachable
7386 // case), so make sure the table size does not get smaller.
7387 TableSize = std::max(a: UpperBound, b: TableSize);
7388 // The default branch is unreachable after we enlarge the lookup table.
7389 // Adjust DefaultIsReachable to reuse code path.
7390 DefaultIsReachable = false;
7391 }
7392 }
7393 }
7394
7395 // Keep track of the switch replacement for each phi
7396 SmallDenseMap<PHINode *, SwitchReplacement> PhiToReplacementMap;
7397 for (PHINode *PHI : PHIs) {
7398 const auto &ResultList = ResultLists[PHI];
7399
7400 Type *ResultType = ResultList.begin()->second->getType();
7401 // Use any value to fill the lookup table holes.
7402 Constant *DefaultVal =
7403 AllHolesArePoison ? PoisonValue::get(T: ResultType) : DefaultResults[PHI];
7404 StringRef FuncName = Fn->getName();
7405 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7406 ResultList, DefaultVal, DL, FuncName);
7407 PhiToReplacementMap.insert(KV: {PHI, Replacement});
7408 }
7409
7410 bool AnyLookupTables = any_of(
7411 Range&: PhiToReplacementMap, P: [](auto &KV) { return KV.second.isLookupTable(); });
7412 bool AnyBitMaps = any_of(Range&: PhiToReplacementMap,
7413 P: [](auto &KV) { return KV.second.isBitMap(); });
7414
7415 // A few conditions prevent the generation of lookup tables:
7416 // 1. The target does not support lookup tables.
7417 // 2. The "no-jump-tables" function attribute is set.
7418 // However, these objections do not apply to other switch replacements, like
7419 // the bitmap, so we only stop here if any of these conditions are met and we
7420 // want to create a LUT. Otherwise, continue with the switch replacement.
7421 if (AnyLookupTables &&
7422 (!TTI.shouldBuildLookupTables() ||
7423 Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()))
7424 return false;
7425
7426 // In the early optimization pipeline, disable formation of lookup tables,
7427 // bit maps and mask checks, as they may inhibit further optimization.
7428 if (!ConvertSwitchToLookupTable &&
7429 (AnyLookupTables || AnyBitMaps || NeedMask))
7430 return false;
7431
7432 Builder.SetInsertPoint(SI);
7433 // TableIndex is the switch condition - TableIndexOffset if we don't
7434 // use the condition directly
7435 if (!UseSwitchConditionAsTableIndex) {
7436 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7437 // we can try to attach nsw.
7438 bool MayWrap = true;
7439 if (!DefaultIsReachable) {
7440 APInt Res =
7441 MaxCaseVal->getValue().ssub_ov(RHS: MinCaseVal->getValue(), Overflow&: MayWrap);
7442 (void)Res;
7443 }
7444 TableIndex = Builder.CreateSub(LHS: SI->getCondition(), RHS: TableIndexOffset,
7445 Name: "switch.tableidx", /*HasNUW =*/false,
7446 /*HasNSW =*/!MayWrap);
7447 }
7448
7449 std::vector<DominatorTree::UpdateType> Updates;
7450
7451 // Compute the maximum table size representable by the integer type we are
7452 // switching upon.
7453 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7454 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7455 assert(MaxTableSize >= TableSize &&
7456 "It is impossible for a switch to have more entries than the max "
7457 "representable value of its input integer type's size.");
7458
7459 // Create the BB that does the lookups.
7460 Module &Mod = *CommonDest->getParent()->getParent();
7461 BasicBlock *LookupBB = BasicBlock::Create(
7462 Context&: Mod.getContext(), Name: "switch.lookup", Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7463
7464 BranchInst *RangeCheckBranch = nullptr;
7465 BranchInst *CondBranch = nullptr;
7466
7467 Builder.SetInsertPoint(SI);
7468 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7469 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7470 Builder.CreateBr(Dest: LookupBB);
7471 if (DTU)
7472 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7473 // Note: We call removeProdecessor later since we need to be able to get the
7474 // PHI value for the default case in case we're using a bit mask.
7475 } else {
7476 Value *Cmp = Builder.CreateICmpULT(
7477 LHS: TableIndex, RHS: ConstantInt::get(Ty: MinCaseVal->getType(), V: TableSize));
7478 RangeCheckBranch =
7479 Builder.CreateCondBr(Cond: Cmp, True: LookupBB, False: SI->getDefaultDest());
7480 CondBranch = RangeCheckBranch;
7481 if (DTU)
7482 Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
7483 }
7484
7485 // Populate the BB that does the lookups.
7486 Builder.SetInsertPoint(LookupBB);
7487
7488 if (NeedMask) {
7489 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7490 // re-purposed to do the hole check, and we create a new LookupBB.
7491 BasicBlock *MaskBB = LookupBB;
7492 MaskBB->setName("switch.hole_check");
7493 LookupBB = BasicBlock::Create(Context&: Mod.getContext(), Name: "switch.lookup",
7494 Parent: CommonDest->getParent(), InsertBefore: CommonDest);
7495
7496 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7497 // unnecessary illegal types.
7498 uint64_t TableSizePowOf2 = NextPowerOf2(A: std::max(a: 7ULL, b: TableSize - 1ULL));
7499 APInt MaskInt(TableSizePowOf2, 0);
7500 APInt One(TableSizePowOf2, 1);
7501 // Build bitmask; fill in a 1 bit for every case.
7502 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7503 for (const auto &Result : ResultList) {
7504 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7505 .getLimitedValue();
7506 MaskInt |= One << Idx;
7507 }
7508 ConstantInt *TableMask = ConstantInt::get(Context&: Mod.getContext(), V: MaskInt);
7509
7510 // Get the TableIndex'th bit of the bitmask.
7511 // If this bit is 0 (meaning hole) jump to the default destination,
7512 // else continue with table lookup.
7513 IntegerType *MapTy = TableMask->getIntegerType();
7514 Value *MaskIndex =
7515 Builder.CreateZExtOrTrunc(V: TableIndex, DestTy: MapTy, Name: "switch.maskindex");
7516 Value *Shifted = Builder.CreateLShr(LHS: TableMask, RHS: MaskIndex, Name: "switch.shifted");
7517 Value *LoBit = Builder.CreateTrunc(
7518 V: Shifted, DestTy: Type::getInt1Ty(C&: Mod.getContext()), Name: "switch.lobit");
7519 CondBranch = Builder.CreateCondBr(Cond: LoBit, True: LookupBB, False: SI->getDefaultDest());
7520 if (DTU) {
7521 Updates.push_back(x: {DominatorTree::Insert, MaskBB, LookupBB});
7522 Updates.push_back(x: {DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7523 }
7524 Builder.SetInsertPoint(LookupBB);
7525 addPredecessorToBlock(Succ: SI->getDefaultDest(), NewPred: MaskBB, ExistPred: BB);
7526 }
7527
7528 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7529 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7530 // do not delete PHINodes here.
7531 SI->getDefaultDest()->removePredecessor(Pred: BB,
7532 /*KeepOneInputPHIs=*/true);
7533 if (DTU)
7534 Updates.push_back(x: {DominatorTree::Delete, BB, SI->getDefaultDest()});
7535 }
7536
7537 for (PHINode *PHI : PHIs) {
7538 const ResultListTy &ResultList = ResultLists[PHI];
7539 auto Replacement = PhiToReplacementMap.at(Val: PHI);
7540 auto *Result = Replacement.replaceSwitch(Index: TableIndex, Builder, DL, Func: Fn);
7541 // Do a small peephole optimization: re-use the switch table compare if
7542 // possible.
7543 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7544 BasicBlock *PhiBlock = PHI->getParent();
7545 // Search for compare instructions which use the phi.
7546 for (auto *User : PHI->users()) {
7547 reuseTableCompare(PhiUser: User, PhiBlock, RangeCheckBranch,
7548 DefaultValue: Replacement.getDefaultValue(), Values: ResultList);
7549 }
7550 }
7551
7552 PHI->addIncoming(V: Result, BB: LookupBB);
7553 }
7554
7555 Builder.CreateBr(Dest: CommonDest);
7556 if (DTU)
7557 Updates.push_back(x: {DominatorTree::Insert, LookupBB, CommonDest});
7558
7559 SmallVector<uint32_t> BranchWeights;
7560 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7561 extractBranchWeights(I: *SI, Weights&: BranchWeights);
7562 uint64_t ToLookupWeight = 0;
7563 uint64_t ToDefaultWeight = 0;
7564
7565 // Remove the switch.
7566 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7567 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7568 BasicBlock *Succ = SI->getSuccessor(idx: I);
7569
7570 if (Succ == SI->getDefaultDest()) {
7571 if (HasBranchWeights)
7572 ToDefaultWeight += BranchWeights[I];
7573 continue;
7574 }
7575 Succ->removePredecessor(Pred: BB);
7576 if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
7577 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7578 if (HasBranchWeights)
7579 ToLookupWeight += BranchWeights[I];
7580 }
7581 SI->eraseFromParent();
7582 if (HasBranchWeights)
7583 setFittedBranchWeights(I&: *CondBranch, Weights: {ToLookupWeight, ToDefaultWeight},
7584 /*IsExpected=*/false);
7585 if (DTU)
7586 DTU->applyUpdates(Updates);
7587
7588 if (NeedMask)
7589 ++NumLookupTablesHoles;
7590 return true;
7591}
7592
7593/// Try to transform a switch that has "holes" in it to a contiguous sequence
7594/// of cases.
7595///
7596/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7597/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7598///
7599/// This converts a sparse switch into a dense switch which allows better
7600/// lowering and could also allow transforming into a lookup table.
7601static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
7602 const DataLayout &DL,
7603 const TargetTransformInfo &TTI) {
7604 auto *CondTy = cast<IntegerType>(Val: SI->getCondition()->getType());
7605 if (CondTy->getIntegerBitWidth() > 64 ||
7606 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7607 return false;
7608 // Only bother with this optimization if there are more than 3 switch cases;
7609 // SDAG will only bother creating jump tables for 4 or more cases.
7610 if (SI->getNumCases() < 4)
7611 return false;
7612
7613 // This transform is agnostic to the signedness of the input or case values. We
7614 // can treat the case values as signed or unsigned. We can optimize more common
7615 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7616 // as signed.
7617 SmallVector<int64_t,4> Values;
7618 for (const auto &C : SI->cases())
7619 Values.push_back(Elt: C.getCaseValue()->getValue().getSExtValue());
7620 llvm::sort(C&: Values);
7621
7622 // If the switch is already dense, there's nothing useful to do here.
7623 if (isSwitchDense(Values))
7624 return false;
7625
7626 // First, transform the values such that they start at zero and ascend.
7627 int64_t Base = Values[0];
7628 for (auto &V : Values)
7629 V -= (uint64_t)(Base);
7630
7631 // Now we have signed numbers that have been shifted so that, given enough
7632 // precision, there are no negative values. Since the rest of the transform
7633 // is bitwise only, we switch now to an unsigned representation.
7634
7635 // This transform can be done speculatively because it is so cheap - it
7636 // results in a single rotate operation being inserted.
7637
7638 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7639 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7640 // less than 64.
7641 unsigned Shift = 64;
7642 for (auto &V : Values)
7643 Shift = std::min(a: Shift, b: (unsigned)llvm::countr_zero(Val: (uint64_t)V));
7644 assert(Shift < 64);
7645 if (Shift > 0)
7646 for (auto &V : Values)
7647 V = (int64_t)((uint64_t)V >> Shift);
7648
7649 if (!isSwitchDense(Values))
7650 // Transform didn't create a dense switch.
7651 return false;
7652
7653 // The obvious transform is to shift the switch condition right and emit a
7654 // check that the condition actually cleanly divided by GCD, i.e.
7655 // C & (1 << Shift - 1) == 0
7656 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7657 //
7658 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7659 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7660 // are nonzero then the switch condition will be very large and will hit the
7661 // default case.
7662
7663 auto *Ty = cast<IntegerType>(Val: SI->getCondition()->getType());
7664 Builder.SetInsertPoint(SI);
7665 Value *Sub =
7666 Builder.CreateSub(LHS: SI->getCondition(), RHS: ConstantInt::getSigned(Ty, V: Base));
7667 Value *Rot = Builder.CreateIntrinsic(
7668 RetTy: Ty, ID: Intrinsic::fshl,
7669 Args: {Sub, Sub, ConstantInt::get(Ty, V: Ty->getBitWidth() - Shift)});
7670 SI->replaceUsesOfWith(From: SI->getCondition(), To: Rot);
7671
7672 for (auto Case : SI->cases()) {
7673 auto *Orig = Case.getCaseValue();
7674 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7675 Case.setValue(cast<ConstantInt>(Val: ConstantInt::get(Ty, V: Sub.lshr(shiftAmt: Shift))));
7676 }
7677 return true;
7678}
7679
7680/// Tries to transform the switch when the condition is umin with a constant.
7681/// In that case, the default branch can be replaced by the constant's branch.
7682/// This method also removes dead cases when the simplification cannot replace
7683/// the default branch.
7684///
7685/// For example:
7686/// switch(umin(a, 3)) {
7687/// case 0:
7688/// case 1:
7689/// case 2:
7690/// case 3:
7691/// case 4:
7692/// // ...
7693/// default:
7694/// unreachable
7695/// }
7696///
7697/// Transforms into:
7698///
7699/// switch(a) {
7700/// case 0:
7701/// case 1:
7702/// case 2:
7703/// default:
7704/// // This is case 3
7705/// }
7706static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU) {
7707 Value *A;
7708 ConstantInt *Constant;
7709
7710 if (!match(V: SI->getCondition(), P: m_UMin(L: m_Value(V&: A), R: m_ConstantInt(CI&: Constant))))
7711 return false;
7712
7713 SmallVector<DominatorTree::UpdateType> Updates;
7714 SwitchInstProfUpdateWrapper SIW(*SI);
7715 BasicBlock *BB = SIW->getParent();
7716
7717 // Dead cases are removed even when the simplification fails.
7718 // A case is dead when its value is higher than the Constant.
7719 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7720 if (!I->getCaseValue()->getValue().ugt(RHS: Constant->getValue())) {
7721 ++I;
7722 continue;
7723 }
7724 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7725 DeadCaseBB->removePredecessor(Pred: BB);
7726 Updates.push_back(Elt: {DominatorTree::Delete, BB, DeadCaseBB});
7727 I = SIW->removeCase(I);
7728 E = SIW->case_end();
7729 }
7730
7731 auto Case = SI->findCaseValue(C: Constant);
7732 // If the case value is not found, `findCaseValue` returns the default case.
7733 // In this scenario, since there is no explicit `case 3:`, the simplification
7734 // fails. The simplification also fails when the switch’s default destination
7735 // is reachable.
7736 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7737 if (DTU)
7738 DTU->applyUpdates(Updates);
7739 return !Updates.empty();
7740 }
7741
7742 BasicBlock *Unreachable = SI->getDefaultDest();
7743 SIW.replaceDefaultDest(I: Case);
7744 SIW.removeCase(I: Case);
7745 SIW->setCondition(A);
7746
7747 Updates.push_back(Elt: {DominatorTree::Delete, BB, Unreachable});
7748
7749 if (DTU)
7750 DTU->applyUpdates(Updates);
7751
7752 return true;
7753}
7754
7755/// Tries to transform switch of powers of two to reduce switch range.
7756/// For example, switch like:
7757/// switch (C) { case 1: case 2: case 64: case 128: }
7758/// will be transformed to:
7759/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7760///
7761/// This transformation allows better lowering and may transform the switch
7762/// instruction into a sequence of bit manipulation and a smaller
7763/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7764/// address of the jump target, and indirectly jump to it).
7765static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7766 DomTreeUpdater *DTU,
7767 const DataLayout &DL,
7768 const TargetTransformInfo &TTI) {
7769 Value *Condition = SI->getCondition();
7770 LLVMContext &Context = SI->getContext();
7771 auto *CondTy = cast<IntegerType>(Val: Condition->getType());
7772
7773 if (CondTy->getIntegerBitWidth() > 64 ||
7774 !DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7775 return false;
7776
7777 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7778 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7779 {Condition, ConstantInt::getTrue(Context)});
7780 if (TTI.getIntrinsicInstrCost(ICA: Attrs, CostKind: TTI::TCK_SizeAndLatency) >
7781 TTI::TCC_Basic * 2)
7782 return false;
7783
7784 // Only bother with this optimization if there are more than 3 switch cases.
7785 // SDAG will start emitting jump tables for 4 or more cases.
7786 if (SI->getNumCases() < 4)
7787 return false;
7788
7789 // Check that switch cases are powers of two.
7790 SmallVector<uint64_t, 4> Values;
7791 for (const auto &Case : SI->cases()) {
7792 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7793 if (llvm::has_single_bit(Value: CaseValue))
7794 Values.push_back(Elt: CaseValue);
7795 else
7796 return false;
7797 }
7798
7799 // isSwichDense requires case values to be sorted.
7800 llvm::sort(C&: Values);
7801 if (!isSwitchDense(NumCases: Values.size(), CaseRange: llvm::countr_zero(Val: Values.back()) -
7802 llvm::countr_zero(Val: Values.front()) + 1))
7803 // Transform is unable to generate dense switch.
7804 return false;
7805
7806 Builder.SetInsertPoint(SI);
7807
7808 if (!SI->defaultDestUnreachable()) {
7809 // Let non-power-of-two inputs jump to the default case, when the latter is
7810 // reachable.
7811 auto *PopC = Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, V: Condition);
7812 auto *IsPow2 = Builder.CreateICmpEQ(LHS: PopC, RHS: ConstantInt::get(Ty: CondTy, V: 1));
7813
7814 auto *OrigBB = SI->getParent();
7815 auto *DefaultCaseBB = SI->getDefaultDest();
7816 BasicBlock *SplitBB = SplitBlock(Old: OrigBB, SplitPt: SI, DTU);
7817 auto It = OrigBB->getTerminator()->getIterator();
7818 SmallVector<uint32_t> Weights;
7819 auto HasWeights =
7820 !ProfcheckDisableMetadataFixes && extractBranchWeights(I: *SI, Weights);
7821 auto *BI = BranchInst::Create(IfTrue: SplitBB, IfFalse: DefaultCaseBB, Cond: IsPow2, InsertBefore: It);
7822 if (HasWeights && any_of(Range&: Weights, P: not_equal_to(Arg: 0))) {
7823 // IsPow2 covers a subset of the cases in which we'd go to the default
7824 // label. The other is those powers of 2 that don't appear in the case
7825 // statement. We don't know the distribution of the values coming in, so
7826 // the safest is to split 50-50 the original probability to `default`.
7827 uint64_t OrigDenominator =
7828 sum_of(Range: map_range(C&: Weights, F: StaticCastTo<uint64_t>));
7829 SmallVector<uint64_t> NewWeights(2);
7830 NewWeights[1] = Weights[0] / 2;
7831 NewWeights[0] = OrigDenominator - NewWeights[1];
7832 setFittedBranchWeights(I&: *BI, Weights: NewWeights, /*IsExpected=*/false);
7833 // The probability of executing the default block stays constant. It was
7834 // p_d = Weights[0] / OrigDenominator
7835 // we rewrite as W/D
7836 // We want to find the probability of the default branch of the switch
7837 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7838 // i.e. the original probability is the probability we go to the default
7839 // branch from the BI branch, or we take the default branch on the SI.
7840 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7841 // This matches using W/2 for the default branch probability numerator and
7842 // D-W/2 as the denominator.
7843 Weights[0] = NewWeights[1];
7844 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7845 for (auto &W : drop_begin(RangeOrContainer&: Weights))
7846 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7847
7848 setBranchWeights(I&: *SI, Weights, /*IsExpected=*/false);
7849 }
7850 // BI is handling the default case for SI, and so should share its DebugLoc.
7851 BI->setDebugLoc(SI->getDebugLoc());
7852 It->eraseFromParent();
7853
7854 addPredecessorToBlock(Succ: DefaultCaseBB, NewPred: OrigBB, ExistPred: SplitBB);
7855 if (DTU)
7856 DTU->applyUpdates(Updates: {{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7857 }
7858
7859 // Replace each case with its trailing zeros number.
7860 for (auto &Case : SI->cases()) {
7861 auto *OrigValue = Case.getCaseValue();
7862 Case.setValue(ConstantInt::get(Ty: OrigValue->getIntegerType(),
7863 V: OrigValue->getValue().countr_zero()));
7864 }
7865
7866 // Replace condition with its trailing zeros number.
7867 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7868 ID: Intrinsic::cttz, Types: {CondTy}, Args: {Condition, ConstantInt::getTrue(Context)});
7869
7870 SI->setCondition(ConditionTrailingZeros);
7871
7872 return true;
7873}
7874
7875/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7876/// the same destination.
7877static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
7878 DomTreeUpdater *DTU) {
7879 auto *Cmp = dyn_cast<CmpIntrinsic>(Val: SI->getCondition());
7880 if (!Cmp || !Cmp->hasOneUse())
7881 return false;
7882
7883 SmallVector<uint32_t, 4> Weights;
7884 bool HasWeights = extractBranchWeights(ProfileData: getBranchWeightMDNode(I: *SI), Weights);
7885 if (!HasWeights)
7886 Weights.resize(N: 4); // Avoid checking HasWeights everywhere.
7887
7888 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7889 int64_t Res;
7890 BasicBlock *Succ, *OtherSucc;
7891 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7892 BasicBlock *Unreachable = nullptr;
7893
7894 if (SI->getNumCases() == 2) {
7895 // Find which of 1, 0 or -1 is missing (handled by default dest).
7896 SmallSet<int64_t, 3> Missing;
7897 Missing.insert(V: 1);
7898 Missing.insert(V: 0);
7899 Missing.insert(V: -1);
7900
7901 Succ = SI->getDefaultDest();
7902 SuccWeight = Weights[0];
7903 OtherSucc = nullptr;
7904 for (auto &Case : SI->cases()) {
7905 std::optional<int64_t> Val =
7906 Case.getCaseValue()->getValue().trySExtValue();
7907 if (!Val)
7908 return false;
7909 if (!Missing.erase(V: *Val))
7910 return false;
7911 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7912 return false;
7913 OtherSucc = Case.getCaseSuccessor();
7914 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7915 }
7916
7917 assert(Missing.size() == 1 && "Should have one case left");
7918 Res = *Missing.begin();
7919 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7920 // Normalize so that Succ is taken once and OtherSucc twice.
7921 Unreachable = SI->getDefaultDest();
7922 Succ = OtherSucc = nullptr;
7923 for (auto &Case : SI->cases()) {
7924 BasicBlock *NewSucc = Case.getCaseSuccessor();
7925 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7926 if (!OtherSucc || OtherSucc == NewSucc) {
7927 OtherSucc = NewSucc;
7928 OtherSuccWeight += Weight;
7929 } else if (!Succ) {
7930 Succ = NewSucc;
7931 SuccWeight = Weight;
7932 } else if (Succ == NewSucc) {
7933 std::swap(a&: Succ, b&: OtherSucc);
7934 std::swap(a&: SuccWeight, b&: OtherSuccWeight);
7935 } else
7936 return false;
7937 }
7938 for (auto &Case : SI->cases()) {
7939 std::optional<int64_t> Val =
7940 Case.getCaseValue()->getValue().trySExtValue();
7941 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7942 return false;
7943 if (Case.getCaseSuccessor() == Succ) {
7944 Res = *Val;
7945 break;
7946 }
7947 }
7948 } else {
7949 return false;
7950 }
7951
7952 // Determine predicate for the missing case.
7953 ICmpInst::Predicate Pred;
7954 switch (Res) {
7955 case 1:
7956 Pred = ICmpInst::ICMP_UGT;
7957 break;
7958 case 0:
7959 Pred = ICmpInst::ICMP_EQ;
7960 break;
7961 case -1:
7962 Pred = ICmpInst::ICMP_ULT;
7963 break;
7964 }
7965 if (Cmp->isSigned())
7966 Pred = ICmpInst::getSignedPredicate(Pred);
7967
7968 MDNode *NewWeights = nullptr;
7969 if (HasWeights)
7970 NewWeights = MDBuilder(SI->getContext())
7971 .createBranchWeights(TrueWeight: SuccWeight, FalseWeight: OtherSuccWeight);
7972
7973 BasicBlock *BB = SI->getParent();
7974 Builder.SetInsertPoint(SI->getIterator());
7975 Value *ICmp = Builder.CreateICmp(P: Pred, LHS: Cmp->getLHS(), RHS: Cmp->getRHS());
7976 Builder.CreateCondBr(Cond: ICmp, True: Succ, False: OtherSucc, BranchWeights: NewWeights,
7977 Unpredictable: SI->getMetadata(KindID: LLVMContext::MD_unpredictable));
7978 OtherSucc->removePredecessor(Pred: BB);
7979 if (Unreachable)
7980 Unreachable->removePredecessor(Pred: BB);
7981 SI->eraseFromParent();
7982 Cmp->eraseFromParent();
7983 if (DTU && Unreachable)
7984 DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, Unreachable}});
7985 return true;
7986}
7987
7988/// Checking whether two cases of SI are equal depends on the contents of the
7989/// BasicBlock and the incoming values of their successor PHINodes.
7990/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7991/// calling this function on each BasicBlock every time isEqual is called,
7992/// especially since the same BasicBlock may be passed as an argument multiple
7993/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7994/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7995/// of the incoming values.
7996struct SwitchSuccWrapper {
7997 BasicBlock *Dest;
7998 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> *PhiPredIVs;
7999};
8000
8001template <> struct llvm::DenseMapInfo<const SwitchSuccWrapper *> {
8002 static const SwitchSuccWrapper *getEmptyKey() {
8003 return static_cast<SwitchSuccWrapper *>(
8004 DenseMapInfo<void *>::getEmptyKey());
8005 }
8006 static const SwitchSuccWrapper *getTombstoneKey() {
8007 return static_cast<SwitchSuccWrapper *>(
8008 DenseMapInfo<void *>::getTombstoneKey());
8009 }
8010 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
8011 BasicBlock *Succ = SSW->Dest;
8012 BranchInst *BI = cast<BranchInst>(Val: Succ->getTerminator());
8013 assert(BI->isUnconditional() &&
8014 "Only supporting unconditional branches for now");
8015 assert(BI->getNumSuccessors() == 1 &&
8016 "Expected unconditional branches to have one successor");
8017 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
8018
8019 // Since we assume the BB is just a single BranchInst with a single
8020 // successor, we hash as the BB and the incoming Values of its successor
8021 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8022 // including the incoming PHI values leads to better performance.
8023 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8024 // time and passing it in SwitchSuccWrapper, but this slowed down the
8025 // average compile time without having any impact on the worst case compile
8026 // time.
8027 BasicBlock *BB = BI->getSuccessor(i: 0);
8028 SmallVector<Value *> PhiValsForBB;
8029 for (PHINode &Phi : BB->phis())
8030 PhiValsForBB.emplace_back(Args&: (*SSW->PhiPredIVs)[&Phi][BB]);
8031
8032 return hash_combine(args: BB, args: hash_combine_range(R&: PhiValsForBB));
8033 }
8034 static bool isEqual(const SwitchSuccWrapper *LHS,
8035 const SwitchSuccWrapper *RHS) {
8036 auto EKey = DenseMapInfo<SwitchSuccWrapper *>::getEmptyKey();
8037 auto TKey = DenseMapInfo<SwitchSuccWrapper *>::getTombstoneKey();
8038 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8039 return LHS == RHS;
8040
8041 BasicBlock *A = LHS->Dest;
8042 BasicBlock *B = RHS->Dest;
8043
8044 // FIXME: we checked that the size of A and B are both 1 in
8045 // simplifyDuplicateSwitchArms to make the Case list smaller to
8046 // improve performance. If we decide to support BasicBlocks with more
8047 // than just a single instruction, we need to check that A.size() ==
8048 // B.size() here, and we need to check more than just the BranchInsts
8049 // for equality.
8050
8051 BranchInst *ABI = cast<BranchInst>(Val: A->getTerminator());
8052 BranchInst *BBI = cast<BranchInst>(Val: B->getTerminator());
8053 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
8054 "Only supporting unconditional branches for now");
8055 if (ABI->getSuccessor(i: 0) != BBI->getSuccessor(i: 0))
8056 return false;
8057
8058 // Need to check that PHIs in successor have matching values
8059 BasicBlock *Succ = ABI->getSuccessor(i: 0);
8060 for (PHINode &Phi : Succ->phis()) {
8061 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8062 if (PredIVs[A] != PredIVs[B])
8063 return false;
8064 }
8065
8066 return true;
8067 }
8068};
8069
8070bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8071 DomTreeUpdater *DTU) {
8072 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8073 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8074 // an entire PHI at once after the loop, opposed to calling
8075 // getIncomingValueForBlock inside this loop, since each call to
8076 // getIncomingValueForBlock is O(|Preds|).
8077 SmallPtrSet<PHINode *, 8> Phis;
8078 SmallPtrSet<BasicBlock *, 8> Seen;
8079 DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
8080 DenseMap<BasicBlock *, SmallVector<unsigned, 32>> BBToSuccessorIndexes;
8081 SmallVector<SwitchSuccWrapper> Cases;
8082 Cases.reserve(N: SI->getNumSuccessors());
8083
8084 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
8085 BasicBlock *BB = SI->getSuccessor(idx: I);
8086
8087 // FIXME: Support more than just a single BranchInst. One way we could do
8088 // this is by taking a hashing approach of all insts in BB.
8089 if (BB->size() != 1)
8090 continue;
8091
8092 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8093 // on other kinds of terminators. We decide to only support unconditional
8094 // branches for now for compile time reasons.
8095 auto *BI = dyn_cast<BranchInst>(Val: BB->getTerminator());
8096 if (!BI || BI->isConditional())
8097 continue;
8098
8099 if (!Seen.insert(Ptr: BB).second) {
8100 auto It = BBToSuccessorIndexes.find(Val: BB);
8101 if (It != BBToSuccessorIndexes.end())
8102 It->second.emplace_back(Args&: I);
8103 continue;
8104 }
8105
8106 // FIXME: This case needs some extra care because the terminators other than
8107 // SI need to be updated. For now, consider only backedges to the SI.
8108 if (BB->getUniquePredecessor() != SI->getParent())
8109 continue;
8110
8111 // Keep track of which PHIs we need as keys in PhiPredIVs below.
8112 for (BasicBlock *Succ : BI->successors())
8113 Phis.insert_range(R: llvm::make_pointer_range(Range: Succ->phis()));
8114
8115 // Add the successor only if not previously visited.
8116 Cases.emplace_back(Args: SwitchSuccWrapper{.Dest: BB, .PhiPredIVs: &PhiPredIVs});
8117 BBToSuccessorIndexes[BB].emplace_back(Args&: I);
8118 }
8119
8120 // Precompute a data structure to improve performance of isEqual for
8121 // SwitchSuccWrapper.
8122 PhiPredIVs.reserve(NumEntries: Phis.size());
8123 for (PHINode *Phi : Phis) {
8124 auto &IVs =
8125 PhiPredIVs.try_emplace(Key: Phi, Args: Phi->getNumIncomingValues()).first->second;
8126 for (auto &IV : Phi->incoming_values())
8127 IVs.insert(KV: {Phi->getIncomingBlock(U: IV), IV.get()});
8128 }
8129
8130 // Build a set such that if the SwitchSuccWrapper exists in the set and
8131 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
8132 // which is not in the set should be replaced with the one in the set. If the
8133 // SwitchSuccWrapper is not in the set, then it should be added to the set so
8134 // other SwitchSuccWrappers can check against it in the same manner. We use
8135 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
8136 // around information to isEquality, getHashValue, and when doing the
8137 // replacement with better performance.
8138 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
8139 ReplaceWith.reserve(Size: Cases.size());
8140
8141 SmallVector<DominatorTree::UpdateType> Updates;
8142 Updates.reserve(N: ReplaceWith.size());
8143 bool MadeChange = false;
8144 for (auto &SSW : Cases) {
8145 // SSW is a candidate for simplification. If we find a duplicate BB,
8146 // replace it.
8147 const auto [It, Inserted] = ReplaceWith.insert(V: &SSW);
8148 if (!Inserted) {
8149 // We know that SI's parent BB no longer dominates the old case successor
8150 // since we are making it dead.
8151 Updates.push_back(Elt: {DominatorTree::Delete, SI->getParent(), SSW.Dest});
8152 const auto &Successors = BBToSuccessorIndexes.at(Val: SSW.Dest);
8153 for (unsigned Idx : Successors)
8154 SI->setSuccessor(idx: Idx, NewSucc: (*It)->Dest);
8155 MadeChange = true;
8156 }
8157 }
8158
8159 if (DTU)
8160 DTU->applyUpdates(Updates);
8161
8162 return MadeChange;
8163}
8164
8165bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8166 BasicBlock *BB = SI->getParent();
8167
8168 if (isValueEqualityComparison(TI: SI)) {
8169 // If we only have one predecessor, and if it is a branch on this value,
8170 // see if that predecessor totally determines the outcome of this switch.
8171 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8172 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: SI, Pred: OnlyPred, Builder))
8173 return requestResimplify();
8174
8175 Value *Cond = SI->getCondition();
8176 if (SelectInst *Select = dyn_cast<SelectInst>(Val: Cond))
8177 if (simplifySwitchOnSelect(SI, Select))
8178 return requestResimplify();
8179
8180 // If the block only contains the switch, see if we can fold the block
8181 // away into any preds.
8182 if (SI == &*BB->instructionsWithoutDebug(SkipPseudoOp: false).begin())
8183 if (foldValueComparisonIntoPredecessors(TI: SI, Builder))
8184 return requestResimplify();
8185 }
8186
8187 // Try to transform the switch into an icmp and a branch.
8188 // The conversion from switch to comparison may lose information on
8189 // impossible switch values, so disable it early in the pipeline.
8190 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8191 return requestResimplify();
8192
8193 // Remove unreachable cases.
8194 if (eliminateDeadSwitchCases(SI, DTU, AC: Options.AC, DL))
8195 return requestResimplify();
8196
8197 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8198 return requestResimplify();
8199
8200 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8201 return requestResimplify();
8202
8203 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8204 return requestResimplify();
8205
8206 // The conversion of switches to arithmetic or lookup table is disabled in
8207 // the early optimization pipeline, as it may lose information or make the
8208 // resulting code harder to analyze.
8209 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8210 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8211 ConvertSwitchToLookupTable: Options.ConvertSwitchToLookupTable))
8212 return requestResimplify();
8213
8214 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8215 return requestResimplify();
8216
8217 if (reduceSwitchRange(SI, Builder, DL, TTI))
8218 return requestResimplify();
8219
8220 if (HoistCommon &&
8221 hoistCommonCodeFromSuccessors(TI: SI, AllInstsEqOnly: !Options.HoistCommonInsts))
8222 return requestResimplify();
8223
8224 if (simplifyDuplicateSwitchArms(SI, DTU))
8225 return requestResimplify();
8226
8227 if (simplifySwitchWhenUMin(SI, DTU))
8228 return requestResimplify();
8229
8230 return false;
8231}
8232
8233bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8234 BasicBlock *BB = IBI->getParent();
8235 bool Changed = false;
8236 SmallVector<uint32_t> BranchWeights;
8237 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8238 extractBranchWeights(I: *IBI, Weights&: BranchWeights);
8239
8240 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8241 if (HasBranchWeights)
8242 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8243 TargetWeight[IBI->getDestination(i: I)] += BranchWeights[I];
8244
8245 // Eliminate redundant destinations.
8246 SmallPtrSet<Value *, 8> Succs;
8247 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8248 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8249 BasicBlock *Dest = IBI->getDestination(i: I);
8250 if (!Dest->hasAddressTaken() || !Succs.insert(Ptr: Dest).second) {
8251 if (!Dest->hasAddressTaken())
8252 RemovedSuccs.insert(X: Dest);
8253 Dest->removePredecessor(Pred: BB);
8254 IBI->removeDestination(i: I);
8255 --I;
8256 --E;
8257 Changed = true;
8258 }
8259 }
8260
8261 if (DTU) {
8262 std::vector<DominatorTree::UpdateType> Updates;
8263 Updates.reserve(n: RemovedSuccs.size());
8264 for (auto *RemovedSucc : RemovedSuccs)
8265 Updates.push_back(x: {DominatorTree::Delete, BB, RemovedSucc});
8266 DTU->applyUpdates(Updates);
8267 }
8268
8269 if (IBI->getNumDestinations() == 0) {
8270 // If the indirectbr has no successors, change it to unreachable.
8271 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8272 eraseTerminatorAndDCECond(TI: IBI);
8273 return true;
8274 }
8275
8276 if (IBI->getNumDestinations() == 1) {
8277 // If the indirectbr has one successor, change it to a direct branch.
8278 BranchInst::Create(IfTrue: IBI->getDestination(i: 0), InsertBefore: IBI->getIterator());
8279 eraseTerminatorAndDCECond(TI: IBI);
8280 return true;
8281 }
8282 if (HasBranchWeights) {
8283 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8284 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8285 NewBranchWeights[I] += TargetWeight.find(Val: IBI->getDestination(i: I))->second;
8286 setFittedBranchWeights(I&: *IBI, Weights: NewBranchWeights, /*IsExpected=*/false);
8287 }
8288 if (SelectInst *SI = dyn_cast<SelectInst>(Val: IBI->getAddress())) {
8289 if (simplifyIndirectBrOnSelect(IBI, SI))
8290 return requestResimplify();
8291 }
8292 return Changed;
8293}
8294
8295/// Given an block with only a single landing pad and a unconditional branch
8296/// try to find another basic block which this one can be merged with. This
8297/// handles cases where we have multiple invokes with unique landing pads, but
8298/// a shared handler.
8299///
8300/// We specifically choose to not worry about merging non-empty blocks
8301/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8302/// practice, the optimizer produces empty landing pad blocks quite frequently
8303/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8304/// sinking in this file)
8305///
8306/// This is primarily a code size optimization. We need to avoid performing
8307/// any transform which might inhibit optimization (such as our ability to
8308/// specialize a particular handler via tail commoning). We do this by not
8309/// merging any blocks which require us to introduce a phi. Since the same
8310/// values are flowing through both blocks, we don't lose any ability to
8311/// specialize. If anything, we make such specialization more likely.
8312///
8313/// TODO - This transformation could remove entries from a phi in the target
8314/// block when the inputs in the phi are the same for the two blocks being
8315/// merged. In some cases, this could result in removal of the PHI entirely.
8316static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
8317 BasicBlock *BB, DomTreeUpdater *DTU) {
8318 auto Succ = BB->getUniqueSuccessor();
8319 assert(Succ);
8320 // If there's a phi in the successor block, we'd likely have to introduce
8321 // a phi into the merged landing pad block.
8322 if (isa<PHINode>(Val: *Succ->begin()))
8323 return false;
8324
8325 for (BasicBlock *OtherPred : predecessors(BB: Succ)) {
8326 if (BB == OtherPred)
8327 continue;
8328 BasicBlock::iterator I = OtherPred->begin();
8329 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(Val&: I);
8330 if (!LPad2 || !LPad2->isIdenticalTo(I: LPad))
8331 continue;
8332 ++I;
8333 BranchInst *BI2 = dyn_cast<BranchInst>(Val&: I);
8334 if (!BI2 || !BI2->isIdenticalTo(I: BI))
8335 continue;
8336
8337 std::vector<DominatorTree::UpdateType> Updates;
8338
8339 // We've found an identical block. Update our predecessors to take that
8340 // path instead and make ourselves dead.
8341 SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
8342 for (BasicBlock *Pred : UniquePreds) {
8343 InvokeInst *II = cast<InvokeInst>(Val: Pred->getTerminator());
8344 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8345 "unexpected successor");
8346 II->setUnwindDest(OtherPred);
8347 if (DTU) {
8348 Updates.push_back(x: {DominatorTree::Insert, Pred, OtherPred});
8349 Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
8350 }
8351 }
8352
8353 SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
8354 for (BasicBlock *Succ : UniqueSuccs) {
8355 Succ->removePredecessor(Pred: BB);
8356 if (DTU)
8357 Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
8358 }
8359
8360 IRBuilder<> Builder(BI);
8361 Builder.CreateUnreachable();
8362 BI->eraseFromParent();
8363 if (DTU)
8364 DTU->applyUpdates(Updates);
8365 return true;
8366 }
8367 return false;
8368}
8369
8370bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
8371 return Branch->isUnconditional() ? simplifyUncondBranch(BI: Branch, Builder)
8372 : simplifyCondBranch(BI: Branch, Builder);
8373}
8374
8375bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
8376 IRBuilder<> &Builder) {
8377 BasicBlock *BB = BI->getParent();
8378 BasicBlock *Succ = BI->getSuccessor(i: 0);
8379
8380 // If the Terminator is the only non-phi instruction, simplify the block.
8381 // If LoopHeader is provided, check if the block or its successor is a loop
8382 // header. (This is for early invocations before loop simplify and
8383 // vectorization to keep canonical loop forms for nested loops. These blocks
8384 // can be eliminated when the pass is invoked later in the back-end.)
8385 // Note that if BB has only one predecessor then we do not introduce new
8386 // backedge, so we can eliminate BB.
8387 bool NeedCanonicalLoop =
8388 Options.NeedCanonicalLoop &&
8389 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(N: 2) &&
8390 (is_contained(Range&: LoopHeaders, Element: BB) || is_contained(Range&: LoopHeaders, Element: Succ)));
8391 BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
8392 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8393 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8394 return true;
8395
8396 // If the only instruction in the block is a seteq/setne comparison against a
8397 // constant, try to simplify the block.
8398 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val&: I)) {
8399 if (ICI->isEquality() && isa<ConstantInt>(Val: ICI->getOperand(i_nocapture: 1))) {
8400 ++I;
8401 if (I->isTerminator() &&
8402 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8403 return true;
8404 if (isa<SelectInst>(Val: I) && I->getNextNode()->isTerminator() &&
8405 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, Select: cast<SelectInst>(Val&: I),
8406 Builder))
8407 return true;
8408 }
8409 }
8410
8411 // See if we can merge an empty landing pad block with another which is
8412 // equivalent.
8413 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(Val&: I)) {
8414 ++I;
8415 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8416 return true;
8417 }
8418
8419 // If this basic block is ONLY a compare and a branch, and if a predecessor
8420 // branches to us and our successor, fold the comparison into the
8421 // predecessor and use logical operations to update the incoming value
8422 // for PHI nodes in common successor.
8423 if (Options.SpeculateBlocks &&
8424 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, TTI: &TTI,
8425 BonusInstThreshold: Options.BonusInstThreshold))
8426 return requestResimplify();
8427 return false;
8428}
8429
8430static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
8431 BasicBlock *PredPred = nullptr;
8432 for (auto *P : predecessors(BB)) {
8433 BasicBlock *PPred = P->getSinglePredecessor();
8434 if (!PPred || (PredPred && PredPred != PPred))
8435 return nullptr;
8436 PredPred = PPred;
8437 }
8438 return PredPred;
8439}
8440
8441/// Fold the following pattern:
8442/// bb0:
8443/// br i1 %cond1, label %bb1, label %bb2
8444/// bb1:
8445/// br i1 %cond2, label %bb3, label %bb4
8446/// bb2:
8447/// br i1 %cond2, label %bb4, label %bb3
8448/// bb3:
8449/// ...
8450/// bb4:
8451/// ...
8452/// into
8453/// bb0:
8454/// %cond = xor i1 %cond1, %cond2
8455/// br i1 %cond, label %bb4, label %bb3
8456/// bb3:
8457/// ...
8458/// bb4:
8459/// ...
8460/// NOTE: %cond2 always dominates the terminator of bb0.
8461static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU) {
8462 BasicBlock *BB = BI->getParent();
8463 BasicBlock *BB1 = BI->getSuccessor(i: 0);
8464 BasicBlock *BB2 = BI->getSuccessor(i: 1);
8465 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8466 if (Succ == BB)
8467 return false;
8468 if (&Succ->front() != Succ->getTerminator())
8469 return false;
8470 SuccBI = dyn_cast<BranchInst>(Val: Succ->getTerminator());
8471 if (!SuccBI || !SuccBI->isConditional())
8472 return false;
8473 BasicBlock *Succ1 = SuccBI->getSuccessor(i: 0);
8474 BasicBlock *Succ2 = SuccBI->getSuccessor(i: 1);
8475 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8476 !isa<PHINode>(Val: Succ1->front()) && !isa<PHINode>(Val: Succ2->front());
8477 };
8478 BranchInst *BB1BI, *BB2BI;
8479 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8480 return false;
8481
8482 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8483 BB1BI->getSuccessor(i: 0) != BB2BI->getSuccessor(i: 1) ||
8484 BB1BI->getSuccessor(i: 1) != BB2BI->getSuccessor(i: 0))
8485 return false;
8486
8487 BasicBlock *BB3 = BB1BI->getSuccessor(i: 0);
8488 BasicBlock *BB4 = BB1BI->getSuccessor(i: 1);
8489 IRBuilder<> Builder(BI);
8490 BI->setCondition(
8491 Builder.CreateXor(LHS: BI->getCondition(), RHS: BB1BI->getCondition()));
8492 BB1->removePredecessor(Pred: BB);
8493 BI->setSuccessor(idx: 0, NewSucc: BB4);
8494 BB2->removePredecessor(Pred: BB);
8495 BI->setSuccessor(idx: 1, NewSucc: BB3);
8496 if (DTU) {
8497 SmallVector<DominatorTree::UpdateType, 4> Updates;
8498 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB1});
8499 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB4});
8500 Updates.push_back(Elt: {DominatorTree::Delete, BB, BB2});
8501 Updates.push_back(Elt: {DominatorTree::Insert, BB, BB3});
8502
8503 DTU->applyUpdates(Updates);
8504 }
8505 bool HasWeight = false;
8506 uint64_t BBTWeight, BBFWeight;
8507 if (extractBranchWeights(I: *BI, TrueVal&: BBTWeight, FalseVal&: BBFWeight))
8508 HasWeight = true;
8509 else
8510 BBTWeight = BBFWeight = 1;
8511 uint64_t BB1TWeight, BB1FWeight;
8512 if (extractBranchWeights(I: *BB1BI, TrueVal&: BB1TWeight, FalseVal&: BB1FWeight))
8513 HasWeight = true;
8514 else
8515 BB1TWeight = BB1FWeight = 1;
8516 uint64_t BB2TWeight, BB2FWeight;
8517 if (extractBranchWeights(I: *BB2BI, TrueVal&: BB2TWeight, FalseVal&: BB2FWeight))
8518 HasWeight = true;
8519 else
8520 BB2TWeight = BB2FWeight = 1;
8521 if (HasWeight) {
8522 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8523 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8524 setFittedBranchWeights(I&: *BI, Weights, /*IsExpected=*/false,
8525 /*ElideAllZero=*/true);
8526 }
8527 return true;
8528}
8529
8530bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8531 assert(
8532 !isa<ConstantInt>(BI->getCondition()) &&
8533 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8534 "Tautological conditional branch should have been eliminated already.");
8535
8536 BasicBlock *BB = BI->getParent();
8537 if (!Options.SimplifyCondBranch ||
8538 BI->getFunction()->hasFnAttribute(Kind: Attribute::OptForFuzzing))
8539 return false;
8540
8541 // Conditional branch
8542 if (isValueEqualityComparison(TI: BI)) {
8543 // If we only have one predecessor, and if it is a branch on this value,
8544 // see if that predecessor totally determines the outcome of this
8545 // switch.
8546 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8547 if (simplifyEqualityComparisonWithOnlyPredecessor(TI: BI, Pred: OnlyPred, Builder))
8548 return requestResimplify();
8549
8550 // This block must be empty, except for the setcond inst, if it exists.
8551 // Ignore dbg and pseudo intrinsics.
8552 auto I = BB->instructionsWithoutDebug(SkipPseudoOp: true).begin();
8553 if (&*I == BI) {
8554 if (foldValueComparisonIntoPredecessors(TI: BI, Builder))
8555 return requestResimplify();
8556 } else if (&*I == cast<Instruction>(Val: BI->getCondition())) {
8557 ++I;
8558 if (&*I == BI && foldValueComparisonIntoPredecessors(TI: BI, Builder))
8559 return requestResimplify();
8560 }
8561 }
8562
8563 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8564 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8565 return true;
8566
8567 // If this basic block has dominating predecessor blocks and the dominating
8568 // blocks' conditions imply BI's condition, we know the direction of BI.
8569 std::optional<bool> Imp = isImpliedByDomCondition(Cond: BI->getCondition(), ContextI: BI, DL);
8570 if (Imp) {
8571 // Turn this into a branch on constant.
8572 auto *OldCond = BI->getCondition();
8573 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(Context&: BB->getContext())
8574 : ConstantInt::getFalse(Context&: BB->getContext());
8575 BI->setCondition(TorF);
8576 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
8577 return requestResimplify();
8578 }
8579
8580 // If this basic block is ONLY a compare and a branch, and if a predecessor
8581 // branches to us and one of our successors, fold the comparison into the
8582 // predecessor and use logical operations to pick the right destination.
8583 if (Options.SpeculateBlocks &&
8584 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, TTI: &TTI,
8585 BonusInstThreshold: Options.BonusInstThreshold))
8586 return requestResimplify();
8587
8588 // We have a conditional branch to two blocks that are only reachable
8589 // from BI. We know that the condbr dominates the two blocks, so see if
8590 // there is any identical code in the "then" and "else" blocks. If so, we
8591 // can hoist it up to the branching block.
8592 if (BI->getSuccessor(i: 0)->getSinglePredecessor()) {
8593 if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8594 if (HoistCommon &&
8595 hoistCommonCodeFromSuccessors(TI: BI, AllInstsEqOnly: !Options.HoistCommonInsts))
8596 return requestResimplify();
8597
8598 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8599 isProfitableToSpeculate(BI, Invert: std::nullopt, TTI)) {
8600 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8601 auto CanSpeculateConditionalLoadsStores = [&]() {
8602 for (auto *Succ : successors(BB)) {
8603 for (Instruction &I : *Succ) {
8604 if (I.isTerminator()) {
8605 if (I.getNumSuccessors() > 1)
8606 return false;
8607 continue;
8608 } else if (!isSafeCheapLoadStore(I: &I, TTI) ||
8609 SpeculatedConditionalLoadsStores.size() ==
8610 HoistLoadsStoresWithCondFaultingThreshold) {
8611 return false;
8612 }
8613 SpeculatedConditionalLoadsStores.push_back(Elt: &I);
8614 }
8615 }
8616 return !SpeculatedConditionalLoadsStores.empty();
8617 };
8618
8619 if (CanSpeculateConditionalLoadsStores()) {
8620 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8621 Invert: std::nullopt, Sel: nullptr);
8622 return requestResimplify();
8623 }
8624 }
8625 } else {
8626 // If Successor #1 has multiple preds, we may be able to conditionally
8627 // execute Successor #0 if it branches to Successor #1.
8628 Instruction *Succ0TI = BI->getSuccessor(i: 0)->getTerminator();
8629 if (Succ0TI->getNumSuccessors() == 1 &&
8630 Succ0TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 1))
8631 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 0)))
8632 return requestResimplify();
8633 }
8634 } else if (BI->getSuccessor(i: 1)->getSinglePredecessor()) {
8635 // If Successor #0 has multiple preds, we may be able to conditionally
8636 // execute Successor #1 if it branches to Successor #0.
8637 Instruction *Succ1TI = BI->getSuccessor(i: 1)->getTerminator();
8638 if (Succ1TI->getNumSuccessors() == 1 &&
8639 Succ1TI->getSuccessor(Idx: 0) == BI->getSuccessor(i: 0))
8640 if (speculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: 1)))
8641 return requestResimplify();
8642 }
8643
8644 // If this is a branch on something for which we know the constant value in
8645 // predecessors (e.g. a phi node in the current block), thread control
8646 // through this block.
8647 if (foldCondBranchOnValueKnownInPredecessor(BI))
8648 return requestResimplify();
8649
8650 // Scan predecessor blocks for conditional branches.
8651 for (BasicBlock *Pred : predecessors(BB))
8652 if (BranchInst *PBI = dyn_cast<BranchInst>(Val: Pred->getTerminator()))
8653 if (PBI != BI && PBI->isConditional())
8654 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8655 return requestResimplify();
8656
8657 // Look for diamond patterns.
8658 if (MergeCondStores)
8659 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8660 if (BranchInst *PBI = dyn_cast<BranchInst>(Val: PrevBB->getTerminator()))
8661 if (PBI != BI && PBI->isConditional())
8662 if (mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
8663 return requestResimplify();
8664
8665 // Look for nested conditional branches.
8666 if (mergeNestedCondBranch(BI, DTU))
8667 return requestResimplify();
8668
8669 return false;
8670}
8671
8672/// Check if passing a value to an instruction will cause undefined behavior.
8673static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8674 assert(V->getType() == I->getType() && "Mismatched types");
8675 Constant *C = dyn_cast<Constant>(Val: V);
8676 if (!C)
8677 return false;
8678
8679 if (I->use_empty())
8680 return false;
8681
8682 if (C->isNullValue() || isa<UndefValue>(Val: C)) {
8683 // Only look at the first use we can handle, avoid hurting compile time with
8684 // long uselists
8685 auto FindUse = llvm::find_if(Range: I->uses(), P: [](auto &U) {
8686 auto *Use = cast<Instruction>(U.getUser());
8687 // Change this list when we want to add new instructions.
8688 switch (Use->getOpcode()) {
8689 default:
8690 return false;
8691 case Instruction::GetElementPtr:
8692 case Instruction::Ret:
8693 case Instruction::BitCast:
8694 case Instruction::Load:
8695 case Instruction::Store:
8696 case Instruction::Call:
8697 case Instruction::CallBr:
8698 case Instruction::Invoke:
8699 case Instruction::UDiv:
8700 case Instruction::URem:
8701 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8702 // implemented to avoid code complexity as it is unclear how useful such
8703 // logic is.
8704 case Instruction::SDiv:
8705 case Instruction::SRem:
8706 return true;
8707 }
8708 });
8709 if (FindUse == I->use_end())
8710 return false;
8711 auto &Use = *FindUse;
8712 auto *User = cast<Instruction>(Val: Use.getUser());
8713 // Bail out if User is not in the same BB as I or User == I or User comes
8714 // before I in the block. The latter two can be the case if User is a
8715 // PHI node.
8716 if (User->getParent() != I->getParent() || User == I ||
8717 User->comesBefore(Other: I))
8718 return false;
8719
8720 // Now make sure that there are no instructions in between that can alter
8721 // control flow (eg. calls)
8722 auto InstrRange =
8723 make_range(x: std::next(x: I->getIterator()), y: User->getIterator());
8724 if (any_of(Range&: InstrRange, P: [](Instruction &I) {
8725 return !isGuaranteedToTransferExecutionToSuccessor(I: &I);
8726 }))
8727 return false;
8728
8729 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8730 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: User))
8731 if (GEP->getPointerOperand() == I) {
8732 // The type of GEP may differ from the type of base pointer.
8733 // Bail out on vector GEPs, as they are not handled by other checks.
8734 if (GEP->getType()->isVectorTy())
8735 return false;
8736 // The current base address is null, there are four cases to consider:
8737 // getelementptr (TY, null, 0) -> null
8738 // getelementptr (TY, null, not zero) -> may be modified
8739 // getelementptr inbounds (TY, null, 0) -> null
8740 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8741 // undefined?
8742 if (!GEP->hasAllZeroIndices() &&
8743 (!GEP->isInBounds() ||
8744 NullPointerIsDefined(F: GEP->getFunction(),
8745 AS: GEP->getPointerAddressSpace())))
8746 PtrValueMayBeModified = true;
8747 return passingValueIsAlwaysUndefined(V, I: GEP, PtrValueMayBeModified);
8748 }
8749
8750 // Look through return.
8751 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: User)) {
8752 bool HasNoUndefAttr =
8753 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NoUndef);
8754 // Return undefined to a noundef return value is undefined.
8755 if (isa<UndefValue>(Val: C) && HasNoUndefAttr)
8756 return true;
8757 // Return null to a nonnull+noundef return value is undefined.
8758 if (C->isNullValue() && HasNoUndefAttr &&
8759 Ret->getFunction()->hasRetAttribute(Kind: Attribute::NonNull)) {
8760 return !PtrValueMayBeModified;
8761 }
8762 }
8763
8764 // Load from null is undefined.
8765 if (LoadInst *LI = dyn_cast<LoadInst>(Val: User))
8766 if (!LI->isVolatile())
8767 return !NullPointerIsDefined(F: LI->getFunction(),
8768 AS: LI->getPointerAddressSpace());
8769
8770 // Store to null is undefined.
8771 if (StoreInst *SI = dyn_cast<StoreInst>(Val: User))
8772 if (!SI->isVolatile())
8773 return (!NullPointerIsDefined(F: SI->getFunction(),
8774 AS: SI->getPointerAddressSpace())) &&
8775 SI->getPointerOperand() == I;
8776
8777 // llvm.assume(false/undef) always triggers immediate UB.
8778 if (auto *Assume = dyn_cast<AssumeInst>(Val: User)) {
8779 // Ignore assume operand bundles.
8780 if (I == Assume->getArgOperand(i: 0))
8781 return true;
8782 }
8783
8784 if (auto *CB = dyn_cast<CallBase>(Val: User)) {
8785 if (C->isNullValue() && NullPointerIsDefined(F: CB->getFunction()))
8786 return false;
8787 // A call to null is undefined.
8788 if (CB->getCalledOperand() == I)
8789 return true;
8790
8791 if (CB->isArgOperand(U: &Use)) {
8792 unsigned ArgIdx = CB->getArgOperandNo(U: &Use);
8793 // Passing null to a nonnnull+noundef argument is undefined.
8794 if (isa<ConstantPointerNull>(Val: C) &&
8795 CB->paramHasNonNullAttr(ArgNo: ArgIdx, /*AllowUndefOrPoison=*/false))
8796 return !PtrValueMayBeModified;
8797 // Passing undef to a noundef argument is undefined.
8798 if (isa<UndefValue>(Val: C) && CB->isPassingUndefUB(ArgNo: ArgIdx))
8799 return true;
8800 }
8801 }
8802 // Div/Rem by zero is immediate UB
8803 if (match(V: User, P: m_BinOp(L: m_Value(), R: m_Specific(V: I))) && User->isIntDivRem())
8804 return true;
8805 }
8806 return false;
8807}
8808
8809/// If BB has an incoming value that will always trigger undefined behavior
8810/// (eg. null pointer dereference), remove the branch leading here.
8811static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
8812 DomTreeUpdater *DTU,
8813 AssumptionCache *AC) {
8814 for (PHINode &PHI : BB->phis())
8815 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8816 if (passingValueIsAlwaysUndefined(V: PHI.getIncomingValue(i), I: &PHI)) {
8817 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8818 Instruction *T = Predecessor->getTerminator();
8819 IRBuilder<> Builder(T);
8820 if (BranchInst *BI = dyn_cast<BranchInst>(Val: T)) {
8821 BB->removePredecessor(Pred: Predecessor);
8822 // Turn unconditional branches into unreachables and remove the dead
8823 // destination from conditional branches.
8824 if (BI->isUnconditional())
8825 Builder.CreateUnreachable();
8826 else {
8827 // Preserve guarding condition in assume, because it might not be
8828 // inferrable from any dominating condition.
8829 Value *Cond = BI->getCondition();
8830 CallInst *Assumption;
8831 if (BI->getSuccessor(i: 0) == BB)
8832 Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
8833 else
8834 Assumption = Builder.CreateAssumption(Cond);
8835 if (AC)
8836 AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
8837 Builder.CreateBr(Dest: BI->getSuccessor(i: 0) == BB ? BI->getSuccessor(i: 1)
8838 : BI->getSuccessor(i: 0));
8839 }
8840 BI->eraseFromParent();
8841 if (DTU)
8842 DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
8843 return true;
8844 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: T)) {
8845 // Redirect all branches leading to UB into
8846 // a newly created unreachable block.
8847 BasicBlock *Unreachable = BasicBlock::Create(
8848 Context&: Predecessor->getContext(), Name: "unreachable", Parent: BB->getParent(), InsertBefore: BB);
8849 Builder.SetInsertPoint(Unreachable);
8850 // The new block contains only one instruction: Unreachable
8851 Builder.CreateUnreachable();
8852 for (const auto &Case : SI->cases())
8853 if (Case.getCaseSuccessor() == BB) {
8854 BB->removePredecessor(Pred: Predecessor);
8855 Case.setSuccessor(Unreachable);
8856 }
8857 if (SI->getDefaultDest() == BB) {
8858 BB->removePredecessor(Pred: Predecessor);
8859 SI->setDefaultDest(Unreachable);
8860 }
8861
8862 if (DTU)
8863 DTU->applyUpdates(
8864 Updates: { { DominatorTree::Insert, Predecessor, Unreachable },
8865 { DominatorTree::Delete, Predecessor, BB } });
8866 return true;
8867 }
8868 }
8869
8870 return false;
8871}
8872
8873bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8874 bool Changed = false;
8875
8876 assert(BB && BB->getParent() && "Block not embedded in function!");
8877 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8878
8879 // Remove basic blocks that have no predecessors (except the entry block)...
8880 // or that just have themself as a predecessor. These are unreachable.
8881 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8882 BB->getSinglePredecessor() == BB) {
8883 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8884 DeleteDeadBlock(BB, DTU);
8885 return true;
8886 }
8887
8888 // Check to see if we can constant propagate this terminator instruction
8889 // away...
8890 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8891 /*TLI=*/nullptr, DTU);
8892
8893 // Check for and eliminate duplicate PHI nodes in this block.
8894 Changed |= EliminateDuplicatePHINodes(BB);
8895
8896 // Check for and remove branches that will always cause undefined behavior.
8897 if (removeUndefIntroducingPredecessor(BB, DTU, AC: Options.AC))
8898 return requestResimplify();
8899
8900 // Merge basic blocks into their predecessor if there is only one distinct
8901 // pred, and if there is only one distinct successor of the predecessor, and
8902 // if there are no PHI nodes.
8903 if (MergeBlockIntoPredecessor(BB, DTU))
8904 return true;
8905
8906 if (SinkCommon && Options.SinkCommonInsts)
8907 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8908 mergeCompatibleInvokes(BB, DTU)) {
8909 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8910 // so we may now how duplicate PHI's.
8911 // Let's rerun EliminateDuplicatePHINodes() first,
8912 // before foldTwoEntryPHINode() potentially converts them into select's,
8913 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8914 return true;
8915 }
8916
8917 IRBuilder<> Builder(BB);
8918
8919 if (Options.SpeculateBlocks &&
8920 !BB->getParent()->hasFnAttribute(Kind: Attribute::OptForFuzzing)) {
8921 // If there is a trivial two-entry PHI node in this basic block, and we can
8922 // eliminate it, do so now.
8923 if (auto *PN = dyn_cast<PHINode>(Val: BB->begin()))
8924 if (PN->getNumIncomingValues() == 2)
8925 if (foldTwoEntryPHINode(PN, TTI, DTU, AC: Options.AC, DL,
8926 SpeculateUnpredictables: Options.SpeculateUnpredictables))
8927 return true;
8928 }
8929
8930 Instruction *Terminator = BB->getTerminator();
8931 Builder.SetInsertPoint(Terminator);
8932 switch (Terminator->getOpcode()) {
8933 case Instruction::Br:
8934 Changed |= simplifyBranch(Branch: cast<BranchInst>(Val: Terminator), Builder);
8935 break;
8936 case Instruction::Resume:
8937 Changed |= simplifyResume(RI: cast<ResumeInst>(Val: Terminator), Builder);
8938 break;
8939 case Instruction::CleanupRet:
8940 Changed |= simplifyCleanupReturn(RI: cast<CleanupReturnInst>(Val: Terminator));
8941 break;
8942 case Instruction::Switch:
8943 Changed |= simplifySwitch(SI: cast<SwitchInst>(Val: Terminator), Builder);
8944 break;
8945 case Instruction::Unreachable:
8946 Changed |= simplifyUnreachable(UI: cast<UnreachableInst>(Val: Terminator));
8947 break;
8948 case Instruction::IndirectBr:
8949 Changed |= simplifyIndirectBr(IBI: cast<IndirectBrInst>(Val: Terminator));
8950 break;
8951 }
8952
8953 return Changed;
8954}
8955
8956bool SimplifyCFGOpt::run(BasicBlock *BB) {
8957 bool Changed = false;
8958
8959 // Repeated simplify BB as long as resimplification is requested.
8960 do {
8961 Resimplify = false;
8962
8963 // Perform one round of simplifcation. Resimplify flag will be set if
8964 // another iteration is requested.
8965 Changed |= simplifyOnce(BB);
8966 } while (Resimplify);
8967
8968 return Changed;
8969}
8970
8971bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
8972 DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
8973 ArrayRef<WeakVH> LoopHeaders) {
8974 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8975 Options)
8976 .run(BB);
8977}
8978