1//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs loop invariant code motion, attempting to remove as much
10// code from the body of a loop as possible. It does this by either hoisting
11// code into the preheader block, or by sinking code to the exit blocks if it is
12// safe. This pass also promotes must-aliased memory locations in the loop to
13// live in registers, thus hoisting and sinking "invariant" loads and stores.
14//
15// Hoisting operations out of loops is a canonicalization transform. It
16// enables and simplifies subsequent optimizations in the middle-end.
17// Rematerialization of hoisted instructions to reduce register pressure is the
18// responsibility of the back-end, which has more accurate information about
19// register pressure and also handles other optimizations than LICM that
20// increase live-ranges.
21//
22// This pass uses alias analysis for two purposes:
23//
24// 1. Moving loop invariant loads and calls out of loops. If we can determine
25// that a load or call inside of a loop never aliases anything stored to,
26// we can hoist it or sink it like any other instruction.
27// 2. Scalar Promotion of Memory - If there is a store instruction inside of
28// the loop, we try to move the store to happen AFTER the loop instead of
29// inside of the loop. This can only happen if a few conditions are true:
30// A. The pointer stored through is loop invariant
31// B. There are no stores or loads in the loop which _may_ alias the
32// pointer. There are no calls in the loop which mod/ref the pointer.
33// If these conditions are true, we can promote the loads and stores in the
34// loop of the pointer to use a temporary alloca'd variable. We then use
35// the SSAUpdater to construct the appropriate SSA form for the value.
36//
37//===----------------------------------------------------------------------===//
38
39#include "llvm/Transforms/Scalar/LICM.h"
40#include "llvm/ADT/PriorityWorklist.h"
41#include "llvm/ADT/SetOperations.h"
42#include "llvm/ADT/Statistic.h"
43#include "llvm/Analysis/AliasAnalysis.h"
44#include "llvm/Analysis/AliasSetTracker.h"
45#include "llvm/Analysis/AssumptionCache.h"
46#include "llvm/Analysis/CaptureTracking.h"
47#include "llvm/Analysis/DomTreeUpdater.h"
48#include "llvm/Analysis/GuardUtils.h"
49#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
50#include "llvm/Analysis/Loads.h"
51#include "llvm/Analysis/LoopInfo.h"
52#include "llvm/Analysis/LoopIterator.h"
53#include "llvm/Analysis/LoopNestAnalysis.h"
54#include "llvm/Analysis/LoopPass.h"
55#include "llvm/Analysis/MemorySSA.h"
56#include "llvm/Analysis/MemorySSAUpdater.h"
57#include "llvm/Analysis/MustExecute.h"
58#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59#include "llvm/Analysis/ScalarEvolution.h"
60#include "llvm/Analysis/TargetLibraryInfo.h"
61#include "llvm/Analysis/TargetTransformInfo.h"
62#include "llvm/Analysis/ValueTracking.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DataLayout.h"
66#include "llvm/IR/DebugInfoMetadata.h"
67#include "llvm/IR/DerivedTypes.h"
68#include "llvm/IR/Dominators.h"
69#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Instructions.h"
71#include "llvm/IR/IntrinsicInst.h"
72#include "llvm/IR/LLVMContext.h"
73#include "llvm/IR/Metadata.h"
74#include "llvm/IR/PatternMatch.h"
75#include "llvm/IR/PredIteratorCache.h"
76#include "llvm/InitializePasses.h"
77#include "llvm/Support/CommandLine.h"
78#include "llvm/Support/Debug.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Transforms/Scalar.h"
81#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82#include "llvm/Transforms/Utils/BasicBlockUtils.h"
83#include "llvm/Transforms/Utils/Local.h"
84#include "llvm/Transforms/Utils/LoopUtils.h"
85#include "llvm/Transforms/Utils/SSAUpdater.h"
86#include <algorithm>
87#include <utility>
88using namespace llvm;
89
90namespace llvm {
91class LPMUpdater;
92} // namespace llvm
93
94#define DEBUG_TYPE "licm"
95
96STATISTIC(NumCreatedBlocks, "Number of blocks created");
97STATISTIC(NumClonedBranches, "Number of branches cloned");
98STATISTIC(NumSunk, "Number of instructions sunk out of loop");
99STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
100STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
101STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
105STATISTIC(NumMinMaxHoisted,
106 "Number of min/max expressions hoisted out of the loop");
107STATISTIC(NumGEPsHoisted,
108 "Number of geps reassociated and hoisted out of the loop");
109STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110 "and hoisted out of the loop");
111STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
112 "reassociated and hoisted out of the loop");
113STATISTIC(NumIntAssociationsHoisted,
114 "Number of invariant int expressions "
115 "reassociated and hoisted out of the loop");
116STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
117 "reassociated and hoisted out of the loop");
118
119/// Memory promotion is enabled by default.
120static cl::opt<bool>
121 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(Val: false),
122 cl::desc("Disable memory promotion in LICM pass"));
123
124static cl::opt<bool> ControlFlowHoisting(
125 "licm-control-flow-hoisting", cl::Hidden, cl::init(Val: false),
126 cl::desc("Enable control flow (and PHI) hoisting in LICM"));
127
128static cl::opt<bool>
129 SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(Val: false),
130 cl::desc("Force thread model single in LICM pass"));
131
132static cl::opt<uint32_t> MaxNumUsesTraversed(
133 "licm-max-num-uses-traversed", cl::Hidden, cl::init(Val: 8),
134 cl::desc("Max num uses visited for identifying load "
135 "invariance in loop using invariant start (default = 8)"));
136
137static cl::opt<unsigned> FPAssociationUpperLimit(
138 "licm-max-num-fp-reassociations", cl::init(Val: 5U), cl::Hidden,
139 cl::desc(
140 "Set upper limit for the number of transformations performed "
141 "during a single round of hoisting the reassociated expressions."));
142
143static cl::opt<unsigned> IntAssociationUpperLimit(
144 "licm-max-num-int-reassociations", cl::init(Val: 5U), cl::Hidden,
145 cl::desc(
146 "Set upper limit for the number of transformations performed "
147 "during a single round of hoisting the reassociated expressions."));
148
149// Experimental option to allow imprecision in LICM in pathological cases, in
150// exchange for faster compile. This is to be removed if MemorySSA starts to
151// address the same issue. LICM calls MemorySSAWalker's
152// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
153// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
154// which may not be precise, since optimizeUses is capped. The result is
155// correct, but we may not get as "far up" as possible to get which access is
156// clobbering the one queried.
157cl::opt<unsigned> llvm::SetLicmMssaOptCap(
158 "licm-mssa-optimization-cap", cl::init(Val: 100), cl::Hidden,
159 cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
160 "for faster compile. Caps the MemorySSA clobbering calls."));
161
162// Experimentally, memory promotion carries less importance than sinking and
163// hoisting. Limit when we do promotion when using MemorySSA, in order to save
164// compile time.
165cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
166 "licm-mssa-max-acc-promotion", cl::init(Val: 250), cl::Hidden,
167 cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
168 "effect. When MSSA in LICM is enabled, then this is the maximum "
169 "number of accesses allowed to be present in a loop in order to "
170 "enable memory promotion."));
171
172namespace llvm {
173extern cl::opt<bool> ProfcheckDisableMetadataFixes;
174} // end namespace llvm
175
176static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
177static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
178 const LoopSafetyInfo *SafetyInfo,
179 TargetTransformInfo *TTI,
180 bool &FoldableInLoop, bool LoopNestMode);
181static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
182 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
183 MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
184 OptimizationRemarkEmitter *ORE);
185static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
186 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
187 MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE);
188static bool isSafeToExecuteUnconditionally(
189 Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
190 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
191 OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
192 AssumptionCache *AC, bool AllowSpeculation);
193static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
194 AAResults *AA, Loop *CurLoop,
195 SinkAndHoistLICMFlags &Flags);
196static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
197 Loop *CurLoop, Instruction &I,
198 SinkAndHoistLICMFlags &Flags,
199 bool InvariantGroup);
200static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
201 MemoryUse &MU);
202/// Aggregates various functions for hoisting computations out of loop.
203static bool hoistArithmetics(Instruction &I, Loop &L,
204 ICFLoopSafetyInfo &SafetyInfo,
205 MemorySSAUpdater &MSSAU, AssumptionCache *AC,
206 DominatorTree *DT);
207static Instruction *cloneInstructionInExitBlock(
208 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
209 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
210
211static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
212 MemorySSAUpdater &MSSAU);
213
214static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
215 ICFLoopSafetyInfo &SafetyInfo,
216 MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
217
218static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
219 function_ref<void(Instruction *)> Fn);
220using PointersAndHasReadsOutsideSet =
221 std::pair<SmallSetVector<Value *, 8>, bool>;
222static SmallVector<PointersAndHasReadsOutsideSet, 0>
223collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
224
225namespace {
226struct LoopInvariantCodeMotion {
227 bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
228 AssumptionCache *AC, TargetLibraryInfo *TLI,
229 TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
230 OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
231
232 LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
233 unsigned LicmMssaNoAccForPromotionCap,
234 bool LicmAllowSpeculation)
235 : LicmMssaOptCap(LicmMssaOptCap),
236 LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
237 LicmAllowSpeculation(LicmAllowSpeculation) {}
238
239private:
240 unsigned LicmMssaOptCap;
241 unsigned LicmMssaNoAccForPromotionCap;
242 bool LicmAllowSpeculation;
243};
244
245struct LegacyLICMPass : public LoopPass {
246 static char ID; // Pass identification, replacement for typeid
247 LegacyLICMPass(
248 unsigned LicmMssaOptCap = SetLicmMssaOptCap,
249 unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
250 bool LicmAllowSpeculation = true)
251 : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
252 LicmAllowSpeculation) {
253 initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
254 }
255
256 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
257 if (skipLoop(L))
258 return false;
259
260 LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
261 << L->getHeader()->getNameOrAsOperand() << "\n");
262
263 Function *F = L->getHeader()->getParent();
264
265 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
266 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
267 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
268 // pass. Function analyses need to be preserved across loop transformations
269 // but ORE cannot be preserved (see comment before the pass definition).
270 OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
271 return LICM.runOnLoop(
272 L, AA: &getAnalysis<AAResultsWrapperPass>().getAAResults(),
273 LI: &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
274 DT: &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
275 AC: &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F&: *F),
276 TLI: &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F: *F),
277 TTI: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F: *F),
278 SE: SE ? &SE->getSE() : nullptr, MSSA, ORE: &ORE);
279 }
280
281 /// This transformation requires natural loop information & requires that
282 /// loop preheaders be inserted into the CFG...
283 ///
284 void getAnalysisUsage(AnalysisUsage &AU) const override {
285 AU.addPreserved<DominatorTreeWrapperPass>();
286 AU.addPreserved<LoopInfoWrapperPass>();
287 AU.addRequired<TargetLibraryInfoWrapperPass>();
288 AU.addRequired<MemorySSAWrapperPass>();
289 AU.addPreserved<MemorySSAWrapperPass>();
290 AU.addRequired<TargetTransformInfoWrapperPass>();
291 AU.addRequired<AssumptionCacheTracker>();
292 getLoopAnalysisUsage(AU);
293 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
294 AU.addPreserved<LazyBlockFrequencyInfoPass>();
295 AU.addPreserved<LazyBranchProbabilityInfoPass>();
296 }
297
298private:
299 LoopInvariantCodeMotion LICM;
300};
301} // namespace
302
303PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
304 LoopStandardAnalysisResults &AR, LPMUpdater &) {
305 if (!AR.MSSA)
306 reportFatalUsageError(reason: "LICM requires MemorySSA (loop-mssa)");
307
308 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
309 // pass. Function analyses need to be preserved across loop transformations
310 // but ORE cannot be preserved (see comment before the pass definition).
311 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
312
313 LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
314 Opts.AllowSpeculation);
315 if (!LICM.runOnLoop(L: &L, AA: &AR.AA, LI: &AR.LI, DT: &AR.DT, AC: &AR.AC, TLI: &AR.TLI, TTI: &AR.TTI,
316 SE: &AR.SE, MSSA: AR.MSSA, ORE: &ORE))
317 return PreservedAnalyses::all();
318
319 auto PA = getLoopPassPreservedAnalyses();
320 PA.preserve<MemorySSAAnalysis>();
321
322 return PA;
323}
324
325void LICMPass::printPipeline(
326 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
327 static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
328 OS, MapClassName2PassName);
329
330 OS << '<';
331 OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
332 OS << '>';
333}
334
335PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
336 LoopStandardAnalysisResults &AR,
337 LPMUpdater &) {
338 if (!AR.MSSA)
339 reportFatalUsageError(reason: "LNICM requires MemorySSA (loop-mssa)");
340
341 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
342 // pass. Function analyses need to be preserved across loop transformations
343 // but ORE cannot be preserved (see comment before the pass definition).
344 OptimizationRemarkEmitter ORE(LN.getParent());
345
346 LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
347 Opts.AllowSpeculation);
348
349 Loop &OutermostLoop = LN.getOutermostLoop();
350 bool Changed = LICM.runOnLoop(L: &OutermostLoop, AA: &AR.AA, LI: &AR.LI, DT: &AR.DT, AC: &AR.AC,
351 TLI: &AR.TLI, TTI: &AR.TTI, SE: &AR.SE, MSSA: AR.MSSA, ORE: &ORE, LoopNestMode: true);
352
353 if (!Changed)
354 return PreservedAnalyses::all();
355
356 auto PA = getLoopPassPreservedAnalyses();
357
358 PA.preserve<DominatorTreeAnalysis>();
359 PA.preserve<LoopAnalysis>();
360 PA.preserve<MemorySSAAnalysis>();
361
362 return PA;
363}
364
365void LNICMPass::printPipeline(
366 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
367 static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
368 OS, MapClassName2PassName);
369
370 OS << '<';
371 OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
372 OS << '>';
373}
374
375char LegacyLICMPass::ID = 0;
376INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
377 false, false)
378INITIALIZE_PASS_DEPENDENCY(LoopPass)
379INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
380INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
381INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
382INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
383INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
384 false)
385
386Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
387
388llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L,
389 MemorySSA &MSSA)
390 : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
391 IsSink, L, MSSA) {}
392
393llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
394 unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
395 Loop &L, MemorySSA &MSSA)
396 : LicmMssaOptCap(LicmMssaOptCap),
397 LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
398 IsSink(IsSink) {
399 unsigned AccessCapCount = 0;
400 for (auto *BB : L.getBlocks())
401 if (const auto *Accesses = MSSA.getBlockAccesses(BB))
402 for (const auto &MA : *Accesses) {
403 (void)MA;
404 ++AccessCapCount;
405 if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
406 NoOfMemAccTooLarge = true;
407 return;
408 }
409 }
410}
411
412/// Hoist expressions out of the specified loop. Note, alias info for inner
413/// loop is not preserved so it is not a good idea to run LICM multiple
414/// times on one loop.
415bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
416 DominatorTree *DT, AssumptionCache *AC,
417 TargetLibraryInfo *TLI,
418 TargetTransformInfo *TTI,
419 ScalarEvolution *SE, MemorySSA *MSSA,
420 OptimizationRemarkEmitter *ORE,
421 bool LoopNestMode) {
422 bool Changed = false;
423
424 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
425
426 // If this loop has metadata indicating that LICM is not to be performed then
427 // just exit.
428 if (hasDisableLICMTransformsHint(L)) {
429 return false;
430 }
431
432 // Don't sink stores from loops with coroutine suspend instructions.
433 // LICM would sink instructions into the default destination of
434 // the coroutine switch. The default destination of the switch is to
435 // handle the case where the coroutine is suspended, by which point the
436 // coroutine frame may have been destroyed. No instruction can be sunk there.
437 // FIXME: This would unfortunately hurt the performance of coroutines, however
438 // there is currently no general solution for this. Similar issues could also
439 // potentially happen in other passes where instructions are being moved
440 // across that edge.
441 bool HasCoroSuspendInst = llvm::any_of(Range: L->getBlocks(), P: [](BasicBlock *BB) {
442 using namespace PatternMatch;
443 return any_of(Range: make_pointer_range(Range&: *BB),
444 P: match_fn(P: m_Intrinsic<Intrinsic::coro_suspend>()));
445 });
446
447 MemorySSAUpdater MSSAU(MSSA);
448 SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
449 /*IsSink=*/true, *L, *MSSA);
450
451 // Get the preheader block to move instructions into...
452 BasicBlock *Preheader = L->getLoopPreheader();
453
454 // Compute loop safety information.
455 ICFLoopSafetyInfo SafetyInfo;
456 SafetyInfo.computeLoopSafetyInfo(CurLoop: L);
457
458 // We want to visit all of the instructions in this loop... that are not parts
459 // of our subloops (they have already had their invariants hoisted out of
460 // their loop, into this loop, so there is no need to process the BODIES of
461 // the subloops).
462 //
463 // Traverse the body of the loop in depth first order on the dominator tree so
464 // that we are guaranteed to see definitions before we see uses. This allows
465 // us to sink instructions in one pass, without iteration. After sinking
466 // instructions, we perform another pass to hoist them out of the loop.
467 if (L->hasDedicatedExits())
468 Changed |=
469 LoopNestMode
470 ? sinkRegionForLoopNest(DT->getNode(BB: L->getHeader()), AA, LI, DT,
471 TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
472 : sinkRegion(DT->getNode(BB: L->getHeader()), AA, LI, DT, TLI, TTI, CurLoop: L,
473 MSSAU, &SafetyInfo, Flags, ORE);
474 Flags.setIsSink(false);
475 if (Preheader)
476 Changed |= hoistRegion(DT->getNode(BB: L->getHeader()), AA, LI, DT, AC, TLI, L,
477 MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
478 AllowSpeculation: LicmAllowSpeculation);
479
480 // Now that all loop invariants have been removed from the loop, promote any
481 // memory references to scalars that we can.
482 // Don't sink stores from loops without dedicated block exits. Exits
483 // containing indirect branches are not transformed by loop simplify,
484 // make sure we catch that. An additional load may be generated in the
485 // preheader for SSA updater, so also avoid sinking when no preheader
486 // is available.
487 if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
488 !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
489 // Figure out the loop exits and their insertion points
490 SmallVector<BasicBlock *, 8> ExitBlocks;
491 L->getUniqueExitBlocks(ExitBlocks);
492
493 // We can't insert into a catchswitch.
494 bool HasCatchSwitch = llvm::any_of(Range&: ExitBlocks, P: [](BasicBlock *Exit) {
495 return isa<CatchSwitchInst>(Val: Exit->getTerminator());
496 });
497
498 if (!HasCatchSwitch) {
499 SmallVector<BasicBlock::iterator, 8> InsertPts;
500 SmallVector<MemoryAccess *, 8> MSSAInsertPts;
501 InsertPts.reserve(N: ExitBlocks.size());
502 MSSAInsertPts.reserve(N: ExitBlocks.size());
503 for (BasicBlock *ExitBlock : ExitBlocks) {
504 InsertPts.push_back(Elt: ExitBlock->getFirstInsertionPt());
505 MSSAInsertPts.push_back(Elt: nullptr);
506 }
507
508 PredIteratorCache PIC;
509
510 // Promoting one set of accesses may make the pointers for another set
511 // loop invariant, so run this in a loop.
512 bool Promoted = false;
513 bool LocalPromoted;
514 do {
515 LocalPromoted = false;
516 for (auto [PointerMustAliases, HasReadsOutsideSet] :
517 collectPromotionCandidates(MSSA, AA, L)) {
518 LocalPromoted |= promoteLoopAccessesToScalars(
519 PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
520 DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
521 AllowSpeculation: LicmAllowSpeculation, HasReadsOutsideSet);
522 }
523 Promoted |= LocalPromoted;
524 } while (LocalPromoted);
525
526 // Once we have promoted values across the loop body we have to
527 // recursively reform LCSSA as any nested loop may now have values defined
528 // within the loop used in the outer loop.
529 // FIXME: This is really heavy handed. It would be a bit better to use an
530 // SSAUpdater strategy during promotion that was LCSSA aware and reformed
531 // it as it went.
532 if (Promoted)
533 formLCSSARecursively(L&: *L, DT: *DT, LI, SE);
534
535 Changed |= Promoted;
536 }
537 }
538
539 // Check that neither this loop nor its parent have had LCSSA broken. LICM is
540 // specifically moving instructions across the loop boundary and so it is
541 // especially in need of basic functional correctness checking here.
542 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
543 assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
544 "Parent loop not left in LCSSA form after LICM!");
545
546 if (VerifyMemorySSA)
547 MSSA->verifyMemorySSA();
548
549 if (Changed && SE)
550 SE->forgetLoopDispositions();
551 return Changed;
552}
553
554/// Walk the specified region of the CFG (defined by all blocks dominated by
555/// the specified block, and that are in the current loop) in reverse depth
556/// first order w.r.t the DominatorTree. This allows us to visit uses before
557/// definitions, allowing us to sink a loop body in one pass without iteration.
558///
559bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
560 DominatorTree *DT, TargetLibraryInfo *TLI,
561 TargetTransformInfo *TTI, Loop *CurLoop,
562 MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
563 SinkAndHoistLICMFlags &Flags,
564 OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
565
566 // Verify inputs.
567 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
568 CurLoop != nullptr && SafetyInfo != nullptr &&
569 "Unexpected input to sinkRegion.");
570
571 // We want to visit children before parents. We will enqueue all the parents
572 // before their children in the worklist and process the worklist in reverse
573 // order.
574 SmallVector<BasicBlock *, 16> Worklist =
575 collectChildrenInLoop(DT, N, CurLoop);
576
577 bool Changed = false;
578 for (BasicBlock *BB : reverse(C&: Worklist)) {
579 // subloop (which would already have been processed).
580 if (inSubLoop(BB, CurLoop, LI))
581 continue;
582
583 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
584 Instruction &I = *--II;
585
586 // The instruction is not used in the loop if it is dead. In this case,
587 // we just delete it instead of sinking it.
588 if (isInstructionTriviallyDead(I: &I, TLI)) {
589 LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
590 salvageKnowledge(I: &I);
591 salvageDebugInfo(I);
592 ++II;
593 eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
594 Changed = true;
595 continue;
596 }
597
598 // Check to see if we can sink this instruction to the exit blocks
599 // of the loop. We can do this if the all users of the instruction are
600 // outside of the loop. In this case, it doesn't even matter if the
601 // operands of the instruction are loop invariant.
602 //
603 bool FoldableInLoop = false;
604 bool LoopNestMode = OutermostLoop != nullptr;
605 if (!I.mayHaveSideEffects() &&
606 isNotUsedOrFoldableInLoop(I, CurLoop: LoopNestMode ? OutermostLoop : CurLoop,
607 SafetyInfo, TTI, FoldableInLoop,
608 LoopNestMode) &&
609 canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, TargetExecutesOncePerLoop: true, LICMFlags&: Flags, ORE)) {
610 if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
611 if (!FoldableInLoop) {
612 ++II;
613 salvageDebugInfo(I);
614 eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
615 }
616 Changed = true;
617 }
618 }
619 }
620 }
621 if (VerifyMemorySSA)
622 MSSAU.getMemorySSA()->verifyMemorySSA();
623 return Changed;
624}
625
626bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
627 DominatorTree *DT, TargetLibraryInfo *TLI,
628 TargetTransformInfo *TTI, Loop *CurLoop,
629 MemorySSAUpdater &MSSAU,
630 ICFLoopSafetyInfo *SafetyInfo,
631 SinkAndHoistLICMFlags &Flags,
632 OptimizationRemarkEmitter *ORE) {
633
634 bool Changed = false;
635 SmallPriorityWorklist<Loop *, 4> Worklist;
636 Worklist.insert(X: CurLoop);
637 appendLoopsToWorklist(*CurLoop, Worklist);
638 while (!Worklist.empty()) {
639 Loop *L = Worklist.pop_back_val();
640 Changed |= sinkRegion(N: DT->getNode(BB: L->getHeader()), AA, LI, DT, TLI, TTI, CurLoop: L,
641 MSSAU, SafetyInfo, Flags, ORE, OutermostLoop: CurLoop);
642 }
643 return Changed;
644}
645
646namespace {
647// This is a helper class for hoistRegion to make it able to hoist control flow
648// in order to be able to hoist phis. The way this works is that we initially
649// start hoisting to the loop preheader, and when we see a loop invariant branch
650// we make note of this. When we then come to hoist an instruction that's
651// conditional on such a branch we duplicate the branch and the relevant control
652// flow, then hoist the instruction into the block corresponding to its original
653// block in the duplicated control flow.
654class ControlFlowHoister {
655private:
656 // Information about the loop we are hoisting from
657 LoopInfo *LI;
658 DominatorTree *DT;
659 Loop *CurLoop;
660 MemorySSAUpdater &MSSAU;
661
662 // A map of blocks in the loop to the block their instructions will be hoisted
663 // to.
664 DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
665
666 // The branches that we can hoist, mapped to the block that marks a
667 // convergence point of their control flow.
668 DenseMap<CondBrInst *, BasicBlock *> HoistableBranches;
669
670public:
671 ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
672 MemorySSAUpdater &MSSAU)
673 : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
674
675 void registerPossiblyHoistableBranch(CondBrInst *BI) {
676 // We can only hoist conditional branches with loop invariant operands.
677 if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(I: BI))
678 return;
679
680 // The branch destinations need to be in the loop, and we don't gain
681 // anything by duplicating conditional branches with duplicate successors,
682 // as it's essentially the same as an unconditional branch.
683 BasicBlock *TrueDest = BI->getSuccessor(i: 0);
684 BasicBlock *FalseDest = BI->getSuccessor(i: 1);
685 if (!CurLoop->contains(BB: TrueDest) || !CurLoop->contains(BB: FalseDest) ||
686 TrueDest == FalseDest)
687 return;
688
689 // We can hoist BI if one branch destination is the successor of the other,
690 // or both have common successor which we check by seeing if the
691 // intersection of their successors is non-empty.
692 // TODO: This could be expanded to allowing branches where both ends
693 // eventually converge to a single block.
694 SmallPtrSet<BasicBlock *, 4> TrueDestSucc(llvm::from_range,
695 successors(BB: TrueDest));
696 SmallPtrSet<BasicBlock *, 4> FalseDestSucc(llvm::from_range,
697 successors(BB: FalseDest));
698 BasicBlock *CommonSucc = nullptr;
699 if (TrueDestSucc.count(Ptr: FalseDest)) {
700 CommonSucc = FalseDest;
701 } else if (FalseDestSucc.count(Ptr: TrueDest)) {
702 CommonSucc = TrueDest;
703 } else {
704 set_intersect(S1&: TrueDestSucc, S2: FalseDestSucc);
705 // If there's one common successor use that.
706 if (TrueDestSucc.size() == 1)
707 CommonSucc = *TrueDestSucc.begin();
708 // If there's more than one pick whichever appears first in the block list
709 // (we can't use the value returned by TrueDestSucc.begin() as it's
710 // unpredicatable which element gets returned).
711 else if (!TrueDestSucc.empty()) {
712 Function *F = TrueDest->getParent();
713 auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(Ptr: &BB); };
714 auto It = llvm::find_if(Range&: *F, P: IsSucc);
715 assert(It != F->end() && "Could not find successor in function");
716 CommonSucc = &*It;
717 }
718 }
719 // The common successor has to be dominated by the branch, as otherwise
720 // there will be some other path to the successor that will not be
721 // controlled by this branch so any phi we hoist would be controlled by the
722 // wrong condition. This also takes care of avoiding hoisting of loop back
723 // edges.
724 // TODO: In some cases this could be relaxed if the successor is dominated
725 // by another block that's been hoisted and we can guarantee that the
726 // control flow has been replicated exactly.
727 if (CommonSucc && DT->dominates(Def: BI, BB: CommonSucc))
728 HoistableBranches[BI] = CommonSucc;
729 }
730
731 bool canHoistPHI(PHINode *PN) {
732 // The phi must have loop invariant operands.
733 if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(I: PN))
734 return false;
735 // We can hoist phis if the block they are in is the target of hoistable
736 // branches which cover all of the predecessors of the block.
737 BasicBlock *BB = PN->getParent();
738 SmallPtrSet<BasicBlock *, 8> PredecessorBlocks(llvm::from_range,
739 predecessors(BB));
740 // If we have less predecessor blocks than predecessors then the phi will
741 // have more than one incoming value for the same block which we can't
742 // handle.
743 // TODO: This could be handled be erasing some of the duplicate incoming
744 // values.
745 if (PredecessorBlocks.size() != pred_size(BB))
746 return false;
747 for (auto &Pair : HoistableBranches) {
748 if (Pair.second == BB) {
749 // Which blocks are predecessors via this branch depends on if the
750 // branch is triangle-like or diamond-like.
751 if (Pair.first->getSuccessor(i: 0) == BB) {
752 PredecessorBlocks.erase(Ptr: Pair.first->getParent());
753 PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: 1));
754 } else if (Pair.first->getSuccessor(i: 1) == BB) {
755 PredecessorBlocks.erase(Ptr: Pair.first->getParent());
756 PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: 0));
757 } else {
758 PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: 0));
759 PredecessorBlocks.erase(Ptr: Pair.first->getSuccessor(i: 1));
760 }
761 }
762 }
763 // PredecessorBlocks will now be empty if for every predecessor of BB we
764 // found a hoistable branch source.
765 return PredecessorBlocks.empty();
766 }
767
768 BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
769 if (!ControlFlowHoisting)
770 return CurLoop->getLoopPreheader();
771 // If BB has already been hoisted, return that
772 if (auto It = HoistDestinationMap.find(Val: BB); It != HoistDestinationMap.end())
773 return It->second;
774
775 // Check if this block is conditional based on a pending branch
776 auto HasBBAsSuccessor =
777 [&](DenseMap<CondBrInst *, BasicBlock *>::value_type &Pair) {
778 return BB != Pair.second && (Pair.first->getSuccessor(i: 0) == BB ||
779 Pair.first->getSuccessor(i: 1) == BB);
780 };
781 auto It = llvm::find_if(Range&: HoistableBranches, P: HasBBAsSuccessor);
782
783 // If not involved in a pending branch, hoist to preheader
784 BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
785 if (It == HoistableBranches.end()) {
786 LLVM_DEBUG(dbgs() << "LICM using "
787 << InitialPreheader->getNameOrAsOperand()
788 << " as hoist destination for "
789 << BB->getNameOrAsOperand() << "\n");
790 HoistDestinationMap[BB] = InitialPreheader;
791 return InitialPreheader;
792 }
793 CondBrInst *BI = It->first;
794 assert(std::none_of(std::next(It), HoistableBranches.end(),
795 HasBBAsSuccessor) &&
796 "BB is expected to be the target of at most one branch");
797
798 LLVMContext &C = BB->getContext();
799 BasicBlock *TrueDest = BI->getSuccessor(i: 0);
800 BasicBlock *FalseDest = BI->getSuccessor(i: 1);
801 BasicBlock *CommonSucc = HoistableBranches[BI];
802 BasicBlock *HoistTarget = getOrCreateHoistedBlock(BB: BI->getParent());
803
804 // Create hoisted versions of blocks that currently don't have them
805 auto CreateHoistedBlock = [&](BasicBlock *Orig) {
806 auto [It, Inserted] = HoistDestinationMap.try_emplace(Key: Orig);
807 if (!Inserted)
808 return It->second;
809 BasicBlock *New =
810 BasicBlock::Create(Context&: C, Name: Orig->getName() + ".licm", Parent: Orig->getParent());
811 It->second = New;
812 DT->addNewBlock(BB: New, DomBB: HoistTarget);
813 if (CurLoop->getParentLoop())
814 CurLoop->getParentLoop()->addBasicBlockToLoop(NewBB: New, LI&: *LI);
815 ++NumCreatedBlocks;
816 LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
817 << " as hoist destination for " << Orig->getName()
818 << "\n");
819 return New;
820 };
821 BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
822 BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
823 BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
824
825 // Link up these blocks with branches.
826 if (!HoistCommonSucc->hasTerminator()) {
827 // The new common successor we've generated will branch to whatever that
828 // hoist target branched to.
829 BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
830 assert(TargetSucc && "Expected hoist target to have a single successor");
831 HoistCommonSucc->moveBefore(MovePos: TargetSucc);
832 UncondBrInst::Create(Target: TargetSucc, InsertBefore: HoistCommonSucc);
833 }
834 if (!HoistTrueDest->hasTerminator()) {
835 HoistTrueDest->moveBefore(MovePos: HoistCommonSucc);
836 UncondBrInst::Create(Target: HoistCommonSucc, InsertBefore: HoistTrueDest);
837 }
838 if (!HoistFalseDest->hasTerminator()) {
839 HoistFalseDest->moveBefore(MovePos: HoistCommonSucc);
840 UncondBrInst::Create(Target: HoistCommonSucc, InsertBefore: HoistFalseDest);
841 }
842
843 // If BI is being cloned to what was originally the preheader then
844 // HoistCommonSucc will now be the new preheader.
845 if (HoistTarget == InitialPreheader) {
846 // Phis in the loop header now need to use the new preheader.
847 InitialPreheader->replaceSuccessorsPhiUsesWith(New: HoistCommonSucc);
848 MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
849 Old: HoistTarget->getSingleSuccessor(), New: HoistCommonSucc, Preds: {HoistTarget});
850 // The new preheader dominates the loop header.
851 DomTreeNode *PreheaderNode = DT->getNode(BB: HoistCommonSucc);
852 DomTreeNode *HeaderNode = DT->getNode(BB: CurLoop->getHeader());
853 DT->changeImmediateDominator(N: HeaderNode, NewIDom: PreheaderNode);
854 // The preheader hoist destination is now the new preheader, with the
855 // exception of the hoist destination of this branch.
856 for (auto &Pair : HoistDestinationMap)
857 if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
858 Pair.second = HoistCommonSucc;
859 }
860
861 // Now finally clone BI.
862 auto *NewBI =
863 CondBrInst::Create(Cond: BI->getCondition(), IfTrue: HoistTrueDest, IfFalse: HoistFalseDest,
864 InsertBefore: HoistTarget->getTerminator()->getIterator());
865 HoistTarget->getTerminator()->eraseFromParent();
866 // md_prof should also come from the original branch - since the
867 // condition was hoisted, the branch probabilities shouldn't change.
868 if (!ProfcheckDisableMetadataFixes)
869 NewBI->copyMetadata(SrcInst: *BI, WL: {LLVMContext::MD_prof});
870 // FIXME: Issue #152767: debug info should also be the same as the
871 // original branch, **if** the user explicitly indicated that.
872 NewBI->setDebugLoc(HoistTarget->getTerminator()->getDebugLoc());
873
874 ++NumClonedBranches;
875
876 assert(CurLoop->getLoopPreheader() &&
877 "Hoisting blocks should not have destroyed preheader");
878 return HoistDestinationMap[BB];
879 }
880};
881} // namespace
882
883/// Walk the specified region of the CFG (defined by all blocks dominated by
884/// the specified block, and that are in the current loop) in depth first
885/// order w.r.t the DominatorTree. This allows us to visit definitions before
886/// uses, allowing us to hoist a loop body in one pass without iteration.
887///
888bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
889 DominatorTree *DT, AssumptionCache *AC,
890 TargetLibraryInfo *TLI, Loop *CurLoop,
891 MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
892 ICFLoopSafetyInfo *SafetyInfo,
893 SinkAndHoistLICMFlags &Flags,
894 OptimizationRemarkEmitter *ORE, bool LoopNestMode,
895 bool AllowSpeculation) {
896 // Verify inputs.
897 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
898 CurLoop != nullptr && SafetyInfo != nullptr &&
899 "Unexpected input to hoistRegion.");
900
901 ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
902
903 // Keep track of instructions that have been hoisted, as they may need to be
904 // re-hoisted if they end up not dominating all of their uses.
905 SmallVector<Instruction *, 16> HoistedInstructions;
906
907 // For PHI hoisting to work we need to hoist blocks before their successors.
908 // We can do this by iterating through the blocks in the loop in reverse
909 // post-order.
910 LoopBlocksRPO Worklist(CurLoop);
911 Worklist.perform(LI);
912 bool Changed = false;
913 BasicBlock *Preheader = CurLoop->getLoopPreheader();
914 for (BasicBlock *BB : Worklist) {
915 // Only need to process the contents of this block if it is not part of a
916 // subloop (which would already have been processed).
917 if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
918 continue;
919
920 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
921 // Try hoisting the instruction out to the preheader. We can only do
922 // this if all of the operands of the instruction are loop invariant and
923 // if it is safe to hoist the instruction.
924 // TODO: It may be safe to hoist if we are hoisting to a conditional block
925 // and we have accurately duplicated the control flow from the loop header
926 // to that block.
927 if (CurLoop->hasLoopInvariantOperands(I: &I) &&
928 canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, TargetExecutesOncePerLoop: true, LICMFlags&: Flags, ORE) &&
929 isSafeToExecuteUnconditionally(Inst&: I, DT, TLI, CurLoop, SafetyInfo, ORE,
930 CtxI: Preheader->getTerminator(), AC,
931 AllowSpeculation)) {
932 hoist(I, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
933 MSSAU, SE, ORE);
934 HoistedInstructions.push_back(Elt: &I);
935 Changed = true;
936 continue;
937 }
938
939 // Attempt to remove floating point division out of the loop by
940 // converting it to a reciprocal multiplication.
941 if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
942 CurLoop->isLoopInvariant(V: I.getOperand(i: 1))) {
943 auto Divisor = I.getOperand(i: 1);
944 auto One = llvm::ConstantFP::get(Ty: Divisor->getType(), V: 1.0);
945 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(V1: One, V2: Divisor);
946 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
947 SafetyInfo->insertInstructionTo(Inst: ReciprocalDivisor, BB: I.getParent());
948 ReciprocalDivisor->insertBefore(InsertPos: I.getIterator());
949 ReciprocalDivisor->setDebugLoc(I.getDebugLoc());
950
951 auto Product =
952 BinaryOperator::CreateFMul(V1: I.getOperand(i: 0), V2: ReciprocalDivisor);
953 Product->setFastMathFlags(I.getFastMathFlags());
954 SafetyInfo->insertInstructionTo(Inst: Product, BB: I.getParent());
955 Product->insertAfter(InsertPos: I.getIterator());
956 Product->setDebugLoc(I.getDebugLoc());
957 I.replaceAllUsesWith(V: Product);
958 eraseInstruction(I, SafetyInfo&: *SafetyInfo, MSSAU);
959
960 hoist(I&: *ReciprocalDivisor, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB),
961 SafetyInfo, MSSAU, SE, ORE);
962 HoistedInstructions.push_back(Elt: ReciprocalDivisor);
963 Changed = true;
964 continue;
965 }
966
967 auto IsInvariantStart = [&](Instruction &I) {
968 using namespace PatternMatch;
969 return I.use_empty() &&
970 match(V: &I, P: m_Intrinsic<Intrinsic::invariant_start>());
971 };
972 auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
973 return SafetyInfo->isGuaranteedToExecute(Inst: I, DT, CurLoop) &&
974 SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
975 };
976 if ((IsInvariantStart(I) || isGuard(U: &I)) &&
977 CurLoop->hasLoopInvariantOperands(I: &I) &&
978 MustExecuteWithoutWritesBefore(I)) {
979 hoist(I, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
980 MSSAU, SE, ORE);
981 HoistedInstructions.push_back(Elt: &I);
982 Changed = true;
983 continue;
984 }
985
986 if (PHINode *PN = dyn_cast<PHINode>(Val: &I)) {
987 if (CFH.canHoistPHI(PN)) {
988 // Redirect incoming blocks first to ensure that we create hoisted
989 // versions of those blocks before we hoist the phi.
990 for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
991 PN->setIncomingBlock(
992 i, BB: CFH.getOrCreateHoistedBlock(BB: PN->getIncomingBlock(i)));
993 hoist(I&: *PN, DT, CurLoop, Dest: CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
994 MSSAU, SE, ORE);
995 assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
996 Changed = true;
997 continue;
998 }
999 }
1000
1001 // Try to reassociate instructions so that part of computations can be
1002 // done out of loop.
1003 if (hoistArithmetics(I, L&: *CurLoop, SafetyInfo&: *SafetyInfo, MSSAU, AC, DT)) {
1004 Changed = true;
1005 continue;
1006 }
1007
1008 // Remember possibly hoistable branches so we can actually hoist them
1009 // later if needed.
1010 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: &I))
1011 CFH.registerPossiblyHoistableBranch(BI);
1012 }
1013 }
1014
1015 // If we hoisted instructions to a conditional block they may not dominate
1016 // their uses that weren't hoisted (such as phis where some operands are not
1017 // loop invariant). If so make them unconditional by moving them to their
1018 // immediate dominator. We iterate through the instructions in reverse order
1019 // which ensures that when we rehoist an instruction we rehoist its operands,
1020 // and also keep track of where in the block we are rehoisting to make sure
1021 // that we rehoist instructions before the instructions that use them.
1022 Instruction *HoistPoint = nullptr;
1023 if (ControlFlowHoisting) {
1024 for (Instruction *I : reverse(C&: HoistedInstructions)) {
1025 if (!llvm::all_of(Range: I->uses(),
1026 P: [&](Use &U) { return DT->dominates(Def: I, U); })) {
1027 BasicBlock *Dominator =
1028 DT->getNode(BB: I->getParent())->getIDom()->getBlock();
1029 if (!HoistPoint || !DT->dominates(A: HoistPoint->getParent(), B: Dominator)) {
1030 if (HoistPoint)
1031 assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1032 "New hoist point expected to dominate old hoist point");
1033 HoistPoint = Dominator->getTerminator();
1034 }
1035 LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1036 << HoistPoint->getParent()->getNameOrAsOperand()
1037 << ": " << *I << "\n");
1038 moveInstructionBefore(I&: *I, Dest: HoistPoint->getIterator(), SafetyInfo&: *SafetyInfo, MSSAU,
1039 SE);
1040 HoistPoint = I;
1041 Changed = true;
1042 }
1043 }
1044 }
1045 if (VerifyMemorySSA)
1046 MSSAU.getMemorySSA()->verifyMemorySSA();
1047
1048 // Now that we've finished hoisting make sure that LI and DT are still
1049 // valid.
1050#ifdef EXPENSIVE_CHECKS
1051 if (Changed) {
1052 assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1053 "Dominator tree verification failed");
1054 LI->verify(*DT);
1055 }
1056#endif
1057
1058 return Changed;
1059}
1060
1061// Return true if LI is invariant within scope of the loop. LI is invariant if
1062// CurLoop is dominated by an invariant.start representing the same memory
1063// location and size as the memory location LI loads from, and also the
1064// invariant.start has no uses.
1065static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
1066 Loop *CurLoop) {
1067 Value *Addr = LI->getPointerOperand();
1068 const DataLayout &DL = LI->getDataLayout();
1069 const TypeSize LocSizeInBits = DL.getTypeSizeInBits(Ty: LI->getType());
1070
1071 // It is not currently possible for clang to generate an invariant.start
1072 // intrinsic with scalable vector types because we don't support thread local
1073 // sizeless types and we don't permit sizeless types in structs or classes.
1074 // Furthermore, even if support is added for this in future the intrinsic
1075 // itself is defined to have a size of -1 for variable sized objects. This
1076 // makes it impossible to verify if the intrinsic envelops our region of
1077 // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1078 // types would have a -1 parameter, but the former is clearly double the size
1079 // of the latter.
1080 if (LocSizeInBits.isScalable())
1081 return false;
1082
1083 // If we've ended up at a global/constant, bail. We shouldn't be looking at
1084 // uselists for non-local Values in a loop pass.
1085 if (isa<Constant>(Val: Addr))
1086 return false;
1087
1088 unsigned UsesVisited = 0;
1089 // Traverse all uses of the load operand value, to see if invariant.start is
1090 // one of the uses, and whether it dominates the load instruction.
1091 for (auto *U : Addr->users()) {
1092 // Avoid traversing for Load operand with high number of users.
1093 if (++UsesVisited > MaxNumUsesTraversed)
1094 return false;
1095 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
1096 // If there are escaping uses of invariant.start instruction, the load maybe
1097 // non-invariant.
1098 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1099 !II->use_empty())
1100 continue;
1101 ConstantInt *InvariantSize = cast<ConstantInt>(Val: II->getArgOperand(i: 0));
1102 // The intrinsic supports having a -1 argument for variable sized objects
1103 // so we should check for that here.
1104 if (InvariantSize->isNegative())
1105 continue;
1106 uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1107 // Confirm the invariant.start location size contains the load operand size
1108 // in bits. Also, the invariant.start should dominate the load, and we
1109 // should not hoist the load out of a loop that contains this dominating
1110 // invariant.start.
1111 if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
1112 DT->properlyDominates(A: II->getParent(), B: CurLoop->getHeader()))
1113 return true;
1114 }
1115
1116 return false;
1117}
1118
1119/// Return true if-and-only-if we know how to (mechanically) both hoist and
1120/// sink a given instruction out of a loop. Does not address legality
1121/// concerns such as aliasing or speculation safety.
1122static bool isHoistableAndSinkableInst(Instruction &I) {
1123 // Only these instructions are hoistable/sinkable.
1124 return (isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I) || isa<CallInst>(Val: I) ||
1125 isa<FenceInst>(Val: I) || isa<CastInst>(Val: I) || isa<UnaryOperator>(Val: I) ||
1126 isa<BinaryOperator>(Val: I) || isa<SelectInst>(Val: I) ||
1127 isa<GetElementPtrInst>(Val: I) || isa<CmpInst>(Val: I) ||
1128 isa<InsertElementInst>(Val: I) || isa<ExtractElementInst>(Val: I) ||
1129 isa<ShuffleVectorInst>(Val: I) || isa<ExtractValueInst>(Val: I) ||
1130 isa<InsertValueInst>(Val: I) || isa<FreezeInst>(Val: I));
1131}
1132
1133/// Return true if I is the only Instruction with a MemoryAccess in L.
1134static bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1135 const MemorySSAUpdater &MSSAU) {
1136 for (auto *BB : L->getBlocks())
1137 if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1138 int NotAPhi = 0;
1139 for (const auto &Acc : *Accs) {
1140 if (isa<MemoryPhi>(Val: &Acc))
1141 continue;
1142 const auto *MUD = cast<MemoryUseOrDef>(Val: &Acc);
1143 if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1144 return false;
1145 }
1146 }
1147 return true;
1148}
1149
1150static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
1151 BatchAAResults &BAA,
1152 SinkAndHoistLICMFlags &Flags,
1153 MemoryUseOrDef *MA) {
1154 // See declaration of SetLicmMssaOptCap for usage details.
1155 if (Flags.tooManyClobberingCalls())
1156 return MA->getDefiningAccess();
1157
1158 MemoryAccess *Source =
1159 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, AA&: BAA);
1160 Flags.incrementClobberingCalls();
1161 return Source;
1162}
1163
1164bool llvm::canHoistLoad(LoadInst &LI, AAResults *AA, DominatorTree *DT,
1165 Loop *CurLoop, MemorySSA &MSSA,
1166 bool TargetExecutesOncePerLoop,
1167 SinkAndHoistLICMFlags &Flags,
1168 OptimizationRemarkEmitter *ORE) {
1169 if (!LI.isUnordered())
1170 return false; // Don't sink/hoist volatile or ordered atomic loads!
1171
1172 // Loads from constant memory are always safe to move, even if they end up
1173 // in the same alias set as something that ends up being modified.
1174 if (!isModSet(MRI: AA->getModRefInfoMask(P: LI.getOperand(i_nocapture: 0))))
1175 return true;
1176 if (LI.hasMetadata(KindID: LLVMContext::MD_invariant_load))
1177 return true;
1178
1179 if (LI.isAtomic() && !TargetExecutesOncePerLoop)
1180 return false; // Don't risk duplicating unordered loads
1181
1182 // This checks for an invariant.start dominating the load.
1183 if (isLoadInvariantInLoop(LI: &LI, DT, CurLoop))
1184 return true;
1185
1186 auto *MU = cast<MemoryUse>(Val: MSSA.getMemoryAccess(I: &LI));
1187
1188 bool InvariantGroup = LI.hasMetadata(KindID: LLVMContext::MD_invariant_group);
1189
1190 bool Invalidated =
1191 pointerInvalidatedByLoop(MSSA: &MSSA, MU, CurLoop, I&: LI, Flags, InvariantGroup);
1192 // Check loop-invariant address because this may also be a sinkable load
1193 // whose address is not necessarily loop-invariant.
1194 if (ORE && Invalidated && CurLoop->isLoopInvariant(V: LI.getPointerOperand()))
1195 ORE->emit(RemarkBuilder: [&]() {
1196 return OptimizationRemarkMissed(
1197 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", &LI)
1198 << "failed to move load with loop-invariant address "
1199 "because the loop may invalidate its value";
1200 });
1201
1202 return !Invalidated;
1203}
1204
1205bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1206 Loop *CurLoop, MemorySSAUpdater &MSSAU,
1207 bool TargetExecutesOncePerLoop,
1208 SinkAndHoistLICMFlags &Flags,
1209 OptimizationRemarkEmitter *ORE) {
1210 // If we don't understand the instruction, bail early.
1211 if (!isHoistableAndSinkableInst(I))
1212 return false;
1213
1214 MemorySSA *MSSA = MSSAU.getMemorySSA();
1215 // Loads have extra constraints we have to verify before we can hoist them.
1216 if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I)) {
1217 return canHoistLoad(LI&: *LI, AA, DT, CurLoop, MSSA&: *MSSA, TargetExecutesOncePerLoop,
1218 Flags, ORE);
1219 } else if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
1220 // Don't sink calls which can throw.
1221 if (CI->mayThrow())
1222 return false;
1223
1224 // Convergent attribute has been used on operations that involve
1225 // inter-thread communication which results are implicitly affected by the
1226 // enclosing control flows. It is not safe to hoist or sink such operations
1227 // across control flow.
1228 if (CI->isConvergent())
1229 return false;
1230
1231 // FIXME: Current LLVM IR semantics don't work well with coroutines and
1232 // thread local globals. We currently treat getting the address of a thread
1233 // local global as not accessing memory, even though it may not be a
1234 // constant throughout a function with coroutines. Remove this check after
1235 // we better model semantics of thread local globals.
1236 if (CI->getFunction()->isPresplitCoroutine())
1237 return false;
1238
1239 using namespace PatternMatch;
1240 if (match(V: CI, P: m_Intrinsic<Intrinsic::assume>()))
1241 // Assumes don't actually alias anything or throw
1242 return true;
1243
1244 // Handle simple cases by querying alias analysis.
1245 MemoryEffects Behavior = AA->getMemoryEffects(Call: CI);
1246
1247 if (Behavior.doesNotAccessMemory())
1248 return true;
1249 if (Behavior.onlyReadsMemory()) {
1250 // Might have stale MemoryDef for call that was later inferred to be
1251 // read-only.
1252 auto *MU = dyn_cast<MemoryUse>(Val: MSSA->getMemoryAccess(I: CI));
1253 if (!MU)
1254 return false;
1255
1256 // If we can prove there are no writes to the memory read by the call, we
1257 // can hoist or sink.
1258 return !pointerInvalidatedByLoop(
1259 MSSA, MU, CurLoop, I, Flags, /*InvariantGroup=*/false);
1260 }
1261
1262 if (Behavior.onlyWritesMemory()) {
1263 // can hoist or sink if there are no conflicting read/writes to the
1264 // memory location written to by the call.
1265 return noConflictingReadWrites(I: CI, MSSA, AA, CurLoop, Flags);
1266 }
1267
1268 return false;
1269 } else if (auto *FI = dyn_cast<FenceInst>(Val: &I)) {
1270 // Fences alias (most) everything to provide ordering. For the moment,
1271 // just give up if there are any other memory operations in the loop.
1272 return isOnlyMemoryAccess(I: FI, L: CurLoop, MSSAU);
1273 } else if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
1274 if (!SI->isUnordered())
1275 return false; // Don't sink/hoist volatile or ordered atomic store!
1276
1277 // We can only hoist a store that we can prove writes a value which is not
1278 // read or overwritten within the loop. For those cases, we fallback to
1279 // load store promotion instead. TODO: We can extend this to cases where
1280 // there is exactly one write to the location and that write dominates an
1281 // arbitrary number of reads in the loop.
1282 if (isOnlyMemoryAccess(I: SI, L: CurLoop, MSSAU))
1283 return true;
1284 return noConflictingReadWrites(I: SI, MSSA, AA, CurLoop, Flags);
1285 }
1286
1287 assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1288
1289 // We've established mechanical ability and aliasing, it's up to the caller
1290 // to check fault safety
1291 return true;
1292}
1293
1294/// Returns true if a PHINode is a trivially replaceable with an
1295/// Instruction.
1296/// This is true when all incoming values are that instruction.
1297/// This pattern occurs most often with LCSSA PHI nodes.
1298///
1299static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1300 for (const Value *IncValue : PN.incoming_values())
1301 if (IncValue != &I)
1302 return false;
1303
1304 return true;
1305}
1306
1307/// Return true if the instruction is foldable in the loop.
1308static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1309 const TargetTransformInfo *TTI) {
1310 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
1311 InstructionCost CostI =
1312 TTI->getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
1313 if (CostI != TargetTransformInfo::TCC_Free)
1314 return false;
1315 // For a GEP, we cannot simply use getInstructionCost because currently
1316 // it optimistically assumes that a GEP will fold into addressing mode
1317 // regardless of its users.
1318 const BasicBlock *BB = GEP->getParent();
1319 for (const User *U : GEP->users()) {
1320 const Instruction *UI = cast<Instruction>(Val: U);
1321 if (CurLoop->contains(Inst: UI) &&
1322 (BB != UI->getParent() ||
1323 (!isa<StoreInst>(Val: UI) && !isa<LoadInst>(Val: UI))))
1324 return false;
1325 }
1326 return true;
1327 }
1328
1329 return false;
1330}
1331
1332/// Return true if the only users of this instruction are outside of
1333/// the loop. If this is true, we can sink the instruction to the exit
1334/// blocks of the loop.
1335///
1336/// We also return true if the instruction could be folded away in lowering.
1337/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1338static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1339 const LoopSafetyInfo *SafetyInfo,
1340 TargetTransformInfo *TTI,
1341 bool &FoldableInLoop, bool LoopNestMode) {
1342 const auto &BlockColors = SafetyInfo->getBlockColors();
1343 bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
1344 for (const User *U : I.users()) {
1345 const Instruction *UI = cast<Instruction>(Val: U);
1346 if (const PHINode *PN = dyn_cast<PHINode>(Val: UI)) {
1347 const BasicBlock *BB = PN->getParent();
1348 // We cannot sink uses in catchswitches.
1349 if (isa<CatchSwitchInst>(Val: BB->getTerminator()))
1350 return false;
1351
1352 // We need to sink a callsite to a unique funclet. Avoid sinking if the
1353 // phi use is too muddled.
1354 if (isa<CallInst>(Val: I))
1355 if (!BlockColors.empty() &&
1356 BlockColors.find(Val: const_cast<BasicBlock *>(BB))->second.size() != 1)
1357 return false;
1358
1359 if (LoopNestMode) {
1360 while (isa<PHINode>(Val: UI) && UI->hasOneUser() &&
1361 UI->getNumOperands() == 1) {
1362 if (!CurLoop->contains(Inst: UI))
1363 break;
1364 UI = cast<Instruction>(Val: UI->user_back());
1365 }
1366 }
1367 }
1368
1369 if (CurLoop->contains(Inst: UI)) {
1370 if (IsFoldable) {
1371 FoldableInLoop = true;
1372 continue;
1373 }
1374 return false;
1375 }
1376 }
1377 return true;
1378}
1379
1380static Instruction *cloneInstructionInExitBlock(
1381 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1382 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1383 Instruction *New;
1384 if (auto *CI = dyn_cast<CallInst>(Val: &I)) {
1385 const auto &BlockColors = SafetyInfo->getBlockColors();
1386
1387 // Sinking call-sites need to be handled differently from other
1388 // instructions. The cloned call-site needs a funclet bundle operand
1389 // appropriate for its location in the CFG.
1390 SmallVector<OperandBundleDef, 1> OpBundles;
1391 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1392 BundleIdx != BundleEnd; ++BundleIdx) {
1393 OperandBundleUse Bundle = CI->getOperandBundleAt(Index: BundleIdx);
1394 if (Bundle.getTagID() == LLVMContext::OB_funclet)
1395 continue;
1396
1397 OpBundles.emplace_back(Args&: Bundle);
1398 }
1399
1400 if (!BlockColors.empty()) {
1401 const ColorVector &CV = BlockColors.find(Val: &ExitBlock)->second;
1402 assert(CV.size() == 1 && "non-unique color for exit block!");
1403 BasicBlock *BBColor = CV.front();
1404 BasicBlock::iterator EHPad = BBColor->getFirstNonPHIIt();
1405 if (EHPad->isEHPad())
1406 OpBundles.emplace_back(Args: "funclet", Args: &*EHPad);
1407 }
1408
1409 New = CallInst::Create(CI, Bundles: OpBundles);
1410 New->copyMetadata(SrcInst: *CI);
1411 } else {
1412 New = I.clone();
1413 }
1414
1415 New->insertInto(ParentBB: &ExitBlock, It: ExitBlock.getFirstInsertionPt());
1416 if (!I.getName().empty())
1417 New->setName(I.getName() + ".le");
1418
1419 if (MSSAU.getMemorySSA()->getMemoryAccess(I: &I)) {
1420 // Create a new MemoryAccess and let MemorySSA set its defining access.
1421 // After running some passes, MemorySSA might be outdated, and the
1422 // instruction `I` may have become a non-memory touching instruction.
1423 MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1424 I: New, Definition: nullptr, BB: New->getParent(), Point: MemorySSA::Beginning,
1425 /*CreationMustSucceed=*/false);
1426 if (NewMemAcc) {
1427 if (auto *MemDef = dyn_cast<MemoryDef>(Val: NewMemAcc))
1428 MSSAU.insertDef(Def: MemDef, /*RenameUses=*/true);
1429 else {
1430 auto *MemUse = cast<MemoryUse>(Val: NewMemAcc);
1431 MSSAU.insertUse(Use: MemUse, /*RenameUses=*/true);
1432 }
1433 }
1434 }
1435
1436 // Build LCSSA PHI nodes for any in-loop operands (if legal). Note that
1437 // this is particularly cheap because we can rip off the PHI node that we're
1438 // replacing for the number and blocks of the predecessors.
1439 // OPT: If this shows up in a profile, we can instead finish sinking all
1440 // invariant instructions, and then walk their operands to re-establish
1441 // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1442 // sinking bottom-up.
1443 for (Use &Op : New->operands())
1444 if (LI->wouldBeOutOfLoopUseRequiringLCSSA(V: Op.get(), ExitBB: PN.getParent())) {
1445 auto *OInst = cast<Instruction>(Val: Op.get());
1446 PHINode *OpPN =
1447 PHINode::Create(Ty: OInst->getType(), NumReservedValues: PN.getNumIncomingValues(),
1448 NameStr: OInst->getName() + ".lcssa");
1449 OpPN->insertBefore(InsertPos: ExitBlock.begin());
1450 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1451 OpPN->addIncoming(V: OInst, BB: PN.getIncomingBlock(i));
1452 Op = OpPN;
1453 }
1454 return New;
1455}
1456
1457static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1458 MemorySSAUpdater &MSSAU) {
1459 MSSAU.removeMemoryAccess(I: &I);
1460 SafetyInfo.removeInstruction(Inst: &I);
1461 I.eraseFromParent();
1462}
1463
1464static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
1465 ICFLoopSafetyInfo &SafetyInfo,
1466 MemorySSAUpdater &MSSAU,
1467 ScalarEvolution *SE) {
1468 SafetyInfo.removeInstruction(Inst: &I);
1469 SafetyInfo.insertInstructionTo(Inst: &I, BB: Dest->getParent());
1470 I.moveBefore(BB&: *Dest->getParent(), I: Dest);
1471 if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1472 Val: MSSAU.getMemorySSA()->getMemoryAccess(I: &I)))
1473 MSSAU.moveToPlace(What: OldMemAcc, BB: Dest->getParent(),
1474 Where: MemorySSA::BeforeTerminator);
1475 if (SE)
1476 SE->forgetBlockAndLoopDispositions(V: &I);
1477}
1478
1479static Instruction *sinkThroughTriviallyReplaceablePHI(
1480 PHINode *TPN, Instruction *I, LoopInfo *LI,
1481 SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1482 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1483 MemorySSAUpdater &MSSAU) {
1484 assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1485 "Expect only trivially replaceable PHI");
1486 BasicBlock *ExitBlock = TPN->getParent();
1487 auto [It, Inserted] = SunkCopies.try_emplace(Key: ExitBlock);
1488 if (Inserted)
1489 It->second = cloneInstructionInExitBlock(I&: *I, ExitBlock&: *ExitBlock, PN&: *TPN, LI,
1490 SafetyInfo, MSSAU);
1491 return It->second;
1492}
1493
1494static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1495 BasicBlock *BB = PN->getParent();
1496 if (!BB->canSplitPredecessors())
1497 return false;
1498 // It's not impossible to split EHPad blocks, but if BlockColors already exist
1499 // it require updating BlockColors for all offspring blocks accordingly. By
1500 // skipping such corner case, we can make updating BlockColors after splitting
1501 // predecessor fairly simple.
1502 if (!SafetyInfo->getBlockColors().empty() &&
1503 BB->getFirstNonPHIIt()->isEHPad())
1504 return false;
1505 for (BasicBlock *BBPred : predecessors(BB)) {
1506 if (isa<IndirectBrInst>(Val: BBPred->getTerminator()))
1507 return false;
1508 }
1509 return true;
1510}
1511
1512static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1513 LoopInfo *LI, const Loop *CurLoop,
1514 LoopSafetyInfo *SafetyInfo,
1515 MemorySSAUpdater *MSSAU) {
1516#ifndef NDEBUG
1517 SmallVector<BasicBlock *, 32> ExitBlocks;
1518 CurLoop->getUniqueExitBlocks(ExitBlocks);
1519 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(llvm::from_range, ExitBlocks);
1520#endif
1521 BasicBlock *ExitBB = PN->getParent();
1522 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1523
1524 // Split predecessors of the loop exit to make instructions in the loop are
1525 // exposed to exit blocks through trivially replaceable PHIs while keeping the
1526 // loop in the canonical form where each predecessor of each exit block should
1527 // be contained within the loop. For example, this will convert the loop below
1528 // from
1529 //
1530 // LB1:
1531 // %v1 =
1532 // br %LE, %LB2
1533 // LB2:
1534 // %v2 =
1535 // br %LE, %LB1
1536 // LE:
1537 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1538 //
1539 // to
1540 //
1541 // LB1:
1542 // %v1 =
1543 // br %LE.split, %LB2
1544 // LB2:
1545 // %v2 =
1546 // br %LE.split2, %LB1
1547 // LE.split:
1548 // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1549 // br %LE
1550 // LE.split2:
1551 // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1552 // br %LE
1553 // LE:
1554 // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1555 //
1556 const auto &BlockColors = SafetyInfo->getBlockColors();
1557 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(BB: ExitBB), pred_end(BB: ExitBB));
1558 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
1559 while (!PredBBs.empty()) {
1560 BasicBlock *PredBB = *PredBBs.begin();
1561 assert(CurLoop->contains(PredBB) &&
1562 "Expect all predecessors are in the loop");
1563 if (PN->getBasicBlockIndex(BB: PredBB) >= 0) {
1564 BasicBlock *NewPred = SplitBlockPredecessors(
1565 BB: ExitBB, Preds: PredBB, Suffix: ".split.loop.exit", DTU: &DTU, LI, MSSAU, PreserveLCSSA: true);
1566 // Since we do not allow splitting EH-block with BlockColors in
1567 // canSplitPredecessors(), we can simply assign predecessor's color to
1568 // the new block.
1569 if (!BlockColors.empty())
1570 // Grab a reference to the ColorVector to be inserted before getting the
1571 // reference to the vector we are copying because inserting the new
1572 // element in BlockColors might cause the map to be reallocated.
1573 SafetyInfo->copyColors(New: NewPred, Old: PredBB);
1574 }
1575 PredBBs.remove(X: PredBB);
1576 }
1577}
1578
1579/// When an instruction is found to only be used outside of the loop, this
1580/// function moves it to the exit blocks and patches up SSA form as needed.
1581/// This method is guaranteed to remove the original instruction from its
1582/// position, and may either delete it or move it to outside of the loop.
1583///
1584static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1585 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1586 MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) {
1587 bool Changed = false;
1588 LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1589
1590 // Iterate over users to be ready for actual sinking. Replace users via
1591 // unreachable blocks with undef and make all user PHIs trivially replaceable.
1592 SmallPtrSet<Instruction *, 8> VisitedUsers;
1593 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1594 auto *User = cast<Instruction>(Val: *UI);
1595 Use &U = UI.getUse();
1596 ++UI;
1597
1598 if (VisitedUsers.count(Ptr: User) || CurLoop->contains(Inst: User))
1599 continue;
1600
1601 if (!DT->isReachableFromEntry(A: User->getParent())) {
1602 U = PoisonValue::get(T: I.getType());
1603 Changed = true;
1604 continue;
1605 }
1606
1607 // The user must be a PHI node.
1608 PHINode *PN = cast<PHINode>(Val: User);
1609
1610 // Surprisingly, instructions can be used outside of loops without any
1611 // exits. This can only happen in PHI nodes if the incoming block is
1612 // unreachable.
1613 BasicBlock *BB = PN->getIncomingBlock(U);
1614 if (!DT->isReachableFromEntry(A: BB)) {
1615 U = PoisonValue::get(T: I.getType());
1616 Changed = true;
1617 continue;
1618 }
1619
1620 VisitedUsers.insert(Ptr: PN);
1621 if (isTriviallyReplaceablePHI(PN: *PN, I))
1622 continue;
1623
1624 if (!canSplitPredecessors(PN, SafetyInfo))
1625 return Changed;
1626
1627 // Split predecessors of the PHI so that we can make users trivially
1628 // replaceable.
1629 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU: &MSSAU);
1630
1631 // Should rebuild the iterators, as they may be invalidated by
1632 // splitPredecessorsOfLoopExit().
1633 UI = I.user_begin();
1634 UE = I.user_end();
1635 }
1636
1637 if (VisitedUsers.empty())
1638 return Changed;
1639
1640 ORE->emit(RemarkBuilder: [&]() {
1641 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1642 << "sinking " << ore::NV("Inst", &I);
1643 });
1644 if (isa<LoadInst>(Val: I))
1645 ++NumMovedLoads;
1646 else if (isa<CallInst>(Val: I))
1647 ++NumMovedCalls;
1648 ++NumSunk;
1649
1650#ifndef NDEBUG
1651 SmallVector<BasicBlock *, 32> ExitBlocks;
1652 CurLoop->getUniqueExitBlocks(ExitBlocks);
1653 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(llvm::from_range, ExitBlocks);
1654#endif
1655
1656 // Clones of this instruction. Don't create more than one per exit block!
1657 SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1658
1659 // If this instruction is only used outside of the loop, then all users are
1660 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1661 // the instruction.
1662 // First check if I is worth sinking for all uses. Sink only when it is worth
1663 // across all uses.
1664 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1665 for (auto *UI : Users) {
1666 auto *User = cast<Instruction>(Val: UI);
1667
1668 if (CurLoop->contains(Inst: User))
1669 continue;
1670
1671 PHINode *PN = cast<PHINode>(Val: User);
1672 assert(ExitBlockSet.count(PN->getParent()) &&
1673 "The LCSSA PHI is not in an exit block!");
1674
1675 // The PHI must be trivially replaceable.
1676 Instruction *New = sinkThroughTriviallyReplaceablePHI(
1677 TPN: PN, I: &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1678 // As we sink the instruction out of the BB, drop its debug location.
1679 New->dropLocation();
1680 PN->replaceAllUsesWith(V: New);
1681 eraseInstruction(I&: *PN, SafetyInfo&: *SafetyInfo, MSSAU);
1682 Changed = true;
1683 }
1684 return Changed;
1685}
1686
1687/// When an instruction is found to only use loop invariant operands that
1688/// is safe to hoist, this instruction is called to do the dirty work.
1689///
1690static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1691 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1692 MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1693 OptimizationRemarkEmitter *ORE) {
1694 LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1695 << I << "\n");
1696 ORE->emit(RemarkBuilder: [&]() {
1697 return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1698 << ore::NV("Inst", &I);
1699 });
1700
1701 // Metadata can be dependent on conditions we are hoisting above.
1702 // Conservatively strip all metadata on the instruction unless we were
1703 // guaranteed to execute I if we entered the loop, in which case the metadata
1704 // is valid in the loop preheader.
1705 // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1706 // then moving to the preheader means we should strip attributes on the call
1707 // that can cause UB since we may be hoisting above conditions that allowed
1708 // inferring those attributes. They may not be valid at the preheader.
1709 if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(Val: I)) &&
1710 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1711 // time in isGuaranteedToExecute if we don't actually have anything to
1712 // drop. It is a compile time optimization, not required for correctness.
1713 !SafetyInfo->isGuaranteedToExecute(Inst: I, DT, CurLoop)) {
1714 I.dropUBImplyingAttrsAndMetadata();
1715 }
1716
1717 if (isa<PHINode>(Val: I))
1718 // Move the new node to the end of the phi list in the destination block.
1719 moveInstructionBefore(I, Dest: Dest->getFirstNonPHIIt(), SafetyInfo&: *SafetyInfo, MSSAU, SE);
1720 else
1721 // Move the new node to the destination block, before its terminator.
1722 moveInstructionBefore(I, Dest: Dest->getTerminator()->getIterator(), SafetyInfo&: *SafetyInfo,
1723 MSSAU, SE);
1724
1725 I.updateLocationAfterHoist();
1726
1727 if (isa<LoadInst>(Val: I))
1728 ++NumMovedLoads;
1729 else if (isa<CallInst>(Val: I))
1730 ++NumMovedCalls;
1731 ++NumHoisted;
1732}
1733
1734/// Only sink or hoist an instruction if it is not a trapping instruction,
1735/// or if the instruction is known not to trap when moved to the preheader.
1736/// or if it is a trapping instruction and is guaranteed to execute.
1737static bool isSafeToExecuteUnconditionally(
1738 Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1739 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1740 OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1741 AssumptionCache *AC, bool AllowSpeculation) {
1742 if (AllowSpeculation &&
1743 isSafeToSpeculativelyExecute(I: &Inst, CtxI, AC, DT, TLI))
1744 return true;
1745
1746 bool GuaranteedToExecute =
1747 SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1748
1749 if (!GuaranteedToExecute) {
1750 auto *LI = dyn_cast<LoadInst>(Val: &Inst);
1751 if (LI && CurLoop->isLoopInvariant(V: LI->getPointerOperand()))
1752 ORE->emit(RemarkBuilder: [&]() {
1753 return OptimizationRemarkMissed(
1754 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1755 << "failed to hoist load with loop-invariant address "
1756 "because load is conditionally executed";
1757 });
1758 }
1759
1760 return GuaranteedToExecute;
1761}
1762
1763namespace {
1764class LoopPromoter : public LoadAndStorePromoter {
1765 Value *SomePtr; // Designated pointer to store to.
1766 SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1767 SmallVectorImpl<BasicBlock::iterator> &LoopInsertPts;
1768 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1769 PredIteratorCache &PredCache;
1770 MemorySSAUpdater &MSSAU;
1771 LoopInfo &LI;
1772 DebugLoc DL;
1773 Align Alignment;
1774 bool UnorderedAtomic;
1775 AAMDNodes AATags;
1776 ICFLoopSafetyInfo &SafetyInfo;
1777 bool CanInsertStoresInExitBlocks;
1778 ArrayRef<const Instruction *> Uses;
1779
1780 // We're about to add a use of V in a loop exit block. Insert an LCSSA phi
1781 // (if legal) if doing so would add an out-of-loop use to an instruction
1782 // defined in-loop.
1783 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1784 if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, ExitBB: BB))
1785 return V;
1786
1787 Instruction *I = cast<Instruction>(Val: V);
1788 // We need to create an LCSSA PHI node for the incoming value and
1789 // store that.
1790 PHINode *PN = PHINode::Create(Ty: I->getType(), NumReservedValues: PredCache.size(BB),
1791 NameStr: I->getName() + ".lcssa");
1792 PN->insertBefore(InsertPos: BB->begin());
1793 for (BasicBlock *Pred : PredCache.get(BB))
1794 PN->addIncoming(V: I, BB: Pred);
1795 return PN;
1796 }
1797
1798public:
1799 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1800 SmallVectorImpl<BasicBlock *> &LEB,
1801 SmallVectorImpl<BasicBlock::iterator> &LIP,
1802 SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1803 MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1804 Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1805 ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1806 : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1807 LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1808 LI(li), DL(std::move(dl)), Alignment(Alignment),
1809 UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1810 SafetyInfo(SafetyInfo),
1811 CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
1812
1813 void insertStoresInLoopExitBlocks() {
1814 // Insert stores after in the loop exit blocks. Each exit block gets a
1815 // store of the live-out values that feed them. Since we've already told
1816 // the SSA updater about the defs in the loop and the preheader
1817 // definition, it is all set and we can start using it.
1818 DIAssignID *NewID = nullptr;
1819 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1820 BasicBlock *ExitBlock = LoopExitBlocks[i];
1821 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(BB: ExitBlock);
1822 LiveInValue = maybeInsertLCSSAPHI(V: LiveInValue, BB: ExitBlock);
1823 Value *Ptr = maybeInsertLCSSAPHI(V: SomePtr, BB: ExitBlock);
1824 BasicBlock::iterator InsertPos = LoopInsertPts[i];
1825 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1826 if (UnorderedAtomic)
1827 NewSI->setOrdering(AtomicOrdering::Unordered);
1828 NewSI->setAlignment(Alignment);
1829 NewSI->setDebugLoc(DL);
1830 // Attach DIAssignID metadata to the new store, generating it on the
1831 // first loop iteration.
1832 if (i == 0) {
1833 // NewSI will have its DIAssignID set here if there are any stores in
1834 // Uses with a DIAssignID attachment. This merged ID will then be
1835 // attached to the other inserted stores (in the branch below).
1836 NewSI->mergeDIAssignID(SourceInstructions: Uses);
1837 NewID = cast_or_null<DIAssignID>(
1838 Val: NewSI->getMetadata(KindID: LLVMContext::MD_DIAssignID));
1839 } else {
1840 // Attach the DIAssignID (or nullptr) merged from Uses in the branch
1841 // above.
1842 NewSI->setMetadata(KindID: LLVMContext::MD_DIAssignID, Node: NewID);
1843 }
1844
1845 if (AATags)
1846 NewSI->setAAMetadata(AATags);
1847
1848 MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1849 MemoryAccess *NewMemAcc;
1850 if (!MSSAInsertPoint) {
1851 NewMemAcc = MSSAU.createMemoryAccessInBB(
1852 I: NewSI, Definition: nullptr, BB: NewSI->getParent(), Point: MemorySSA::Beginning);
1853 } else {
1854 NewMemAcc =
1855 MSSAU.createMemoryAccessAfter(I: NewSI, Definition: nullptr, InsertPt: MSSAInsertPoint);
1856 }
1857 MSSAInsertPts[i] = NewMemAcc;
1858 MSSAU.insertDef(Def: cast<MemoryDef>(Val: NewMemAcc), RenameUses: true);
1859 // FIXME: true for safety, false may still be correct.
1860 }
1861 }
1862
1863 void doExtraRewritesBeforeFinalDeletion() override {
1864 if (CanInsertStoresInExitBlocks)
1865 insertStoresInLoopExitBlocks();
1866 }
1867
1868 void instructionDeleted(Instruction *I) const override {
1869 SafetyInfo.removeInstruction(Inst: I);
1870 MSSAU.removeMemoryAccess(I);
1871 }
1872
1873 bool shouldDelete(Instruction *I) const override {
1874 if (isa<StoreInst>(Val: I))
1875 return CanInsertStoresInExitBlocks;
1876 return true;
1877 }
1878};
1879
1880bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1881 DominatorTree *DT) {
1882 // We can perform the captured-before check against any instruction in the
1883 // loop header, as the loop header is reachable from any instruction inside
1884 // the loop.
1885 // TODO: ReturnCaptures=true shouldn't be necessary here.
1886 return capturesNothing(CC: PointerMayBeCapturedBefore(
1887 V, /*ReturnCaptures=*/true, I: L->getHeader()->getTerminator(), DT,
1888 /*IncludeI=*/false, Mask: CaptureComponents::Provenance));
1889}
1890
1891/// Return true if we can prove that a caller cannot inspect the object if an
1892/// unwind occurs inside the loop.
1893bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1894 DominatorTree *DT) {
1895 bool RequiresNoCaptureBeforeUnwind;
1896 if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1897 return false;
1898
1899 return !RequiresNoCaptureBeforeUnwind ||
1900 isNotCapturedBeforeOrInLoop(V: Object, L, DT);
1901}
1902
1903bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
1904 TargetTransformInfo *TTI) {
1905 // The object must be function-local to start with, and then not captured
1906 // before/in the loop.
1907 return (isIdentifiedFunctionLocal(V: Object) &&
1908 isNotCapturedBeforeOrInLoop(V: Object, L, DT)) ||
1909 (TTI->isSingleThreaded() || SingleThread);
1910}
1911
1912} // namespace
1913
1914/// Try to promote memory values to scalars by sinking stores out of the
1915/// loop and moving loads to before the loop. We do this by looping over
1916/// the stores in the loop, looking for stores to Must pointers which are
1917/// loop invariant.
1918///
1919bool llvm::promoteLoopAccessesToScalars(
1920 const SmallSetVector<Value *, 8> &PointerMustAliases,
1921 SmallVectorImpl<BasicBlock *> &ExitBlocks,
1922 SmallVectorImpl<BasicBlock::iterator> &InsertPts,
1923 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1924 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1925 const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
1926 MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1927 OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
1928 bool HasReadsOutsideSet) {
1929 // Verify inputs.
1930 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1931 SafetyInfo != nullptr &&
1932 "Unexpected Input to promoteLoopAccessesToScalars");
1933
1934 LLVM_DEBUG({
1935 dbgs() << "Trying to promote set of must-aliased pointers:\n";
1936 for (Value *Ptr : PointerMustAliases)
1937 dbgs() << " " << *Ptr << "\n";
1938 });
1939 ++NumPromotionCandidates;
1940
1941 Value *SomePtr = *PointerMustAliases.begin();
1942 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1943
1944 // It is not safe to promote a load/store from the loop if the load/store is
1945 // conditional. For example, turning:
1946 //
1947 // for () { if (c) *P += 1; }
1948 //
1949 // into:
1950 //
1951 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1952 //
1953 // is not safe, because *P may only be valid to access if 'c' is true.
1954 //
1955 // The safety property divides into two parts:
1956 // p1) The memory may not be dereferenceable on entry to the loop. In this
1957 // case, we can't insert the required load in the preheader.
1958 // p2) The memory model does not allow us to insert a store along any dynamic
1959 // path which did not originally have one.
1960 //
1961 // If at least one store is guaranteed to execute, both properties are
1962 // satisfied, and promotion is legal.
1963 //
1964 // This, however, is not a necessary condition. Even if no store/load is
1965 // guaranteed to execute, we can still establish these properties.
1966 // We can establish (p1) by proving that hoisting the load into the preheader
1967 // is safe (i.e. proving dereferenceability on all paths through the loop). We
1968 // can use any access within the alias set to prove dereferenceability,
1969 // since they're all must alias.
1970 //
1971 // There are two ways establish (p2):
1972 // a) Prove the location is thread-local. In this case the memory model
1973 // requirement does not apply, and stores are safe to insert.
1974 // b) Prove a store dominates every exit block. In this case, if an exit
1975 // blocks is reached, the original dynamic path would have taken us through
1976 // the store, so inserting a store into the exit block is safe. Note that this
1977 // is different from the store being guaranteed to execute. For instance,
1978 // if an exception is thrown on the first iteration of the loop, the original
1979 // store is never executed, but the exit blocks are not executed either.
1980
1981 bool DereferenceableInPH = false;
1982 bool StoreIsGuaranteedToExecute = false;
1983 bool LoadIsGuaranteedToExecute = false;
1984 bool FoundLoadToPromote = false;
1985
1986 // Goes from Unknown to either Safe or Unsafe, but can't switch between them.
1987 enum {
1988 StoreSafe,
1989 StoreUnsafe,
1990 StoreSafetyUnknown,
1991 } StoreSafety = StoreSafetyUnknown;
1992
1993 SmallVector<Instruction *, 64> LoopUses;
1994
1995 // We start with an alignment of one and try to find instructions that allow
1996 // us to prove better alignment.
1997 Align Alignment;
1998 // Keep track of which types of access we see
1999 bool SawUnorderedAtomic = false;
2000 bool SawNotAtomic = false;
2001 AAMDNodes AATags;
2002
2003 const DataLayout &MDL = Preheader->getDataLayout();
2004
2005 // If there are reads outside the promoted set, then promoting stores is
2006 // definitely not safe.
2007 if (HasReadsOutsideSet)
2008 StoreSafety = StoreUnsafe;
2009
2010 if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
2011 // If a loop can throw, we have to insert a store along each unwind edge.
2012 // That said, we can't actually make the unwind edge explicit. Therefore,
2013 // we have to prove that the store is dead along the unwind edge. We do
2014 // this by proving that the caller can't have a reference to the object
2015 // after return and thus can't possibly load from the object.
2016 Value *Object = getUnderlyingObject(V: SomePtr);
2017 if (!isNotVisibleOnUnwindInLoop(Object, L: CurLoop, DT))
2018 StoreSafety = StoreUnsafe;
2019 }
2020
2021 // Check that all accesses to pointers in the alias set use the same type.
2022 // We cannot (yet) promote a memory location that is loaded and stored in
2023 // different sizes. While we are at it, collect alignment and AA info.
2024 Type *AccessTy = nullptr;
2025 for (Value *ASIV : PointerMustAliases) {
2026 for (Use &U : ASIV->uses()) {
2027 // Ignore instructions that are outside the loop.
2028 Instruction *UI = dyn_cast<Instruction>(Val: U.getUser());
2029 if (!UI || !CurLoop->contains(Inst: UI))
2030 continue;
2031
2032 // If there is an non-load/store instruction in the loop, we can't promote
2033 // it.
2034 if (LoadInst *Load = dyn_cast<LoadInst>(Val: UI)) {
2035 if (!Load->isUnordered())
2036 return false;
2037
2038 SawUnorderedAtomic |= Load->isAtomic();
2039 SawNotAtomic |= !Load->isAtomic();
2040 FoundLoadToPromote = true;
2041
2042 Align InstAlignment = Load->getAlign();
2043
2044 if (!LoadIsGuaranteedToExecute)
2045 LoadIsGuaranteedToExecute =
2046 SafetyInfo->isGuaranteedToExecute(Inst: *UI, DT, CurLoop);
2047
2048 // Note that proving a load safe to speculate requires proving
2049 // sufficient alignment at the target location. Proving it guaranteed
2050 // to execute does as well. Thus we can increase our guaranteed
2051 // alignment as well.
2052 if (!DereferenceableInPH || (InstAlignment > Alignment))
2053 if (isSafeToExecuteUnconditionally(
2054 Inst&: *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2055 CtxI: Preheader->getTerminator(), AC, AllowSpeculation)) {
2056 DereferenceableInPH = true;
2057 Alignment = std::max(a: Alignment, b: InstAlignment);
2058 }
2059 } else if (const StoreInst *Store = dyn_cast<StoreInst>(Val: UI)) {
2060 // Stores *of* the pointer are not interesting, only stores *to* the
2061 // pointer.
2062 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2063 continue;
2064 if (!Store->isUnordered())
2065 return false;
2066
2067 SawUnorderedAtomic |= Store->isAtomic();
2068 SawNotAtomic |= !Store->isAtomic();
2069
2070 // If the store is guaranteed to execute, both properties are satisfied.
2071 // We may want to check if a store is guaranteed to execute even if we
2072 // already know that promotion is safe, since it may have higher
2073 // alignment than any other guaranteed stores, in which case we can
2074 // raise the alignment on the promoted store.
2075 Align InstAlignment = Store->getAlign();
2076 bool GuaranteedToExecute =
2077 SafetyInfo->isGuaranteedToExecute(Inst: *UI, DT, CurLoop);
2078 StoreIsGuaranteedToExecute |= GuaranteedToExecute;
2079 if (GuaranteedToExecute) {
2080 DereferenceableInPH = true;
2081 if (StoreSafety == StoreSafetyUnknown)
2082 StoreSafety = StoreSafe;
2083 Alignment = std::max(a: Alignment, b: InstAlignment);
2084 }
2085
2086 // If a store dominates all exit blocks, it is safe to sink.
2087 // As explained above, if an exit block was executed, a dominating
2088 // store must have been executed at least once, so we are not
2089 // introducing stores on paths that did not have them.
2090 // Note that this only looks at explicit exit blocks. If we ever
2091 // start sinking stores into unwind edges (see above), this will break.
2092 if (StoreSafety == StoreSafetyUnknown &&
2093 llvm::all_of(Range&: ExitBlocks, P: [&](BasicBlock *Exit) {
2094 return DT->dominates(A: Store->getParent(), B: Exit);
2095 }))
2096 StoreSafety = StoreSafe;
2097
2098 // If the store is not guaranteed to execute, we may still get
2099 // deref info through it.
2100 if (!DereferenceableInPH) {
2101 DereferenceableInPH = isDereferenceableAndAlignedPointer(
2102 V: Store->getPointerOperand(), Ty: Store->getValueOperand()->getType(),
2103 Alignment: Store->getAlign(),
2104 Q: SimplifyQuery(MDL, TLI, DT, AC, Preheader->getTerminator()));
2105 }
2106 } else
2107 continue; // Not a load or store.
2108
2109 if (!AccessTy)
2110 AccessTy = getLoadStoreType(I: UI);
2111 else if (AccessTy != getLoadStoreType(I: UI))
2112 return false;
2113
2114 // Merge the AA tags.
2115 if (LoopUses.empty()) {
2116 // On the first load/store, just take its AA tags.
2117 AATags = UI->getAAMetadata();
2118 } else if (AATags) {
2119 AATags = AATags.merge(Other: UI->getAAMetadata());
2120 }
2121
2122 LoopUses.push_back(Elt: UI);
2123 }
2124 }
2125
2126 // If we found both an unordered atomic instruction and a non-atomic memory
2127 // access, bail. We can't blindly promote non-atomic to atomic since we
2128 // might not be able to lower the result. We can't downgrade since that
2129 // would violate memory model. Also, align 0 is an error for atomics.
2130 if (SawUnorderedAtomic && SawNotAtomic)
2131 return false;
2132
2133 // If we're inserting an atomic load in the preheader, we must be able to
2134 // lower it. We're only guaranteed to be able to lower naturally aligned
2135 // atomics.
2136 if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(Ty: AccessTy))
2137 return false;
2138
2139 // If we couldn't prove we can hoist the load, bail.
2140 if (!DereferenceableInPH) {
2141 LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
2142 return false;
2143 }
2144
2145 // We know we can hoist the load, but don't have a guaranteed store.
2146 // Check whether the location is writable and thread-local. If it is, then we
2147 // can insert stores along paths which originally didn't have them without
2148 // violating the memory model.
2149 if (StoreSafety == StoreSafetyUnknown) {
2150 Value *Object = getUnderlyingObject(V: SomePtr);
2151 bool ExplicitlyDereferenceableOnly;
2152 // The dereferenceability query here is only required to satisfy the
2153 // writable contract, actual dereferenceability has already been proven
2154 // above. As such, we can ignore frees.
2155 if (isWritableObject(Object, ExplicitlyDereferenceableOnly) &&
2156 (!ExplicitlyDereferenceableOnly ||
2157 isDereferenceablePointer(V: SomePtr, Ty: AccessTy, Q: MDL,
2158 /*IgnoreFree=*/true)) &&
2159 isThreadLocalObject(Object, L: CurLoop, DT, TTI))
2160 StoreSafety = StoreSafe;
2161 }
2162
2163 // If we've still failed to prove we can sink the store, hoist the load
2164 // only, if possible.
2165 if (StoreSafety != StoreSafe && !FoundLoadToPromote)
2166 // If we cannot hoist the load either, give up.
2167 return false;
2168
2169 // Lets do the promotion!
2170 if (StoreSafety == StoreSafe) {
2171 LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2172 << '\n');
2173 ++NumLoadStorePromoted;
2174 } else {
2175 LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2176 << '\n');
2177 ++NumLoadPromoted;
2178 }
2179
2180 ORE->emit(RemarkBuilder: [&]() {
2181 return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2182 LoopUses[0])
2183 << "Moving accesses to memory location out of the loop";
2184 });
2185
2186 // Look at all the loop uses, and try to merge their locations.
2187 std::vector<DebugLoc> LoopUsesLocs;
2188 for (auto U : LoopUses)
2189 LoopUsesLocs.push_back(x: U->getDebugLoc());
2190 auto DL = DebugLoc::getMergedLocations(Locs: LoopUsesLocs);
2191
2192 // We use the SSAUpdater interface to insert phi nodes as required.
2193 SmallVector<PHINode *, 16> NewPHIs;
2194 SSAUpdater SSA(&NewPHIs);
2195 LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2196 MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2197 SawUnorderedAtomic,
2198 StoreIsGuaranteedToExecute ? AATags : AAMDNodes(),
2199 *SafetyInfo, StoreSafety == StoreSafe);
2200
2201 // Set up the preheader to have a definition of the value. It is the live-out
2202 // value from the preheader that uses in the loop will use.
2203 LoadInst *PreheaderLoad = nullptr;
2204 if (FoundLoadToPromote || !StoreIsGuaranteedToExecute) {
2205 PreheaderLoad =
2206 new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2207 Preheader->getTerminator()->getIterator());
2208 if (SawUnorderedAtomic)
2209 PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2210 PreheaderLoad->setAlignment(Alignment);
2211 PreheaderLoad->setDebugLoc(DebugLoc::getDropped());
2212 if (AATags && LoadIsGuaranteedToExecute)
2213 PreheaderLoad->setAAMetadata(AATags);
2214
2215 MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2216 I: PreheaderLoad, Definition: nullptr, BB: PreheaderLoad->getParent(), Point: MemorySSA::End);
2217 MemoryUse *NewMemUse = cast<MemoryUse>(Val: PreheaderLoadMemoryAccess);
2218 MSSAU.insertUse(Use: NewMemUse, /*RenameUses=*/true);
2219 SSA.AddAvailableValue(BB: Preheader, V: PreheaderLoad);
2220 } else {
2221 SSA.AddAvailableValue(BB: Preheader, V: PoisonValue::get(T: AccessTy));
2222 }
2223
2224 if (VerifyMemorySSA)
2225 MSSAU.getMemorySSA()->verifyMemorySSA();
2226 // Rewrite all the loads in the loop and remember all the definitions from
2227 // stores in the loop.
2228 Promoter.run(Insts: LoopUses);
2229
2230 if (VerifyMemorySSA)
2231 MSSAU.getMemorySSA()->verifyMemorySSA();
2232 // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2233 if (PreheaderLoad && PreheaderLoad->use_empty())
2234 eraseInstruction(I&: *PreheaderLoad, SafetyInfo&: *SafetyInfo, MSSAU);
2235
2236 return true;
2237}
2238
2239static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2240 function_ref<void(Instruction *)> Fn) {
2241 for (const BasicBlock *BB : L->blocks())
2242 if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2243 for (const auto &Access : *Accesses)
2244 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(Val: &Access))
2245 Fn(MUD->getMemoryInst());
2246}
2247
2248// The bool indicates whether there might be reads outside the set, in which
2249// case only loads may be promoted.
2250static SmallVector<PointersAndHasReadsOutsideSet, 0>
2251collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
2252 BatchAAResults BatchAA(*AA);
2253 AliasSetTracker AST(BatchAA);
2254
2255 auto IsPotentiallyPromotable = [L](const Instruction *I) {
2256 if (const auto *SI = dyn_cast<StoreInst>(Val: I)) {
2257 const Value *PtrOp = SI->getPointerOperand();
2258 return !isa<ConstantData>(Val: PtrOp) && L->isLoopInvariant(V: PtrOp);
2259 }
2260 if (const auto *LI = dyn_cast<LoadInst>(Val: I)) {
2261 const Value *PtrOp = LI->getPointerOperand();
2262 return !isa<ConstantData>(Val: PtrOp) && L->isLoopInvariant(V: PtrOp);
2263 }
2264 return false;
2265 };
2266
2267 // Populate AST with potentially promotable accesses.
2268 SmallPtrSet<Value *, 16> AttemptingPromotion;
2269 foreachMemoryAccess(MSSA, L, Fn: [&](Instruction *I) {
2270 if (IsPotentiallyPromotable(I)) {
2271 AttemptingPromotion.insert(Ptr: I);
2272 AST.add(I);
2273 }
2274 });
2275
2276 // We're only interested in must-alias sets that contain a mod.
2277 SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets;
2278 for (AliasSet &AS : AST)
2279 if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2280 Sets.push_back(Elt: {&AS, false});
2281
2282 if (Sets.empty())
2283 return {}; // Nothing to promote...
2284
2285 // Discard any sets for which there is an aliasing non-promotable access.
2286 foreachMemoryAccess(MSSA, L, Fn: [&](Instruction *I) {
2287 if (AttemptingPromotion.contains(Ptr: I))
2288 return;
2289
2290 llvm::erase_if(C&: Sets, P: [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) {
2291 ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(Inst: I, AA&: BatchAA);
2292 // Cannot promote if there are writes outside the set.
2293 if (isModSet(MRI: MR))
2294 return true;
2295 if (isRefSet(MRI: MR)) {
2296 // Remember reads outside the set.
2297 Pair.setInt(true);
2298 // If this is a mod-only set and there are reads outside the set,
2299 // we will not be able to promote, so bail out early.
2300 return !Pair.getPointer()->isRef();
2301 }
2302 return false;
2303 });
2304 });
2305
2306 SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result;
2307 for (auto [Set, HasReadsOutsideSet] : Sets) {
2308 SmallSetVector<Value *, 8> PointerMustAliases;
2309 for (const auto &MemLoc : *Set)
2310 PointerMustAliases.insert(X: const_cast<Value *>(MemLoc.Ptr));
2311 Result.emplace_back(Args: std::move(PointerMustAliases), Args&: HasReadsOutsideSet);
2312 }
2313
2314 return Result;
2315}
2316
2317// For a given store instruction or writeonly call instruction, this function
2318// checks that there are no read or writes that conflict with the memory
2319// access in the instruction
2320static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
2321 AAResults *AA, Loop *CurLoop,
2322 SinkAndHoistLICMFlags &Flags) {
2323 assert(isa<CallInst>(*I) || isa<StoreInst>(*I));
2324 // If there are more accesses than the Promotion cap, then give up as we're
2325 // not walking a list that long.
2326 if (Flags.tooManyMemoryAccesses())
2327 return false;
2328
2329 auto *IMD = MSSA->getMemoryAccess(I);
2330 BatchAAResults BAA(*AA);
2331 auto *Source = getClobberingMemoryAccess(MSSA&: *MSSA, BAA, Flags, MA: IMD);
2332 // Make sure there are no clobbers inside the loop.
2333 if (!MSSA->isLiveOnEntryDef(MA: Source) && CurLoop->contains(BB: Source->getBlock()))
2334 return false;
2335
2336 // If there are interfering Uses don't move this store.
2337 // TODO: Cache set of Uses on the first walk in runOnLoop, update when
2338 // moving accesses. Can also extend to dominating uses.
2339 for (auto *BB : CurLoop->getBlocks()) {
2340 auto *Accesses = MSSA->getBlockAccesses(BB);
2341 if (!Accesses)
2342 continue;
2343 for (const auto &MA : *Accesses) {
2344 // Accesses are ordered. If we find one that I dominates we can stop.
2345 if (!Flags.getIsSink() && MSSA->dominates(A: IMD, B: &MA))
2346 break;
2347
2348 if (const auto *MemUseOrDef = dyn_cast<MemoryUseOrDef>(Val: &MA)) {
2349 // Skip unrelated accesses.
2350 if (isNoModRef(MRI: BAA.getModRefInfo(I: MemUseOrDef->getMemoryInst(), I2: I)))
2351 continue;
2352
2353 return false;
2354 }
2355 }
2356 }
2357 return true;
2358}
2359
2360static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
2361 Loop *CurLoop, Instruction &I,
2362 SinkAndHoistLICMFlags &Flags,
2363 bool InvariantGroup) {
2364 // For hoisting, use the walker to determine safety
2365 if (!Flags.getIsSink()) {
2366 // If hoisting an invariant group, we only need to check that there
2367 // is no store to the loaded pointer between the start of the loop,
2368 // and the load (since all values must be the same).
2369
2370 // This can be checked in two conditions:
2371 // 1) if the memoryaccess is outside the loop
2372 // 2) the earliest access is at the loop header,
2373 // if the memory loaded is the phi node
2374
2375 BatchAAResults BAA(MSSA->getAA());
2376 MemoryAccess *Source = getClobberingMemoryAccess(MSSA&: *MSSA, BAA, Flags, MA: MU);
2377 return !MSSA->isLiveOnEntryDef(MA: Source) &&
2378 CurLoop->contains(BB: Source->getBlock()) &&
2379 !(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Val: Source));
2380 }
2381
2382 // For sinking, we'd need to check all Defs below this use. The getClobbering
2383 // call will look on the backedge of the loop, but will check aliasing with
2384 // the instructions on the previous iteration.
2385 // For example:
2386 // for (i ... )
2387 // load a[i] ( Use (LoE)
2388 // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2389 // i++;
2390 // The load sees no clobbering inside the loop, as the backedge alias check
2391 // does phi translation, and will check aliasing against store a[i-1].
2392 // However sinking the load outside the loop, below the store is incorrect.
2393
2394 // For now, only sink if there are no Defs in the loop, and the existing ones
2395 // precede the use and are in the same block.
2396 // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2397 // needs PostDominatorTreeAnalysis.
2398 // FIXME: More precise: no Defs that alias this Use.
2399 if (Flags.tooManyMemoryAccesses())
2400 return true;
2401 for (auto *BB : CurLoop->getBlocks())
2402 if (pointerInvalidatedByBlock(BB&: *BB, MSSA&: *MSSA, MU&: *MU))
2403 return true;
2404 // When sinking, the source block may not be part of the loop so check it.
2405 if (!CurLoop->contains(Inst: &I))
2406 return pointerInvalidatedByBlock(BB&: *I.getParent(), MSSA&: *MSSA, MU&: *MU);
2407
2408 return false;
2409}
2410
2411bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2412 if (const auto *Accesses = MSSA.getBlockDefs(BB: &BB))
2413 for (const auto &MA : *Accesses)
2414 if (const auto *MD = dyn_cast<MemoryDef>(Val: &MA))
2415 if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(A: MD, B: &MU))
2416 return true;
2417 return false;
2418}
2419
2420/// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
2421/// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
2422/// minimun can be computed outside of loop, and X is not a loop-invariant.
2423static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2424 MemorySSAUpdater &MSSAU) {
2425 bool Inverse = false;
2426 using namespace PatternMatch;
2427 Value *Cond1, *Cond2;
2428 if (match(V: &I, P: m_LogicalOr(L: m_Value(V&: Cond1), R: m_Value(V&: Cond2)))) {
2429 Inverse = true;
2430 } else if (match(V: &I, P: m_LogicalAnd(L: m_Value(V&: Cond1), R: m_Value(V&: Cond2)))) {
2431 // Do nothing
2432 } else
2433 return false;
2434
2435 auto MatchICmpAgainstInvariant = [&](Value *C, CmpPredicate &P, Value *&LHS,
2436 Value *&RHS) {
2437 if (!match(V: C, P: m_OneUse(SubPattern: m_ICmp(Pred&: P, L: m_Value(V&: LHS), R: m_Value(V&: RHS)))))
2438 return false;
2439 if (!LHS->getType()->isIntegerTy())
2440 return false;
2441 if (!ICmpInst::isRelational(P))
2442 return false;
2443 if (L.isLoopInvariant(V: LHS)) {
2444 std::swap(a&: LHS, b&: RHS);
2445 P = ICmpInst::getSwappedPredicate(pred: P);
2446 }
2447 if (L.isLoopInvariant(V: LHS) || !L.isLoopInvariant(V: RHS))
2448 return false;
2449 if (Inverse)
2450 P = ICmpInst::getInversePredicate(pred: P);
2451 return true;
2452 };
2453 CmpPredicate P1, P2;
2454 Value *LHS1, *LHS2, *RHS1, *RHS2;
2455 if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) ||
2456 !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2))
2457 return false;
2458 auto MatchingPred = CmpPredicate::getMatching(A: P1, B: P2);
2459 if (!MatchingPred || LHS1 != LHS2)
2460 return false;
2461
2462 // Everything is fine, we can do the transform.
2463 bool UseMin = ICmpInst::isLT(P: *MatchingPred) || ICmpInst::isLE(P: *MatchingPred);
2464 assert(
2465 (UseMin || ICmpInst::isGT(*MatchingPred) ||
2466 ICmpInst::isGE(*MatchingPred)) &&
2467 "Relational predicate is either less (or equal) or greater (or equal)!");
2468 Intrinsic::ID id = ICmpInst::isSigned(Pred: *MatchingPred)
2469 ? (UseMin ? Intrinsic::smin : Intrinsic::smax)
2470 : (UseMin ? Intrinsic::umin : Intrinsic::umax);
2471 auto *Preheader = L.getLoopPreheader();
2472 assert(Preheader && "Loop is not in simplify form?");
2473 IRBuilder<> Builder(Preheader->getTerminator());
2474 // We are about to create a new guaranteed use for RHS2 which might not exist
2475 // before (if it was a non-taken input of logical and/or instruction). If it
2476 // was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
2477 // introduced, so they don't need this.
2478 if (isa<SelectInst>(Val: I))
2479 RHS2 = Builder.CreateFreeze(V: RHS2, Name: RHS2->getName() + ".fr");
2480 Value *NewRHS = Builder.CreateBinaryIntrinsic(
2481 ID: id, LHS: RHS1, RHS: RHS2, FMFSource: nullptr,
2482 Name: StringRef("invariant.") +
2483 (ICmpInst::isSigned(Pred: *MatchingPred) ? "s" : "u") +
2484 (UseMin ? "min" : "max"));
2485 Builder.SetInsertPoint(&I);
2486 ICmpInst::Predicate P = *MatchingPred;
2487 if (Inverse)
2488 P = ICmpInst::getInversePredicate(pred: P);
2489 Value *NewCond = Builder.CreateICmp(P, LHS: LHS1, RHS: NewRHS);
2490 NewCond->takeName(V: &I);
2491 I.replaceAllUsesWith(V: NewCond);
2492 eraseInstruction(I, SafetyInfo, MSSAU);
2493 Instruction &CondI1 = *cast<Instruction>(Val: Cond1);
2494 Instruction &CondI2 = *cast<Instruction>(Val: Cond2);
2495 salvageDebugInfo(I&: CondI1);
2496 salvageDebugInfo(I&: CondI2);
2497 eraseInstruction(I&: CondI1, SafetyInfo, MSSAU);
2498 eraseInstruction(I&: CondI2, SafetyInfo, MSSAU);
2499 return true;
2500}
2501
2502/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
2503/// this allows hoisting the inner GEP.
2504static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2505 MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2506 DominatorTree *DT) {
2507 auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I);
2508 if (!GEP)
2509 return false;
2510
2511 // Do not try to hoist a constant GEP out of the loop via reassociation.
2512 // Constant GEPs can often be folded into addressing modes, and reassociating
2513 // them may inhibit CSE of a common base.
2514 if (GEP->hasAllConstantIndices())
2515 return false;
2516
2517 auto *Src = dyn_cast<GetElementPtrInst>(Val: GEP->getPointerOperand());
2518 if (!Src || !Src->hasOneUse() || !L.contains(Inst: Src))
2519 return false;
2520
2521 Value *SrcPtr = Src->getPointerOperand();
2522 auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); };
2523 if (!L.isLoopInvariant(V: SrcPtr) || !all_of(Range: GEP->indices(), P: LoopInvariant))
2524 return false;
2525
2526 // This can only happen if !AllowSpeculation, otherwise this would already be
2527 // handled.
2528 // FIXME: Should we respect AllowSpeculation in these reassociation folds?
2529 // The flag exists to prevent metadata dropping, which is not relevant here.
2530 if (all_of(Range: Src->indices(), P: LoopInvariant))
2531 return false;
2532
2533 // The swapped GEPs are inbounds if both original GEPs are inbounds
2534 // and the sign of the offsets is the same. For simplicity, only
2535 // handle both offsets being non-negative.
2536 const DataLayout &DL = GEP->getDataLayout();
2537 auto NonNegative = [&](Value *V) {
2538 return isKnownNonNegative(V, SQ: SimplifyQuery(DL, DT, AC, GEP));
2539 };
2540 bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
2541 all_of(Range: Src->indices(), P: NonNegative) &&
2542 all_of(Range: GEP->indices(), P: NonNegative);
2543
2544 BasicBlock *Preheader = L.getLoopPreheader();
2545 IRBuilder<> Builder(Preheader->getTerminator());
2546 Value *NewSrc = Builder.CreateGEP(Ty: GEP->getSourceElementType(), Ptr: SrcPtr,
2547 IdxList: SmallVector<Value *>(GEP->indices()),
2548 Name: "invariant.gep", NW: IsInBounds);
2549 Builder.SetInsertPoint(GEP);
2550 Value *NewGEP = Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: NewSrc,
2551 IdxList: SmallVector<Value *>(Src->indices()), Name: "gep",
2552 NW: IsInBounds);
2553 GEP->replaceAllUsesWith(V: NewGEP);
2554 eraseInstruction(I&: *GEP, SafetyInfo, MSSAU);
2555 salvageDebugInfo(I&: *Src);
2556 eraseInstruction(I&: *Src, SafetyInfo, MSSAU);
2557 return true;
2558}
2559
2560/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
2561/// C1 and C2 are loop invariants and LV is a loop-variant.
2562static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
2563 Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2564 ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2565 AssumptionCache *AC, DominatorTree *DT) {
2566 assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2567 assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2568
2569 bool IsSigned = ICmpInst::isSigned(Pred);
2570
2571 // Try to represent VariantLHS as sum of invariant and variant operands.
2572 using namespace PatternMatch;
2573 Value *VariantOp, *InvariantOp;
2574 if (IsSigned && !match(V: VariantLHS, P: m_NSWAddLike(L: m_Value(V&: VariantOp),
2575 R: m_Value(V&: InvariantOp))))
2576 return false;
2577 if (!IsSigned && !match(V: VariantLHS, P: m_NUWAddLike(L: m_Value(V&: VariantOp),
2578 R: m_Value(V&: InvariantOp))))
2579 return false;
2580
2581 // LHS itself is a loop-variant, try to represent it in the form:
2582 // "VariantOp + InvariantOp". If it is possible, then we can reassociate.
2583 if (L.isLoopInvariant(V: VariantOp))
2584 std::swap(a&: VariantOp, b&: InvariantOp);
2585 if (L.isLoopInvariant(V: VariantOp) || !L.isLoopInvariant(V: InvariantOp))
2586 return false;
2587
2588 // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
2589 // freely move values from left side of inequality to right side (just as in
2590 // normal linear arithmetics). Overflows make things much more complicated, so
2591 // we want to avoid this.
2592 auto &DL = L.getHeader()->getDataLayout();
2593 SimplifyQuery SQ(DL, DT, AC, &ICmp);
2594 if (IsSigned && computeOverflowForSignedSub(LHS: InvariantRHS, RHS: InvariantOp, SQ) !=
2595 llvm::OverflowResult::NeverOverflows)
2596 return false;
2597 if (!IsSigned &&
2598 computeOverflowForUnsignedSub(LHS: InvariantRHS, RHS: InvariantOp, SQ) !=
2599 llvm::OverflowResult::NeverOverflows)
2600 return false;
2601 auto *Preheader = L.getLoopPreheader();
2602 assert(Preheader && "Loop is not in simplify form?");
2603 IRBuilder<> Builder(Preheader->getTerminator());
2604 Value *NewCmpOp =
2605 Builder.CreateSub(LHS: InvariantRHS, RHS: InvariantOp, Name: "invariant.op",
2606 /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned);
2607 ICmp.setPredicate(Pred);
2608 ICmp.setOperand(i_nocapture: 0, Val_nocapture: VariantOp);
2609 ICmp.setOperand(i_nocapture: 1, Val_nocapture: NewCmpOp);
2610 // The new LHS is a different value, so a samesign (or any other
2611 // poison-generating) flag asserted about the old operands may no longer hold.
2612 ICmp.dropPoisonGeneratingFlags();
2613
2614 Instruction &DeadI = cast<Instruction>(Val&: *VariantLHS);
2615 salvageDebugInfo(I&: DeadI);
2616 eraseInstruction(I&: DeadI, SafetyInfo, MSSAU);
2617 return true;
2618}
2619
2620/// Try to reassociate and hoist the following two patterns:
2621/// LV - C1 < C2 --> LV < C1 + C2,
2622/// C1 - LV < C2 --> LV > C1 - C2.
2623static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
2624 Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2625 ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2626 AssumptionCache *AC, DominatorTree *DT) {
2627 assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2628 assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2629
2630 bool IsSigned = ICmpInst::isSigned(Pred);
2631
2632 // Try to represent VariantLHS as sum of invariant and variant operands.
2633 using namespace PatternMatch;
2634 Value *VariantOp, *InvariantOp;
2635 if (IsSigned &&
2636 !match(V: VariantLHS, P: m_NSWSub(L: m_Value(V&: VariantOp), R: m_Value(V&: InvariantOp))))
2637 return false;
2638 if (!IsSigned &&
2639 !match(V: VariantLHS, P: m_NUWSub(L: m_Value(V&: VariantOp), R: m_Value(V&: InvariantOp))))
2640 return false;
2641
2642 bool VariantSubtracted = false;
2643 // LHS itself is a loop-variant, try to represent it in the form:
2644 // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
2645 // the variant operand goes with minus, we use a slightly different scheme.
2646 if (L.isLoopInvariant(V: VariantOp)) {
2647 std::swap(a&: VariantOp, b&: InvariantOp);
2648 VariantSubtracted = true;
2649 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
2650 }
2651 if (L.isLoopInvariant(V: VariantOp) || !L.isLoopInvariant(V: InvariantOp))
2652 return false;
2653
2654 // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
2655 // freely move values from left side of inequality to right side (just as in
2656 // normal linear arithmetics). Overflows make things much more complicated, so
2657 // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
2658 // "C1 - C2" does not overflow.
2659 auto &DL = L.getHeader()->getDataLayout();
2660 SimplifyQuery SQ(DL, DT, AC, &ICmp);
2661 if (VariantSubtracted && IsSigned) {
2662 // C1 - LV < C2 --> LV > C1 - C2
2663 if (computeOverflowForSignedSub(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2664 llvm::OverflowResult::NeverOverflows)
2665 return false;
2666 } else if (VariantSubtracted && !IsSigned) {
2667 // C1 - LV < C2 --> LV > C1 - C2
2668 if (computeOverflowForUnsignedSub(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2669 llvm::OverflowResult::NeverOverflows)
2670 return false;
2671 } else if (!VariantSubtracted && IsSigned) {
2672 // LV - C1 < C2 --> LV < C1 + C2
2673 if (computeOverflowForSignedAdd(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2674 llvm::OverflowResult::NeverOverflows)
2675 return false;
2676 } else { // !VariantSubtracted && !IsSigned
2677 // LV - C1 < C2 --> LV < C1 + C2
2678 if (computeOverflowForUnsignedAdd(LHS: InvariantOp, RHS: InvariantRHS, SQ) !=
2679 llvm::OverflowResult::NeverOverflows)
2680 return false;
2681 }
2682 auto *Preheader = L.getLoopPreheader();
2683 assert(Preheader && "Loop is not in simplify form?");
2684 IRBuilder<> Builder(Preheader->getTerminator());
2685 Value *NewCmpOp =
2686 VariantSubtracted
2687 ? Builder.CreateSub(LHS: InvariantOp, RHS: InvariantRHS, Name: "invariant.op",
2688 /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned)
2689 : Builder.CreateAdd(LHS: InvariantOp, RHS: InvariantRHS, Name: "invariant.op",
2690 /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned);
2691 ICmp.setPredicate(Pred);
2692 ICmp.setOperand(i_nocapture: 0, Val_nocapture: VariantOp);
2693 ICmp.setOperand(i_nocapture: 1, Val_nocapture: NewCmpOp);
2694 // The new LHS is a different value, so a samesign (or any other
2695 // poison-generating) flag asserted about the old operands may no longer hold.
2696 ICmp.dropPoisonGeneratingFlags();
2697
2698 Instruction &DeadI = cast<Instruction>(Val&: *VariantLHS);
2699 salvageDebugInfo(I&: DeadI);
2700 eraseInstruction(I&: DeadI, SafetyInfo, MSSAU);
2701 return true;
2702}
2703
2704/// Reassociate and hoist add/sub expressions.
2705static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2706 MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2707 DominatorTree *DT) {
2708 using namespace PatternMatch;
2709 CmpPredicate Pred;
2710 Value *LHS, *RHS;
2711 if (!match(V: &I, P: m_ICmp(Pred, L: m_Value(V&: LHS), R: m_Value(V&: RHS))))
2712 return false;
2713
2714 // Put variant operand to LHS position.
2715 if (L.isLoopInvariant(V: LHS)) {
2716 std::swap(a&: LHS, b&: RHS);
2717 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
2718 }
2719 // We want to delete the initial operation after reassociation, so only do it
2720 // if it has no other uses.
2721 if (L.isLoopInvariant(V: LHS) || !L.isLoopInvariant(V: RHS) || !LHS->hasOneUse())
2722 return false;
2723
2724 // TODO: We could go with smarter context, taking common dominator of all I's
2725 // users instead of I itself.
2726 if (hoistAdd(Pred, VariantLHS: LHS, InvariantRHS: RHS, ICmp&: cast<ICmpInst>(Val&: I), L, SafetyInfo, MSSAU, AC, DT))
2727 return true;
2728
2729 if (hoistSub(Pred, VariantLHS: LHS, InvariantRHS: RHS, ICmp&: cast<ICmpInst>(Val&: I), L, SafetyInfo, MSSAU, AC, DT))
2730 return true;
2731
2732 return false;
2733}
2734
2735static bool isReassociableOp(Instruction *I, unsigned IntOpcode,
2736 unsigned FPOpcode) {
2737 if (I->getOpcode() == IntOpcode)
2738 return true;
2739 if (I->getOpcode() == FPOpcode && I->hasAllowReassoc() &&
2740 I->hasNoSignedZeros())
2741 return true;
2742 return false;
2743}
2744
2745/// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where
2746/// A1, A2, ... and C are loop invariants into expressions like
2747/// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...
2748/// invariant expressions. This functions returns true only if any hoisting has
2749/// actually occurred.
2750static bool hoistMulAddAssociation(Instruction &I, Loop &L,
2751 ICFLoopSafetyInfo &SafetyInfo,
2752 MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2753 DominatorTree *DT) {
2754 if (!isReassociableOp(I: &I, IntOpcode: Instruction::Mul, FPOpcode: Instruction::FMul))
2755 return false;
2756 Value *VariantOp = I.getOperand(i: 0);
2757 Value *InvariantOp = I.getOperand(i: 1);
2758 if (L.isLoopInvariant(V: VariantOp))
2759 std::swap(a&: VariantOp, b&: InvariantOp);
2760 if (L.isLoopInvariant(V: VariantOp) || !L.isLoopInvariant(V: InvariantOp))
2761 return false;
2762 Value *Factor = InvariantOp;
2763
2764 // First, we need to make sure we should do the transformation.
2765 SmallVector<Use *> Changes;
2766 SmallVector<BinaryOperator *> Adds;
2767 SmallVector<BinaryOperator *> Worklist;
2768 if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(Val: VariantOp))
2769 Worklist.push_back(Elt: VariantBinOp);
2770 while (!Worklist.empty()) {
2771 BinaryOperator *BO = Worklist.pop_back_val();
2772 if (!BO->hasOneUse())
2773 return false;
2774 if (isReassociableOp(I: BO, IntOpcode: Instruction::Add, FPOpcode: Instruction::FAdd) &&
2775 isa<BinaryOperator>(Val: BO->getOperand(i_nocapture: 0)) &&
2776 isa<BinaryOperator>(Val: BO->getOperand(i_nocapture: 1))) {
2777 Worklist.push_back(Elt: cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: 0)));
2778 Worklist.push_back(Elt: cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: 1)));
2779 Adds.push_back(Elt: BO);
2780 continue;
2781 }
2782 if (!isReassociableOp(I: BO, IntOpcode: Instruction::Mul, FPOpcode: Instruction::FMul) ||
2783 L.isLoopInvariant(V: BO))
2784 return false;
2785 Use &U0 = BO->getOperandUse(i: 0);
2786 Use &U1 = BO->getOperandUse(i: 1);
2787 if (L.isLoopInvariant(V: U0))
2788 Changes.push_back(Elt: &U0);
2789 else if (L.isLoopInvariant(V: U1))
2790 Changes.push_back(Elt: &U1);
2791 else
2792 return false;
2793 unsigned Limit = I.getType()->isIntOrIntVectorTy()
2794 ? IntAssociationUpperLimit
2795 : FPAssociationUpperLimit;
2796 if (Changes.size() > Limit)
2797 return false;
2798 }
2799 if (Changes.empty())
2800 return false;
2801
2802 // Drop the poison flags for any adds we looked through.
2803 if (I.getType()->isIntOrIntVectorTy()) {
2804 for (auto *Add : Adds)
2805 Add->dropPoisonGeneratingFlags();
2806 }
2807
2808 // We know we should do it so let's do the transformation.
2809 auto *Preheader = L.getLoopPreheader();
2810 assert(Preheader && "Loop is not in simplify form?");
2811 IRBuilder<> Builder(Preheader->getTerminator());
2812 for (auto *U : Changes) {
2813 assert(L.isLoopInvariant(U->get()));
2814 auto *Ins = cast<BinaryOperator>(Val: U->getUser());
2815 Value *Mul;
2816 if (I.getType()->isIntOrIntVectorTy()) {
2817 Mul = Builder.CreateMul(LHS: U->get(), RHS: Factor, Name: "factor.op.mul");
2818 // Drop the poison flags on the original multiply.
2819 Ins->dropPoisonGeneratingFlags();
2820 } else
2821 Mul = Builder.CreateFMulFMF(L: U->get(), R: Factor, FMFSource: Ins, Name: "factor.op.fmul");
2822
2823 // Rewrite the reassociable instruction.
2824 unsigned OpIdx = U->getOperandNo();
2825 auto *LHS = OpIdx == 0 ? Mul : Ins->getOperand(i_nocapture: 0);
2826 auto *RHS = OpIdx == 1 ? Mul : Ins->getOperand(i_nocapture: 1);
2827 auto *NewBO =
2828 BinaryOperator::Create(Op: Ins->getOpcode(), S1: LHS, S2: RHS,
2829 Name: Ins->getName() + ".reass", InsertBefore: Ins->getIterator());
2830 NewBO->setDebugLoc(DebugLoc::getDropped());
2831 NewBO->copyIRFlags(V: Ins);
2832 if (VariantOp == Ins)
2833 VariantOp = NewBO;
2834 Ins->replaceAllUsesWith(V: NewBO);
2835 eraseInstruction(I&: *Ins, SafetyInfo, MSSAU);
2836 }
2837
2838 I.replaceAllUsesWith(V: VariantOp);
2839 eraseInstruction(I, SafetyInfo, MSSAU);
2840 return true;
2841}
2842
2843/// Reassociate associative binary expressions of the form
2844///
2845/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
2846/// 2. "(C1 op LV) op C2" ==> "LV op (C1 op C2)"
2847/// 3. "C2 op (C1 op LV)" ==> "LV op (C1 op C2)"
2848/// 4. "C2 op (LV op C1)" ==> "LV op (C1 op C2)"
2849///
2850/// where op is an associative BinOp, LV is a loop variant, and C1 and C2 are
2851/// loop invariants that we want to hoist, noting that associativity implies
2852/// commutativity.
2853static bool hoistBOAssociation(Instruction &I, Loop &L,
2854 ICFLoopSafetyInfo &SafetyInfo,
2855 MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2856 DominatorTree *DT) {
2857 auto *BO = dyn_cast<BinaryOperator>(Val: &I);
2858 if (!BO || !BO->isAssociative())
2859 return false;
2860
2861 Instruction::BinaryOps Opcode = BO->getOpcode();
2862 bool LVInRHS = L.isLoopInvariant(V: BO->getOperand(i_nocapture: 0));
2863 auto *BO0 = dyn_cast<BinaryOperator>(Val: BO->getOperand(i_nocapture: LVInRHS));
2864 if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative() ||
2865 BO0->hasNUsesOrMore(N: BO0->getType()->isIntegerTy() ? 2 : 3))
2866 return false;
2867
2868 Value *LV = BO0->getOperand(i_nocapture: 0);
2869 Value *C1 = BO0->getOperand(i_nocapture: 1);
2870 Value *C2 = BO->getOperand(i_nocapture: !LVInRHS);
2871
2872 assert(BO->isCommutative() && BO0->isCommutative() &&
2873 "Associativity implies commutativity");
2874 if (L.isLoopInvariant(V: LV) && !L.isLoopInvariant(V: C1))
2875 std::swap(a&: LV, b&: C1);
2876 if (L.isLoopInvariant(V: LV) || !L.isLoopInvariant(V: C1) || !L.isLoopInvariant(V: C2))
2877 return false;
2878
2879 auto *Preheader = L.getLoopPreheader();
2880 assert(Preheader && "Loop is not in simplify form?");
2881
2882 IRBuilder<> Builder(Preheader->getTerminator());
2883 auto *Inv = Builder.CreateBinOp(Opc: Opcode, LHS: C1, RHS: C2, Name: "invariant.op");
2884
2885 auto *NewBO = BinaryOperator::Create(
2886 Op: Opcode, S1: LV, S2: Inv, Name: BO->getName() + ".reass", InsertBefore: BO->getIterator());
2887 NewBO->setDebugLoc(DebugLoc::getDropped());
2888
2889 if (Opcode == Instruction::FAdd || Opcode == Instruction::FMul) {
2890 // Intersect FMF flags for FADD and FMUL.
2891 FastMathFlags Intersect = BO->getFastMathFlags() & BO0->getFastMathFlags();
2892 if (auto *I = dyn_cast<Instruction>(Val: Inv))
2893 I->setFastMathFlags(Intersect);
2894 NewBO->setFastMathFlags(Intersect);
2895 } else {
2896 OverflowTracking Flags;
2897 Flags.AllKnownNonNegative = false;
2898 Flags.AllKnownNonZero = false;
2899 Flags.mergeFlags(I&: *BO);
2900 Flags.mergeFlags(I&: *BO0);
2901 // If `Inv` was not constant-folded, a new Instruction has been created.
2902 if (auto *I = dyn_cast<Instruction>(Val: Inv))
2903 Flags.applyFlags(I&: *I);
2904 Flags.applyFlags(I&: *NewBO);
2905 }
2906
2907 BO->replaceAllUsesWith(V: NewBO);
2908 eraseInstruction(I&: *BO, SafetyInfo, MSSAU);
2909
2910 // (LV op C1) might not be erased if it has more uses than the one we just
2911 // replaced.
2912 if (BO0->use_empty()) {
2913 salvageDebugInfo(I&: *BO0);
2914 eraseInstruction(I&: *BO0, SafetyInfo, MSSAU);
2915 }
2916
2917 return true;
2918}
2919
2920/// Reassociate add/sub expressions of the form:
2921///
2922/// 1. "(LV + C1) - C2" ==> "LV + (C1 - C2)"
2923/// 2. "(LV - C1) - C2" ==> "LV - (C1 + C2)"
2924/// 3. "(LV - C1) + C2" ==> "LV + (C2 - C1)"
2925///
2926/// where LV is a loop variant, and C1 and C2 are loop invariants.
2927/// Sub is not associative, but these algebraic identities allow hoisting
2928/// invariant computations out of the loop.
2929static bool hoistSubAddAssociation(Instruction &I, Loop &L,
2930 ICFLoopSafetyInfo &SafetyInfo,
2931 MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2932 DominatorTree *DT) {
2933 using namespace PatternMatch;
2934
2935 Instruction *BO;
2936 Value *LV, *C1, *C2;
2937 Instruction::BinaryOps InvOp, ResultOp;
2938
2939 // Try to match one of three reassociation patterns involving sub.
2940 //
2941 // 1. (LV + C1) - C2 ==> LV + (C1 - C2)
2942 // 2. (LV - C1) - C2 ==> LV - (C1 + C2)
2943 // 3. (LV - C1) + C2 ==> LV + (C2 - C1)
2944 // ^ ^
2945 // \ \___ InvOp
2946 // \
2947 // \____ ResultOp
2948 //
2949 if (match(V: &I,
2950 P: m_Sub(L: m_OneUse(SubPattern: m_Instruction(I&: BO, P: m_Add(L: m_Value(V&: LV), R: m_Value(V&: C1)))),
2951 R: m_Value(V&: C2)))) {
2952 // Case 1.
2953 //
2954 // Depending on which of the addition is invariant, we might need to swap
2955 // the arguments
2956 if (L.isLoopInvariant(V: LV) && !L.isLoopInvariant(V: C1))
2957 std::swap(a&: LV, b&: C1);
2958 InvOp = Instruction::Sub;
2959 ResultOp = Instruction::Add;
2960 } else if (match(V: &I, P: m_Sub(L: m_OneUse(SubPattern: m_Instruction(
2961 I&: BO, P: m_Sub(L: m_Value(V&: LV), R: m_Value(V&: C1)))),
2962 R: m_Value(V&: C2)))) {
2963 // Case 2.
2964 InvOp = Instruction::Add;
2965 ResultOp = Instruction::Sub;
2966 } else if (match(V: &I, P: m_c_Add(L: m_OneUse(SubPattern: m_Instruction(
2967 I&: BO, P: m_Sub(L: m_Value(V&: LV), R: m_Value(V&: C1)))),
2968 R: m_Value(V&: C2)))) {
2969 // Case 3.
2970 //
2971 // We use (C2 - C1) as the invariant as opposed to case 1, but instead of
2972 // adding a special case in invariant creation, we can just swap the
2973 // operands here.
2974 std::swap(a&: C1, b&: C2);
2975 InvOp = Instruction::Sub;
2976 ResultOp = Instruction::Add;
2977 } else {
2978 return false;
2979 }
2980
2981 if (L.isLoopInvariant(V: LV) || !L.isLoopInvariant(V: C1) || !L.isLoopInvariant(V: C2))
2982 return false;
2983
2984 auto *Preheader = L.getLoopPreheader();
2985 assert(Preheader && "Loop is not in simplify form?");
2986
2987 IRBuilder<> Builder(Preheader->getTerminator());
2988 auto *Inv = Builder.CreateBinOp(Opc: InvOp, LHS: C1, RHS: C2, Name: "invariant.op");
2989
2990 auto *NewBO = BinaryOperator::Create(Op: ResultOp, S1: LV, S2: Inv,
2991 Name: I.getName() + ".reass", InsertBefore: I.getIterator());
2992 NewBO->setDebugLoc(DebugLoc::getDropped());
2993
2994 // No overflow flags are set on the new instructions -- reassociation
2995 // involving sub does not preserve nsw/nuw in general.
2996
2997 I.replaceAllUsesWith(V: NewBO);
2998 eraseInstruction(I, SafetyInfo, MSSAU);
2999
3000 salvageDebugInfo(I&: *BO);
3001 eraseInstruction(I&: *BO, SafetyInfo, MSSAU);
3002
3003 return true;
3004}
3005
3006static bool hoistArithmetics(Instruction &I, Loop &L,
3007 ICFLoopSafetyInfo &SafetyInfo,
3008 MemorySSAUpdater &MSSAU, AssumptionCache *AC,
3009 DominatorTree *DT) {
3010 // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
3011 // into (x < min(INV1, INV2)), and hoisting the invariant part of this
3012 // expression out of the loop.
3013 if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
3014 ++NumHoisted;
3015 ++NumMinMaxHoisted;
3016 return true;
3017 }
3018
3019 // Try to hoist GEPs by reassociation.
3020 if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
3021 ++NumHoisted;
3022 ++NumGEPsHoisted;
3023 return true;
3024 }
3025
3026 // Try to hoist add/sub's by reassociation.
3027 if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
3028 ++NumHoisted;
3029 ++NumAddSubHoisted;
3030 return true;
3031 }
3032
3033 bool IsInt = I.getType()->isIntOrIntVectorTy();
3034 if (hoistMulAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
3035 ++NumHoisted;
3036 if (IsInt)
3037 ++NumIntAssociationsHoisted;
3038 else
3039 ++NumFPAssociationsHoisted;
3040 return true;
3041 }
3042
3043 if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
3044 ++NumHoisted;
3045 ++NumBOAssociationsHoisted;
3046 return true;
3047 }
3048
3049 if (hoistSubAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
3050 ++NumHoisted;
3051 ++NumBOAssociationsHoisted;
3052 return true;
3053 }
3054
3055 return false;
3056}
3057
3058/// Little predicate that returns true if the specified basic block is in
3059/// a subloop of the current one, not the current one itself.
3060///
3061static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
3062 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
3063 return LI->getLoopFor(BB) != CurLoop;
3064}
3065