LoopRotationUtils.cpp source code [llvm_projects/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp]

1	//===----------------- LoopRotationUtils.cpp -----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file provides utilities to convert a loop into a loop with bottom test.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Transforms/Utils/LoopRotationUtils.h"
14	#include "llvm/ADT/Statistic.h"
15	#include "llvm/Analysis/AssumptionCache.h"
16	#include "llvm/Analysis/CodeMetrics.h"
17	#include "llvm/Analysis/DomTreeUpdater.h"
18	#include "llvm/Analysis/InstructionSimplify.h"
19	#include "llvm/Analysis/LoopInfo.h"
20	#include "llvm/Analysis/MemorySSA.h"
21	#include "llvm/Analysis/MemorySSAUpdater.h"
22	#include "llvm/Analysis/ScalarEvolution.h"
23	#include "llvm/Analysis/ValueTracking.h"
24	#include "llvm/IR/CFG.h"
25	#include "llvm/IR/DebugInfo.h"
26	#include "llvm/IR/Dominators.h"
27	#include "llvm/IR/IntrinsicInst.h"
28	#include "llvm/IR/MDBuilder.h"
29	#include "llvm/IR/ProfDataUtils.h"
30	#include "llvm/Support/CommandLine.h"
31	#include "llvm/Support/Debug.h"
32	#include "llvm/Support/raw_ostream.h"
33	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
34	#include "llvm/Transforms/Utils/Cloning.h"
35	#include "llvm/Transforms/Utils/Local.h"
36	#include "llvm/Transforms/Utils/SSAUpdater.h"
37	#include "llvm/Transforms/Utils/ValueMapper.h"
38	using namespace llvm;
39
40	#define DEBUG_TYPE "loop-rotate"
41
42	STATISTIC(NumNotRotatedDueToHeaderSize,
43	"Number of loops not rotated due to the header size");
44	STATISTIC(NumInstrsHoisted,
45	"Number of instructions hoisted into loop preheader");
46	STATISTIC(NumInstrsDuplicated,
47	"Number of instructions cloned into loop preheader");
48	STATISTIC(NumRotated, "Number of loops rotated");
49
50	static cl::opt<bool>
51	MultiRotate("loop-rotate-multi", cl::init(Val: false), cl::Hidden,
52	cl::desc ("Allow loop rotation multiple times in order to reach "
53	"a better latch exit"));
54
55	// Probability that a rotated loop has zero trip count / is never entered.
56	static constexpr uint32_t ZeroTripCountWeights[] = {`1`, `127`};
57
58	namespace {
59	/// A simple loop rotation transformation.
60	class LoopRotate {
61	const unsigned MaxHeaderSize;
62	LoopInfo *LI;
63	const TargetTransformInfo *TTI;
64	AssumptionCache *AC;
65	DominatorTree *DT;
66	ScalarEvolution *SE;
67	MemorySSAUpdater *MSSAU;
68	const SimplifyQuery &SQ;
69	bool RotationOnly;
70	bool IsUtilMode;
71	bool PrepareForLTO;
72
73	public:
74	LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
75	const TargetTransformInfo TTI, AssumptionCache AC,
76	DominatorTree DT, ScalarEvolution SE, MemorySSAUpdater *MSSAU,
77	const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
78	bool PrepareForLTO)
79	: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
80	MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
81	IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
82	bool processLoop(Loop *L);
83
84	private:
85	bool rotateLoop(Loop L, bool* SimplifiedLatch);
86	bool simplifyLoopLatch(Loop *L);
87	};
88	} // end anonymous namespace
89
90	/// Insert (K, V) pair into the ValueToValueMap, and verify the key did not
91	/// previously exist in the map, and the value was inserted.
92	static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value K, Value V) {
93	bool Inserted = VM.insert(KV: {K, V}).second;
94	assert(Inserted);
95	(void)Inserted;
96	}
97	/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
98	/// old header into the preheader. If there were uses of the values produced by
99	/// these instruction that were outside of the loop, we have to insert PHI nodes
100	/// to merge the two values. Do this now.
101	static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
102	BasicBlock *OrigPreheader,
103	ValueToValueMapTy &ValueMap,
104	ScalarEvolution *SE,
105	SmallVectorImpl<PHINode> InsertedPHIs) {
106	// Remove PHI node entries that are no longer live.
107	BasicBlock::iterator I, E = OrigHeader->end();
108	for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(Val&: I); ++I)
109	PN->removeIncomingValue(Idx: PN->getBasicBlockIndex(BB: OrigPreheader));
110
111	// Now fix up users of the instructions in OrigHeader, inserting PHI nodes
112	// as necessary.
113	SSAUpdater SSA(InsertedPHIs);
114	for (I = OrigHeader->begin(); I != E; ++I) {
115	Value OrigHeaderVal = &I;
116
117	// If there are no uses of the value (e.g. because it returns void), there
118	// is nothing to rewrite.
119	if (OrigHeaderVal->use_empty())
120	continue;
121
122	Value *OrigPreHeaderVal = ValueMap.lookup(Val: OrigHeaderVal);
123
124	// The value now exits in two versions: the initial value in the preheader
125	// and the loop "next" value in the original header.
126	SSA.Initialize(Ty: OrigHeaderVal->getType(), Name: OrigHeaderVal->getName());
127	// Force re-computation of OrigHeaderVal, as some users now need to use the
128	// new PHI node.
129	if (SE)
130	SE->forgetValue(V: OrigHeaderVal);
131	SSA.AddAvailableValue(BB: OrigHeader, V: OrigHeaderVal);
132	SSA.AddAvailableValue(BB: OrigPreheader, V: OrigPreHeaderVal);
133
134	// Visit each use of the OrigHeader instruction.
135	for (Use &U : llvm::make_early_inc_range(Range: OrigHeaderVal->uses())) {
136	// SSAUpdater can't handle a non-PHI use in the same block as an
137	// earlier def. We can easily handle those cases manually.
138	Instruction *UserInst = cast<Instruction>(Val: U.getUser());
139	if (!isa<PHINode>(Val: UserInst)) {
140	BasicBlock *UserBB = UserInst->getParent();
141
142	// The original users in the OrigHeader are already using the
143	// original definitions.
144	if (UserBB == OrigHeader)
145	continue;
146
147	// Users in the OrigPreHeader need to use the value to which the
148	// original definitions are mapped.
149	if (UserBB == OrigPreheader) {
150	U = OrigPreHeaderVal;
151	continue;
152	}
153	}
154
155	// Anything else can be handled by SSAUpdater.
156	SSA.RewriteUse(U);
157	}
158
159	// Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
160	// intrinsics.
161	SmallVector<DbgValueInst *, `1`> DbgValues;
162	SmallVector<DbgVariableRecord *, `1`> DbgVariableRecords;
163	llvm::findDbgValues(DbgValues, V: OrigHeaderVal, DbgVariableRecords: &DbgVariableRecords);
164	for (auto &DbgValue : DbgValues) {
165	// The original users in the OrigHeader are already using the original
166	// definitions.
167	BasicBlock *UserBB = DbgValue->getParent();
168	if (UserBB == OrigHeader)
169	continue;
170
171	// Users in the OrigPreHeader need to use the value to which the
172	// original definitions are mapped and anything else can be handled by
173	// the SSAUpdater. To avoid adding PHINodes, check if the value is
174	// available in UserBB, if not substitute undef.
175	Value *NewVal;
176	if (UserBB == OrigPreheader)
177	NewVal = OrigPreHeaderVal;
178	else if (SSA.HasValueForBlock(BB: UserBB))
179	NewVal = SSA.GetValueInMiddleOfBlock(BB: UserBB);
180	else
181	NewVal = UndefValue::get(T: OrigHeaderVal->getType());
182	DbgValue->replaceVariableLocationOp(OldValue: OrigHeaderVal, NewValue: NewVal);
183	}
184
185	// RemoveDIs: duplicate implementation for non-instruction debug-info
186	// storage in DbgVariableRecords.
187	for (DbgVariableRecord *DVR : DbgVariableRecords) {
188	// The original users in the OrigHeader are already using the original
189	// definitions.
190	BasicBlock *UserBB = DVR->getMarker()->getParent();
191	if (UserBB == OrigHeader)
192	continue;
193
194	// Users in the OrigPreHeader need to use the value to which the
195	// original definitions are mapped and anything else can be handled by
196	// the SSAUpdater. To avoid adding PHINodes, check if the value is
197	// available in UserBB, if not substitute undef.
198	Value *NewVal;
199	if (UserBB == OrigPreheader)
200	NewVal = OrigPreHeaderVal;
201	else if (SSA.HasValueForBlock(BB: UserBB))
202	NewVal = SSA.GetValueInMiddleOfBlock(BB: UserBB);
203	else
204	NewVal = UndefValue::get(T: OrigHeaderVal->getType());
205	DVR->replaceVariableLocationOp(OldValue: OrigHeaderVal, NewValue: NewVal);
206	}
207	}
208	}
209
210	// Assuming both header and latch are exiting, look for a phi which is only
211	// used outside the loop (via a LCSSA phi) in the exit from the header.
212	// This means that rotating the loop can remove the phi.
213	static bool profitableToRotateLoopExitingLatch(Loop *L) {
214	BasicBlock *Header = L->getHeader();
215	BranchInst *BI = dyn_cast<BranchInst>(Val: Header->getTerminator());
216	assert(BI && BI->isConditional() && "need header with conditional exit");
217	BasicBlock *HeaderExit = BI->getSuccessor(i: `0`);
218	if (L->contains(BB: HeaderExit))
219	HeaderExit = BI->getSuccessor(i: `1`);
220
221	for (auto &Phi : Header->phis()) {
222	// Look for uses of this phi in the loop/via exits other than the header.
223	if (llvm::any_of(Range: Phi.users(), P: [HeaderExit](const User *U) {
224	return cast<Instruction>(Val: U)->getParent() != HeaderExit;
225	}))
226	continue;
227	return true;
228	}
229	return false;
230	}
231
232	// Check that latch exit is deoptimizing (which means - very unlikely to happen)
233	// and there is another exit from the loop which is non-deoptimizing.
234	// If we rotate latch to that exit our loop has a better chance of being fully
235	// canonical.
236	//
237	// It can give false positives in some rare cases.
238	static bool canRotateDeoptimizingLatchExit(Loop *L) {
239	BasicBlock *Latch = L->getLoopLatch();
240	assert(Latch && "need latch");
241	BranchInst *BI = dyn_cast<BranchInst>(Val: Latch->getTerminator());
242	// Need normal exiting latch.
243	if (!BI \|\| !BI->isConditional())
244	return false;
245
246	BasicBlock *Exit = BI->getSuccessor(i: `1`);
247	if (L->contains(BB: Exit))
248	Exit = BI->getSuccessor(i: `0`);
249
250	// Latch exit is non-deoptimizing, no need to rotate.
251	if (!Exit->getPostdominatingDeoptimizeCall())
252	return false;
253
254	SmallVector<BasicBlock *, `4`> Exits;
255	L->getUniqueExitBlocks(ExitBlocks&: Exits);
256	if (!Exits.empty()) {
257	// There is at least one non-deoptimizing exit.
258	//
259	// Note, that BasicBlock::getPostdominatingDeoptimizeCall is not exact,
260	// as it can conservatively return false for deoptimizing exits with
261	// complex enough control flow down to deoptimize call.
262	//
263	// That means here we can report success for a case where
264	// all exits are deoptimizing but one of them has complex enough
265	// control flow (e.g. with loops).
266	//
267	// That should be a very rare case and false positives for this function
268	// have compile-time effect only.
269	return any_of(Range&: Exits, P: [](const BasicBlock *BB) {
270	return !BB->getPostdominatingDeoptimizeCall();
271	});
272	}
273	return false;
274	}
275
276	static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI,
277	bool HasConditionalPreHeader,
278	bool SuccsSwapped) {
279	MDNode *WeightMD = getBranchWeightMDNode(I: PreHeaderBI);
280	if (WeightMD == nullptr)
281	return;
282
283	// LoopBI should currently be a clone of PreHeaderBI with the same
284	// metadata. But we double check to make sure we don't have a degenerate case
285	// where instsimplify changed the instructions.
286	if (WeightMD != getBranchWeightMDNode(I: LoopBI))
287	return;
288
289	SmallVector<uint32_t, `2`> Weights;
290	extractFromBranchWeightMD32(ProfileData: WeightMD, Weights);
291	if (Weights.size() != `2`)
292	return;
293	uint32_t OrigLoopExitWeight = Weights [`0`];
294	uint32_t OrigLoopBackedgeWeight = Weights [`1`];
295
296	if (SuccsSwapped)
297	std::swap(a&: OrigLoopExitWeight, b&: OrigLoopBackedgeWeight);
298
299	// Update branch weights. Consider the following edge-counts:
300	//
301	// \| \|-------- \|
302	// V V \| V
303	// Br i1 ... \| Br i1 ...
304	// \| \| \| \| \|
305	// x\| y\| \| becomes: \| y0\| \|-----
306	// V V \| \| V V \|
307	// Exit Loop \| \| Loop \|
308	// \| \| \| Br i1 ... \|
309	// ----- \| \| \| \|
310	// x0\| x1\| y1 \| \|
311	// V V ----
312	// Exit
313	//
314	// The following must hold:
315	// - x == x0 + x1 # counts to "exit" must stay the same.
316	// - y0 == x - x0 == x1 # how often loop was entered at all.
317	// - y1 == y - y0 # How often loop was repeated (after first iter.).
318	//
319	// We cannot generally deduce how often we had a zero-trip count loop so we
320	// have to make a guess for how to distribute x among the new x0 and x1.
321
322	uint32_t ExitWeight0; // aka x0
323	uint32_t ExitWeight1; // aka x1
324	uint32_t EnterWeight; // aka y0
325	uint32_t LoopBackWeight; // aka y1
326	if (OrigLoopExitWeight > `0` && OrigLoopBackedgeWeight > `0`) {
327	ExitWeight0 = `0`;
328	if (HasConditionalPreHeader) {
329	// Here we cannot know how many 0-trip count loops we have, so we guess:
330	if (OrigLoopBackedgeWeight >= OrigLoopExitWeight) {
331	// If the loop count is bigger than the exit count then we set
332	// probabilities as if 0-trip count nearly never happens.
333	ExitWeight0 = ZeroTripCountWeights[`0`];
334	// Scale up counts if necessary so we can match `ZeroTripCountWeights`
335	// for the `ExitWeight0`:`ExitWeight1` (aka `x0`:`x1` ratio`) ratio.
336	while (OrigLoopExitWeight < ZeroTripCountWeights[`1`] + ExitWeight0) {
337	// ... but don't overflow.
338	uint32_t const HighBit = uint32_t{`1`} << (sizeof(uint32_t) * `8` - `1`);
339	if ((OrigLoopBackedgeWeight & HighBit) != `0` \|\|
340	(OrigLoopExitWeight & HighBit) != `0`)
341	break;
342	OrigLoopBackedgeWeight <<= `1`;
343	OrigLoopExitWeight <<= `1`;
344	}
345	} else {
346	// If there's a higher exit-count than backedge-count then we set
347	// probabilities as if there are only 0-trip and 1-trip cases.
348	ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight;
349	}
350	} else {
351	// Theoretically, if the loop body must be executed at least once, the
352	// backedge count must be not less than exit count. However the branch
353	// weight collected by sampling-based PGO may be not very accurate due to
354	// sampling. Therefore this workaround is required here to avoid underflow
355	// of unsigned in following update of branch weight.
356	if (OrigLoopExitWeight > OrigLoopBackedgeWeight)
357	OrigLoopBackedgeWeight = OrigLoopExitWeight;
358	}
359	assert(OrigLoopExitWeight >= ExitWeight0 && "Bad branch weight");
360	ExitWeight1 = OrigLoopExitWeight - ExitWeight0;
361	EnterWeight = ExitWeight1;
362	assert(OrigLoopBackedgeWeight >= EnterWeight && "Bad branch weight");
363	LoopBackWeight = OrigLoopBackedgeWeight - EnterWeight;
364	} else if (OrigLoopExitWeight == `0`) {
365	if (OrigLoopBackedgeWeight == `0`) {
366	// degenerate case... keep everything zero...
367	ExitWeight0 = `0`;
368	ExitWeight1 = `0`;
369	EnterWeight = `0`;
370	LoopBackWeight = `0`;
371	} else {
372	// Special case "LoopExitWeight == 0" weights which behaves like an
373	// endless where we don't want loop-enttry (y0) to be the same as
374	// loop-exit (x1).
375	ExitWeight0 = `0`;
376	ExitWeight1 = `0`;
377	EnterWeight = `1`;
378	LoopBackWeight = OrigLoopBackedgeWeight;
379	}
380	} else {
381	// loop is never entered.
382	assert(OrigLoopBackedgeWeight == `0` && "remaining case is backedge zero");
383	ExitWeight0 = `1`;
384	ExitWeight1 = `1`;
385	EnterWeight = `0`;
386	LoopBackWeight = `0`;
387	}
388
389	const uint32_t LoopBIWeights[] = {
390	SuccsSwapped ? LoopBackWeight : ExitWeight1,
391	SuccsSwapped ? ExitWeight1 : LoopBackWeight,
392	};
393	setBranchWeights(I&: LoopBI, Weights: LoopBIWeights, /IsExpected=/false);
394	if (HasConditionalPreHeader) {
395	const uint32_t PreHeaderBIWeights[] = {
396	SuccsSwapped ? EnterWeight : ExitWeight0,
397	SuccsSwapped ? ExitWeight0 : EnterWeight,
398	};
399	setBranchWeights(I&: PreHeaderBI, Weights: PreHeaderBIWeights, /IsExpected=/false);
400	}
401	}
402
403	/// Rotate loop LP. Return true if the loop is rotated.
404	///
405	/// \param SimplifiedLatch is true if the latch was just folded into the final
406	/// loop exit. In this case we may want to rotate even though the new latch is
407	/// now an exiting branch. This rotation would have happened had the latch not
408	/// been simplified. However, if SimplifiedLatch is false, then we avoid
409	/// rotating loops in which the latch exits to avoid excessive or endless
410	/// rotation. LoopRotate should be repeatable and converge to a canonical
411	/// form. This property is satisfied because simplifying the loop latch can only
412	/// happen once across multiple invocations of the LoopRotate pass.
413	///
414	/// If -loop-rotate-multi is enabled we can do multiple rotations in one go
415	/// so to reach a suitable (non-deoptimizing) exit.
416	bool LoopRotate::rotateLoop(Loop L, bool* SimplifiedLatch) {
417	// If the loop has only one block then there is not much to rotate.
418	if (L->getBlocks().size() == `1`)
419	return false;
420
421	bool Rotated = false;
422	do {
423	BasicBlock *OrigHeader = L->getHeader();
424	BasicBlock *OrigLatch = L->getLoopLatch();
425
426	BranchInst *BI = dyn_cast<BranchInst>(Val: OrigHeader->getTerminator());
427	if (!BI \|\| BI->isUnconditional())
428	return Rotated;
429
430	// If the loop header is not one of the loop exiting blocks then
431	// either this loop is already rotated or it is not
432	// suitable for loop rotation transformations.
433	if (!L->isLoopExiting(BB: OrigHeader))
434	return Rotated;
435
436	// If the loop latch already contains a branch that leaves the loop then the
437	// loop is already rotated.
438	if (!OrigLatch)
439	return Rotated;
440
441	// Rotate if either the loop latch does not* exit the loop, or if the loop*
442	// latch was just simplified. Or if we think it will be profitable.
443	if (L->isLoopExiting(BB: OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
444	!profitableToRotateLoopExitingLatch(L) &&
445	!canRotateDeoptimizingLatchExit(L))
446	return Rotated;
447
448	// Check size of original header and reject loop if it is very big or we can't
449	// duplicate blocks inside it.
450	{
451	SmallPtrSet<const Value *, `32`> EphValues;
452	CodeMetrics::collectEphemeralValues(L, AC, EphValues);
453
454	CodeMetrics Metrics;
455	Metrics.analyzeBasicBlock(BB: OrigHeader, TTI: *TTI, EphValues, PrepareForLTO);
456	if (Metrics.notDuplicatable) {
457	LLVM_DEBUG(
458	dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
459	<< " instructions: ";
460	L->dump());
461	return Rotated;
462	}
463	if (Metrics.Convergence != ConvergenceKind::None) {
464	LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
465	"instructions: ";
466	L->dump());
467	return Rotated;
468	}
469	if (!Metrics.NumInsts.isValid()) {
470	LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions"
471	" with invalid cost: ";
472	L->dump());
473	return Rotated;
474	}
475	if (Metrics.NumInsts > MaxHeaderSize) {
476	LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
477	<< Metrics.NumInsts
478	<< " instructions, which is more than the threshold ("
479	<< MaxHeaderSize << " instructions): ";
480	L->dump());
481	++NumNotRotatedDueToHeaderSize;
482	return Rotated;
483	}
484
485	// When preparing for LTO, avoid rotating loops with calls that could be
486	// inlined during the LTO stage.
487	if (PrepareForLTO && Metrics.NumInlineCandidates > `0`)
488	return Rotated;
489	}
490
491	// Now, this loop is suitable for rotation.
492	BasicBlock *OrigPreheader = L->getLoopPreheader();
493
494	// If the loop could not be converted to canonical form, it must have an
495	// indirectbr in it, just give up.
496	if (!OrigPreheader \|\| !L->hasDedicatedExits())
497	return Rotated;
498
499	// Anything ScalarEvolution may know about this loop or the PHI nodes
500	// in its header will soon be invalidated. We should also invalidate
501	// all outer loops because insertion and deletion of blocks that happens
502	// during the rotation may violate invariants related to backedge taken
503	// infos in them.
504	if (SE) {
505	SE->forgetTopmostLoop(L);
506	// We may hoist some instructions out of loop. In case if they were cached
507	// as "loop variant" or "loop computable", these caches must be dropped.
508	// We also may fold basic blocks, so cached block dispositions also need
509	// to be dropped.
510	SE->forgetBlockAndLoopDispositions();
511	}
512
513	LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
514	if (MSSAU && VerifyMemorySSA)
515	MSSAU->getMemorySSA()->verifyMemorySSA();
516
517	// Find new Loop header. NewHeader is a Header's one and only successor
518	// that is inside loop. Header's other successor is outside the
519	// loop. Otherwise loop is not suitable for rotation.
520	BasicBlock *Exit = BI->getSuccessor(i: `0`);
521	BasicBlock *NewHeader = BI->getSuccessor(i: `1`);
522	bool BISuccsSwapped = L->contains(BB: Exit);
523	if (BISuccsSwapped)
524	std::swap(a&: Exit, b&: NewHeader);
525	assert(NewHeader && "Unable to determine new loop header");
526	assert(L->contains(NewHeader) && !L->contains(Exit) &&
527	"Unable to determine loop header and exit blocks");
528
529	// This code assumes that the new header has exactly one predecessor.
530	// Remove any single-entry PHI nodes in it.
531	assert(NewHeader->getSinglePredecessor() &&
532	"New header doesn't have one pred!");
533	FoldSingleEntryPHINodes(BB: NewHeader);
534
535	// Begin by walking OrigHeader and populating ValueMap with an entry for
536	// each Instruction.
537	BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
538	ValueToValueMapTy ValueMap, ValueMapMSSA;
539
540	// For PHI nodes, the value available in OldPreHeader is just the
541	// incoming value from OldPreHeader.
542	for (; PHINode *PN = dyn_cast<PHINode>(Val&: I); ++I)
543	InsertNewValueIntoMap(VM&: ValueMap, K: PN,
544	V: PN->getIncomingValueForBlock(BB: OrigPreheader));
545
546	// For the rest of the instructions, either hoist to the OrigPreheader if
547	// possible or create a clone in the OldPreHeader if not.
548	Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
549
550	// Record all debug intrinsics preceding LoopEntryBranch to avoid
551	// duplication.
552	using DbgIntrinsicHash =
553	std::pair<std::pair<hash_code, DILocalVariable >, DIExpression >;
554	auto makeHash = [](auto *D) -> DbgIntrinsicHash {
555	auto VarLocOps = D->location_ops();
556	return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()),
557	D->getVariable()},
558	D->getExpression()};
559	};
560
561	SmallDenseSet<DbgIntrinsicHash, `8`> DbgIntrinsics;
562	for (Instruction &I : llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: *OrigPreheader))) {
563	if (auto *DII = dyn_cast<DbgVariableIntrinsic>(Val: &I)) {
564	DbgIntrinsics.insert(V: makeHash (DII));
565	// Until RemoveDIs supports dbg.declares in DbgVariableRecord format,
566	// we'll need to collect DbgVariableRecords attached to any other debug
567	// intrinsics.
568	for (const DbgVariableRecord &DVR :
569	filterDbgVars(R: DII->getDbgRecordRange()))
570	DbgIntrinsics.insert(V: makeHash (&DVR));
571	} else {
572	break;
573	}
574	}
575
576	// Build DbgVariableRecord hashes for DbgVariableRecords attached to the
577	// terminator, which isn't considered in the loop above.
578	for (const DbgVariableRecord &DVR :
579	filterDbgVars(R: OrigPreheader->getTerminator()->getDbgRecordRange()))
580	DbgIntrinsics.insert(V: makeHash (&DVR));
581
582	// Remember the local noalias scope declarations in the header. After the
583	// rotation, they must be duplicated and the scope must be cloned. This
584	// avoids unwanted interaction across iterations.
585	SmallVector<NoAliasScopeDeclInst *, `6`> NoAliasDeclInstructions;
586	for (Instruction &I : *OrigHeader)
587	if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
588	NoAliasDeclInstructions.push_back(Elt: Decl);
589
590	Module *M = OrigHeader->getModule();
591
592	// Track the next DbgRecord to clone. If we have a sequence where an
593	// instruction is hoisted instead of being cloned:
594	// DbgRecord blah
595	// %foo = add i32 0, 0
596	// DbgRecord xyzzy
597	// %bar = call i32 @foobar()
598	// where %foo is hoisted, then the DbgRecord "blah" will be seen twice, once
599	// attached to %foo, then when %foo his hoisted it will "fall down" onto the
600	// function call:
601	// DbgRecord blah
602	// DbgRecord xyzzy
603	// %bar = call i32 @foobar()
604	// causing it to appear attached to the call too.
605	//
606	// To avoid this, cloneDebugInfoFrom takes an optional "start cloning from
607	// here" position to account for this behaviour. We point it at any
608	// DbgRecords on the next instruction, here labelled xyzzy, before we hoist
609	// %foo. Later, we only only clone DbgRecords from that position (xyzzy)
610	// onwards, which avoids cloning DbgRecord "blah" multiple times. (Stored as
611	// a range because it gives us a natural way of testing whether
612	// there were DbgRecords on the next instruction before we hoisted things).
613	iterator_range<DbgRecord::self_iterator> NextDbgInsts =
614	(I != E) ? I ->getDbgRecordRange() : DbgMarker::getEmptyDbgRecordRange();
615
616	while (I != E) {
617	Instruction Inst = &I ++;
618
619	// If the instruction's operands are invariant and it doesn't read or write
620	// memory, then it is safe to hoist. Doing this doesn't change the order of
621	// execution in the preheader, but does prevent the instruction from
622	// executing in each iteration of the loop. This means it is safe to hoist
623	// something that might trap, but isn't safe to hoist something that reads
624	// memory (without proving that the loop doesn't write).
625	if (L->hasLoopInvariantOperands(I: Inst) && !Inst->mayReadFromMemory() &&
626	!Inst->mayWriteToMemory() && !Inst->isTerminator() &&
627	!isa<DbgInfoIntrinsic>(Val: Inst) && !isa<AllocaInst>(Val: Inst) &&
628	// It is not safe to hoist the value of these instructions in
629	// coroutines, as the addresses of otherwise eligible variables (e.g.
630	// thread-local variables and errno) may change if the coroutine is
631	// resumed in a different thread.Therefore, we disable this
632	// optimization for correctness. However, this may block other correct
633	// optimizations.
634	// FIXME: This should be reverted once we have a better model for
635	// memory access in coroutines.
636	!Inst->getFunction()->isPresplitCoroutine()) {
637
638	if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat &&
639	!NextDbgInsts.empty()) {
640	auto DbgValueRange =
641	LoopEntryBranch->cloneDebugInfoFrom(From: Inst, FromHere: NextDbgInsts.begin());
642	RemapDbgRecordRange(M, Range: DbgValueRange, VM&: ValueMap,
643	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
644	// Erase anything we've seen before.
645	for (DbgVariableRecord &DVR :
646	make_early_inc_range(Range: filterDbgVars(R: DbgValueRange)))
647	if (DbgIntrinsics.count(V: makeHash (&DVR)))
648	DVR.eraseFromParent();
649	}
650
651	NextDbgInsts = I ->getDbgRecordRange();
652
653	Inst->moveBefore(MovePos: LoopEntryBranch);
654
655	++NumInstrsHoisted;
656	continue;
657	}
658
659	// Otherwise, create a duplicate of the instruction.
660	Instruction *C = Inst->clone();
661	C->insertBefore(InsertPos: LoopEntryBranch);
662
663	++NumInstrsDuplicated;
664
665	if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat &&
666	!NextDbgInsts.empty()) {
667	auto Range = C->cloneDebugInfoFrom(From: Inst, FromHere: NextDbgInsts.begin());
668	RemapDbgRecordRange(M, Range, VM&: ValueMap,
669	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
670	NextDbgInsts = DbgMarker::getEmptyDbgRecordRange();
671	// Erase anything we've seen before.
672	for (DbgVariableRecord &DVR :
673	make_early_inc_range(Range: filterDbgVars(R: Range)))
674	if (DbgIntrinsics.count(V: makeHash (&DVR)))
675	DVR.eraseFromParent();
676	}
677
678	// Eagerly remap the operands of the instruction.
679	RemapInstruction(I: C, VM&: ValueMap,
680	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
681
682	// Avoid inserting the same intrinsic twice.
683	if (auto *DII = dyn_cast<DbgVariableIntrinsic>(Val: C))
684	if (DbgIntrinsics.count(V: makeHash (DII))) {
685	C->eraseFromParent();
686	continue;
687	}
688
689	// With the operands remapped, see if the instruction constant folds or is
690	// otherwise simplifyable. This commonly occurs because the entry from PHI
691	// nodes allows icmps and other instructions to fold.
692	Value *V = simplifyInstruction(I: C, Q: SQ);
693	if (V && LI->replacementPreservesLCSSAForm(From: C, To: V)) {
694	// If so, then delete the temporary instruction and stick the folded value
695	// in the map.
696	InsertNewValueIntoMap(VM&: ValueMap, K: Inst, V);
697	if (!C->mayHaveSideEffects()) {
698	C->eraseFromParent();
699	C = nullptr;
700	}
701	} else {
702	InsertNewValueIntoMap(VM&: ValueMap, K: Inst, V: C);
703	}
704	if (C) {
705	// Otherwise, stick the new instruction into the new block!
706	C->setName(Inst->getName());
707
708	if (auto *II = dyn_cast<AssumeInst>(Val: C))
709	AC->registerAssumption(CI: II);
710	// MemorySSA cares whether the cloned instruction was inserted or not, and
711	// not whether it can be remapped to a simplified value.
712	if (MSSAU)
713	InsertNewValueIntoMap(VM&: ValueMapMSSA, K: Inst, V: C);
714	}
715	}
716
717	if (!NoAliasDeclInstructions.empty()) {
718	// There are noalias scope declarations:
719	// (general):
720	// Original: OrigPre { OrigHeader NewHeader ... Latch }
721	// after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader }
722	//
723	// with D: llvm.experimental.noalias.scope.decl,
724	// U: !noalias or !alias.scope depending on D
725	// ... { D U1 U2 } can transform into:
726	// (0) : ... { D U1 U2 } // no relevant rotation for this part
727	// (1) : ... D' { U1 U2 D } // D is part of OrigHeader
728	// (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader
729	//
730	// We now want to transform:
731	// (1) -> : ... D' { D U1 U2 D'' }
732	// (2) -> : ... D' U1' { D U2 D'' U1'' }
733	// D: original llvm.experimental.noalias.scope.decl
734	// D', U1': duplicate with replaced scopes
735	// D'', U1'': different duplicate with replaced scopes
736	// This ensures a safe fallback to 'may_alias' introduced by the rotate,
737	// as U1'' and U1' scopes will not be compatible wrt to the local restrict
738
739	// Clone the llvm.experimental.noalias.decl again for the NewHeader.
740	BasicBlock::iterator NewHeaderInsertionPoint =
741	NewHeader->getFirstNonPHIIt();
742	for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) {
743	LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:"
744	<< *NAD << "\n");
745	Instruction *NewNAD = NAD->clone();
746	NewNAD->insertBefore(BB&: *NewHeader, InsertPos: NewHeaderInsertionPoint);
747	}
748
749	// Scopes must now be duplicated, once for OrigHeader and once for
750	// OrigPreHeader'.
751	{
752	auto &Context = NewHeader->getContext();
753
754	SmallVector<MDNode *, `8`> NoAliasDeclScopes;
755	for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions)
756	NoAliasDeclScopes.push_back(Elt: NAD->getScopeList());
757
758	LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n");
759	cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, NewBlocks: {OrigHeader}, Context,
760	Ext: "h.rot");
761	LLVM_DEBUG(OrigHeader->dump());
762
763	// Keep the compile time impact low by only adapting the inserted block
764	// of instructions in the OrigPreHeader. This might result in slightly
765	// more aliasing between these instructions and those that were already
766	// present, but it will be much faster when the original PreHeader is
767	// large.
768	LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n");
769	auto *FirstDecl =
770	cast<Instruction>(Val&: ValueMap [*NoAliasDeclInstructions.begin()]);
771	auto *LastInst = &OrigPreheader->back();
772	cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, IStart: FirstDecl, IEnd: LastInst,
773	Context, Ext: "pre.rot");
774	LLVM_DEBUG(OrigPreheader->dump());
775
776	LLVM_DEBUG(dbgs() << " Updated NewHeader:\n");
777	LLVM_DEBUG(NewHeader->dump());
778	}
779	}
780
781	// Along with all the other instructions, we just cloned OrigHeader's
782	// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
783	// successors by duplicating their incoming values for OrigHeader.
784	for (BasicBlock *SuccBB : successors(BB: OrigHeader))
785	for (BasicBlock::iterator BI = SuccBB->begin();
786	PHINode *PN = dyn_cast<PHINode>(Val&: BI); ++BI)
787	PN->addIncoming(V: PN->getIncomingValueForBlock(BB: OrigHeader), BB: OrigPreheader);
788
789	// Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
790	// OrigPreHeader's old terminator (the original branch into the loop), and
791	// remove the corresponding incoming values from the PHI nodes in OrigHeader.
792	LoopEntryBranch->eraseFromParent();
793	OrigPreheader->flushTerminatorDbgRecords();
794
795	// Update MemorySSA before the rewrite call below changes the 1:1
796	// instruction:cloned_instruction_or_value mapping.
797	if (MSSAU) {
798	InsertNewValueIntoMap(VM&: ValueMapMSSA, K: OrigHeader, V: OrigPreheader);
799	MSSAU->updateForClonedBlockIntoPred(BB: OrigHeader, P1: OrigPreheader,
800	VM: ValueMapMSSA);
801	}
802
803	SmallVector<PHINode*, `2`> InsertedPHIs;
804	// If there were any uses of instructions in the duplicated block outside the
805	// loop, update them, inserting PHI nodes as required
806	RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE,
807	InsertedPHIs: &InsertedPHIs);
808
809	// Attach dbg.value intrinsics to the new phis if that phi uses a value that
810	// previously had debug metadata attached. This keeps the debug info
811	// up-to-date in the loop body.
812	if (!InsertedPHIs.empty())
813	insertDebugValuesForPHIs(BB: OrigHeader, InsertedPHIs);
814
815	// NewHeader is now the header of the loop.
816	L->moveToHeader(BB: NewHeader);
817	assert(L->getHeader() == NewHeader && "Latch block is our new header");
818
819	// Inform DT about changes to the CFG.
820	if (DT) {
821	// The OrigPreheader branches to the NewHeader and Exit now. Then, inform
822	// the DT about the removed edge to the OrigHeader (that got removed).
823	SmallVector<DominatorTree::UpdateType, `3`> Updates;
824	Updates.push_back(Elt: {DominatorTree::Insert, OrigPreheader, Exit});
825	Updates.push_back(Elt: {DominatorTree::Insert, OrigPreheader, NewHeader});
826	Updates.push_back(Elt: {DominatorTree::Delete, OrigPreheader, OrigHeader});
827
828	if (MSSAU) {
829	MSSAU->applyUpdates(Updates, DT&: DT, /UpdateDT=/UpdateDTFirst: true*);
830	if (VerifyMemorySSA)
831	MSSAU->getMemorySSA()->verifyMemorySSA();
832	} else {
833	DT->applyUpdates(Updates);
834	}
835	}
836
837	// At this point, we've finished our major CFG changes. As part of cloning
838	// the loop into the preheader we've simplified instructions and the
839	// duplicated conditional branch may now be branching on a constant. If it is
840	// branching on a constant and if that constant means that we enter the loop,
841	// then we fold away the cond branch to an uncond branch. This simplifies the
842	// loop in cases important for nested loops, and it also means we don't have
843	// to split as many edges.
844	BranchInst *PHBI = cast<BranchInst>(Val: OrigPreheader->getTerminator());
845	assert(PHBI->isConditional() && "Should be clone of BI condbr!");
846	const Value *Cond = PHBI->getCondition();
847	const bool HasConditionalPreHeader =
848	!isa<ConstantInt>(Val: Cond) \|\|
849	PHBI->getSuccessor(i: cast<ConstantInt>(Val: Cond)->isZero()) != NewHeader;
850
851	updateBranchWeights(PreHeaderBI&: PHBI, LoopBI&: BI, HasConditionalPreHeader, SuccsSwapped: BISuccsSwapped);
852
853	if (HasConditionalPreHeader) {
854	// The conditional branch can't be folded, handle the general case.
855	// Split edges as necessary to preserve LoopSimplify form.
856
857	// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
858	// thus is not a preheader anymore.
859	// Split the edge to form a real preheader.
860	BasicBlock *NewPH = SplitCriticalEdge(
861	Src: OrigPreheader, Dst: NewHeader,
862	Options: CriticalEdgeSplittingOptions (DT, LI, MSSAU).setPreserveLCSSA());
863	NewPH->setName(NewHeader->getName() + ".lr.ph");
864
865	// Preserve canonical loop form, which means that 'Exit' should have only
866	// one predecessor. Note that Exit could be an exit block for multiple
867	// nested loops, causing both of the edges to now be critical and need to
868	// be split.
869	SmallVector<BasicBlock *, `4`> ExitPreds(predecessors(BB: Exit));
870	bool SplitLatchEdge = false;
871	for (BasicBlock *ExitPred : ExitPreds) {
872	// We only need to split loop exit edges.
873	Loop *PredLoop = LI->getLoopFor(BB: ExitPred);
874	if (!PredLoop \|\| PredLoop->contains(BB: Exit) \|\|
875	isa<IndirectBrInst>(Val: ExitPred->getTerminator()))
876	continue;
877	SplitLatchEdge \|= L->getLoopLatch() == ExitPred;
878	BasicBlock *ExitSplit = SplitCriticalEdge(
879	Src: ExitPred, Dst: Exit,
880	Options: CriticalEdgeSplittingOptions (DT, LI, MSSAU).setPreserveLCSSA());
881	ExitSplit->moveBefore(MovePos: Exit);
882	}
883	assert(SplitLatchEdge &&
884	"Despite splitting all preds, failed to split latch exit?");
885	(void)SplitLatchEdge;
886	} else {
887	// We can fold the conditional branch in the preheader, this makes things
888	// simpler. The first step is to remove the extra edge to the Exit block.
889	Exit->removePredecessor(Pred: OrigPreheader, KeepOneInputPHIs: true /preserve LCSSA/);
890	BranchInst *NewBI = BranchInst::Create(IfTrue: NewHeader, InsertBefore: PHBI->getIterator());
891	NewBI->setDebugLoc(PHBI->getDebugLoc());
892	PHBI->eraseFromParent();
893
894	// With our CFG finalized, update DomTree if it is available.
895	if (DT) DT->deleteEdge(From: OrigPreheader, To: Exit);
896
897	// Update MSSA too, if available.
898	if (MSSAU)
899	MSSAU->removeEdge(From: OrigPreheader, To: Exit);
900	}
901
902	assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
903	assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
904
905	if (MSSAU && VerifyMemorySSA)
906	MSSAU->getMemorySSA()->verifyMemorySSA();
907
908	// Now that the CFG and DomTree are in a consistent state again, try to merge
909	// the OrigHeader block into OrigLatch. This will succeed if they are
910	// connected by an unconditional branch. This is just a cleanup so the
911	// emitted code isn't too gross in this common case.
912	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
913	BasicBlock *PredBB = OrigHeader->getUniquePredecessor();
914	bool DidMerge = MergeBlockIntoPredecessor(BB: OrigHeader, DTU: &DTU, LI, MSSAU);
915	if (DidMerge)
916	RemoveRedundantDbgInstrs(BB: PredBB);
917
918	if (MSSAU && VerifyMemorySSA)
919	MSSAU->getMemorySSA()->verifyMemorySSA();
920
921	LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
922
923	++NumRotated;
924
925	Rotated = true;
926	SimplifiedLatch = false;
927
928	// Check that new latch is a deoptimizing exit and then repeat rotation if possible.
929	// Deoptimizing latch exit is not a generally typical case, so we just loop over.
930	// TODO: if it becomes a performance bottleneck extend rotation algorithm
931	// to handle multiple rotations in one go.
932	} while (MultiRotate && canRotateDeoptimizingLatchExit(L));
933
934
935	return true;
936	}
937
938	/// Determine whether the instructions in this range may be safely and cheaply
939	/// speculated. This is not an important enough situation to develop complex
940	/// heuristics. We handle a single arithmetic instruction along with any type
941	/// conversions.
942	static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
943	BasicBlock::iterator End, Loop *L) {
944	bool seenIncrement = false;
945	bool MultiExitLoop = false;
946
947	if (!L->getExitingBlock())
948	MultiExitLoop = true;
949
950	for (BasicBlock::iterator I = Begin; I != End; ++I) {
951
952	if (!isSafeToSpeculativelyExecute(I: &*I))
953	return false;
954
955	if (isa<DbgInfoIntrinsic>(Val: I))
956	continue;
957
958	switch (I ->getOpcode()) {
959	default:
960	return false;
961	case Instruction::GetElementPtr:
962	// GEPs are cheap if all indices are constant.
963	if (!cast<GEPOperator>(Val&: I)->hasAllConstantIndices())
964	return false;
965	// fall-thru to increment case
966	[[fallthrough]];
967	case Instruction::Add:
968	case Instruction::Sub:
969	case Instruction::And:
970	case Instruction::Or:
971	case Instruction::Xor:
972	case Instruction::Shl:
973	case Instruction::LShr:
974	case Instruction::AShr: {
975	Value *IVOpnd =
976	!isa<Constant>(Val: I ->getOperand(i: `0`))
977	? I ->getOperand(i: `0`)
978	: !isa<Constant>(Val: I ->getOperand(i: `1`)) ? I ->getOperand(i: `1`) : nullptr;
979	if (!IVOpnd)
980	return false;
981
982	// If increment operand is used outside of the loop, this speculation
983	// could cause extra live range interference.
984	if (MultiExitLoop) {
985	for (User *UseI : IVOpnd->users()) {
986	auto *UserInst = cast<Instruction>(Val: UseI);
987	if (!L->contains(Inst: UserInst))
988	return false;
989	}
990	}
991
992	if (seenIncrement)
993	return false;
994	seenIncrement = true;
995	break;
996	}
997	case Instruction::Trunc:
998	case Instruction::ZExt:
999	case Instruction::SExt:
1000	// ignore type conversions
1001	break;
1002	}
1003	}
1004	return true;
1005	}
1006
1007	/// Fold the loop tail into the loop exit by speculating the loop tail
1008	/// instructions. Typically, this is a single post-increment. In the case of a
1009	/// simple 2-block loop, hoisting the increment can be much better than
1010	/// duplicating the entire loop header. In the case of loops with early exits,
1011	/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
1012	/// canonical form so downstream passes can handle it.
1013	///
1014	/// I don't believe this invalidates SCEV.
1015	bool LoopRotate::simplifyLoopLatch(Loop *L) {
1016	BasicBlock *Latch = L->getLoopLatch();
1017	if (!Latch \|\| Latch->hasAddressTaken())
1018	return false;
1019
1020	BranchInst *Jmp = dyn_cast<BranchInst>(Val: Latch->getTerminator());
1021	if (!Jmp \|\| !Jmp->isUnconditional())
1022	return false;
1023
1024	BasicBlock *LastExit = Latch->getSinglePredecessor();
1025	if (!LastExit \|\| !L->isLoopExiting(BB: LastExit))
1026	return false;
1027
1028	BranchInst *BI = dyn_cast<BranchInst>(Val: LastExit->getTerminator());
1029	if (!BI)
1030	return false;
1031
1032	if (!shouldSpeculateInstrs(Begin: Latch->begin(), End: Jmp->getIterator(), L))
1033	return false;
1034
1035	LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
1036	<< LastExit->getName() << "\n");
1037
1038	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
1039	MergeBlockIntoPredecessor(BB: Latch, DTU: &DTU, LI, MSSAU, MemDep: nullptr,
1040	/PredecessorWithTwoSuccessors=/true);
1041
1042	if (SE) {
1043	// Merging blocks may remove blocks reference in the block disposition cache. Clear the cache.
1044	SE->forgetBlockAndLoopDispositions();
1045	}
1046
1047	if (MSSAU && VerifyMemorySSA)
1048	MSSAU->getMemorySSA()->verifyMemorySSA();
1049
1050	return true;
1051	}
1052
1053	/// Rotate \c L, and return true if any modification was made.
1054	bool LoopRotate::processLoop(Loop *L) {
1055	// Save the loop metadata.
1056	MDNode *LoopMD = L->getLoopID();
1057
1058	bool SimplifiedLatch = false;
1059
1060	// Simplify the loop latch before attempting to rotate the header
1061	// upward. Rotation may not be needed if the loop tail can be folded into the
1062	// loop exit.
1063	if (!RotationOnly)
1064	SimplifiedLatch = simplifyLoopLatch(L);
1065
1066	bool MadeChange = rotateLoop(L, SimplifiedLatch);
1067	assert((!MadeChange \|\| L->isLoopExiting(L->getLoopLatch())) &&
1068	"Loop latch should be exiting after loop-rotate.");
1069
1070	// Restore the loop metadata.
1071	// NB! We presume LoopRotation DOESN'T ADD its own metadata.
1072	if ((MadeChange \|\| SimplifiedLatch) && LoopMD)
1073	L->setLoopID(LoopMD);
1074
1075	return MadeChange \|\| SimplifiedLatch;
1076	}
1077
1078
1079	/// The utility to convert a loop into a loop with bottom test.
1080	bool llvm::LoopRotation(Loop L, LoopInfo LI, const TargetTransformInfo *TTI,
1081	AssumptionCache AC, DominatorTree DT,
1082	ScalarEvolution SE, MemorySSAUpdater MSSAU,
1083	const SimplifyQuery &SQ, bool RotationOnly = true,
1084	unsigned Threshold = unsigned(-`1`),
1085	bool IsUtilMode = true, bool PrepareForLTO) {
1086	LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
1087	IsUtilMode, PrepareForLTO);
1088	return LR.processLoop(L);
1089	}
1090

Browse the source code of llvm_projects/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp