LoopStrengthReduce.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp]

1	//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This transformation analyzes and transforms the induction variables (and
10	// computations derived from them) into forms suitable for efficient execution
11	// on the target.
12	//
13	// This pass performs a strength reduction on array references inside loops that
14	// have as one or more of their components the loop induction variable, it
15	// rewrites expressions to take advantage of scaled-index addressing modes
16	// available on the target, and it performs a variety of other optimizations
17	// related to loop induction variables.
18	//
19	// Terminology note: this code has a lot of handling for "post-increment" or
20	// "post-inc" users. This is not talking about post-increment addressing modes;
21	// it is instead talking about code like this:
22	//
23	// %i = phi [ 0, %entry ], [ %i.next, %latch ]
24	// ...
25	// %i.next = add %i, 1
26	// %c = icmp eq %i.next, %n
27	//
28	// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
29	// it's useful to think about these as the same register, with some uses using
30	// the value of the register before the add and some using it after. In this
31	// example, the icmp is a post-increment user, since it uses %i.next, which is
32	// the value of the induction variable after the increment. The other common
33	// case of post-increment users is users outside the loop.
34	//
35	// TODO: More sophistication in the way Formulae are generated and filtered.
36	//
37	// TODO: Handle multiple loops at a time.
38	//
39	// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
40	// of a GlobalValue?
41	//
42	// TODO: When truncation is free, truncate ICmp users' operands to make it a
43	// smaller encoding (on x86 at least).
44	//
45	// TODO: When a negated register is used by an add (such as in a list of
46	// multiple base registers, or as the increment expression in an addrec),
47	// we may not actually need both reg and (-1 reg) in registers; the*
48	// negation can be implemented by using a sub instead of an add. The
49	// lack of support for taking this into consideration when making
50	// register pressure decisions is partly worked around by the "Special"
51	// use kind.
52	//
53	//===----------------------------------------------------------------------===//
54
55	#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
56	#include "llvm/ADT/APInt.h"
57	#include "llvm/ADT/DenseMap.h"
58	#include "llvm/ADT/DenseSet.h"
59	#include "llvm/ADT/PointerIntPair.h"
60	#include "llvm/ADT/STLExtras.h"
61	#include "llvm/ADT/SetVector.h"
62	#include "llvm/ADT/SmallBitVector.h"
63	#include "llvm/ADT/SmallPtrSet.h"
64	#include "llvm/ADT/SmallSet.h"
65	#include "llvm/ADT/SmallVector.h"
66	#include "llvm/ADT/Statistic.h"
67	#include "llvm/ADT/iterator_range.h"
68	#include "llvm/Analysis/AssumptionCache.h"
69	#include "llvm/Analysis/DomTreeUpdater.h"
70	#include "llvm/Analysis/IVUsers.h"
71	#include "llvm/Analysis/LoopAnalysisManager.h"
72	#include "llvm/Analysis/LoopInfo.h"
73	#include "llvm/Analysis/LoopPass.h"
74	#include "llvm/Analysis/MemorySSA.h"
75	#include "llvm/Analysis/MemorySSAUpdater.h"
76	#include "llvm/Analysis/ScalarEvolution.h"
77	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
78	#include "llvm/Analysis/ScalarEvolutionNormalization.h"
79	#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
80	#include "llvm/Analysis/TargetLibraryInfo.h"
81	#include "llvm/Analysis/TargetTransformInfo.h"
82	#include "llvm/Analysis/ValueTracking.h"
83	#include "llvm/BinaryFormat/Dwarf.h"
84	#include "llvm/IR/BasicBlock.h"
85	#include "llvm/IR/Constant.h"
86	#include "llvm/IR/Constants.h"
87	#include "llvm/IR/DebugInfoMetadata.h"
88	#include "llvm/IR/DerivedTypes.h"
89	#include "llvm/IR/Dominators.h"
90	#include "llvm/IR/GlobalValue.h"
91	#include "llvm/IR/IRBuilder.h"
92	#include "llvm/IR/InstrTypes.h"
93	#include "llvm/IR/Instruction.h"
94	#include "llvm/IR/Instructions.h"
95	#include "llvm/IR/IntrinsicInst.h"
96	#include "llvm/IR/Module.h"
97	#include "llvm/IR/Operator.h"
98	#include "llvm/IR/Type.h"
99	#include "llvm/IR/Use.h"
100	#include "llvm/IR/User.h"
101	#include "llvm/IR/Value.h"
102	#include "llvm/IR/ValueHandle.h"
103	#include "llvm/InitializePasses.h"
104	#include "llvm/Pass.h"
105	#include "llvm/Support/Casting.h"
106	#include "llvm/Support/CommandLine.h"
107	#include "llvm/Support/Compiler.h"
108	#include "llvm/Support/Debug.h"
109	#include "llvm/Support/ErrorHandling.h"
110	#include "llvm/Support/MathExtras.h"
111	#include "llvm/Support/raw_ostream.h"
112	#include "llvm/Transforms/Scalar.h"
113	#include "llvm/Transforms/Utils.h"
114	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
115	#include "llvm/Transforms/Utils/Local.h"
116	#include "llvm/Transforms/Utils/LoopUtils.h"
117	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
118	#include <algorithm>
119	#include <cassert>
120	#include <cstddef>
121	#include <cstdint>
122	#include <iterator>
123	#include <limits>
124	#include <map>
125	#include <numeric>
126	#include <optional>
127	#include <utility>
128
129	using namespace llvm;
130	using namespace SCEVPatternMatch;
131
132	#define DEBUG_TYPE "loop-reduce"
133
134	/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
135	/// bail out. This threshold is far beyond the number of users that LSR can
136	/// conceivably solve, so it should not affect generated code, but catches the
137	/// worst cases before LSR burns too much compile time and stack space.
138	static const unsigned MaxIVUsers = `200`;
139
140	/// Limit the size of expression that SCEV-based salvaging will attempt to
141	/// translate into a DIExpression.
142	/// Choose a maximum size such that debuginfo is not excessively increased and
143	/// the salvaging is not too expensive for the compiler.
144	static const unsigned MaxSCEVSalvageExpressionSize = `64`;
145
146	// Cleanup congruent phis after LSR phi expansion.
147	static cl::opt<bool> EnablePhiElim(
148	"enable-lsr-phielim", cl::Hidden, cl::init(Val: true),
149	cl::desc ("Enable LSR phi elimination"));
150
151	// The flag adds instruction count to solutions cost comparison.
152	static cl::opt<bool> InsnsCost(
153	"lsr-insns-cost", cl::Hidden, cl::init(Val: true),
154	cl::desc ("Add instruction count to a LSR cost model"));
155
156	// Flag to choose how to narrow complex lsr solution
157	static cl::opt<bool> LSRExpNarrow(
158	"lsr-exp-narrow", cl::Hidden, cl::init(Val: false),
159	cl::desc ("Narrow LSR complex solution using"
160	" expectation of registers number"));
161
162	// Flag to narrow search space by filtering non-optimal formulae with
163	// the same ScaledReg and Scale.
164	static cl::opt<bool> FilterSameScaledReg(
165	"lsr-filter-same-scaled-reg", cl::Hidden, cl::init(Val: true),
166	cl::desc ("Narrow LSR search space by filtering non-optimal formulae"
167	" with the same ScaledReg and Scale"));
168
169	static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(
170	"lsr-preferred-addressing-mode", cl::Hidden, cl::init(Val: TTI::AMK_None),
171	cl::desc ("A flag that overrides the target's preferred addressing mode."),
172	cl::values(
173	clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"),
174	clEnumValN(TTI::AMK_PreIndexed, "preindexed",
175	"Prefer pre-indexed addressing mode"),
176	clEnumValN(TTI::AMK_PostIndexed, "postindexed",
177	"Prefer post-indexed addressing mode"),
178	clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")));
179
180	static cl::opt<unsigned> ComplexityLimit(
181	"lsr-complexity-limit", cl::Hidden,
182	cl::init(Val: std::numeric_limits<uint16_t>::max()),
183	cl::desc ("LSR search space complexity limit"));
184
185	static cl::opt<unsigned> SetupCostDepthLimit(
186	"lsr-setupcost-depth-limit", cl::Hidden, cl::init(Val: `7`),
187	cl::desc ("The limit on recursion depth for LSRs setup cost"));
188
189	static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
190	"lsr-drop-solution", cl::Hidden,
191	cl::desc ("Attempt to drop solution if it is less profitable"));
192
193	static cl::opt<bool> EnableVScaleImmediates(
194	"lsr-enable-vscale-immediates", cl::Hidden, cl::init(Val: true),
195	cl::desc ("Enable analysis of vscale-relative immediates in LSR"));
196
197	static cl::opt<bool> DropScaledForVScale(
198	"lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(Val: true),
199	cl::desc ("Avoid using scaled registers with vscale-relative addressing"));
200
201	#ifndef NDEBUG
202	// Stress test IV chain generation.
203	static cl::opt<bool> StressIVChain(
204	"stress-ivchain", cl::Hidden, cl::init(false),
205	cl::desc("Stress test LSR IV chains"));
206	#else
207	static bool StressIVChain = false;
208	#endif
209
210	namespace {
211
212	struct MemAccessTy {
213	/// Used in situations where the accessed memory type is unknown.
214	static const unsigned UnknownAddressSpace =
215	std::numeric_limits<unsigned>::max();
216
217	Type MemTy = nullptr*;
218	unsigned AddrSpace = UnknownAddressSpace;
219
220	MemAccessTy() = default;
221	MemAccessTy(Type Ty, unsigned* AS) : MemTy(Ty), AddrSpace(AS) {}
222
223	bool operator==(MemAccessTy Other) const {
224	return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
225	}
226
227	bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
228
229	static MemAccessTy getUnknown(LLVMContext &Ctx,
230	unsigned AS = UnknownAddressSpace) {
231	return MemAccessTy (Type::getVoidTy(C&: Ctx), AS);
232	}
233
234	Type getType() { return* MemTy; }
235	};
236
237	/// This class holds data which is used to order reuse candidates.
238	class RegSortData {
239	public:
240	/// This represents the set of LSRUse indices which reference
241	/// a particular register.
242	SmallBitVector UsedByIndices;
243
244	void print(raw_ostream &OS) const;
245	void dump() const;
246	};
247
248	// An offset from an address that is either scalable or fixed. Used for
249	// per-target optimizations of addressing modes.
250	class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> {
251	constexpr Immediate(ScalarTy MinVal, bool Scalable)
252	: FixedOrScalableQuantity (MinVal, Scalable) {}
253
254	constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)
255	: FixedOrScalableQuantity (V) {}
256
257	public:
258	constexpr Immediate() = delete;
259
260	static constexpr Immediate getFixed(ScalarTy MinVal) {
261	return {MinVal, false};
262	}
263	static constexpr Immediate getScalable(ScalarTy MinVal) {
264	return {MinVal, true};
265	}
266	static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {
267	return {MinVal, Scalable};
268	}
269	static constexpr Immediate getZero() { return {`0`, false}; }
270	static constexpr Immediate getFixedMin() {
271	return {std::numeric_limits<int64_t>::min(), false};
272	}
273	static constexpr Immediate getFixedMax() {
274	return {std::numeric_limits<int64_t>::max(), false};
275	}
276	static constexpr Immediate getScalableMin() {
277	return {std::numeric_limits<int64_t>::min(), true};
278	}
279	static constexpr Immediate getScalableMax() {
280	return {std::numeric_limits<int64_t>::max(), true};
281	}
282
283	constexpr bool isLessThanZero() const { return Quantity < `0`; }
284
285	constexpr bool isGreaterThanZero() const { return Quantity > `0`; }
286
287	constexpr bool isCompatibleImmediate(const Immediate &Imm) const {
288	return isZero() \|\| Imm.isZero() \|\| Imm.Scalable == Scalable;
289	}
290
291	constexpr bool isMin() const {
292	return Quantity == std::numeric_limits<ScalarTy>::min();
293	}
294
295	constexpr bool isMax() const {
296	return Quantity == std::numeric_limits<ScalarTy>::max();
297	}
298
299	// Arithmetic 'operators' that cast to unsigned types first.
300	constexpr Immediate addUnsigned(const Immediate &RHS) const {
301	assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
302	ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();
303	return {Value, Scalable \|\| RHS.isScalable()};
304	}
305
306	constexpr Immediate subUnsigned(const Immediate &RHS) const {
307	assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
308	ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();
309	return {Value, Scalable \|\| RHS.isScalable()};
310	}
311
312	// Scale the quantity by a constant without caring about runtime scalability.
313	constexpr Immediate mulUnsigned(const ScalarTy RHS) const {
314	ScalarTy Value = (uint64_t)Quantity * RHS;
315	return {Value, Scalable};
316	}
317
318	// Helpers for generating SCEVs with vscale terms where needed.
319	const SCEV getSCEV(ScalarEvolution &SE, Type Ty) const {
320	const SCEV *S = SE.getConstant(Ty, V: Quantity);
321	if (Scalable)
322	S = SE.getMulExpr(LHS: S, RHS: SE.getVScale(Ty: S->getType()));
323	return S;
324	}
325
326	const SCEV getNegativeSCEV(ScalarEvolution &SE, Type Ty) const {
327	const SCEV *NegS = SE.getConstant(Ty, V: -(uint64_t)Quantity);
328	if (Scalable)
329	NegS = SE.getMulExpr(LHS: NegS, RHS: SE.getVScale(Ty: NegS->getType()));
330	return NegS;
331	}
332
333	const SCEV getUnknownSCEV(ScalarEvolution &SE, Type Ty) const {
334	// TODO: Avoid implicit trunc?
335	// See https://github.com/llvm/llvm-project/issues/112510.
336	const SCEV *SU = SE.getUnknown(
337	V: ConstantInt::getSigned(Ty, V: Quantity, /ImplicitTrunc=/true));
338	if (Scalable)
339	SU = SE.getMulExpr(LHS: SU, RHS: SE.getVScale(Ty: SU->getType()));
340	return SU;
341	}
342	};
343
344	// This is needed for the Compare type of std::map when Immediate is used
345	// as a key. We don't need it to be fully correct against any value of vscale,
346	// just to make sure that vscale-related terms in the map are considered against
347	// each other rather than being mixed up and potentially missing opportunities.
348	struct KeyOrderTargetImmediate {
349	bool operator()(const Immediate &LHS, const Immediate &RHS) const {
350	if (LHS.isScalable() && !RHS.isScalable())
351	return false;
352	if (!LHS.isScalable() && RHS.isScalable())
353	return true;
354	return LHS.getKnownMinValue() < RHS.getKnownMinValue();
355	}
356	};
357
358	// This would be nicer if we could be generic instead of directly using size_t,
359	// but there doesn't seem to be a type trait for is_orderable or
360	// is_lessthan_comparable or similar.
361	struct KeyOrderSizeTAndImmediate {
362	bool operator()(const std::pair<size_t, Immediate> &LHS,
363	const std::pair<size_t, Immediate> &RHS) const {
364	size_t LSize = LHS.first;
365	size_t RSize = RHS.first;
366	if (LSize != RSize)
367	return LSize < RSize;
368	return KeyOrderTargetImmediate ()(LHS.second, RHS.second);
369	}
370	};
371	} // end anonymous namespace
372
373	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
374	void RegSortData::print(raw_ostream &OS) const {
375	OS << "[NumUses=" << UsedByIndices.count() << `']'`;
376	}
377
378	LLVM_DUMP_METHOD void RegSortData::dump() const {
379	print(errs()); errs() << `'\n'`;
380	}
381	#endif
382
383	namespace {
384
385	/// Map register candidates to information about how they are used.
386	class RegUseTracker {
387	using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
388
389	RegUsesTy RegUsesMap;
390	SmallVector<const SCEV *, `16`> RegSequence;
391
392	public:
393	void countRegister(const SCEV *Reg, size_t LUIdx);
394	void dropRegister(const SCEV *Reg, size_t LUIdx);
395	void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
396
397	bool isRegUsedByUsesOtherThan(const SCEV Reg, size_t LUIdx) const*;
398
399	const SmallBitVector &getUsedByIndices(const SCEV Reg) const*;
400
401	void clear();
402
403	using iterator = SmallVectorImpl<const SCEV *>::iterator;
404	using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
405
406	iterator begin() { return RegSequence.begin(); }
407	iterator end() { return RegSequence.end(); }
408	const_iterator begin() const { return RegSequence.begin(); }
409	const_iterator end() const { return RegSequence.end(); }
410	};
411
412	} // end anonymous namespace
413
414	void
415	RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
416	std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(Key: Reg);
417	RegSortData &RSD = Pair.first ->second;
418	if (Pair.second)
419	RegSequence.push_back(Elt: Reg);
420	RSD.UsedByIndices.resize(N: std::max(a: RSD.UsedByIndices.size(), b: LUIdx + `1`));
421	RSD.UsedByIndices.set(LUIdx);
422	}
423
424	void
425	RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
426	RegUsesTy::iterator It = RegUsesMap.find(Val: Reg);
427	assert(It != RegUsesMap.end());
428	RegSortData &RSD = It ->second;
429	assert(RSD.UsedByIndices.size() > LUIdx);
430	RSD.UsedByIndices.reset(Idx: LUIdx);
431	}
432
433	void
434	RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
435	assert(LUIdx <= LastLUIdx);
436
437	// Update RegUses. The data structure is not optimized for this purpose;
438	// we must iterate through it and update each of the bit vectors.
439	for (auto &Pair : RegUsesMap) {
440	SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
441	if (LUIdx < UsedByIndices.size())
442	UsedByIndices [LUIdx] =
443	LastLUIdx < UsedByIndices.size() ? UsedByIndices [LastLUIdx] : false;
444	UsedByIndices.resize(N: std::min(a: UsedByIndices.size(), b: LastLUIdx));
445	}
446	}
447
448	bool
449	RegUseTracker::isRegUsedByUsesOtherThan(const SCEV Reg, size_t LUIdx) const* {
450	RegUsesTy::const_iterator I = RegUsesMap.find(Val: Reg);
451	if (I == RegUsesMap.end())
452	return false;
453	const SmallBitVector &UsedByIndices = I ->second.UsedByIndices;
454	int i = UsedByIndices.find_first();
455	if (i == -`1`) return false;
456	if ((size_t)i != LUIdx) return true;
457	return UsedByIndices.find_next(Prev: i) != -`1`;
458	}
459
460	const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV Reg) const* {
461	RegUsesTy::const_iterator I = RegUsesMap.find(Val: Reg);
462	assert(I != RegUsesMap.end() && "Unknown register!");
463	return I ->second.UsedByIndices;
464	}
465
466	void RegUseTracker::clear() {
467	RegUsesMap.clear();
468	RegSequence.clear();
469	}
470
471	namespace {
472
473	/// This class holds information that describes a formula for computing
474	/// satisfying a use. It may include broken-out immediates and scaled registers.
475	struct Formula {
476	/// Global base address used for complex addressing.
477	GlobalValue BaseGV = nullptr*;
478
479	/// Base offset for complex addressing.
480	Immediate BaseOffset = Immediate::getZero();
481
482	/// Whether any complex addressing has a base register.
483	bool HasBaseReg = false;
484
485	/// The scale of any complex addressing.
486	int64_t Scale = `0`;
487
488	/// The list of "base" registers for this use. When this is non-empty. The
489	/// canonical representation of a formula is
490	/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
491	/// 2. ScaledReg != NULL implies Scale != 1 \|\| !BaseRegs.empty().
492	/// 3. The reg containing recurrent expr related with currect loop in the
493	/// formula should be put in the ScaledReg.
494	/// #1 enforces that the scaled register is always used when at least two
495	/// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 reg2.*
496	/// #2 enforces that 1 reg is reg.*
497	/// #3 ensures invariant regs with respect to current loop can be combined
498	/// together in LSR codegen.
499	/// This invariant can be temporarily broken while building a formula.
500	/// However, every formula inserted into the LSRInstance must be in canonical
501	/// form.
502	SmallVector<const SCEV *, `4`> BaseRegs;
503
504	/// The 'scaled' register for this use. This should be non-null when Scale is
505	/// not zero.
506	const SCEV ScaledReg = nullptr*;
507
508	/// An additional constant offset which added near the use. This requires a
509	/// temporary register, but the offset itself can live in an add immediate
510	/// field rather than a register.
511	Immediate UnfoldedOffset = Immediate::getZero();
512
513	Formula() = default;
514
515	void initialMatch(const SCEV S, Loop L, ScalarEvolution &SE);
516
517	bool isCanonical(const Loop &L) const;
518
519	void canonicalize(const Loop &L);
520
521	bool unscale();
522
523	bool hasZeroEnd() const;
524
525	bool countsDownToZero() const;
526
527	size_t getNumRegs() const;
528	Type getType() const*;
529
530	void deleteBaseReg(const SCEV *&S);
531
532	bool referencesReg(const SCEV S) const*;
533	bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
534	const RegUseTracker &RegUses) const;
535
536	void print(raw_ostream &OS) const;
537	void dump() const;
538	};
539
540	} // end anonymous namespace
541
542	/// Recursion helper for initialMatch.
543	static void DoInitialMatch(const SCEV S, Loop L,
544	SmallVectorImpl<SCEVUse> &Good,
545	SmallVectorImpl<SCEVUse> &Bad, ScalarEvolution &SE) {
546	// Collect expressions which properly dominate the loop header.
547	if (SE.properlyDominates(S, BB: L->getHeader())) {
548	Good.push_back(Elt: S);
549	return;
550	}
551
552	// Look at add operands.
553	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
554	for (const SCEV *S : Add->operands())
555	DoInitialMatch(S, L, Good, Bad, SE);
556	return;
557	}
558
559	// Look at addrec operands.
560	const SCEV Start, Step;
561	const Loop *ARLoop;
562	if (match(S,
563	P: m_scev_AffineAddRec(Op0: m_SCEV(V&: Start), Op1: m_SCEV(V&: Step), L: m_Loop(L&: ARLoop))) &&
564	!Start->isZero()) {
565	DoInitialMatch(S: Start, L, Good, Bad, SE);
566	DoInitialMatch(S: SE.getAddRecExpr(Start: SE.getConstant(Ty: S->getType(), V: `0`), Step,
567	// FIXME: AR->getNoWrapFlags()
568	L: ARLoop, Flags: SCEV::FlagAnyWrap),
569	L, Good, Bad, SE);
570	return;
571	}
572
573	// Handle a multiplication by -1 (negation) if it didn't fold.
574	if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Val: S))
575	if (Mul->getOperand(i: `0`)->isAllOnesValue()) {
576	SmallVector<SCEVUse, `4`> Ops(drop_begin(RangeOrContainer: Mul->operands()));
577	const SCEV *NewMul = SE.getMulExpr(Ops);
578
579	SmallVector<SCEVUse, `4`> MyGood;
580	SmallVector<SCEVUse, `4`> MyBad;
581	DoInitialMatch(S: NewMul, L, Good&: MyGood, Bad&: MyBad, SE);
582	const SCEV *NegOne = SE.getSCEV(V: ConstantInt::getAllOnesValue(
583	Ty: SE.getEffectiveSCEVType(Ty: NewMul->getType())));
584	for (const SCEV *S : MyGood)
585	Good.push_back(Elt: SE.getMulExpr(LHS: NegOne, RHS: S));
586	for (const SCEV *S : MyBad)
587	Bad.push_back(Elt: SE.getMulExpr(LHS: NegOne, RHS: S));
588	return;
589	}
590
591	// Ok, we can't do anything interesting. Just stuff the whole thing into a
592	// register and hope for the best.
593	Bad.push_back(Elt: S);
594	}
595
596	/// Incorporate loop-variant parts of S into this Formula, attempting to keep
597	/// all loop-invariant and loop-computable values in a single base register.
598	void Formula::initialMatch(const SCEV S, Loop L, ScalarEvolution &SE) {
599	SmallVector<SCEVUse, `4`> Good;
600	SmallVector<SCEVUse, `4`> Bad;
601	DoInitialMatch(S, L, Good, Bad, SE);
602	if (!Good.empty()) {
603	const SCEV *Sum = SE.getAddExpr(Ops&: Good);
604	if (!Sum->isZero())
605	BaseRegs.push_back(Elt: Sum);
606	HasBaseReg = true;
607	}
608	if (!Bad.empty()) {
609	const SCEV *Sum = SE.getAddExpr(Ops&: Bad);
610	if (!Sum->isZero())
611	BaseRegs.push_back(Elt: Sum);
612	HasBaseReg = true;
613	}
614	canonicalize(L: *L);
615	}
616
617	static bool containsAddRecDependentOnLoop(const SCEV S, const* Loop &L) {
618	return SCEVExprContains(Root: S, Pred: [&L](const SCEV *S) {
619	return isa<SCEVAddRecExpr>(Val: S) && (cast<SCEVAddRecExpr>(Val: S)->getLoop() == &L);
620	});
621	}
622
623	/// Check whether or not this formula satisfies the canonical
624	/// representation.
625	/// \see Formula::BaseRegs.
626	bool Formula::isCanonical(const Loop &L) const {
627	assert((Scale == `0` \|\| ScaledReg) &&
628	"ScaledReg must be non-null if Scale is non-zero");
629
630	if (!ScaledReg)
631	return BaseRegs.size() <= `1`;
632
633	if (Scale != `1`)
634	return true;
635
636	if (Scale == `1` && BaseRegs.empty())
637	return false;
638
639	if (containsAddRecDependentOnLoop(S: ScaledReg, L))
640	return true;
641
642	// If ScaledReg is not a recurrent expr, or it is but its loop is not current
643	// loop, meanwhile BaseRegs contains a recurrent expr reg related with current
644	// loop, we want to swap the reg in BaseRegs with ScaledReg.
645	return none_of(Range: BaseRegs, P: [&L](const SCEV *S) {
646	return containsAddRecDependentOnLoop(S, L);
647	});
648	}
649
650	/// Helper method to morph a formula into its canonical representation.
651	/// \see Formula::BaseRegs.
652	/// Every formula having more than one base register, must use the ScaledReg
653	/// field. Otherwise, we would have to do special cases everywhere in LSR
654	/// to treat reg1 + reg2 + ... the same way as reg1 + 1reg2 + ...*
655	/// On the other hand, 1reg should be canonicalized into reg.*
656	void Formula::canonicalize(const Loop &L) {
657	if (isCanonical(L))
658	return;
659
660	if (BaseRegs.empty()) {
661	// No base reg? Use scale reg with scale = 1 as such.
662	assert(ScaledReg && "Expected 1*reg => reg");
663	assert(Scale == `1` && "Expected 1*reg => reg");
664	BaseRegs.push_back(Elt: ScaledReg);
665	Scale = `0`;
666	ScaledReg = nullptr;
667	return;
668	}
669
670	// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
671	if (!ScaledReg) {
672	ScaledReg = BaseRegs.pop_back_val();
673	Scale = `1`;
674	}
675
676	// If ScaledReg is an invariant with respect to L, find the reg from
677	// BaseRegs containing the recurrent expr related with Loop L. Swap the
678	// reg with ScaledReg.
679	if (!containsAddRecDependentOnLoop(S: ScaledReg, L)) {
680	auto I = find_if(Range&: BaseRegs, P: [&L](const SCEV *S) {
681	return containsAddRecDependentOnLoop(S, L);
682	});
683	if (I != BaseRegs.end())
684	std::swap(a&: ScaledReg, b&: *I);
685	}
686	assert(isCanonical(L) && "Failed to canonicalize?");
687	}
688
689	/// Get rid of the scale in the formula.
690	/// In other words, this method morphes reg1 + 1reg2 into reg1 + reg2.*
691	/// \return true if it was possible to get rid of the scale, false otherwise.
692	/// \note After this operation the formula may not be in the canonical form.
693	bool Formula::unscale() {
694	if (Scale != `1`)
695	return false;
696	Scale = `0`;
697	BaseRegs.push_back(Elt: ScaledReg);
698	ScaledReg = nullptr;
699	return true;
700	}
701
702	bool Formula::hasZeroEnd() const {
703	if (UnfoldedOffset \|\| BaseOffset)
704	return false;
705	if (BaseRegs.size() != `1` \|\| ScaledReg)
706	return false;
707	return true;
708	}
709
710	bool Formula::countsDownToZero() const {
711	if (!hasZeroEnd())
712	return false;
713	assert(BaseRegs.size() == `1` && "hasZeroEnd should mean one BaseReg");
714	const APInt *StepInt;
715	if (!match(S: BaseRegs [`0`], P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_APInt(C&: StepInt))))
716	return false;
717	return StepInt->isNegative();
718	}
719
720	/// Return the total number of register operands used by this formula. This does
721	/// not include register uses implied by non-constant addrec strides.
722	size_t Formula::getNumRegs() const {
723	return !!ScaledReg + BaseRegs.size();
724	}
725
726	/// Return the type of this formula, if it has one, or null otherwise. This type
727	/// is meaningless except for the bit size.
728	Type Formula::getType() const* {
729	return !BaseRegs.empty() ? BaseRegs.front()->getType() :
730	ScaledReg ? ScaledReg->getType() :
731	BaseGV ? BaseGV->getType() :
732	nullptr;
733	}
734
735	/// Delete the given base reg from the BaseRegs list.
736	void Formula::deleteBaseReg(const SCEV *&S) {
737	if (&S != &BaseRegs.back())
738	std::swap(a&: S, b&: BaseRegs.back());
739	BaseRegs.pop_back();
740	}
741
742	/// Test if this formula references the given register.
743	bool Formula::referencesReg(const SCEV S) const* {
744	return S == ScaledReg \|\| is_contained(Range: BaseRegs, Element: S);
745	}
746
747	/// Test whether this formula uses registers which are used by uses other than
748	/// the use with the given index.
749	bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
750	const RegUseTracker &RegUses) const {
751	if (ScaledReg)
752	if (RegUses.isRegUsedByUsesOtherThan(Reg: ScaledReg, LUIdx))
753	return true;
754	for (const SCEV *BaseReg : BaseRegs)
755	if (RegUses.isRegUsedByUsesOtherThan(Reg: BaseReg, LUIdx))
756	return true;
757	return false;
758	}
759
760	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
761	void Formula::print(raw_ostream &OS) const {
762	ListSeparator Plus(" + ");
763	if (BaseGV) {
764	OS << Plus;
765	BaseGV->printAsOperand(OS, /PrintType=/false);
766	}
767	if (BaseOffset.isNonZero())
768	OS << Plus << BaseOffset;
769
770	for (const SCEV *BaseReg : BaseRegs)
771	OS << Plus << "reg(" << *BaseReg << `')'`;
772
773	if (HasBaseReg && BaseRegs.empty())
774	OS << Plus << "error: HasBaseReg";
775	else if (!HasBaseReg && !BaseRegs.empty())
776	OS << Plus << "error: !HasBaseReg";
777
778	if (Scale != `0`) {
779	OS << Plus << Scale << "*reg(";
780	if (ScaledReg)
781	OS << *ScaledReg;
782	else
783	OS << "<unknown>";
784	OS << `')'`;
785	}
786	if (UnfoldedOffset.isNonZero())
787	OS << Plus << "imm(" << UnfoldedOffset << `')'`;
788	}
789
790	LLVM_DUMP_METHOD void Formula::dump() const {
791	print(errs()); errs() << `'\n'`;
792	}
793	#endif
794
795	/// Return true if the given addrec can be sign-extended without changing its
796	/// value.
797	static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
798	Type *WideTy =
799	IntegerType::get(C&: SE.getContext(), NumBits: SE.getTypeSizeInBits(Ty: AR->getType()) + `1`);
800	return isa<SCEVAddRecExpr>(Val: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
801	}
802
803	/// Return true if the given add can be sign-extended without changing its
804	/// value.
805	static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
806	Type *WideTy =
807	IntegerType::get(C&: SE.getContext(), NumBits: SE.getTypeSizeInBits(Ty: A->getType()) + `1`);
808	return isa<SCEVAddExpr>(Val: SE.getSignExtendExpr(Op: A, Ty: WideTy));
809	}
810
811	/// Return true if the given mul can be sign-extended without changing its
812	/// value.
813	static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
814	Type *WideTy =
815	IntegerType::get(C&: SE.getContext(),
816	NumBits: SE.getTypeSizeInBits(Ty: M->getType()) * M->getNumOperands());
817	return isa<SCEVMulExpr>(Val: SE.getSignExtendExpr(Op: M, Ty: WideTy));
818	}
819
820	/// Return an expression for LHS /s RHS, if it can be determined and if the
821	/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
822	/// is true, expressions like (X Y) /s Y are simplified to X, ignoring that*
823	/// the multiplication may overflow, which is useful when the result will be
824	/// used in a context where the most significant bits are ignored.
825	static const SCEV getExactSDiv(const* SCEV LHS, const* SCEV *RHS,
826	ScalarEvolution &SE,
827	bool IgnoreSignificantBits = false) {
828	// Handle the trivial case, which works for any SCEV type.
829	if (LHS == RHS)
830	return SE.getConstant(Ty: LHS->getType(), V: `1`);
831
832	// Handle a few RHS special cases.
833	const SCEVConstant *RC = dyn_cast<SCEVConstant>(Val: RHS);
834	if (RC) {
835	const APInt &RA = RC->getAPInt();
836	// Handle x /s -1 as x -1, to give ScalarEvolution a chance to do*
837	// some folding.
838	if (RA.isAllOnes()) {
839	if (LHS->getType()->isPointerTy())
840	return nullptr;
841	return SE.getMulExpr(LHS, RHS: RC);
842	}
843	// Handle x /s 1 as x.
844	if (RA == `1`)
845	return LHS;
846	}
847
848	// Check for a division of a constant by a constant.
849	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: LHS)) {
850	if (!RC)
851	return nullptr;
852	const APInt &LA = C->getAPInt();
853	const APInt &RA = RC->getAPInt();
854	if (LA.srem(RHS: RA) != `0`)
855	return nullptr;
856	return SE.getConstant(Val: LA.sdiv(RHS: RA));
857	}
858
859	// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
860	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: LHS)) {
861	if ((IgnoreSignificantBits \|\| isAddRecSExtable(AR, SE)) && AR->isAffine()) {
862	const SCEV *Step = getExactSDiv(LHS: AR->getStepRecurrence(SE), RHS, SE,
863	IgnoreSignificantBits);
864	if (!Step) return nullptr;
865	const SCEV *Start = getExactSDiv(LHS: AR->getStart(), RHS, SE,
866	IgnoreSignificantBits);
867	if (!Start) return nullptr;
868	// FlagNW is independent of the start value, step direction, and is
869	// preserved with smaller magnitude steps.
870	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
871	return SE.getAddRecExpr(Start, Step, L: AR->getLoop(), Flags: SCEV::FlagAnyWrap);
872	}
873	return nullptr;
874	}
875
876	// Distribute the sdiv over add operands, if the add doesn't overflow.
877	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: LHS)) {
878	if (IgnoreSignificantBits \|\| isAddSExtable(A: Add, SE)) {
879	SmallVector<SCEVUse, `8`> Ops;
880	for (const SCEV *S : Add->operands()) {
881	const SCEV *Op = getExactSDiv(LHS: S, RHS, SE, IgnoreSignificantBits);
882	if (!Op) return nullptr;
883	Ops.push_back(Elt: Op);
884	}
885	return SE.getAddExpr(Ops);
886	}
887	return nullptr;
888	}
889
890	// Check for a multiply operand that we can pull RHS out of.
891	if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Val: LHS)) {
892	if (IgnoreSignificantBits \|\| isMulSExtable(M: Mul, SE)) {
893	// Handle special case C1XY /s C2XY.
894	if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(Val: RHS)) {
895	if (IgnoreSignificantBits \|\| isMulSExtable(M: MulRHS, SE)) {
896	const SCEVConstant *LC = dyn_cast<SCEVConstant>(Val: Mul->getOperand(i: `0`));
897	const SCEVConstant *RC =
898	dyn_cast<SCEVConstant>(Val: MulRHS->getOperand(i: `0`));
899	if (LC && RC) {
900	SmallVector<const SCEV *, `4`> LOps(drop_begin(RangeOrContainer: Mul->operands()));
901	SmallVector<const SCEV *, `4`> ROps(drop_begin(RangeOrContainer: MulRHS->operands()));
902	if (LOps == ROps)
903	return getExactSDiv(LHS: LC, RHS: RC, SE, IgnoreSignificantBits);
904	}
905	}
906	}
907
908	SmallVector<SCEVUse, `4`> Ops;
909	bool Found = false;
910	for (const SCEV *S : Mul->operands()) {
911	if (!Found)
912	if (const SCEV *Q = getExactSDiv(LHS: S, RHS, SE,
913	IgnoreSignificantBits)) {
914	S = Q;
915	Found = true;
916	}
917	Ops.push_back(Elt: S);
918	}
919	return Found ? SE.getMulExpr(Ops) : nullptr;
920	}
921	return nullptr;
922	}
923
924	// Otherwise we don't know.
925	return nullptr;
926	}
927
928	/// If S involves the addition of a constant integer value, return that integer
929	/// value, and mutate S to point to a new SCEV with that value excluded.
930	static Immediate ExtractImmediate(SCEVUse &S, ScalarEvolution &SE) {
931	const APInt *C;
932	if (match(U: S, P: m_scev_APInt(C))) {
933	if (C->getSignificantBits() <= `64`) {
934	S = SE.getConstant(Ty: S ->getType(), V: `0`);
935	return Immediate::getFixed(MinVal: C->getSExtValue());
936	}
937	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val&: S)) {
938	SmallVector<SCEVUse, `8`> NewOps(Add->operands());
939	Immediate Result = ExtractImmediate(S&: NewOps.front(), SE);
940	if (Result.isNonZero())
941	S = SE.getAddExpr(Ops&: NewOps);
942	return Result;
943	} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val&: S)) {
944	SmallVector<SCEVUse, `8`> NewOps(AR->operands());
945	Immediate Result = ExtractImmediate(S&: NewOps.front(), SE);
946	if (Result.isNonZero())
947	S = SE.getAddRecExpr(Operands&: NewOps, L: AR->getLoop(),
948	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
949	Flags: SCEV::FlagAnyWrap);
950	return Result;
951	} else if (EnableVScaleImmediates &&
952	match(U: S, P: m_scev_Mul(Op0: m_scev_APInt(C), Op1: m_SCEVVScale()))) {
953	S = SE.getConstant(Ty: S ->getType(), V: `0`);
954	return Immediate::getScalable(MinVal: C->getSExtValue());
955	}
956	return Immediate::getZero();
957	}
958
959	/// If S involves the addition of a GlobalValue address, return that symbol, and
960	/// mutate S to point to a new SCEV with that value excluded.
961	static GlobalValue *ExtractSymbol(SCEVUse &S, ScalarEvolution &SE) {
962	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val&: S)) {
963	if (GlobalValue *GV = dyn_cast<GlobalValue>(Val: U->getValue())) {
964	S = SE.getConstant(Ty: GV->getType(), V: `0`);
965	return GV;
966	}
967	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val&: S)) {
968	SmallVector<SCEVUse, `8`> NewOps(Add->operands());
969	GlobalValue *Result = ExtractSymbol(S&: NewOps.back(), SE);
970	if (Result)
971	S = SE.getAddExpr(Ops&: NewOps);
972	return Result;
973	} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val&: S)) {
974	SmallVector<SCEVUse, `8`> NewOps(AR->operands());
975	GlobalValue *Result = ExtractSymbol(S&: NewOps.front(), SE);
976	if (Result)
977	S = SE.getAddRecExpr(Operands&: NewOps, L: AR->getLoop(),
978	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
979	Flags: SCEV::FlagAnyWrap);
980	return Result;
981	}
982	return nullptr;
983	}
984
985	/// Returns true if the specified instruction is using the specified value as an
986	/// address.
987	static bool isAddressUse(const TargetTransformInfo &TTI,
988	Instruction Inst, Value OperandVal) {
989	bool isAddress = isa<LoadInst>(Val: Inst);
990	if (StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
991	if (SI->getPointerOperand() == OperandVal)
992	isAddress = true;
993	} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) {
994	// Addressing modes can also be folded into prefetches and a variety
995	// of intrinsics.
996	switch (II->getIntrinsicID()) {
997	case Intrinsic::memset:
998	case Intrinsic::prefetch:
999	case Intrinsic::masked_load:
1000	if (II->getArgOperand(i: `0`) == OperandVal)
1001	isAddress = true;
1002	break;
1003	case Intrinsic::masked_store:
1004	if (II->getArgOperand(i: `1`) == OperandVal)
1005	isAddress = true;
1006	break;
1007	case Intrinsic::memmove:
1008	case Intrinsic::memcpy:
1009	if (II->getArgOperand(i: `0`) == OperandVal \|\|
1010	II->getArgOperand(i: `1`) == OperandVal)
1011	isAddress = true;
1012	break;
1013	default: {
1014	MemIntrinsicInfo IntrInfo;
1015	if (TTI.getTgtMemIntrinsic(Inst: II, Info&: IntrInfo)) {
1016	if (IntrInfo.PtrVal == OperandVal)
1017	isAddress = true;
1018	}
1019	}
1020	}
1021	} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) {
1022	if (RMW->getPointerOperand() == OperandVal)
1023	isAddress = true;
1024	} else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) {
1025	if (CmpX->getPointerOperand() == OperandVal)
1026	isAddress = true;
1027	}
1028	return isAddress;
1029	}
1030
1031	/// Return the type of the memory being accessed.
1032	static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
1033	Instruction Inst, Value OperandVal) {
1034	MemAccessTy AccessTy = MemAccessTy::getUnknown(Ctx&: Inst->getContext());
1035
1036	// First get the type of memory being accessed.
1037	if (Type *Ty = Inst->getAccessType())
1038	AccessTy.MemTy = Ty;
1039
1040	// Then get the pointer address space.
1041	if (const StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
1042	AccessTy.AddrSpace = SI->getPointerAddressSpace();
1043	} else if (const LoadInst *LI = dyn_cast<LoadInst>(Val: Inst)) {
1044	AccessTy.AddrSpace = LI->getPointerAddressSpace();
1045	} else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) {
1046	AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1047	} else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) {
1048	AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1049	} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) {
1050	switch (II->getIntrinsicID()) {
1051	case Intrinsic::prefetch:
1052	case Intrinsic::memset:
1053	AccessTy.AddrSpace = II->getArgOperand(i: `0`)->getType()->getPointerAddressSpace();
1054	AccessTy.MemTy = OperandVal->getType();
1055	break;
1056	case Intrinsic::memmove:
1057	case Intrinsic::memcpy:
1058	AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
1059	AccessTy.MemTy = OperandVal->getType();
1060	break;
1061	case Intrinsic::masked_load:
1062	AccessTy.AddrSpace =
1063	II->getArgOperand(i: `0`)->getType()->getPointerAddressSpace();
1064	break;
1065	case Intrinsic::masked_store:
1066	AccessTy.AddrSpace =
1067	II->getArgOperand(i: `1`)->getType()->getPointerAddressSpace();
1068	break;
1069	default: {
1070	MemIntrinsicInfo IntrInfo;
1071	if (TTI.getTgtMemIntrinsic(Inst: II, Info&: IntrInfo) && IntrInfo.PtrVal) {
1072	AccessTy.AddrSpace
1073	= IntrInfo.PtrVal->getType()->getPointerAddressSpace();
1074	}
1075
1076	break;
1077	}
1078	}
1079	}
1080
1081	return AccessTy;
1082	}
1083
1084	/// Return true if this AddRec is already a phi in its loop.
1085	static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
1086	for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
1087	if (SE.isSCEVable(Ty: PN.getType()) &&
1088	(SE.getEffectiveSCEVType(Ty: PN.getType()) ==
1089	SE.getEffectiveSCEVType(Ty: AR->getType())) &&
1090	SE.getSCEV(V: &PN) == AR)
1091	return true;
1092	}
1093	return false;
1094	}
1095
1096	/// Check if expanding this expression is likely to incur significant cost. This
1097	/// is tricky because SCEV doesn't track which expressions are actually computed
1098	/// by the current IR.
1099	///
1100	/// We currently allow expansion of IV increments that involve adds,
1101	/// multiplication by constants, and AddRecs from existing phis.
1102	///
1103	/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
1104	/// obvious multiple of the UDivExpr.
1105	static bool isHighCostExpansion(const SCEV *S,
1106	SmallPtrSetImpl<const SCEV*> &Processed,
1107	ScalarEvolution &SE) {
1108	// Zero/One operand expressions
1109	switch (S->getSCEVType()) {
1110	case scUnknown:
1111	case scConstant:
1112	case scVScale:
1113	return false;
1114	case scTruncate:
1115	return isHighCostExpansion(S: cast<SCEVTruncateExpr>(Val: S)->getOperand(),
1116	Processed, SE);
1117	case scZeroExtend:
1118	return isHighCostExpansion(S: cast<SCEVZeroExtendExpr>(Val: S)->getOperand(),
1119	Processed, SE);
1120	case scSignExtend:
1121	return isHighCostExpansion(S: cast<SCEVSignExtendExpr>(Val: S)->getOperand(),
1122	Processed, SE);
1123	default:
1124	break;
1125	}
1126
1127	if (!Processed.insert(Ptr: S).second)
1128	return false;
1129
1130	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
1131	for (const SCEV *S : Add->operands()) {
1132	if (isHighCostExpansion(S, Processed, SE))
1133	return true;
1134	}
1135	return false;
1136	}
1137
1138	const SCEV Op0, Op1;
1139	if (match(S, P: m_scev_Mul(Op0: m_SCEV(V&: Op0), Op1: m_SCEV(V&: Op1)))) {
1140	// Multiplication by a constant is ok
1141	if (isa<SCEVConstant>(Val: Op0))
1142	return isHighCostExpansion(S: Op1, Processed, SE);
1143
1144	// If we have the value of one operand, check if an existing
1145	// multiplication already generates this expression.
1146	if (const auto *U = dyn_cast<SCEVUnknown>(Val: Op1)) {
1147	Value *UVal = U->getValue();
1148	for (User *UR : UVal->users()) {
1149	// If U is a constant, it may be used by a ConstantExpr.
1150	Instruction *UI = dyn_cast<Instruction>(Val: UR);
1151	if (UI && UI->getOpcode() == Instruction::Mul &&
1152	SE.isSCEVable(Ty: UI->getType())) {
1153	return SE.getSCEV(V: UI) == S;
1154	}
1155	}
1156	}
1157	}
1158
1159	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
1160	if (isExistingPhi(AR, SE))
1161	return false;
1162	}
1163
1164	// Fow now, consider any other type of expression (div/mul/min/max) high cost.
1165	return true;
1166	}
1167
1168	namespace {
1169
1170	class LSRUse;
1171
1172	} // end anonymous namespace
1173
1174	/// Check if the addressing mode defined by \p F is completely
1175	/// folded in \p LU at isel time.
1176	/// This includes address-mode folding and special icmp tricks.
1177	/// This function returns true if \p LU can accommodate what \p F
1178	/// defines and up to 1 base + 1 scaled + offset.
1179	/// In other words, if \p F has several base registers, this function may
1180	/// still return true. Therefore, users still need to account for
1181	/// additional base registers and/or unfolded offsets to derive an
1182	/// accurate cost model.
1183	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1184	const LSRUse &LU, const Formula &F);
1185
1186	// Get the cost of the scaling factor used in F for LU.
1187	static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
1188	const LSRUse &LU, const Formula &F,
1189	const Loop &L);
1190
1191	namespace {
1192
1193	/// This class is used to measure and compare candidate formulae.
1194	class Cost {
1195	const Loop L = nullptr*;
1196	ScalarEvolution SE = nullptr*;
1197	const TargetTransformInfo TTI = nullptr*;
1198	TargetTransformInfo::LSRCost C;
1199	TTI::AddressingModeKind AMK = TTI::AMK_None;
1200
1201	public:
1202	Cost() = delete;
1203	Cost(const Loop L, ScalarEvolution &SE, const* TargetTransformInfo &TTI,
1204	TTI::AddressingModeKind AMK) :
1205	L(L), SE(&SE), TTI(&TTI), AMK(AMK) {
1206	C.Insns = `0`;
1207	C.NumRegs = `0`;
1208	C.AddRecCost = `0`;
1209	C.NumIVMuls = `0`;
1210	C.NumBaseAdds = `0`;
1211	C.ImmCost = `0`;
1212	C.SetupCost = `0`;
1213	C.ScaleCost = `0`;
1214	}
1215
1216	bool isLess(const Cost &Other) const;
1217
1218	void Lose();
1219
1220	#ifndef NDEBUG
1221	// Once any of the metrics loses, they must all remain losers.
1222	bool isValid() {
1223	return ((C.Insns \| C.NumRegs \| C.AddRecCost \| C.NumIVMuls \| C.NumBaseAdds
1224	\| C.ImmCost \| C.SetupCost \| C.ScaleCost) != ~`0u`)
1225	\|\| ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
1226	& C.ImmCost & C.SetupCost & C.ScaleCost) == ~`0u`);
1227	}
1228	#endif
1229
1230	bool isLoser() {
1231	assert(isValid() && "invalid cost");
1232	return C.NumRegs == ~`0u`;
1233	}
1234
1235	void RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,
1236	const DenseSet<const SCEV > &VisitedRegs, const* LSRUse &LU,
1237	bool HardwareLoopProfitable,
1238	SmallPtrSetImpl<const SCEV > LoserRegs = nullptr);
1239
1240	void print(raw_ostream &OS) const;
1241	void dump() const;
1242
1243	private:
1244	void RateRegister(const Formula &F, const SCEV *Reg,
1245	SmallPtrSetImpl<const SCEV > &Regs, const* LSRUse &LU,
1246	bool HardwareLoopProfitable);
1247	void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
1248	SmallPtrSetImpl<const SCEV *> &Regs,
1249	const LSRUse &LU, bool HardwareLoopProfitable,
1250	SmallPtrSetImpl<const SCEV > LoserRegs);
1251	};
1252
1253	/// An operand value in an instruction which is to be replaced with some
1254	/// equivalent, possibly strength-reduced, replacement.
1255	struct LSRFixup {
1256	/// The instruction which will be updated.
1257	Instruction UserInst = nullptr*;
1258
1259	/// The operand of the instruction which will be replaced. The operand may be
1260	/// used more than once; every instance will be replaced.
1261	Value OperandValToReplace = nullptr*;
1262
1263	/// If this user is to use the post-incremented value of an induction
1264	/// variable, this set is non-empty and holds the loops associated with the
1265	/// induction variable.
1266	PostIncLoopSet PostIncLoops;
1267
1268	/// A constant offset to be added to the LSRUse expression. This allows
1269	/// multiple fixups to share the same LSRUse with different offsets, for
1270	/// example in an unrolled loop.
1271	Immediate Offset = Immediate::getZero();
1272
1273	LSRFixup() = default;
1274
1275	bool isUseFullyOutsideLoop(const Loop L) const*;
1276
1277	void print(raw_ostream &OS) const;
1278	void dump() const;
1279	};
1280
1281	/// This class holds the state that LSR keeps for each use in IVUsers, as well
1282	/// as uses invented by LSR itself. It includes information about what kinds of
1283	/// things can be folded into the user, information about the user itself, and
1284	/// information about how the use may be satisfied. TODO: Represent multiple
1285	/// users of the same expression in common?
1286	class LSRUse {
1287	DenseSet<SmallVector<const SCEV *, `4`>> Uniquifier;
1288
1289	public:
1290	/// An enum for a kind of use, indicating what types of scaled and immediate
1291	/// operands it might support.
1292	enum KindType {
1293	Basic, ///< A normal use, with no folding.
1294	Special, ///< A special case of basic, allowing -1 scales.
1295	Address, ///< An address use; folding according to TargetLowering
1296	ICmpZero ///< An equality icmp with both operands folded into one.
1297	// TODO: Add a generic icmp too?
1298	};
1299
1300	using SCEVUseKindPair = PointerIntPair<const SCEV *, `2`, KindType>;
1301
1302	KindType Kind;
1303	MemAccessTy AccessTy;
1304
1305	/// The list of operands which are to be replaced.
1306	SmallVector<LSRFixup, `8`> Fixups;
1307
1308	/// Keep track of the min and max offsets of the fixups.
1309	Immediate MinOffset = Immediate::getFixedMax();
1310	Immediate MaxOffset = Immediate::getFixedMin();
1311
1312	/// This records whether all of the fixups using this LSRUse are outside of
1313	/// the loop, in which case some special-case heuristics may be used.
1314	bool AllFixupsOutsideLoop = true;
1315
1316	/// This records whether all of the fixups using this LSRUse are unconditional
1317	/// within the loop, meaning they will be executed on every path to the loop
1318	/// latch. This includes fixups before early exits.
1319	bool AllFixupsUnconditional = true;
1320
1321	/// RigidFormula is set to true to guarantee that this use will be associated
1322	/// with a single formula--the one that initially matched. Some SCEV
1323	/// expressions cannot be expanded. This allows LSR to consider the registers
1324	/// used by those expressions without the need to expand them later after
1325	/// changing the formula.
1326	bool RigidFormula = false;
1327
1328	/// A list of ways to build a value that can satisfy this user. After the
1329	/// list is populated, one of these is selected heuristically and used to
1330	/// formulate a replacement for OperandValToReplace in UserInst.
1331	SmallVector<Formula, `12`> Formulae;
1332
1333	/// The set of register candidates used by all formulae in this LSRUse.
1334	SmallPtrSet<const SCEV *, `4`> Regs;
1335
1336	LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy (AT) {}
1337
1338	LSRFixup &getNewFixup() {
1339	Fixups.push_back(Elt: LSRFixup ());
1340	return Fixups.back();
1341	}
1342
1343	void pushFixup(LSRFixup &f) {
1344	Fixups.push_back(Elt: f);
1345	if (Immediate::isKnownGT(LHS: f.Offset, RHS: MaxOffset))
1346	MaxOffset = f.Offset;
1347	if (Immediate::isKnownLT(LHS: f.Offset, RHS: MinOffset))
1348	MinOffset = f.Offset;
1349	}
1350
1351	bool HasFormulaWithSameRegs(const Formula &F) const;
1352	float getNotSelectedProbability(const SCEV Reg) const*;
1353	bool InsertFormula(const Formula &F, const Loop &L);
1354	void DeleteFormula(Formula &F);
1355	void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1356
1357	void print(raw_ostream &OS) const;
1358	void dump() const;
1359	};
1360
1361	} // end anonymous namespace
1362
1363	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1364	LSRUse::KindType Kind, MemAccessTy AccessTy,
1365	GlobalValue *BaseGV, Immediate BaseOffset,
1366	bool HasBaseReg, int64_t Scale,
1367	Instruction Fixup = nullptr*);
1368
1369	static unsigned getSetupCost(const SCEV Reg, unsigned* Depth) {
1370	if (isa<SCEVUnknown>(Val: Reg) \|\| isa<SCEVConstant>(Val: Reg))
1371	return `1`;
1372	if (Depth == `0`)
1373	return `0`;
1374	if (const auto *S = dyn_cast<SCEVAddRecExpr>(Val: Reg))
1375	return getSetupCost(Reg: S->getStart(), Depth: Depth - `1`);
1376	if (auto S = dyn_cast<SCEVIntegralCastExpr>(Val: Reg))
1377	return getSetupCost(Reg: S->getOperand(), Depth: Depth - `1`);
1378	if (auto S = dyn_cast<SCEVNAryExpr>(Val: Reg))
1379	return std::accumulate(first: S->operands().begin(), last: S->operands().end(), init: `0`,
1380	binary_op: [&](unsigned i, const SCEV *Reg) {
1381	return i + getSetupCost(Reg, Depth: Depth - `1`);
1382	});
1383	if (auto S = dyn_cast<SCEVUDivExpr>(Val: Reg))
1384	return getSetupCost(Reg: S->getLHS(), Depth: Depth - `1`) +
1385	getSetupCost(Reg: S->getRHS(), Depth: Depth - `1`);
1386	return `0`;
1387	}
1388
1389	/// Tally up interesting quantities from the given register.
1390	void Cost::RateRegister(const Formula &F, const SCEV *Reg,
1391	SmallPtrSetImpl<const SCEV > &Regs, const* LSRUse &LU,
1392	bool HardwareLoopProfitable) {
1393	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Reg)) {
1394	// If this is an addrec for another loop, it should be an invariant
1395	// with respect to L since L is the innermost loop (at least
1396	// for now LSR only handles innermost loops).
1397	if (AR->getLoop() != L) {
1398	// If the AddRec exists, consider it's register free and leave it alone.
1399	if (isExistingPhi(AR, SE&: *SE) && !(AMK & TTI::AMK_PostIndexed))
1400	return;
1401
1402	// It is bad to allow LSR for current loop to add induction variables
1403	// for its sibling loops.
1404	if (!AR->getLoop()->contains(L)) {
1405	Lose();
1406	return;
1407	}
1408
1409	// Otherwise, it will be an invariant with respect to Loop L.
1410	++C.NumRegs;
1411	return;
1412	}
1413
1414	unsigned LoopCost = `1`;
1415	if (TTI->isIndexedLoadLegal(Mode: TTI->MIM_PostInc, Ty: AR->getType()) \|\|
1416	TTI->isIndexedStoreLegal(Mode: TTI->MIM_PostInc, Ty: AR->getType())) {
1417	const SCEV *Start;
1418	const APInt *Step;
1419	if (match(S: AR, P: m_scev_AffineAddRec(Op0: m_SCEV(V&: Start), Op1: m_scev_APInt(C&: Step)))) {
1420	// If the step size matches the base offset, we could use pre-indexed
1421	// addressing.
1422	bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) &&
1423	F.BaseOffset.isFixed() &&
1424	*Step == F.BaseOffset.getFixedValue();
1425	bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) &&
1426	!isa<SCEVConstant>(Val: Start) &&
1427	SE->isLoopInvariant(S: Start, L);
1428	// We can only pre or post index when the load/store is unconditional.
1429	if ((CanPreIndex \|\| CanPostIndex) && LU.AllFixupsUnconditional)
1430	LoopCost = `0`;
1431	}
1432	}
1433
1434	// If the loop counts down to zero and we'll be using a hardware loop then
1435	// the addrec will be combined into the hardware loop instruction.
1436	if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&
1437	HardwareLoopProfitable)
1438	LoopCost = `0`;
1439	C.AddRecCost += LoopCost;
1440
1441	// Add the step value register, if it needs one.
1442	// TODO: The non-affine case isn't precisely modeled here.
1443	if (!AR->isAffine() \|\| !isa<SCEVConstant>(Val: AR->getOperand(i: `1`))) {
1444	if (!Regs.count(Ptr: AR->getOperand(i: `1`))) {
1445	RateRegister(F, Reg: AR->getOperand(i: `1`), Regs, LU, HardwareLoopProfitable);
1446	if (isLoser())
1447	return;
1448	}
1449	}
1450	}
1451	++C.NumRegs;
1452
1453	// Rough heuristic; favor registers which don't require extra setup
1454	// instructions in the preheader.
1455	C.SetupCost += getSetupCost(Reg, Depth: SetupCostDepthLimit);
1456	// Ensure we don't, even with the recusion limit, produce invalid costs.
1457	C.SetupCost = std::min<unsigned>(a: C.SetupCost, b: `1` << `16`);
1458
1459	C.NumIVMuls += isa<SCEVMulExpr>(Val: Reg) &&
1460	SE->hasComputableLoopEvolution(S: Reg, L);
1461	}
1462
1463	/// Record this register in the set. If we haven't seen it before, rate
1464	/// it. Optional LoserRegs provides a way to declare any formula that refers to
1465	/// one of those regs an instant loser.
1466	void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
1467	SmallPtrSetImpl<const SCEV *> &Regs,
1468	const LSRUse &LU, bool HardwareLoopProfitable,
1469	SmallPtrSetImpl<const SCEV > LoserRegs) {
1470	if (LoserRegs && LoserRegs->count(Ptr: Reg)) {
1471	Lose();
1472	return;
1473	}
1474	if (Regs.insert(Ptr: Reg).second) {
1475	RateRegister(F, Reg, Regs, LU, HardwareLoopProfitable);
1476	if (LoserRegs && isLoser())
1477	LoserRegs->insert(Ptr: Reg);
1478	}
1479	}
1480
1481	void Cost::RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,
1482	const DenseSet<const SCEV *> &VisitedRegs,
1483	const LSRUse &LU, bool HardwareLoopProfitable,
1484	SmallPtrSetImpl<const SCEV > LoserRegs) {
1485	if (isLoser())
1486	return;
1487	assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
1488	// Tally up the registers.
1489	unsigned PrevAddRecCost = C.AddRecCost;
1490	unsigned PrevNumRegs = C.NumRegs;
1491	unsigned PrevNumBaseAdds = C.NumBaseAdds;
1492	if (const SCEV *ScaledReg = F.ScaledReg) {
1493	if (VisitedRegs.count(V: ScaledReg)) {
1494	Lose();
1495	return;
1496	}
1497	RatePrimaryRegister(F, Reg: ScaledReg, Regs, LU, HardwareLoopProfitable,
1498	LoserRegs);
1499	if (isLoser())
1500	return;
1501	}
1502	for (const SCEV *BaseReg : F.BaseRegs) {
1503	if (VisitedRegs.count(V: BaseReg)) {
1504	Lose();
1505	return;
1506	}
1507	RatePrimaryRegister(F, Reg: BaseReg, Regs, LU, HardwareLoopProfitable,
1508	LoserRegs);
1509	if (isLoser())
1510	return;
1511	}
1512
1513	// Determine how many (unfolded) adds we'll need inside the loop.
1514	size_t NumBaseParts = F.getNumRegs();
1515	if (NumBaseParts > `1`)
1516	// Do not count the base and a possible second register if the target
1517	// allows to fold 2 registers.
1518	C.NumBaseAdds +=
1519	NumBaseParts - (`1` + (F.Scale && isAMCompletelyFolded(TTI: *TTI, LU, F)));
1520	C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());
1521
1522	// Accumulate non-free scaling amounts.
1523	C.ScaleCost += getScalingFactorCost(TTI: TTI, LU, F, L: L).getValue();
1524
1525	// Tally up the non-zero immediates.
1526	for (const LSRFixup &Fixup : LU.Fixups) {
1527	if (Fixup.Offset.isCompatibleImmediate(Imm: F.BaseOffset)) {
1528	Immediate Offset = Fixup.Offset.addUnsigned(RHS: F.BaseOffset);
1529	if (F.BaseGV)
1530	C.ImmCost += `64`; // Handle symbolic values conservatively.
1531	// TODO: This should probably be the pointer size.
1532	else if (Offset.isNonZero())
1533	C.ImmCost +=
1534	APInt (`64`, Offset.getKnownMinValue(), true).getSignificantBits();
1535
1536	// Check with target if this offset with this instruction is
1537	// specifically not supported.
1538	if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&
1539	!isAMCompletelyFolded(TTI: *TTI, Kind: LSRUse::Address, AccessTy: LU.AccessTy, BaseGV: F.BaseGV,
1540	BaseOffset: Offset, HasBaseReg: F.HasBaseReg, Scale: F.Scale, Fixup: Fixup.UserInst))
1541	C.NumBaseAdds++;
1542	} else {
1543	// Incompatible immediate type, increase cost to avoid using
1544	C.ImmCost += `2048`;
1545	}
1546	}
1547
1548	// If we don't count instruction cost exit here.
1549	if (!InsnsCost) {
1550	assert(isValid() && "invalid cost");
1551	return;
1552	}
1553
1554	// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
1555	// additional instruction (at least fill).
1556	// TODO: Need distinguish register class?
1557	unsigned TTIRegNum = TTI->getNumberOfRegisters(
1558	ClassID: TTI->getRegisterClassForType(Vector: false, Ty: F.getType())) - `1`;
1559	if (C.NumRegs > TTIRegNum) {
1560	// Cost already exceeded TTIRegNum, then only newly added register can add
1561	// new instructions.
1562	if (PrevNumRegs > TTIRegNum)
1563	C.Insns += (C.NumRegs - PrevNumRegs);
1564	else
1565	C.Insns += (C.NumRegs - TTIRegNum);
1566	}
1567
1568	// If ICmpZero formula ends with not 0, it could not be replaced by
1569	// just add or sub. We'll need to compare final result of AddRec.
1570	// That means we'll need an additional instruction. But if the target can
1571	// macro-fuse a compare with a branch, don't count this extra instruction.
1572	// For -10 + {0, +, 1}:
1573	// i = i + 1;
1574	// cmp i, 10
1575	//
1576	// For {-10, +, 1}:
1577	// i = i + 1;
1578	if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
1579	!TTI->canMacroFuseCmp())
1580	C.Insns++;
1581	// Each new AddRec adds 1 instruction to calculation.
1582	C.Insns += (C.AddRecCost - PrevAddRecCost);
1583
1584	// BaseAdds adds instructions for unfolded registers.
1585	if (LU.Kind != LSRUse::ICmpZero)
1586	C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
1587	assert(isValid() && "invalid cost");
1588	}
1589
1590	/// Set this cost to a losing value.
1591	void Cost::Lose() {
1592	C.Insns = std::numeric_limits<unsigned>::max();
1593	C.NumRegs = std::numeric_limits<unsigned>::max();
1594	C.AddRecCost = std::numeric_limits<unsigned>::max();
1595	C.NumIVMuls = std::numeric_limits<unsigned>::max();
1596	C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1597	C.ImmCost = std::numeric_limits<unsigned>::max();
1598	C.SetupCost = std::numeric_limits<unsigned>::max();
1599	C.ScaleCost = std::numeric_limits<unsigned>::max();
1600	}
1601
1602	/// Choose the lower cost.
1603	bool Cost::isLess(const Cost &Other) const {
1604	if (InsnsCost.getNumOccurrences() > `0` && InsnsCost &&
1605	C.Insns != Other.C.Insns)
1606	return C.Insns < Other.C.Insns;
1607	return TTI->isLSRCostLess(C1: C, C2: Other.C);
1608	}
1609
1610	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1611	void Cost::print(raw_ostream &OS) const {
1612	if (InsnsCost)
1613	OS << C.Insns << " instruction" << (C.Insns == `1` ? " " : "s ");
1614	OS << C.NumRegs << " reg" << (C.NumRegs == `1` ? "" : "s");
1615	if (C.AddRecCost != `0`)
1616	OS << ", with addrec cost " << C.AddRecCost;
1617	if (C.NumIVMuls != `0`)
1618	OS << ", plus " << C.NumIVMuls << " IV mul"
1619	<< (C.NumIVMuls == `1` ? "" : "s");
1620	if (C.NumBaseAdds != `0`)
1621	OS << ", plus " << C.NumBaseAdds << " base add"
1622	<< (C.NumBaseAdds == `1` ? "" : "s");
1623	if (C.ScaleCost != `0`)
1624	OS << ", plus " << C.ScaleCost << " scale cost";
1625	if (C.ImmCost != `0`)
1626	OS << ", plus " << C.ImmCost << " imm cost";
1627	if (C.SetupCost != `0`)
1628	OS << ", plus " << C.SetupCost << " setup cost";
1629	}
1630
1631	LLVM_DUMP_METHOD void Cost::dump() const {
1632	print(errs()); errs() << `'\n'`;
1633	}
1634	#endif
1635
1636	/// Test whether this fixup always uses its value outside of the given loop.
1637	bool LSRFixup::isUseFullyOutsideLoop(const Loop L) const* {
1638	// PHI nodes use their value in their incoming blocks.
1639	if (const PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) {
1640	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i)
1641	if (PN->getIncomingValue(i) == OperandValToReplace &&
1642	L->contains(BB: PN->getIncomingBlock(i)))
1643	return false;
1644	return true;
1645	}
1646
1647	return !L->contains(Inst: UserInst);
1648	}
1649
1650	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1651	void LSRFixup::print(raw_ostream &OS) const {
1652	OS << "UserInst=";
1653	// Store is common and interesting enough to be worth special-casing.
1654	if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1655	OS << "store ";
1656	Store->getOperand(`0`)->printAsOperand(OS, /PrintType=/false);
1657	} else if (UserInst->getType()->isVoidTy())
1658	OS << UserInst->getOpcodeName();
1659	else
1660	UserInst->printAsOperand(OS, /PrintType=/false);
1661
1662	OS << ", OperandValToReplace=";
1663	OperandValToReplace->printAsOperand(OS, /PrintType=/false);
1664
1665	for (const Loop *PIL : PostIncLoops) {
1666	OS << ", PostIncLoop=";
1667	PIL->getHeader()->printAsOperand(OS, /PrintType=/false);
1668	}
1669
1670	if (Offset.isNonZero())
1671	OS << ", Offset=" << Offset;
1672	}
1673
1674	LLVM_DUMP_METHOD void LSRFixup::dump() const {
1675	print(errs()); errs() << `'\n'`;
1676	}
1677	#endif
1678
1679	/// Test whether this use as a formula which has the same registers as the given
1680	/// formula.
1681	bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1682	SmallVector<const SCEV *, `4`> Key = F.BaseRegs;
1683	if (F.ScaledReg) Key.push_back(Elt: F.ScaledReg);
1684	// Unstable sort by host order ok, because this is only used for uniquifying.
1685	llvm::sort(C&: Key);
1686	return Uniquifier.count(V: Key);
1687	}
1688
1689	/// The function returns a probability of selecting formula without Reg.
1690	float LSRUse::getNotSelectedProbability(const SCEV Reg) const* {
1691	unsigned FNum = `0`;
1692	for (const Formula &F : Formulae)
1693	if (F.referencesReg(S: Reg))
1694	FNum++;
1695	return ((float)(Formulae.size() - FNum)) / Formulae.size();
1696	}
1697
1698	/// If the given formula has not yet been inserted, add it to the list, and
1699	/// return true. Return false otherwise. The formula must be in canonical form.
1700	bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
1701	assert(F.isCanonical(L) && "Invalid canonical representation");
1702
1703	if (!Formulae.empty() && RigidFormula)
1704	return false;
1705
1706	SmallVector<const SCEV *, `4`> Key = F.BaseRegs;
1707	if (F.ScaledReg) Key.push_back(Elt: F.ScaledReg);
1708	// Unstable sort by host order ok, because this is only used for uniquifying.
1709	llvm::sort(C&: Key);
1710
1711	if (!Uniquifier.insert(V: Key).second)
1712	return false;
1713
1714	// Using a register to hold the value of 0 is not profitable.
1715	assert((!F.ScaledReg \|\| !F.ScaledReg->isZero()) &&
1716	"Zero allocated in a scaled register!");
1717	#ifndef NDEBUG
1718	for (const SCEV *BaseReg : F.BaseRegs)
1719	assert(!BaseReg->isZero() && "Zero allocated in a base register!");
1720	#endif
1721
1722	// Add the formula to the list.
1723	Formulae.push_back(Elt: F);
1724
1725	// Record registers now being used by this use.
1726	Regs.insert_range(R: F.BaseRegs);
1727	if (F.ScaledReg)
1728	Regs.insert(Ptr: F.ScaledReg);
1729
1730	return true;
1731	}
1732
1733	/// Remove the given formula from this use's list.
1734	void LSRUse::DeleteFormula(Formula &F) {
1735	if (&F != &Formulae.back())
1736	std::swap(a&: F, b&: Formulae.back());
1737	Formulae.pop_back();
1738	}
1739
1740	/// Recompute the Regs field, and update RegUses.
1741	void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1742	// Now that we've filtered out some formulae, recompute the Regs set.
1743	SmallPtrSet<const SCEV *, `4`> OldRegs = std::move(Regs);
1744	Regs.clear();
1745	for (const Formula &F : Formulae) {
1746	if (F.ScaledReg) Regs.insert(Ptr: F.ScaledReg);
1747	Regs.insert_range(R: F.BaseRegs);
1748	}
1749
1750	// Update the RegTracker.
1751	for (const SCEV *S : OldRegs)
1752	if (!Regs.count(Ptr: S))
1753	RegUses.dropRegister(Reg: S, LUIdx);
1754	}
1755
1756	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1757	void LSRUse::print(raw_ostream &OS) const {
1758	OS << "LSR Use: Kind=";
1759	switch (Kind) {
1760	case Basic: OS << "Basic"; break;
1761	case Special: OS << "Special"; break;
1762	case ICmpZero: OS << "ICmpZero"; break;
1763	case Address:
1764	OS << "Address of ";
1765	if (AccessTy.MemTy->isPointerTy())
1766	OS << "pointer"; // the full pointer type could be really verbose
1767	else {
1768	OS << *AccessTy.MemTy;
1769	}
1770
1771	OS << " in addrspace(" << AccessTy.AddrSpace << `')'`;
1772	}
1773
1774	OS << ", Offsets={";
1775	bool NeedComma = false;
1776	for (const LSRFixup &Fixup : Fixups) {
1777	if (NeedComma) OS << `','`;
1778	OS << Fixup.Offset;
1779	NeedComma = true;
1780	}
1781	OS << `'}'`;
1782
1783	if (AllFixupsOutsideLoop)
1784	OS << ", all-fixups-outside-loop";
1785
1786	if (AllFixupsUnconditional)
1787	OS << ", all-fixups-unconditional";
1788	}
1789
1790	LLVM_DUMP_METHOD void LSRUse::dump() const {
1791	print(errs()); errs() << `'\n'`;
1792	}
1793	#endif
1794
1795	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1796	LSRUse::KindType Kind, MemAccessTy AccessTy,
1797	GlobalValue *BaseGV, Immediate BaseOffset,
1798	bool HasBaseReg, int64_t Scale,
1799	Instruction Fixup /* = nullptr /) {
1800	switch (Kind) {
1801	case LSRUse::Address: {
1802	int64_t FixedOffset =
1803	BaseOffset.isScalable() ? `0` : BaseOffset.getFixedValue();
1804	int64_t ScalableOffset =
1805	BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : `0`;
1806	return TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, BaseGV, BaseOffset: FixedOffset,
1807	HasBaseReg, Scale, AddrSpace: AccessTy.AddrSpace,
1808	I: Fixup, ScalableOffset);
1809	}
1810	case LSRUse::ICmpZero:
1811	// There's not even a target hook for querying whether it would be legal to
1812	// fold a GV into an ICmp.
1813	if (BaseGV)
1814	return false;
1815
1816	// ICmp only has two operands; don't allow more than two non-trivial parts.
1817	if (Scale != `0` && HasBaseReg && BaseOffset.isNonZero())
1818	return false;
1819
1820	// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1821	// putting the scaled register in the other operand of the icmp.
1822	if (Scale != `0` && Scale != -`1`)
1823	return false;
1824
1825	// If we have low-level target information, ask the target if it can fold an
1826	// integer immediate on an icmp.
1827	if (BaseOffset.isNonZero()) {
1828	// We don't have an interface to query whether the target supports
1829	// icmpzero against scalable quantities yet.
1830	if (BaseOffset.isScalable())
1831	return false;
1832
1833	// We have one of:
1834	// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1835	// ICmpZero -1ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset*
1836	// Offs is the ICmp immediate.
1837	if (Scale == `0`)
1838	// The cast does the right thing with
1839	// std::numeric_limits<int64_t>::min().
1840	BaseOffset = BaseOffset.getFixed(MinVal: -(uint64_t)BaseOffset.getFixedValue());
1841	return TTI.isLegalICmpImmediate(Imm: BaseOffset.getFixedValue());
1842	}
1843
1844	// ICmpZero BaseReg + -1ScaleReg => ICmp BaseReg, ScaleReg*
1845	return true;
1846
1847	case LSRUse::Basic:
1848	// Only handle single-register values.
1849	return !BaseGV && Scale == `0` && BaseOffset.isZero();
1850
1851	case LSRUse::Special:
1852	// Special case Basic to handle -1 scales.
1853	return !BaseGV && (Scale == `0` \|\| Scale == -`1`) && BaseOffset.isZero();
1854	}
1855
1856	llvm_unreachable("Invalid LSRUse Kind!");
1857	}
1858
1859	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1860	Immediate MinOffset, Immediate MaxOffset,
1861	LSRUse::KindType Kind, MemAccessTy AccessTy,
1862	GlobalValue *BaseGV, Immediate BaseOffset,
1863	bool HasBaseReg, int64_t Scale) {
1864	if (BaseOffset.isNonZero() &&
1865	(BaseOffset.isScalable() != MinOffset.isScalable() \|\|
1866	BaseOffset.isScalable() != MaxOffset.isScalable()))
1867	return false;
1868	// Check for overflow.
1869	int64_t Base = BaseOffset.getKnownMinValue();
1870	int64_t Min = MinOffset.getKnownMinValue();
1871	int64_t Max = MaxOffset.getKnownMinValue();
1872	if (((int64_t)((uint64_t)Base + Min) > Base) != (Min > `0`))
1873	return false;
1874	MinOffset = Immediate::get(MinVal: (uint64_t)Base + Min, Scalable: MinOffset.isScalable());
1875	if (((int64_t)((uint64_t)Base + Max) > Base) != (Max > `0`))
1876	return false;
1877	MaxOffset = Immediate::get(MinVal: (uint64_t)Base + Max, Scalable: MaxOffset.isScalable());
1878
1879	return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset: MinOffset,
1880	HasBaseReg, Scale) &&
1881	isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset: MaxOffset,
1882	HasBaseReg, Scale);
1883	}
1884
1885	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1886	Immediate MinOffset, Immediate MaxOffset,
1887	LSRUse::KindType Kind, MemAccessTy AccessTy,
1888	const Formula &F, const Loop &L) {
1889	// For the purpose of isAMCompletelyFolded either having a canonical formula
1890	// or a scale not equal to zero is correct.
1891	// Problems may arise from non canonical formulae having a scale == 0.
1892	// Strictly speaking it would best to just rely on canonical formulae.
1893	// However, when we generate the scaled formulae, we first check that the
1894	// scaling factor is profitable before computing the actual ScaledReg for
1895	// compile time sake.
1896	assert((F.isCanonical(L) \|\| F.Scale != `0`));
1897	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1898	BaseGV: F.BaseGV, BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg, Scale: F.Scale);
1899	}
1900
1901	/// Test whether we know how to expand the current formula.
1902	static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
1903	Immediate MaxOffset, LSRUse::KindType Kind,
1904	MemAccessTy AccessTy, GlobalValue *BaseGV,
1905	Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {
1906	// We know how to expand completely foldable formulae.
1907	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1908	BaseOffset, HasBaseReg, Scale) \|\|
1909	// Or formulae that use a base register produced by a sum of base
1910	// registers.
1911	(Scale == `1` &&
1912	isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1913	BaseGV, BaseOffset, HasBaseReg: true, Scale: `0`));
1914	}
1915
1916	static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
1917	Immediate MaxOffset, LSRUse::KindType Kind,
1918	MemAccessTy AccessTy, const Formula &F) {
1919	return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV: F.BaseGV,
1920	BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg, Scale: F.Scale);
1921	}
1922
1923	static bool isLegalAddImmediate(const TargetTransformInfo &TTI,
1924	Immediate Offset) {
1925	if (Offset.isScalable())
1926	return TTI.isLegalAddScalableImmediate(Imm: Offset.getKnownMinValue());
1927
1928	return TTI.isLegalAddImmediate(Imm: Offset.getFixedValue());
1929	}
1930
1931	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1932	const LSRUse &LU, const Formula &F) {
1933	// Target may want to look at the user instructions.
1934	if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
1935	for (const LSRFixup &Fixup : LU.Fixups)
1936	if (!isAMCompletelyFolded(TTI, Kind: LSRUse::Address, AccessTy: LU.AccessTy, BaseGV: F.BaseGV,
1937	BaseOffset: (F.BaseOffset + Fixup.Offset), HasBaseReg: F.HasBaseReg,
1938	Scale: F.Scale, Fixup: Fixup.UserInst))
1939	return false;
1940	return true;
1941	}
1942
1943	return isAMCompletelyFolded(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
1944	AccessTy: LU.AccessTy, BaseGV: F.BaseGV, BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg,
1945	Scale: F.Scale);
1946	}
1947
1948	static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
1949	const LSRUse &LU, const Formula &F,
1950	const Loop &L) {
1951	if (!F.Scale)
1952	return `0`;
1953
1954	// If the use is not completely folded in that instruction, we will have to
1955	// pay an extra cost only for scale != 1.
1956	if (!isAMCompletelyFolded(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
1957	AccessTy: LU.AccessTy, F, L))
1958	return F.Scale != `1`;
1959
1960	switch (LU.Kind) {
1961	case LSRUse::Address: {
1962	// Check the scaling factor cost with both the min and max offsets.
1963	int64_t ScalableMin = `0`, ScalableMax = `0`, FixedMin = `0`, FixedMax = `0`;
1964	if (F.BaseOffset.isScalable()) {
1965	ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();
1966	ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1967	} else {
1968	FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();
1969	FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();
1970	}
1971	InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(
1972	Ty: LU.AccessTy.MemTy, BaseGV: F.BaseGV, BaseOffset: StackOffset::get(Fixed: FixedMin, Scalable: ScalableMin),
1973	HasBaseReg: F.HasBaseReg, Scale: F.Scale, AddrSpace: LU.AccessTy.AddrSpace);
1974	InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(
1975	Ty: LU.AccessTy.MemTy, BaseGV: F.BaseGV, BaseOffset: StackOffset::get(Fixed: FixedMax, Scalable: ScalableMax),
1976	HasBaseReg: F.HasBaseReg, Scale: F.Scale, AddrSpace: LU.AccessTy.AddrSpace);
1977
1978	assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
1979	"Legal addressing mode has an illegal cost!");
1980	return std::max(a: ScaleCostMinOffset, b: ScaleCostMaxOffset);
1981	}
1982	case LSRUse::ICmpZero:
1983	case LSRUse::Basic:
1984	case LSRUse::Special:
1985	// The use is completely folded, i.e., everything is folded into the
1986	// instruction.
1987	return `0`;
1988	}
1989
1990	llvm_unreachable("Invalid LSRUse Kind!");
1991	}
1992
1993	static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1994	LSRUse::KindType Kind, MemAccessTy AccessTy,
1995	GlobalValue *BaseGV, Immediate BaseOffset,
1996	bool HasBaseReg) {
1997	// Fast-path: zero is always foldable.
1998	if (BaseOffset.isZero() && !BaseGV)
1999	return true;
2000
2001	// Conservatively, create an address with an immediate and a
2002	// base and a scale.
2003	int64_t Scale = Kind == LSRUse::ICmpZero ? -`1` : `1`;
2004
2005	// Canonicalize a scale of 1 to a base register if the formula doesn't
2006	// already have a base register.
2007	if (!HasBaseReg && Scale == `1`) {
2008	Scale = `0`;
2009	HasBaseReg = true;
2010	}
2011
2012	// FIXME: Try with + without a scale? Maybe based on TTI?
2013	// I think basereg + scaledreg + immediateoffset isn't a good 'conservative'
2014	// default for many architectures, not just AArch64 SVE. More investigation
2015	// needed later to determine if this should be used more widely than just
2016	// on scalable types.
2017	if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2018	AccessTy.MemTy && AccessTy.MemTy->isScalableTy() && DropScaledForVScale)
2019	Scale = `0`;
2020
2021	return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
2022	HasBaseReg, Scale);
2023	}
2024
2025	static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
2026	ScalarEvolution &SE, Immediate MinOffset,
2027	Immediate MaxOffset, LSRUse::KindType Kind,
2028	MemAccessTy AccessTy, const SCEV *S,
2029	bool HasBaseReg) {
2030	// Fast-path: zero is always foldable.
2031	if (S->isZero()) return true;
2032
2033	// Conservatively, create an address with an immediate and a
2034	// base and a scale.
2035	SCEVUse SCopy = S;
2036	Immediate BaseOffset = ExtractImmediate(S&: SCopy, SE);
2037	GlobalValue *BaseGV = ExtractSymbol(S&: SCopy, SE);
2038
2039	// If there's anything else involved, it's not foldable.
2040	if (!SCopy ->isZero())
2041	return false;
2042
2043	// Fast-path: zero is always foldable.
2044	if (BaseOffset.isZero() && !BaseGV)
2045	return true;
2046
2047	if (BaseOffset.isScalable())
2048	return false;
2049
2050	// Conservatively, create an address with an immediate and a
2051	// base and a scale.
2052	int64_t Scale = Kind == LSRUse::ICmpZero ? -`1` : `1`;
2053
2054	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
2055	BaseOffset, HasBaseReg, Scale);
2056	}
2057
2058	namespace {
2059
2060	/// An individual increment in a Chain of IV increments. Relate an IV user to
2061	/// an expression that computes the IV it uses from the IV used by the previous
2062	/// link in the Chain.
2063	///
2064	/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
2065	/// original IVOperand. The head of the chain's IVOperand is only valid during
2066	/// chain collection, before LSR replaces IV users. During chain generation,
2067	/// IncExpr can be used to find the new IVOperand that computes the same
2068	/// expression.
2069	struct IVInc {
2070	Instruction *UserInst;
2071	Value* IVOperand;
2072	const SCEV *IncExpr;
2073
2074	IVInc(Instruction U, Value O, const SCEV *E)
2075	: UserInst(U), IVOperand(O), IncExpr(E) {}
2076	};
2077
2078	// The list of IV increments in program order. We typically add the head of a
2079	// chain without finding subsequent links.
2080	struct IVChain {
2081	SmallVector<IVInc, `1`> Incs;
2082	const SCEV ExprBase = nullptr*;
2083
2084	IVChain() = default;
2085	IVChain(const IVInc &Head, const SCEV *Base)
2086	: Incs (`1`, Head), ExprBase(Base) {}
2087
2088	using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
2089
2090	// Return the first increment in the chain.
2091	const_iterator begin() const {
2092	assert(!Incs.empty());
2093	return std::next(x: Incs.begin());
2094	}
2095	const_iterator end() const {
2096	return Incs.end();
2097	}
2098
2099	// Returns true if this chain contains any increments.
2100	bool hasIncs() const { return Incs.size() >= `2`; }
2101
2102	// Add an IVInc to the end of this chain.
2103	void add(const IVInc &X) { Incs.push_back(Elt: X); }
2104
2105	// Returns the last UserInst in the chain.
2106	Instruction tailUserInst() const* { return Incs.back().UserInst; }
2107
2108	// Returns true if IncExpr can be profitably added to this chain.
2109	bool isProfitableIncrement(const SCEV *OperExpr,
2110	const SCEV *IncExpr,
2111	ScalarEvolution&);
2112	};
2113
2114	/// Helper for CollectChains to track multiple IV increment uses. Distinguish
2115	/// between FarUsers that definitely cross IV increments and NearUsers that may
2116	/// be used between IV increments.
2117	struct ChainUsers {
2118	SmallPtrSet<Instruction*, `4`> FarUsers;
2119	SmallPtrSet<Instruction*, `4`> NearUsers;
2120	};
2121
2122	/// This class holds state for the main loop strength reduction logic.
2123	class LSRInstance {
2124	IVUsers &IU;
2125	ScalarEvolution &SE;
2126	DominatorTree &DT;
2127	LoopInfo &LI;
2128	AssumptionCache &AC;
2129	TargetLibraryInfo &TLI;
2130	const TargetTransformInfo &TTI;
2131	Loop *const L;
2132	MemorySSAUpdater *MSSAU;
2133	TTI::AddressingModeKind AMK;
2134	mutable SCEVExpander Rewriter;
2135	bool Changed = false;
2136	bool HardwareLoopProfitable = false;
2137
2138	/// This is the insert position that the current loop's induction variable
2139	/// increment should be placed. In simple loops, this is the latch block's
2140	/// terminator. But in more complicated cases, this is a position which will
2141	/// dominate all the in-loop post-increment users.
2142	Instruction IVIncInsertPos = nullptr*;
2143
2144	/// Interesting factors between use strides.
2145	///
2146	/// We explicitly use a SetVector which contains a SmallSet, instead of the
2147	/// default, a SmallDenseSet, because we need to use the full range of
2148	/// int64_ts, and there's currently no good way of doing that with
2149	/// SmallDenseSet.
2150	SetVector<int64_t, SmallVector<int64_t, `8`>, SmallSet<int64_t, `8`>> Factors;
2151
2152	/// The cost of the current SCEV, the best solution by LSR will be dropped if
2153	/// the solution is not profitable.
2154	Cost BaselineCost;
2155
2156	/// Interesting use types, to facilitate truncation reuse.
2157	SmallSetVector<Type *, `4`> Types;
2158
2159	/// The list of interesting uses.
2160	mutable SmallVector<LSRUse, `16`> Uses;
2161
2162	/// Track which uses use which register candidates.
2163	RegUseTracker RegUses;
2164
2165	// Limit the number of chains to avoid quadratic behavior. We don't expect to
2166	// have more than a few IV increment chains in a loop. Missing a Chain falls
2167	// back to normal LSR behavior for those uses.
2168	static const unsigned MaxChains = `8`;
2169
2170	/// IV users can form a chain of IV increments.
2171	SmallVector<IVChain, MaxChains> IVChainVec;
2172
2173	/// IV users that belong to profitable IVChains.
2174	SmallPtrSet<Use*, MaxChains> IVIncSet;
2175
2176	/// Induction variables that were generated and inserted by the SCEV Expander.
2177	SmallVector<llvm::WeakVH, `2`> ScalarEvolutionIVs;
2178
2179	// Inserting instructions in the loop and using them as PHI's input could
2180	// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
2181	// corresponding incoming block is not loop exiting). So collect all such
2182	// instructions to form LCSSA for them later.
2183	SmallSetVector<Instruction *, `4`> InsertedNonLCSSAInsts;
2184
2185	void OptimizeShadowIV();
2186	bool FindIVUserForCond(Instruction Cond, IVStrideUse &CondUse);
2187	Instruction OptimizeMax(ICmpInst Cond, IVStrideUse *&CondUse);
2188	void OptimizeLoopTermCond();
2189
2190	void ChainInstruction(Instruction UserInst, Instruction IVOper,
2191	SmallVectorImpl<ChainUsers> &ChainUsersVec);
2192	void FinalizeChain(IVChain &Chain);
2193	void CollectChains();
2194	void GenerateIVChain(const IVChain &Chain,
2195	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2196
2197	void CollectInterestingTypesAndFactors();
2198	void CollectFixupsAndInitialFormulae();
2199
2200	// Support for sharing of LSRUses between LSRFixups.
2201	using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2202	UseMapTy UseMap;
2203
2204	bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,
2205	LSRUse::KindType Kind, MemAccessTy AccessTy);
2206
2207	std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
2208	MemAccessTy AccessTy);
2209
2210	void DeleteUse(LSRUse &LU, size_t LUIdx);
2211
2212	LSRUse FindUseWithSimilarFormula(const* Formula &F, const LSRUse &OrigLU);
2213
2214	void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
2215	void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
2216	void CountRegisters(const Formula &F, size_t LUIdx);
2217	bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
2218	bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;
2219
2220	void CollectLoopInvariantFixupsAndFormulae();
2221
2222	void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
2223	unsigned Depth = `0`);
2224
2225	void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
2226	const Formula &Base, unsigned Depth,
2227	size_t Idx, bool IsScaledReg = false);
2228	void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
2229	void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
2230	const Formula &Base, size_t Idx,
2231	bool IsScaledReg = false);
2232	void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
2233	void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
2234	const Formula &Base,
2235	const SmallVectorImpl<Immediate> &Worklist,
2236	size_t Idx, bool IsScaledReg = false);
2237	void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
2238	void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
2239	void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
2240	void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
2241	void GenerateCrossUseConstantOffsets();
2242	void GenerateAllReuseFormulae();
2243
2244	void FilterOutUndesirableDedicatedRegisters();
2245
2246	size_t EstimateSearchSpaceComplexity() const;
2247	void NarrowSearchSpaceByDetectingSupersets();
2248	void NarrowSearchSpaceByCollapsingUnrolledCode();
2249	void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2250	void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2251	void NarrowSearchSpaceByFilterPostInc();
2252	void NarrowSearchSpaceByDeletingCostlyFormulas();
2253	void NarrowSearchSpaceByPickingWinnerRegs();
2254	void NarrowSearchSpaceUsingHeuristics();
2255
2256	void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2257	Cost &SolutionCost,
2258	SmallVectorImpl<const Formula *> &Workspace,
2259	const Cost &CurCost,
2260	const SmallPtrSet<const SCEV *, `16`> &CurRegs,
2261	DenseSet<const SCEV > &VisitedRegs) const*;
2262	void Solve(SmallVectorImpl<const Formula > &Solution) const*;
2263
2264	BasicBlock::iterator
2265	HoistInsertPosition(BasicBlock::iterator IP,
2266	const SmallVectorImpl<Instruction > &Inputs) const*;
2267	BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
2268	const LSRFixup &LF,
2269	const LSRUse &LU) const;
2270
2271	Value Expand(const* LSRUse &LU, const LSRFixup &LF, const Formula &F,
2272	BasicBlock::iterator IP,
2273	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2274	void RewriteForPHI(PHINode PN, const* LSRUse &LU, const LSRFixup &LF,
2275	const Formula &F,
2276	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2277	void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2278	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2279	void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
2280
2281	public:
2282	LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2283	LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
2284	TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2285
2286	bool getChanged() const { return Changed; }
2287	const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
2288	return ScalarEvolutionIVs;
2289	}
2290
2291	void print_factors_and_types(raw_ostream &OS) const;
2292	void print_fixups(raw_ostream &OS) const;
2293	void print_uses(raw_ostream &OS) const;
2294	void print(raw_ostream &OS) const;
2295	void dump() const;
2296	};
2297
2298	} // end anonymous namespace
2299
2300	/// If IV is used in a int-to-float cast inside the loop then try to eliminate
2301	/// the cast operation.
2302	void LSRInstance::OptimizeShadowIV() {
2303	const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2304	if (isa<SCEVCouldNotCompute>(Val: BackedgeTakenCount))
2305	return;
2306
2307	for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
2308	UI != E; / empty /) {
2309	IVUsers::const_iterator CandidateUI = UI;
2310	++UI;
2311	Instruction *ShadowUse = CandidateUI ->getUser();
2312	Type DestTy = nullptr*;
2313	bool IsSigned = false;
2314
2315	/ If shadow use is a int->float cast then insert a second IV*
2316	to eliminate this cast.
2317
2318	for (unsigned i = 0; i < n; ++i)
2319	foo((double)i);
2320
2321	is transformed into
2322
2323	double d = 0.0;
2324	for (unsigned i = 0; i < n; ++i, ++d)
2325	foo(d);
2326	*/
2327	if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(Val: CandidateUI ->getUser())) {
2328	IsSigned = false;
2329	DestTy = UCast->getDestTy();
2330	}
2331	else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(Val: CandidateUI ->getUser())) {
2332	IsSigned = true;
2333	DestTy = SCast->getDestTy();
2334	}
2335	if (!DestTy) continue;
2336
2337	// If target does not support DestTy natively then do not apply
2338	// this transformation.
2339	if (!TTI.isTypeLegal(Ty: DestTy)) continue;
2340
2341	PHINode *PH = dyn_cast<PHINode>(Val: ShadowUse->getOperand(i: `0`));
2342	if (!PH) continue;
2343	if (PH->getNumIncomingValues() != `2`) continue;
2344
2345	// If the calculation in integers overflows, the result in FP type will
2346	// differ. So we only can do this transformation if we are guaranteed to not
2347	// deal with overflowing values
2348	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: PH));
2349	if (!AR) continue;
2350	if (IsSigned && !AR->hasNoSignedWrap()) continue;
2351	if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
2352
2353	Type *SrcTy = PH->getType();
2354	int Mantissa = DestTy->getFPMantissaWidth();
2355	if (Mantissa == -`1`) continue;
2356	if ((int)SE.getTypeSizeInBits(Ty: SrcTy) > Mantissa)
2357	continue;
2358
2359	unsigned Entry, Latch;
2360	if (PH->getIncomingBlock(i: `0`) == L->getLoopPreheader()) {
2361	Entry = `0`;
2362	Latch = `1`;
2363	} else {
2364	Entry = `1`;
2365	Latch = `0`;
2366	}
2367
2368	ConstantInt *Init = dyn_cast<ConstantInt>(Val: PH->getIncomingValue(i: Entry));
2369	if (!Init) continue;
2370	Constant *NewInit = ConstantFP::get(Ty: DestTy, V: IsSigned ?
2371	(double)Init->getSExtValue() :
2372	(double)Init->getZExtValue());
2373
2374	BinaryOperator *Incr =
2375	dyn_cast<BinaryOperator>(Val: PH->getIncomingValue(i: Latch));
2376	if (!Incr) continue;
2377	if (Incr->getOpcode() != Instruction::Add
2378	&& Incr->getOpcode() != Instruction::Sub)
2379	continue;
2380
2381	/ Initialize new IV, double d = 0.0 in above example. /
2382	ConstantInt C = nullptr*;
2383	if (Incr->getOperand(i_nocapture: `0`) == PH)
2384	C = dyn_cast<ConstantInt>(Val: Incr->getOperand(i_nocapture: `1`));
2385	else if (Incr->getOperand(i_nocapture: `1`) == PH)
2386	C = dyn_cast<ConstantInt>(Val: Incr->getOperand(i_nocapture: `0`));
2387	else
2388	continue;
2389
2390	if (!C) continue;
2391
2392	// Ignore negative constants, as the code below doesn't handle them
2393	// correctly. TODO: Remove this restriction.
2394	if (!C->getValue().isStrictlyPositive())
2395	continue;
2396
2397	/ Add new PHINode. /
2398	PHINode *NewPH = PHINode::Create(Ty: DestTy, NumReservedValues: `2`, NameStr: "IV.S.", InsertBefore: PH->getIterator());
2399	NewPH->setDebugLoc(PH->getDebugLoc());
2400
2401	/ create new increment. '++d' in above example. /
2402	Constant *CFP = ConstantFP::get(Ty: DestTy, V: C->getZExtValue());
2403	BinaryOperator *NewIncr = BinaryOperator::Create(
2404	Op: Incr->getOpcode() == Instruction::Add ? Instruction::FAdd
2405	: Instruction::FSub,
2406	S1: NewPH, S2: CFP, Name: "IV.S.next.", InsertBefore: Incr->getIterator());
2407	NewIncr->setDebugLoc(Incr->getDebugLoc());
2408
2409	NewPH->addIncoming(V: NewInit, BB: PH->getIncomingBlock(i: Entry));
2410	NewPH->addIncoming(V: NewIncr, BB: PH->getIncomingBlock(i: Latch));
2411
2412	/ Remove cast operation /
2413	ShadowUse->replaceAllUsesWith(V: NewPH);
2414	ShadowUse->eraseFromParent();
2415	Changed = true;
2416	break;
2417	}
2418	}
2419
2420	/// If Cond has an operand that is an expression of an IV, set the IV user and
2421	/// stride information and return true, otherwise return false.
2422	bool LSRInstance::FindIVUserForCond(Instruction Cond, IVStrideUse &CondUse) {
2423	for (IVStrideUse &U : IU)
2424	if (U.getUser() == Cond) {
2425	// NOTE: we could handle setcc instructions with multiple uses here, but
2426	// InstCombine does it as well for simple uses, it's not clear that it
2427	// occurs enough in real life to handle.
2428	CondUse = &U;
2429	return true;
2430	}
2431	return false;
2432	}
2433
2434	/// Rewrite the loop's terminating condition if it uses a max computation.
2435	///
2436	/// This is a narrow solution to a specific, but acute, problem. For loops
2437	/// like this:
2438	///
2439	/// i = 0;
2440	/// do {
2441	/// p[i] = 0.0;
2442	/// } while (++i < n);
2443	///
2444	/// the trip count isn't just 'n', because 'n' might not be positive. And
2445	/// unfortunately this can come up even for loops where the user didn't use
2446	/// a C do-while loop. For example, seemingly well-behaved top-test loops
2447	/// will commonly be lowered like this:
2448	///
2449	/// if (n > 0) {
2450	/// i = 0;
2451	/// do {
2452	/// p[i] = 0.0;
2453	/// } while (++i < n);
2454	/// }
2455	///
2456	/// and then it's possible for subsequent optimization to obscure the if
2457	/// test in such a way that indvars can't find it.
2458	///
2459	/// When indvars can't find the if test in loops like this, it creates a
2460	/// max expression, which allows it to give the loop a canonical
2461	/// induction variable:
2462	///
2463	/// i = 0;
2464	/// max = n < 1 ? 1 : n;
2465	/// do {
2466	/// p[i] = 0.0;
2467	/// } while (++i != max);
2468	///
2469	/// Canonical induction variables are necessary because the loop passes
2470	/// are designed around them. The most obvious example of this is the
2471	/// LoopInfo analysis, which doesn't remember trip count values. It
2472	/// expects to be able to rediscover the trip count each time it is
2473	/// needed, and it does this using a simple analysis that only succeeds if
2474	/// the loop has a canonical induction variable.
2475	///
2476	/// However, when it comes time to generate code, the maximum operation
2477	/// can be quite costly, especially if it's inside of an outer loop.
2478	///
2479	/// This function solves this problem by detecting this type of loop and
2480	/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
2481	/// the instructions for the maximum computation.
2482	Instruction LSRInstance::OptimizeMax(ICmpInst Cond, IVStrideUse *&CondUse) {
2483	// Check that the loop matches the pattern we're looking for.
2484	if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
2485	Cond->getPredicate() != CmpInst::ICMP_NE)
2486	return Cond;
2487
2488	SelectInst *Sel = dyn_cast<SelectInst>(Val: Cond->getOperand(i_nocapture: `1`));
2489	if (!Sel \|\| !Sel->hasOneUse()) return Cond;
2490
2491	const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2492	if (isa<SCEVCouldNotCompute>(Val: BackedgeTakenCount))
2493	return Cond;
2494	const SCEV *One = SE.getConstant(Ty: BackedgeTakenCount->getType(), V: `1`);
2495
2496	// Add one to the backedge-taken count to get the trip count.
2497	const SCEV *IterationCount = SE.getAddExpr(LHS: One, RHS: BackedgeTakenCount);
2498	if (IterationCount != SE.getSCEV(V: Sel)) return Cond;
2499
2500	// Check for a max calculation that matches the pattern. There's no check
2501	// for ICMP_ULE here because the comparison would be with zero, which
2502	// isn't interesting.
2503	CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
2504	const SCEVNAryExpr Max = nullptr*;
2505	if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(Val: BackedgeTakenCount)) {
2506	Pred = ICmpInst::ICMP_SLE;
2507	Max = S;
2508	} else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(Val: IterationCount)) {
2509	Pred = ICmpInst::ICMP_SLT;
2510	Max = S;
2511	} else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(Val: IterationCount)) {
2512	Pred = ICmpInst::ICMP_ULT;
2513	Max = U;
2514	} else {
2515	// No match; bail.
2516	return Cond;
2517	}
2518
2519	// To handle a max with more than two operands, this optimization would
2520	// require additional checking and setup.
2521	if (Max->getNumOperands() != `2`)
2522	return Cond;
2523
2524	const SCEV *MaxLHS = Max->getOperand(i: `0`);
2525	const SCEV *MaxRHS = Max->getOperand(i: `1`);
2526
2527	// ScalarEvolution canonicalizes constants to the left. For < and >, look
2528	// for a comparison with 1. For <= and >=, a comparison with zero.
2529	if (!MaxLHS \|\|
2530	(ICmpInst::isTrueWhenEqual(predicate: Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2531	return Cond;
2532
2533	// Check the relevant induction variable for conformance to
2534	// the pattern.
2535	const SCEV *IV = SE.getSCEV(V: Cond->getOperand(i_nocapture: `0`));
2536	if (!match(S: IV,
2537	P: m_scev_AffineAddRec(Op0: m_scev_SpecificInt(V: `1`), Op1: m_scev_SpecificInt(V: `1`))))
2538	return Cond;
2539
2540	assert(cast<SCEVAddRecExpr>(IV)->getLoop() == L &&
2541	"Loop condition operand is an addrec in a different loop!");
2542
2543	// Check the right operand of the select, and remember it, as it will
2544	// be used in the new comparison instruction.
2545	Value NewRHS = nullptr*;
2546	if (ICmpInst::isTrueWhenEqual(predicate: Pred)) {
2547	// Look for n+1, and grab n.
2548	if (AddOperator *BO = dyn_cast<AddOperator>(Val: Sel->getOperand(i_nocapture: `1`)))
2549	if (ConstantInt *BO1 = dyn_cast<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`)))
2550	if (BO1->isOne() && SE.getSCEV(V: BO->getOperand(i_nocapture: `0`)) == MaxRHS)
2551	NewRHS = BO->getOperand(i_nocapture: `0`);
2552	if (AddOperator *BO = dyn_cast<AddOperator>(Val: Sel->getOperand(i_nocapture: `2`)))
2553	if (ConstantInt *BO1 = dyn_cast<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`)))
2554	if (BO1->isOne() && SE.getSCEV(V: BO->getOperand(i_nocapture: `0`)) == MaxRHS)
2555	NewRHS = BO->getOperand(i_nocapture: `0`);
2556	if (!NewRHS)
2557	return Cond;
2558	} else if (SE.getSCEV(V: Sel->getOperand(i_nocapture: `1`)) == MaxRHS)
2559	NewRHS = Sel->getOperand(i_nocapture: `1`);
2560	else if (SE.getSCEV(V: Sel->getOperand(i_nocapture: `2`)) == MaxRHS)
2561	NewRHS = Sel->getOperand(i_nocapture: `2`);
2562	else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Val: MaxRHS))
2563	NewRHS = SU->getValue();
2564	else
2565	// Max doesn't match expected pattern.
2566	return Cond;
2567
2568	// Determine the new comparison opcode. It may be signed or unsigned,
2569	// and the original comparison may be either equality or inequality.
2570	if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2571	Pred = CmpInst::getInversePredicate(pred: Pred);
2572
2573	// Ok, everything looks ok to change the condition into an SLT or SGE and
2574	// delete the max calculation.
2575	ICmpInst NewCond = new* ICmpInst (Cond->getIterator(), Pred,
2576	Cond->getOperand(i_nocapture: `0`), NewRHS, "scmp");
2577
2578	// Delete the max calculation instructions.
2579	NewCond->setDebugLoc(Cond->getDebugLoc());
2580	Cond->replaceAllUsesWith(V: NewCond);
2581	CondUse->setUser(NewCond);
2582	Instruction *Cmp = cast<Instruction>(Val: Sel->getOperand(i_nocapture: `0`));
2583	Cond->eraseFromParent();
2584	Sel->eraseFromParent();
2585	if (Cmp->use_empty()) {
2586	salvageDebugInfo(I&: *Cmp);
2587	Cmp->eraseFromParent();
2588	}
2589	return NewCond;
2590	}
2591
2592	/// Change loop terminating condition to use the postinc iv when possible.
2593	void
2594	LSRInstance::OptimizeLoopTermCond() {
2595	SmallPtrSet<Instruction *, `4`> PostIncs;
2596
2597	// We need a different set of heuristics for rotated and non-rotated loops.
2598	// If a loop is rotated then the latch is also the backedge, so inserting
2599	// post-inc expressions just before the latch is ideal. To reduce live ranges
2600	// it also makes sense to rewrite terminating conditions to use post-inc
2601	// expressions.
2602	//
2603	// If the loop is not rotated then the latch is not a backedge; the latch
2604	// check is done in the loop head. Adding post-inc expressions before the
2605	// latch will cause overlapping live-ranges of pre-inc and post-inc expressions
2606	// in the loop body. In this case we do not* want to use post-inc expressions*
2607	// in the latch check, and we want to insert post-inc expressions before
2608	// the backedge.
2609	BasicBlock *LatchBlock = L->getLoopLatch();
2610	SmallVector<BasicBlock*, `8`> ExitingBlocks;
2611	L->getExitingBlocks(ExitingBlocks);
2612	if (!llvm::is_contained(Range&: ExitingBlocks, Element: LatchBlock)) {
2613	// The backedge doesn't exit the loop; treat this as a head-tested loop.
2614	IVIncInsertPos = LatchBlock->getTerminator();
2615	return;
2616	}
2617
2618	// Otherwise treat this as a rotated loop.
2619	for (BasicBlock *ExitingBlock : ExitingBlocks) {
2620	// Get the terminating condition for the loop if possible. If we
2621	// can, we want to change it to use a post-incremented version of its
2622	// induction variable, to allow coalescing the live ranges for the IV into
2623	// one register value.
2624
2625	CondBrInst *TermBr = dyn_cast<CondBrInst>(Val: ExitingBlock->getTerminator());
2626	if (!TermBr)
2627	continue;
2628
2629	Instruction *Cond = dyn_cast<Instruction>(Val: TermBr->getCondition());
2630	// If the argument to TermBr is an extractelement, then the source of that
2631	// instruction is what's generated the condition.
2632	auto *Extract = dyn_cast_or_null<ExtractElementInst>(Val: Cond);
2633	if (Extract)
2634	Cond = dyn_cast<Instruction>(Val: Extract->getVectorOperand());
2635	// FIXME: We could do more here, like handling logical operations where one
2636	// side is a cmp that uses an induction variable.
2637	if (!Cond)
2638	continue;
2639
2640	// Search IVUsesByStride to find Cond's IVUse if there is one.
2641	IVStrideUse CondUse = nullptr*;
2642	if (!FindIVUserForCond(Cond, CondUse))
2643	continue;
2644
2645	// If the trip count is computed in terms of a max (due to ScalarEvolution
2646	// being unable to find a sufficient guard, for example), change the loop
2647	// comparison to use SLT or ULT instead of NE.
2648	// One consequence of doing this now is that it disrupts the count-down
2649	// optimization. That's not always a bad thing though, because in such
2650	// cases it may still be worthwhile to avoid a max.
2651	if (auto *Cmp = dyn_cast<ICmpInst>(Val: Cond))
2652	Cond = OptimizeMax(Cond: Cmp, CondUse);
2653
2654	// If this exiting block dominates the latch block, it may also use
2655	// the post-inc value if it won't be shared with other uses.
2656	// Check for dominance.
2657	if (!DT.dominates(A: ExitingBlock, B: LatchBlock))
2658	continue;
2659
2660	// Conservatively avoid trying to use the post-inc value in non-latch
2661	// exits if there may be pre-inc users in intervening blocks.
2662	if (LatchBlock != ExitingBlock)
2663	for (const IVStrideUse &UI : IU)
2664	// Test if the use is reachable from the exiting block. This dominator
2665	// query is a conservative approximation of reachability.
2666	if (&UI != CondUse &&
2667	!DT.properlyDominates(A: UI.getUser()->getParent(), B: ExitingBlock)) {
2668	// Conservatively assume there may be reuse if the quotient of their
2669	// strides could be a legal scale.
2670	const SCEV A = IU.getStride(IU: CondUse, L);
2671	const SCEV *B = IU.getStride(IU: UI, L);
2672	if (!A \|\| !B) continue;
2673	if (SE.getTypeSizeInBits(Ty: A->getType()) !=
2674	SE.getTypeSizeInBits(Ty: B->getType())) {
2675	if (SE.getTypeSizeInBits(Ty: A->getType()) >
2676	SE.getTypeSizeInBits(Ty: B->getType()))
2677	B = SE.getSignExtendExpr(Op: B, Ty: A->getType());
2678	else
2679	A = SE.getSignExtendExpr(Op: A, Ty: B->getType());
2680	}
2681	if (const SCEVConstant *D =
2682	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: B, RHS: A, SE))) {
2683	const ConstantInt *C = D->getValue();
2684	// Stride of one or negative one can have reuse with non-addresses.
2685	if (C->isOne() \|\| C->isMinusOne())
2686	goto decline_post_inc;
2687	// Avoid weird situations.
2688	if (C->getValue().getSignificantBits() >= `64` \|\|
2689	C->getValue().isMinSignedValue())
2690	goto decline_post_inc;
2691	// Check for possible scaled-address reuse.
2692	if (isAddressUse(TTI, Inst: UI.getUser(), OperandVal: UI.getOperandValToReplace())) {
2693	MemAccessTy AccessTy =
2694	getAccessType(TTI, Inst: UI.getUser(), OperandVal: UI.getOperandValToReplace());
2695	int64_t Scale = C->getSExtValue();
2696	if (TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, /BaseGV=/nullptr,
2697	/BaseOffset=/`0`,
2698	/HasBaseReg=/true, Scale,
2699	AddrSpace: AccessTy.AddrSpace))
2700	goto decline_post_inc;
2701	Scale = -Scale;
2702	if (TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, /BaseGV=/nullptr,
2703	/BaseOffset=/`0`,
2704	/HasBaseReg=/true, Scale,
2705	AddrSpace: AccessTy.AddrSpace))
2706	goto decline_post_inc;
2707	}
2708	}
2709	}
2710
2711	LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
2712	<< *Cond << `'\n'`);
2713
2714	// It's possible for the setcc instruction to be anywhere in the loop, and
2715	// possible for it to have multiple users. If it is not immediately before
2716	// the exiting block branch, move it.
2717	if (isa_and_nonnull<CmpInst>(Val: Cond) && Cond->getNextNode() != TermBr &&
2718	!Extract) {
2719	if (Cond->hasOneUse()) {
2720	Cond->moveBefore(InsertPos: TermBr->getIterator());
2721	} else {
2722	// Clone the terminating condition and insert into the loopend.
2723	Instruction *OldCond = Cond;
2724	Cond = Cond->clone();
2725	Cond->setName(L->getHeader()->getName() + ".termcond");
2726	Cond->insertInto(ParentBB: ExitingBlock, It: TermBr->getIterator());
2727
2728	// Clone the IVUse, as the old use still exists!
2729	CondUse = &IU.AddUser(User: Cond, Operand: CondUse->getOperandValToReplace());
2730	TermBr->replaceUsesOfWith(From: OldCond, To: Cond);
2731	}
2732	}
2733
2734	// If we get to here, we know that we can transform the setcc instruction to
2735	// use the post-incremented version of the IV, allowing us to coalesce the
2736	// live ranges for the IV correctly.
2737	CondUse->transformToPostInc(L);
2738	Changed = true;
2739
2740	PostIncs.insert(Ptr: Cond);
2741	decline_post_inc:;
2742	}
2743
2744	// Determine an insertion point for the loop induction variable increment. It
2745	// must dominate all the post-inc comparisons we just set up, and it must
2746	// dominate the loop latch edge.
2747	IVIncInsertPos = L->getLoopLatch()->getTerminator();
2748	for (Instruction *Inst : PostIncs)
2749	IVIncInsertPos = DT.findNearestCommonDominator(I1: IVIncInsertPos, I2: Inst);
2750	}
2751
2752	/// Determine if the given use can accommodate a fixup at the given offset and
2753	/// other details. If so, update the use and return true.
2754	bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2755	bool HasBaseReg, LSRUse::KindType Kind,
2756	MemAccessTy AccessTy) {
2757	Immediate NewMinOffset = LU.MinOffset;
2758	Immediate NewMaxOffset = LU.MaxOffset;
2759	MemAccessTy NewAccessTy = AccessTy;
2760
2761	// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2762	// something conservative, however this can pessimize in the case that one of
2763	// the uses will have all its uses outside the loop, for example.
2764	if (LU.Kind != Kind)
2765	return false;
2766
2767	// Check for a mismatched access type, and fall back conservatively as needed.
2768	// TODO: Be less conservative when the type is similar and can use the same
2769	// addressing modes.
2770	if (Kind == LSRUse::Address) {
2771	if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2772	NewAccessTy = MemAccessTy::getUnknown(Ctx&: AccessTy.MemTy->getContext(),
2773	AS: AccessTy.AddrSpace);
2774	}
2775	}
2776
2777	// Conservatively assume HasBaseReg is true for now.
2778	if (Immediate::isKnownLT(LHS: NewOffset, RHS: LU.MinOffset)) {
2779	if (!isAlwaysFoldable(TTI, Kind, AccessTy: NewAccessTy, /BaseGV=/nullptr,
2780	BaseOffset: LU.MaxOffset - NewOffset, HasBaseReg))
2781	return false;
2782	NewMinOffset = NewOffset;
2783	} else if (Immediate::isKnownGT(LHS: NewOffset, RHS: LU.MaxOffset)) {
2784	if (!isAlwaysFoldable(TTI, Kind, AccessTy: NewAccessTy, /BaseGV=/nullptr,
2785	BaseOffset: NewOffset - LU.MinOffset, HasBaseReg))
2786	return false;
2787	NewMaxOffset = NewOffset;
2788	}
2789
2790	// FIXME: We should be able to handle some level of scalable offset support
2791	// for 'void', but in order to get basic support up and running this is
2792	// being left out.
2793	if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2794	(NewMinOffset.isScalable() \|\| NewMaxOffset.isScalable()))
2795	return false;
2796
2797	// Update the use.
2798	LU.MinOffset = NewMinOffset;
2799	LU.MaxOffset = NewMaxOffset;
2800	LU.AccessTy = NewAccessTy;
2801	return true;
2802	}
2803
2804	/// Return an LSRUse index and an offset value for a fixup which needs the given
2805	/// expression, with the given kind and optional access type. Either reuse an
2806	/// existing use or create a new one, as needed.
2807	std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,
2808	LSRUse::KindType Kind,
2809	MemAccessTy AccessTy) {
2810	const SCEV *Copy = Expr;
2811	SCEVUse ExprUse = Expr;
2812	Immediate Offset = ExtractImmediate(S&: ExprUse, SE);
2813	Expr = ExprUse;
2814
2815	// Basic uses can't accept any offset, for example.
2816	if (!isAlwaysFoldable(TTI, Kind, AccessTy, /BaseGV=/ nullptr,
2817	BaseOffset: Offset, /HasBaseReg=/ true)) {
2818	Expr = Copy;
2819	Offset = Immediate::getFixed(MinVal: `0`);
2820	}
2821
2822	std::pair<UseMapTy::iterator, bool> P =
2823	UseMap.try_emplace(Key: LSRUse::SCEVUseKindPair (Expr, Kind));
2824	if (!P.second) {
2825	// A use already existed with this base.
2826	size_t LUIdx = P.first ->second;
2827	LSRUse &LU = Uses [LUIdx];
2828	if (reconcileNewOffset(LU, NewOffset: Offset, /HasBaseReg=/true, Kind, AccessTy))
2829	// Reuse this use.
2830	return std::make_pair(x&: LUIdx, y&: Offset);
2831	}
2832
2833	// Create a new use.
2834	size_t LUIdx = Uses.size();
2835	P.first ->second = LUIdx;
2836	Uses.push_back(Elt: LSRUse (Kind, AccessTy));
2837	LSRUse &LU = Uses [LUIdx];
2838
2839	LU.MinOffset = Offset;
2840	LU.MaxOffset = Offset;
2841	return std::make_pair(x&: LUIdx, y&: Offset);
2842	}
2843
2844	/// Delete the given use from the Uses list.
2845	void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2846	if (&LU != &Uses.back())
2847	std::swap(a&: LU, b&: Uses.back());
2848	Uses.pop_back();
2849
2850	// Update RegUses.
2851	RegUses.swapAndDropUse(LUIdx, LastLUIdx: Uses.size());
2852	}
2853
2854	/// Look for a use distinct from OrigLU which is has a formula that has the same
2855	/// registers as the given formula.
2856	LSRUse *
2857	LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2858	const LSRUse &OrigLU) {
2859	// Search all uses for the formula. This could be more clever.
2860	for (LSRUse &LU : Uses) {
2861	// Check whether this use is close enough to OrigLU, to see whether it's
2862	// worthwhile looking through its formulae.
2863	// Ignore ICmpZero uses because they may contain formulae generated by
2864	// GenerateICmpZeroScales, in which case adding fixup offsets may
2865	// be invalid.
2866	if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero &&
2867	LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2868	LU.HasFormulaWithSameRegs(F: OrigF)) {
2869	// Scan through this use's formulae.
2870	for (const Formula &F : LU.Formulae) {
2871	// Check to see if this formula has the same registers and symbols
2872	// as OrigF.
2873	if (F.BaseRegs == OrigF.BaseRegs &&
2874	F.ScaledReg == OrigF.ScaledReg &&
2875	F.BaseGV == OrigF.BaseGV &&
2876	F.Scale == OrigF.Scale &&
2877	F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2878	if (F.BaseOffset.isZero())
2879	return &LU;
2880	// This is the formula where all the registers and symbols matched;
2881	// there aren't going to be any others. Since we declined it, we
2882	// can skip the rest of the formulae and proceed to the next LSRUse.
2883	break;
2884	}
2885	}
2886	}
2887	}
2888
2889	// Nothing looked good.
2890	return nullptr;
2891	}
2892
2893	void LSRInstance::CollectInterestingTypesAndFactors() {
2894	SmallSetVector<const SCEV *, `4`> Strides;
2895
2896	// Collect interesting types and strides.
2897	SmallVector<const SCEV *, `4`> Worklist;
2898	for (const IVStrideUse &U : IU) {
2899	const SCEV *Expr = IU.getExpr(IU: U);
2900	if (!Expr)
2901	continue;
2902
2903	// Collect interesting types.
2904	Types.insert(X: SE.getEffectiveSCEVType(Ty: Expr->getType()));
2905
2906	// Add strides for mentioned loops.
2907	Worklist.push_back(Elt: Expr);
2908	do {
2909	const SCEV *S = Worklist.pop_back_val();
2910	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2911	if (AR->getLoop() == L)
2912	Strides.insert(X: AR->getStepRecurrence(SE));
2913	Worklist.push_back(Elt: AR->getStart());
2914	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
2915	append_range(C&: Worklist, R: Add->operands());
2916	}
2917	} while (!Worklist.empty());
2918	}
2919
2920	// Compute interesting factors from the set of interesting strides.
2921	for (SmallSetVector<const SCEV *, `4`>::const_iterator
2922	I = Strides.begin(), E = Strides.end(); I != E; ++I)
2923	for (SmallSetVector<const SCEV *, `4`>::const_iterator NewStrideIter =
2924	std::next(x: I); NewStrideIter != E; ++NewStrideIter) {
2925	const SCEV OldStride = I;
2926	const SCEV NewStride = NewStrideIter;
2927
2928	if (SE.getTypeSizeInBits(Ty: OldStride->getType()) !=
2929	SE.getTypeSizeInBits(Ty: NewStride->getType())) {
2930	if (SE.getTypeSizeInBits(Ty: OldStride->getType()) >
2931	SE.getTypeSizeInBits(Ty: NewStride->getType()))
2932	NewStride = SE.getSignExtendExpr(Op: NewStride, Ty: OldStride->getType());
2933	else
2934	OldStride = SE.getSignExtendExpr(Op: OldStride, Ty: NewStride->getType());
2935	}
2936	if (const SCEVConstant *Factor =
2937	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: NewStride, RHS: OldStride,
2938	SE, IgnoreSignificantBits: true))) {
2939	if (Factor->getAPInt().getSignificantBits() <= `64` && !Factor->isZero())
2940	Factors.insert(X: Factor->getAPInt().getSExtValue());
2941	} else if (const SCEVConstant *Factor =
2942	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: OldStride,
2943	RHS: NewStride,
2944	SE, IgnoreSignificantBits: true))) {
2945	if (Factor->getAPInt().getSignificantBits() <= `64` && !Factor->isZero())
2946	Factors.insert(X: Factor->getAPInt().getSExtValue());
2947	}
2948	}
2949
2950	// If all uses use the same type, don't bother looking for truncation-based
2951	// reuse.
2952	if (Types.size() == `1`)
2953	Types.clear();
2954
2955	LLVM_DEBUG(print_factors_and_types(dbgs()));
2956	}
2957
2958	/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
2959	/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
2960	/// IVStrideUses, we could partially skip this.
2961	static User::op_iterator
2962	findIVOperand(User::op_iterator OI, User::op_iterator OE,
2963	Loop *L, ScalarEvolution &SE) {
2964	for(; OI != OE; ++OI) {
2965	if (Instruction Oper = dyn_cast<Instruction>(Val&: OI)) {
2966	if (!SE.isSCEVable(Ty: Oper->getType()))
2967	continue;
2968
2969	if (const SCEVAddRecExpr *AR =
2970	dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: Oper))) {
2971	if (AR->getLoop() == L)
2972	break;
2973	}
2974	}
2975	}
2976	return OI;
2977	}
2978
2979	/// IVChain logic must consistently peek base TruncInst operands, so wrap it in
2980	/// a convenient helper.
2981	static Value getWideOperand(Value Oper) {
2982	if (TruncInst *Trunc = dyn_cast<TruncInst>(Val: Oper))
2983	return Trunc->getOperand(i_nocapture: `0`);
2984	return Oper;
2985	}
2986
2987	/// Return an approximation of this SCEV expression's "base", or NULL for any
2988	/// constant. Returning the expression itself is conservative. Returning a
2989	/// deeper subexpression is more precise and valid as long as it isn't less
2990	/// complex than another subexpression. For expressions involving multiple
2991	/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
2992	/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
2993	/// IVInc==b-a.
2994	///
2995	/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2996	/// SCEVUnknown, we simply return the rightmost SCEV operand.
2997	static const SCEV getExprBase(const* SCEV *S) {
2998	switch (S->getSCEVType()) {
2999	default: // including scUnknown.
3000	return S;
3001	case scConstant:
3002	case scVScale:
3003	return nullptr;
3004	case scTruncate:
3005	return getExprBase(S: cast<SCEVTruncateExpr>(Val: S)->getOperand());
3006	case scZeroExtend:
3007	return getExprBase(S: cast<SCEVZeroExtendExpr>(Val: S)->getOperand());
3008	case scSignExtend:
3009	return getExprBase(S: cast<SCEVSignExtendExpr>(Val: S)->getOperand());
3010	case scAddExpr: {
3011	// Skip over scaled operands (scMulExpr) to follow add operands as long as
3012	// there's nothing more complex.
3013	// FIXME: not sure if we want to recognize negation.
3014	const SCEVAddExpr *Add = cast<SCEVAddExpr>(Val: S);
3015	for (const SCEV *SubExpr : reverse(C: Add->operands())) {
3016	if (SubExpr->getSCEVType() == scAddExpr)
3017	return getExprBase(S: SubExpr);
3018
3019	if (SubExpr->getSCEVType() != scMulExpr)
3020	return SubExpr;
3021	}
3022	return S; // all operands are scaled, be conservative.
3023	}
3024	case scAddRecExpr:
3025	return getExprBase(S: cast<SCEVAddRecExpr>(Val: S)->getStart());
3026	}
3027	llvm_unreachable("Unknown SCEV kind!");
3028	}
3029
3030	/// Return true if the chain increment is profitable to expand into a loop
3031	/// invariant value, which may require its own register. A profitable chain
3032	/// increment will be an offset relative to the same base. We allow such offsets
3033	/// to potentially be used as chain increment as long as it's not obviously
3034	/// expensive to expand using real instructions.
3035	bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
3036	const SCEV *IncExpr,
3037	ScalarEvolution &SE) {
3038	// Aggressively form chains when -stress-ivchain.
3039	if (StressIVChain)
3040	return true;
3041
3042	// Do not replace a constant offset from IV head with a nonconstant IV
3043	// increment.
3044	if (!isa<SCEVConstant>(Val: IncExpr)) {
3045	const SCEV *HeadExpr = SE.getSCEV(V: getWideOperand(Oper: Incs [`0`].IVOperand));
3046	if (isa<SCEVConstant>(Val: SE.getMinusSCEV(LHS: OperExpr, RHS: HeadExpr)))
3047	return false;
3048	}
3049
3050	SmallPtrSet<const SCEV*, `8`> Processed;
3051	return !isHighCostExpansion(S: IncExpr, Processed, SE);
3052	}
3053
3054	/// Return true if the number of registers needed for the chain is estimated to
3055	/// be less than the number required for the individual IV users. First prohibit
3056	/// any IV users that keep the IV live across increments (the Users set should
3057	/// be empty). Next count the number and type of increments in the chain.
3058	///
3059	/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
3060	/// effectively use postinc addressing modes. Only consider it profitable it the
3061	/// increments can be computed in fewer registers when chained.
3062	///
3063	/// TODO: Consider IVInc free if it's already used in another chains.
3064	static bool isProfitableChain(IVChain &Chain,
3065	SmallPtrSetImpl<Instruction *> &Users,
3066	ScalarEvolution &SE,
3067	const TargetTransformInfo &TTI) {
3068	if (StressIVChain)
3069	return true;
3070
3071	if (!Chain.hasIncs())
3072	return false;
3073
3074	if (!Users.empty()) {
3075	LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[`0`].UserInst << " users:\n";
3076	for (Instruction *Inst
3077	: Users) { dbgs() << " " << *Inst << "\n"; });
3078	return false;
3079	}
3080	assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
3081
3082	// The chain itself may require a register, so initialize cost to 1.
3083	int cost = `1`;
3084
3085	// A complete chain likely eliminates the need for keeping the original IV in
3086	// a register. LSR does not currently know how to form a complete chain unless
3087	// the header phi already exists.
3088	if (isa<PHINode>(Val: Chain.tailUserInst())
3089	&& SE.getSCEV(V: Chain.tailUserInst()) == Chain.Incs [`0`].IncExpr) {
3090	--cost;
3091	}
3092	const SCEV LastIncExpr = nullptr*;
3093	unsigned NumConstIncrements = `0`;
3094	unsigned NumVarIncrements = `0`;
3095	unsigned NumReusedIncrements = `0`;
3096
3097	if (TTI.isProfitableLSRChainElement(I: Chain.Incs [`0`].UserInst))
3098	return true;
3099
3100	for (const IVInc &Inc : Chain) {
3101	if (TTI.isProfitableLSRChainElement(I: Inc.UserInst))
3102	return true;
3103	if (Inc.IncExpr->isZero())
3104	continue;
3105
3106	// Incrementing by zero or some constant is neutral. We assume constants can
3107	// be folded into an addressing mode or an add's immediate operand.
3108	if (isa<SCEVConstant>(Val: Inc.IncExpr)) {
3109	++NumConstIncrements;
3110	continue;
3111	}
3112
3113	if (Inc.IncExpr == LastIncExpr)
3114	++NumReusedIncrements;
3115	else
3116	++NumVarIncrements;
3117
3118	LastIncExpr = Inc.IncExpr;
3119	}
3120	// An IV chain with a single increment is handled by LSR's postinc
3121	// uses. However, a chain with multiple increments requires keeping the IV's
3122	// value live longer than it needs to be if chained.
3123	if (NumConstIncrements > `1`)
3124	--cost;
3125
3126	// Materializing increment expressions in the preheader that didn't exist in
3127	// the original code may cost a register. For example, sign-extended array
3128	// indices can produce ridiculous increments like this:
3129	// IV + ((sext i32 (2 %s) to i64) + (-1 * (sext i32 %s to i64)))*
3130	cost += NumVarIncrements;
3131
3132	// Reusing variable increments likely saves a register to hold the multiple of
3133	// the stride.
3134	cost -= NumReusedIncrements;
3135
3136	LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[`0`].UserInst << " Cost: " << cost
3137	<< "\n");
3138
3139	return cost < `0`;
3140	}
3141
3142	/// Add this IV user to an existing chain or make it the head of a new chain.
3143	void LSRInstance::ChainInstruction(Instruction UserInst, Instruction IVOper,
3144	SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3145	// When IVs are used as types of varying widths, they are generally converted
3146	// to a wider type with some uses remaining narrow under a (free) trunc.
3147	Value *const NextIV = getWideOperand(Oper: IVOper);
3148	const SCEV *const OperExpr = SE.getSCEV(V: NextIV);
3149	const SCEV *const OperExprBase = getExprBase(S: OperExpr);
3150
3151	// Visit all existing chains. Check if its IVOper can be computed as a
3152	// profitable loop invariant increment from the last link in the Chain.
3153	unsigned ChainIdx = `0`, NChains = IVChainVec.size();
3154	const SCEV LastIncExpr = nullptr*;
3155	for (; ChainIdx < NChains; ++ChainIdx) {
3156	IVChain &Chain = IVChainVec [ChainIdx];
3157
3158	// Prune the solution space aggressively by checking that both IV operands
3159	// are expressions that operate on the same unscaled SCEVUnknown. This
3160	// "base" will be canceled by the subsequent getMinusSCEV call. Checking
3161	// first avoids creating extra SCEV expressions.
3162	if (!StressIVChain && Chain.ExprBase != OperExprBase)
3163	continue;
3164
3165	Value *PrevIV = getWideOperand(Oper: Chain.Incs.back().IVOperand);
3166	if (PrevIV->getType() != NextIV->getType())
3167	continue;
3168
3169	// A phi node terminates a chain.
3170	if (isa<PHINode>(Val: UserInst) && isa<PHINode>(Val: Chain.tailUserInst()))
3171	continue;
3172
3173	// The increment must be loop-invariant so it can be kept in a register.
3174	const SCEV *PrevExpr = SE.getSCEV(V: PrevIV);
3175	const SCEV *IncExpr = SE.getMinusSCEV(LHS: OperExpr, RHS: PrevExpr);
3176	if (isa<SCEVCouldNotCompute>(Val: IncExpr) \|\| !SE.isLoopInvariant(S: IncExpr, L))
3177	continue;
3178
3179	if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3180	LastIncExpr = IncExpr;
3181	break;
3182	}
3183	}
3184	// If we haven't found a chain, create a new one, unless we hit the max. Don't
3185	// bother for phi nodes, because they must be last in the chain.
3186	if (ChainIdx == NChains) {
3187	if (isa<PHINode>(Val: UserInst))
3188	return;
3189	if (NChains >= MaxChains && !StressIVChain) {
3190	LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
3191	return;
3192	}
3193	LastIncExpr = OperExpr;
3194	// IVUsers may have skipped over sign/zero extensions. We don't currently
3195	// attempt to form chains involving extensions unless they can be hoisted
3196	// into this loop's AddRec.
3197	if (!isa<SCEVAddRecExpr>(Val: LastIncExpr))
3198	return;
3199	++NChains;
3200	IVChainVec.push_back(Elt: IVChain (IVInc (UserInst, IVOper, LastIncExpr),
3201	OperExprBase));
3202	ChainUsersVec.resize(N: NChains);
3203	LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
3204	<< ") IV=" << *LastIncExpr << "\n");
3205	} else {
3206	LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
3207	<< ") IV+" << *LastIncExpr << "\n");
3208	// Add this IV user to the end of the chain.
3209	IVChainVec [ChainIdx].add(X: IVInc (UserInst, IVOper, LastIncExpr));
3210	}
3211	IVChain &Chain = IVChainVec [ChainIdx];
3212
3213	SmallPtrSet<Instruction*,`4`> &NearUsers = ChainUsersVec [ChainIdx].NearUsers;
3214	// This chain's NearUsers become FarUsers.
3215	if (!LastIncExpr->isZero()) {
3216	ChainUsersVec [ChainIdx].FarUsers.insert_range(R&: NearUsers);
3217	NearUsers.clear();
3218	}
3219
3220	// All other uses of IVOperand become near uses of the chain.
3221	// We currently ignore intermediate values within SCEV expressions, assuming
3222	// they will eventually be used be the current chain, or can be computed
3223	// from one of the chain increments. To be more precise we could
3224	// transitively follow its user and only add leaf IV users to the set.
3225	for (User *U : IVOper->users()) {
3226	Instruction *OtherUse = dyn_cast<Instruction>(Val: U);
3227	if (!OtherUse)
3228	continue;
3229	// Uses in the chain will no longer be uses if the chain is formed.
3230	// Include the head of the chain in this iteration (not Chain.begin()).
3231	IVChain::const_iterator IncIter = Chain.Incs.begin();
3232	IVChain::const_iterator IncEnd = Chain.Incs.end();
3233	for( ; IncIter != IncEnd; ++IncIter) {
3234	if (IncIter->UserInst == OtherUse)
3235	break;
3236	}
3237	if (IncIter != IncEnd)
3238	continue;
3239
3240	if (SE.isSCEVable(Ty: OtherUse->getType())
3241	&& !isa<SCEVUnknown>(Val: SE.getSCEV(V: OtherUse))
3242	&& IU.isIVUserOrOperand(Inst: OtherUse)) {
3243	continue;
3244	}
3245	NearUsers.insert(Ptr: OtherUse);
3246	}
3247
3248	// Since this user is part of the chain, it's no longer considered a use
3249	// of the chain.
3250	ChainUsersVec [ChainIdx].FarUsers.erase(Ptr: UserInst);
3251	}
3252
3253	/// Populate the vector of Chains.
3254	///
3255	/// This decreases ILP at the architecture level. Targets with ample registers,
3256	/// multiple memory ports, and no register renaming probably don't want
3257	/// this. However, such targets should probably disable LSR altogether.
3258	///
3259	/// The job of LSR is to make a reasonable choice of induction variables across
3260	/// the loop. Subsequent passes can easily "unchain" computation exposing more
3261	/// ILP within the loop* if the target wants it.*
3262	///
3263	/// Finding the best IV chain is potentially a scheduling problem. Since LSR
3264	/// will not reorder memory operations, it will recognize this as a chain, but
3265	/// will generate redundant IV increments. Ideally this would be corrected later
3266	/// by a smart scheduler:
3267	/// = A[i]
3268	/// = A[i+x]
3269	/// A[i] =
3270	/// A[i+x] =
3271	///
3272	/// TODO: Walk the entire domtree within this loop, not just the path to the
3273	/// loop latch. This will discover chains on side paths, but requires
3274	/// maintaining multiple copies of the Chains state.
3275	void LSRInstance::CollectChains() {
3276	LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
3277	SmallVector<ChainUsers, `8`> ChainUsersVec;
3278
3279	SmallVector<BasicBlock *,`8`> LatchPath;
3280	BasicBlock *LoopHeader = L->getHeader();
3281	for (DomTreeNode *Rung = DT.getNode(BB: L->getLoopLatch());
3282	Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3283	LatchPath.push_back(Elt: Rung->getBlock());
3284	}
3285	LatchPath.push_back(Elt: LoopHeader);
3286
3287	// Walk the instruction stream from the loop header to the loop latch.
3288	for (BasicBlock *BB : reverse(C&: LatchPath)) {
3289	for (Instruction &I : *BB) {
3290	// Skip instructions that weren't seen by IVUsers analysis.
3291	if (isa<PHINode>(Val: I) \|\| !IU.isIVUserOrOperand(Inst: &I))
3292	continue;
3293
3294	// Skip ephemeral values, as they don't produce real code.
3295	if (IU.isEphemeral(V: &I))
3296	continue;
3297
3298	// Ignore users that are part of a SCEV expression. This way we only
3299	// consider leaf IV Users. This effectively rediscovers a portion of
3300	// IVUsers analysis but in program order this time.
3301	if (SE.isSCEVable(Ty: I.getType()) && !isa<SCEVUnknown>(Val: SE.getSCEV(V: &I)))
3302	continue;
3303
3304	// Remove this instruction from any NearUsers set it may be in.
3305	for (unsigned ChainIdx = `0`, NChains = IVChainVec.size();
3306	ChainIdx < NChains; ++ChainIdx) {
3307	ChainUsersVec [ChainIdx].NearUsers.erase(Ptr: &I);
3308	}
3309	// Search for operands that can be chained.
3310	SmallPtrSet<Instruction*, `4`> UniqueOperands;
3311	User::op_iterator IVOpEnd = I.op_end();
3312	User::op_iterator IVOpIter = findIVOperand(OI: I.op_begin(), OE: IVOpEnd, L, SE);
3313	while (IVOpIter != IVOpEnd) {
3314	Instruction IVOpInst = cast<Instruction>(Val&: IVOpIter);
3315	if (UniqueOperands.insert(Ptr: IVOpInst).second)
3316	ChainInstruction(UserInst: &I, IVOper: IVOpInst, ChainUsersVec);
3317	IVOpIter = findIVOperand(OI: std::next(x: IVOpIter), OE: IVOpEnd, L, SE);
3318	}
3319	} // Continue walking down the instructions.
3320	} // Continue walking down the domtree.
3321	// Visit phi backedges to determine if the chain can generate the IV postinc.
3322	for (PHINode &PN : L->getHeader()->phis()) {
3323	if (!SE.isSCEVable(Ty: PN.getType()))
3324	continue;
3325
3326	Instruction *IncV =
3327	dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: L->getLoopLatch()));
3328	if (IncV)
3329	ChainInstruction(UserInst: &PN, IVOper: IncV, ChainUsersVec);
3330	}
3331	// Remove any unprofitable chains.
3332	unsigned ChainIdx = `0`;
3333	for (unsigned UsersIdx = `0`, NChains = IVChainVec.size();
3334	UsersIdx < NChains; ++UsersIdx) {
3335	if (!isProfitableChain(Chain&: IVChainVec [UsersIdx],
3336	Users&: ChainUsersVec [UsersIdx].FarUsers, SE, TTI))
3337	continue;
3338	// Preserve the chain at UsesIdx.
3339	if (ChainIdx != UsersIdx)
3340	IVChainVec [ChainIdx] = IVChainVec [UsersIdx];
3341	FinalizeChain(Chain&: IVChainVec [ChainIdx]);
3342	++ChainIdx;
3343	}
3344	IVChainVec.resize(N: ChainIdx);
3345	}
3346
3347	void LSRInstance::FinalizeChain(IVChain &Chain) {
3348	assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
3349	LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[`0`].UserInst << "\n");
3350
3351	for (const IVInc &Inc : Chain) {
3352	LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
3353	auto UseI = find(Range: Inc.UserInst->operands(), Val: Inc.IVOperand);
3354	assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
3355	IVIncSet.insert(Ptr: UseI);
3356	}
3357	}
3358
3359	/// Return true if the IVInc can be folded into an addressing mode.
3360	static bool canFoldIVIncExpr(const SCEV IncExpr, Instruction UserInst,
3361	Value Operand, const* TargetTransformInfo &TTI) {
3362	const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(Val: IncExpr);
3363	Immediate IncOffset = Immediate::getZero();
3364	if (IncConst) {
3365	if (IncConst && IncConst->getAPInt().getSignificantBits() > `64`)
3366	return false;
3367	IncOffset = Immediate::getFixed(MinVal: IncConst->getValue()->getSExtValue());
3368	} else {
3369	// Look for mul(vscale, constant), to detect a scalable offset.
3370	const APInt *C;
3371	if (!match(S: IncExpr, P: m_scev_Mul(Op0: m_scev_APInt(C), Op1: m_SCEVVScale())) \|\|
3372	C->getSignificantBits() > `64`)
3373	return false;
3374	IncOffset = Immediate::getScalable(MinVal: C->getSExtValue());
3375	}
3376
3377	if (!isAddressUse(TTI, Inst: UserInst, OperandVal: Operand))
3378	return false;
3379
3380	MemAccessTy AccessTy = getAccessType(TTI, Inst: UserInst, OperandVal: Operand);
3381	if (!isAlwaysFoldable(TTI, Kind: LSRUse::Address, AccessTy, /BaseGV=/nullptr,
3382	BaseOffset: IncOffset, /HasBaseReg=/false))
3383	return false;
3384
3385	return true;
3386	}
3387
3388	/// Generate an add or subtract for each IVInc in a chain to materialize the IV
3389	/// user's operand from the previous IV user's operand.
3390	void LSRInstance::GenerateIVChain(const IVChain &Chain,
3391	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3392	// Find the new IVOperand for the head of the chain. It may have been replaced
3393	// by LSR.
3394	const IVInc &Head = Chain.Incs [`0`];
3395	User::op_iterator IVOpEnd = Head.UserInst->op_end();
3396	// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
3397	User::op_iterator IVOpIter = findIVOperand(OI: Head.UserInst->op_begin(),
3398	OE: IVOpEnd, L, SE);
3399	Value IVSrc = nullptr*;
3400	while (IVOpIter != IVOpEnd) {
3401	IVSrc = getWideOperand(Oper: *IVOpIter);
3402
3403	// If this operand computes the expression that the chain needs, we may use
3404	// it. (Check this after setting IVSrc which is used below.)
3405	//
3406	// Note that if Head.IncExpr is wider than IVSrc, then this phi is too
3407	// narrow for the chain, so we can no longer use it. We do allow using a
3408	// wider phi, assuming the LSR checked for free truncation. In that case we
3409	// should already have a truncate on this operand such that
3410	// getSCEV(IVSrc) == IncExpr.
3411	if (SE.getSCEV(V: *IVOpIter) == Head.IncExpr
3412	\|\| SE.getSCEV(V: IVSrc) == Head.IncExpr) {
3413	break;
3414	}
3415	IVOpIter = findIVOperand(OI: std::next(x: IVOpIter), OE: IVOpEnd, L, SE);
3416	}
3417	if (IVOpIter == IVOpEnd) {
3418	// Gracefully give up on this chain.
3419	LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
3420	return;
3421	}
3422	assert(IVSrc && "Failed to find IV chain source");
3423
3424	LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
3425	Type *IVTy = IVSrc->getType();
3426	Type *IntTy = SE.getEffectiveSCEVType(Ty: IVTy);
3427	const SCEV LeftOverExpr = nullptr*;
3428	const SCEV *Accum = SE.getZero(Ty: IntTy);
3429	SmallVector<std::pair<const SCEV , Value >> Bases;
3430	Bases.emplace_back(Args&: Accum, Args&: IVSrc);
3431
3432	for (const IVInc &Inc : Chain) {
3433	Instruction *InsertPt = Inc.UserInst;
3434	if (isa<PHINode>(Val: InsertPt))
3435	InsertPt = L->getLoopLatch()->getTerminator();
3436
3437	// IVOper will replace the current IV User's operand. IVSrc is the IV
3438	// value currently held in a register.
3439	Value *IVOper = IVSrc;
3440	if (!Inc.IncExpr->isZero()) {
3441	// IncExpr was the result of subtraction of two narrow values, so must
3442	// be signed.
3443	const SCEV *IncExpr = SE.getNoopOrSignExtend(V: Inc.IncExpr, Ty: IntTy);
3444	Accum = SE.getAddExpr(LHS: Accum, RHS: IncExpr);
3445	LeftOverExpr = LeftOverExpr ?
3446	SE.getAddExpr(LHS: LeftOverExpr, RHS: IncExpr) : IncExpr;
3447	}
3448
3449	// Look through each base to see if any can produce a nice addressing mode.
3450	bool FoundBase = false;
3451	for (auto [MapScev, MapIVOper] : reverse(C&: Bases)) {
3452	const SCEV *Remainder = SE.getMinusSCEV(LHS: Accum, RHS: MapScev);
3453	if (canFoldIVIncExpr(IncExpr: Remainder, UserInst: Inc.UserInst, Operand: Inc.IVOperand, TTI)) {
3454	if (!Remainder->isZero()) {
3455	Rewriter.clearPostInc();
3456	Value *IncV = Rewriter.expandCodeFor(SH: Remainder, Ty: IntTy, I: InsertPt);
3457	const SCEV *IVOperExpr =
3458	SE.getAddExpr(LHS: SE.getUnknown(V: MapIVOper), RHS: SE.getUnknown(V: IncV));
3459	IVOper = Rewriter.expandCodeFor(SH: IVOperExpr, Ty: IVTy, I: InsertPt);
3460	} else {
3461	IVOper = MapIVOper;
3462	}
3463
3464	FoundBase = true;
3465	break;
3466	}
3467	}
3468	if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {
3469	// Expand the IV increment.
3470	Rewriter.clearPostInc();
3471	Value *IncV = Rewriter.expandCodeFor(SH: LeftOverExpr, Ty: IntTy, I: InsertPt);
3472	const SCEV *IVOperExpr = SE.getAddExpr(LHS: SE.getUnknown(V: IVSrc),
3473	RHS: SE.getUnknown(V: IncV));
3474	IVOper = Rewriter.expandCodeFor(SH: IVOperExpr, Ty: IVTy, I: InsertPt);
3475
3476	// If an IV increment can't be folded, use it as the next IV value.
3477	if (!canFoldIVIncExpr(IncExpr: LeftOverExpr, UserInst: Inc.UserInst, Operand: Inc.IVOperand, TTI)) {
3478	assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
3479	Bases.emplace_back(Args&: Accum, Args&: IVOper);
3480	IVSrc = IVOper;
3481	LeftOverExpr = nullptr;
3482	}
3483	}
3484	Type *OperTy = Inc.IVOperand->getType();
3485	if (IVTy != OperTy) {
3486	assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
3487	"cannot extend a chained IV");
3488	IRBuilder<> Builder(InsertPt);
3489	IVOper = Builder.CreateTruncOrBitCast(V: IVOper, DestTy: OperTy, Name: "lsr.chain");
3490	}
3491	Inc.UserInst->replaceUsesOfWith(From: Inc.IVOperand, To: IVOper);
3492	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: Inc.IVOperand))
3493	DeadInsts.emplace_back(Args&: OperandIsInstr);
3494	}
3495	// If LSR created a new, wider phi, we may also replace its postinc. We only
3496	// do this if we also found a wide value for the head of the chain.
3497	if (isa<PHINode>(Val: Chain.tailUserInst())) {
3498	for (PHINode &Phi : L->getHeader()->phis()) {
3499	if (Phi.getType() != IVSrc->getType())
3500	continue;
3501	Instruction *PostIncV = dyn_cast<Instruction>(
3502	Val: Phi.getIncomingValueForBlock(BB: L->getLoopLatch()));
3503	if (!PostIncV \|\| (SE.getSCEV(V: PostIncV) != SE.getSCEV(V: IVSrc)))
3504	continue;
3505	Value *IVOper = IVSrc;
3506	Type *PostIncTy = PostIncV->getType();
3507	if (IVTy != PostIncTy) {
3508	assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
3509	IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3510	Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
3511	IVOper = Builder.CreatePointerCast(V: IVSrc, DestTy: PostIncTy, Name: "lsr.chain");
3512	}
3513	Phi.replaceUsesOfWith(From: PostIncV, To: IVOper);
3514	DeadInsts.emplace_back(Args&: PostIncV);
3515	}
3516	}
3517	}
3518
3519	void LSRInstance::CollectFixupsAndInitialFormulae() {
3520	CondBrInst ExitBranch = nullptr*;
3521	bool SaveCmp = TTI.canSaveCmp(L, BI: &ExitBranch, SE: &SE, LI: &LI, DT: &DT, AC: &AC, LibInfo: &TLI);
3522
3523	// For calculating baseline cost
3524	SmallPtrSet<const SCEV *, `16`> Regs;
3525	DenseSet<const SCEV *> VisitedRegs;
3526	DenseSet<size_t> VisitedLSRUse;
3527
3528	for (const IVStrideUse &U : IU) {
3529	Instruction *UserInst = U.getUser();
3530	// Skip IV users that are part of profitable IV Chains.
3531	User::op_iterator UseI =
3532	find(Range: UserInst->operands(), Val: U.getOperandValToReplace());
3533	assert(UseI != UserInst->op_end() && "cannot find IV operand");
3534	if (IVIncSet.count(Ptr: UseI)) {
3535	LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << `'\n'`);
3536	continue;
3537	}
3538
3539	LSRUse::KindType Kind = LSRUse::Basic;
3540	MemAccessTy AccessTy;
3541	if (isAddressUse(TTI, Inst: UserInst, OperandVal: U.getOperandValToReplace())) {
3542	Kind = LSRUse::Address;
3543	AccessTy = getAccessType(TTI, Inst: UserInst, OperandVal: U.getOperandValToReplace());
3544	}
3545
3546	const SCEV *S = IU.getExpr(IU: U);
3547	if (!S)
3548	continue;
3549	PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
3550
3551	// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
3552	// (N - i == 0), and this allows (N - i) to be the expression that we work
3553	// with rather than just N or i, so we can consider the register
3554	// requirements for both N and i at the same time. Limiting this code to
3555	// equality icmps is not a problem because all interesting loops use
3556	// equality icmps, thanks to IndVarSimplify.
3557	if (ICmpInst *CI = dyn_cast<ICmpInst>(Val: UserInst)) {
3558	// If CI can be saved in some target, like replaced inside hardware loop
3559	// in PowerPC, no need to generate initial formulae for it.
3560	if (SaveCmp && CI == dyn_cast<ICmpInst>(Val: ExitBranch->getCondition()))
3561	continue;
3562	if (CI->isEquality()) {
3563	// Swap the operands if needed to put the OperandValToReplace on the
3564	// left, for consistency.
3565	Value *NV = CI->getOperand(i_nocapture: `1`);
3566	if (NV == U.getOperandValToReplace()) {
3567	CI->setOperand(i_nocapture: `1`, Val_nocapture: CI->getOperand(i_nocapture: `0`));
3568	CI->setOperand(i_nocapture: `0`, Val_nocapture: NV);
3569	NV = CI->getOperand(i_nocapture: `1`);
3570	Changed = true;
3571	}
3572
3573	// x == y --> x - y == 0
3574	const SCEV *N = SE.getSCEV(V: NV);
3575	if (SE.isLoopInvariant(S: N, L) && Rewriter.isSafeToExpand(S: N) &&
3576	(!NV->getType()->isPointerTy() \|\|
3577	SE.getPointerBase(V: N) == SE.getPointerBase(V: S))) {
3578	// S is normalized, so normalize N before folding it into S
3579	// to keep the result normalized.
3580	N = normalizeForPostIncUse(S: N, Loops: TmpPostIncLoops, SE);
3581	if (!N)
3582	continue;
3583	Kind = LSRUse::ICmpZero;
3584	S = SE.getMinusSCEV(LHS: N, RHS: S);
3585	} else if (L->isLoopInvariant(V: NV) &&
3586	(!isa<Instruction>(Val: NV) \|\|
3587	DT.dominates(Def: cast<Instruction>(Val: NV), BB: L->getHeader())) &&
3588	!NV->getType()->isPointerTy()) {
3589	// If we can't generally expand the expression (e.g. it contains
3590	// a divide), but it is already at a loop invariant point before the
3591	// loop, wrap it in an unknown (to prevent the expander from trying
3592	// to re-expand in a potentially unsafe way.) The restriction to
3593	// integer types is required because the unknown hides the base, and
3594	// SCEV can't compute the difference of two unknown pointers.
3595	N = SE.getUnknown(V: NV);
3596	N = normalizeForPostIncUse(S: N, Loops: TmpPostIncLoops, SE);
3597	if (!N)
3598	continue;
3599	Kind = LSRUse::ICmpZero;
3600	S = SE.getMinusSCEV(LHS: N, RHS: S);
3601	assert(!isa<SCEVCouldNotCompute>(S));
3602	}
3603
3604	// -1 and the negations of all interesting strides (except the negation
3605	// of -1) are now also interesting.
3606	for (size_t i = `0`, e = Factors.size(); i != e; ++i)
3607	if (Factors [i] != -`1`)
3608	Factors.insert(X: -(uint64_t)Factors [i]);
3609	Factors.insert(X: -`1`);
3610	}
3611	}
3612
3613	// Get or create an LSRUse.
3614	std::pair<size_t, Immediate> P = getUse(Expr&: S, Kind, AccessTy);
3615	size_t LUIdx = P.first;
3616	Immediate Offset = P.second;
3617	LSRUse &LU = Uses [LUIdx];
3618
3619	// Record the fixup.
3620	LSRFixup &LF = LU.getNewFixup();
3621	LF.UserInst = UserInst;
3622	LF.OperandValToReplace = U.getOperandValToReplace();
3623	LF.PostIncLoops = TmpPostIncLoops;
3624	LF.Offset = Offset;
3625	LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3626	LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3627
3628	// Create SCEV as Formula for calculating baseline cost
3629	if (!VisitedLSRUse.count(V: LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3630	Formula F;
3631	F.initialMatch(S, L, SE);
3632	BaselineCost.RateFormula(F, Regs, VisitedRegs, LU,
3633	HardwareLoopProfitable);
3634	VisitedLSRUse.insert(V: LUIdx);
3635	}
3636
3637	// If this is the first use of this LSRUse, give it a formula.
3638	if (LU.Formulae.empty()) {
3639	InsertInitialFormula(S, LU, LUIdx);
3640	CountRegisters(F: LU.Formulae.back(), LUIdx);
3641	}
3642	}
3643
3644	LLVM_DEBUG(print_fixups(dbgs()));
3645	}
3646
3647	/// Insert a formula for the given expression into the given use, separating out
3648	/// loop-variant portions from loop-invariant and loop-computable portions.
3649	void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
3650	size_t LUIdx) {
3651	// Mark uses whose expressions cannot be expanded.
3652	if (!Rewriter.isSafeToExpand(S))
3653	LU.RigidFormula = true;
3654
3655	Formula F;
3656	F.initialMatch(S, L, SE);
3657	bool Inserted = InsertFormula(LU, LUIdx, F);
3658	assert(Inserted && "Initial formula already exists!"); (void)Inserted;
3659	}
3660
3661	/// Insert a simple single-register formula for the given expression into the
3662	/// given use.
3663	void
3664	LSRInstance::InsertSupplementalFormula(const SCEV *S,
3665	LSRUse &LU, size_t LUIdx) {
3666	Formula F;
3667	F.BaseRegs.push_back(Elt: S);
3668	F.HasBaseReg = true;
3669	bool Inserted = InsertFormula(LU, LUIdx, F);
3670	assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
3671	}
3672
3673	/// Note which registers are used by the given formula, updating RegUses.
3674	void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3675	if (F.ScaledReg)
3676	RegUses.countRegister(Reg: F.ScaledReg, LUIdx);
3677	for (const SCEV *BaseReg : F.BaseRegs)
3678	RegUses.countRegister(Reg: BaseReg, LUIdx);
3679	}
3680
3681	/// If the given formula has not yet been inserted, add it to the list, and
3682	/// return true. Return false otherwise.
3683	bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3684	// Do not insert formula that we will not be able to expand.
3685	assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3686	"Formula is illegal");
3687
3688	if (!LU.InsertFormula(F, L: *L))
3689	return false;
3690
3691	CountRegisters(F, LUIdx);
3692	return true;
3693	}
3694
3695	/// Test whether this fixup will be executed each time the corresponding IV
3696	/// increment instruction is executed.
3697	bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const {
3698	// If the fixup block dominates the IV increment block then there is no path
3699	// through the loop to the increment that doesn't pass through the fixup.
3700	return DT.dominates(A: LF.UserInst->getParent(), B: IVIncInsertPos->getParent());
3701	}
3702
3703	/// Check for other uses of loop-invariant values which we're tracking. These
3704	/// other uses will pin these values in registers, making them less profitable
3705	/// for elimination.
3706	/// TODO: This currently misses non-constant addrec step registers.
3707	/// TODO: Should this give more weight to users inside the loop?
3708	void
3709	LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3710	SmallVector<const SCEV *, `8`> Worklist(RegUses.begin(), RegUses.end());
3711	SmallPtrSet<const SCEV *, `32`> Visited;
3712
3713	// Don't collect outside uses if we are favoring postinc - the instructions in
3714	// the loop are more important than the ones outside of it.
3715	if (AMK == TTI::AMK_PostIndexed)
3716	return;
3717
3718	while (!Worklist.empty()) {
3719	const SCEV *S = Worklist.pop_back_val();
3720
3721	// Don't process the same SCEV twice
3722	if (!Visited.insert(Ptr: S).second)
3723	continue;
3724
3725	if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(Val: S))
3726	append_range(C&: Worklist, R: N->operands());
3727	else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(Val: S))
3728	Worklist.push_back(Elt: C->getOperand());
3729	else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
3730	Worklist.push_back(Elt: D->getLHS());
3731	Worklist.push_back(Elt: D->getRHS());
3732	} else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(Val: S)) {
3733	const Value *V = US->getValue();
3734	if (const Instruction *Inst = dyn_cast<Instruction>(Val: V)) {
3735	// Look for instructions defined outside the loop.
3736	if (L->contains(Inst)) continue;
3737	} else if (isa<Constant>(Val: V))
3738	// Constants can be re-materialized.
3739	continue;
3740	for (const Use &U : V->uses()) {
3741	const Instruction *UserInst = dyn_cast<Instruction>(Val: U.getUser());
3742	// Ignore non-instructions.
3743	if (!UserInst)
3744	continue;
3745	// Don't bother if the instruction is an EHPad.
3746	if (UserInst->isEHPad())
3747	continue;
3748	// Ignore instructions in other functions (as can happen with
3749	// Constants).
3750	if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3751	continue;
3752	// Ignore instructions not dominated by the loop.
3753	const BasicBlock *UseBB = !isa<PHINode>(Val: UserInst) ?
3754	UserInst->getParent() :
3755	cast<PHINode>(Val: UserInst)->getIncomingBlock(
3756	i: PHINode::getIncomingValueNumForOperand(i: U.getOperandNo()));
3757	if (!DT.dominates(A: L->getHeader(), B: UseBB))
3758	continue;
3759	// Don't bother if the instruction is in a BB which ends in an EHPad.
3760	if (UseBB->getTerminator()->isEHPad())
3761	continue;
3762
3763	// Ignore cases in which the currently-examined value could come from
3764	// a basic block terminated with an EHPad. This checks all incoming
3765	// blocks of the phi node since it is possible that the same incoming
3766	// value comes from multiple basic blocks, only some of which may end
3767	// in an EHPad. If any of them do, a subsequent rewrite attempt by this
3768	// pass would try to insert instructions into an EHPad, hitting an
3769	// assertion.
3770	if (isa<PHINode>(Val: UserInst)) {
3771	const auto *PhiNode = cast<PHINode>(Val: UserInst);
3772	bool HasIncompatibleEHPTerminatedBlock = false;
3773	llvm::Value *ExpectedValue = U;
3774	for (unsigned int I = `0`; I < PhiNode->getNumIncomingValues(); I++) {
3775	if (PhiNode->getIncomingValue(i: I) == ExpectedValue) {
3776	if (PhiNode->getIncomingBlock(i: I)->getTerminator()->isEHPad()) {
3777	HasIncompatibleEHPTerminatedBlock = true;
3778	break;
3779	}
3780	}
3781	}
3782	if (HasIncompatibleEHPTerminatedBlock) {
3783	continue;
3784	}
3785	}
3786
3787	// Don't bother rewriting PHIs in catchswitch blocks.
3788	if (isa<CatchSwitchInst>(Val: UserInst->getParent()->getTerminator()))
3789	continue;
3790	// Ignore uses which are part of other SCEV expressions, to avoid
3791	// analyzing them multiple times.
3792	if (SE.isSCEVable(Ty: UserInst->getType())) {
3793	const SCEV UserS = SE.getSCEV(V: const_cast<Instruction >(UserInst));
3794	// If the user is a no-op, look through to its uses.
3795	if (!isa<SCEVUnknown>(Val: UserS))
3796	continue;
3797	if (UserS == US) {
3798	Worklist.push_back(
3799	Elt: SE.getUnknown(V: const_cast<Instruction *>(UserInst)));
3800	continue;
3801	}
3802	}
3803	// Ignore icmp instructions which are already being analyzed.
3804	if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Val: UserInst)) {
3805	unsigned OtherIdx = !U.getOperandNo();
3806	Value *OtherOp = ICI->getOperand(i_nocapture: OtherIdx);
3807	if (SE.hasComputableLoopEvolution(S: SE.getSCEV(V: OtherOp), L))
3808	continue;
3809	}
3810
3811	// Do not consider uses inside lifetime intrinsics. These are not
3812	// actually materialized.
3813	if (UserInst->isLifetimeStartOrEnd())
3814	continue;
3815
3816	std::pair<size_t, Immediate> P =
3817	getUse(Expr&: S, Kind: LSRUse::Basic, AccessTy: MemAccessTy ());
3818	size_t LUIdx = P.first;
3819	Immediate Offset = P.second;
3820	LSRUse &LU = Uses [LUIdx];
3821	LSRFixup &LF = LU.getNewFixup();
3822	LF.UserInst = const_cast<Instruction *>(UserInst);
3823	LF.OperandValToReplace = U;
3824	LF.Offset = Offset;
3825	LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3826	LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3827	InsertSupplementalFormula(S: US, LU, LUIdx);
3828	CountRegisters(F: LU.Formulae.back(), LUIdx: Uses.size() - `1`);
3829	break;
3830	}
3831	}
3832	}
3833	}
3834
3835	/// Split S into subexpressions which can be pulled out into separate
3836	/// registers. If C is non-null, multiply each subexpression by C.
3837	///
3838	/// Return remainder expression after factoring the subexpressions captured by
3839	/// Ops. If Ops is complete, return NULL.
3840	static const SCEV CollectSubexprs(const* SCEV S, const* SCEVConstant *C,
3841	SmallVectorImpl<const SCEV *> &Ops,
3842	const Loop *L,
3843	ScalarEvolution &SE,
3844	unsigned Depth = `0`) {
3845	// Arbitrarily cap recursion to protect compile time.
3846	if (Depth >= `3`)
3847	return S;
3848
3849	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
3850	// Break out add operands.
3851	for (const SCEV *S : Add->operands()) {
3852	const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth: Depth+`1`);
3853	if (Remainder)
3854	Ops.push_back(Elt: C ? SE.getMulExpr(LHS: C, RHS: Remainder) : Remainder);
3855	}
3856	return nullptr;
3857	}
3858	const SCEV Start, Step;
3859	const SCEVConstant *Op0;
3860	const SCEV *Op1;
3861	if (match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(V&: Start), Op1: m_SCEV(V&: Step)))) {
3862	// Split a non-zero base out of an addrec.
3863	if (Start->isZero())
3864	return S;
3865
3866	const SCEV *Remainder = CollectSubexprs(S: Start, C, Ops, L, SE, Depth: Depth + `1`);
3867	// Split the non-zero AddRec unless it is part of a nested recurrence that
3868	// does not pertain to this loop.
3869	if (Remainder && (cast<SCEVAddRecExpr>(Val: S)->getLoop() == L \|\|
3870	!isa<SCEVAddRecExpr>(Val: Remainder))) {
3871	Ops.push_back(Elt: C ? SE.getMulExpr(LHS: C, RHS: Remainder) : Remainder);
3872	Remainder = nullptr;
3873	}
3874	if (Remainder != Start) {
3875	if (!Remainder)
3876	Remainder = SE.getConstant(Ty: S->getType(), V: `0`);
3877	return SE.getAddRecExpr(Start: Remainder, Step,
3878	L: cast<SCEVAddRecExpr>(Val: S)->getLoop(),
3879	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3880	Flags: SCEV::FlagAnyWrap);
3881	}
3882	} else if (match(S, P: m_scev_Mul(Op0: m_SCEVConstant(V&: Op0), Op1: m_SCEV(V&: Op1)))) {
3883	// Break (C (a + b + c)) into Ca + Cb + Cc.
3884	C = C ? cast<SCEVConstant>(Val: SE.getMulExpr(LHS: C, RHS: Op0)) : Op0;
3885	const SCEV *Remainder = CollectSubexprs(S: Op1, C, Ops, L, SE, Depth: Depth + `1`);
3886	if (Remainder)
3887	Ops.push_back(Elt: SE.getMulExpr(LHS: C, RHS: Remainder));
3888	return nullptr;
3889	}
3890	return S;
3891	}
3892
3893	/// Return true if the SCEV represents a value that may end up as a
3894	/// post-increment operation.
3895	static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
3896	LSRUse &LU, const SCEV S, const* Loop *L,
3897	ScalarEvolution &SE) {
3898	if (LU.Kind != LSRUse::Address \|\|
3899	!LU.AccessTy.getType()->isIntOrIntVectorTy())
3900	return false;
3901	const SCEV *Start;
3902	if (!match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(V&: Start), Op1: m_SCEVConstant())))
3903	return false;
3904	// Check if a post-indexed load/store can be used.
3905	if (TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty: S->getType()) \|\|
3906	TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty: S->getType())) {
3907	if (!isa<SCEVConstant>(Val: Start) && SE.isLoopInvariant(S: Start, L))
3908	return true;
3909	}
3910	return false;
3911	}
3912
3913	/// Helper function for LSRInstance::GenerateReassociations.
3914	void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3915	const Formula &Base,
3916	unsigned Depth, size_t Idx,
3917	bool IsScaledReg) {
3918	const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
3919	// Don't generate reassociations for the base register of a value that
3920	// may generate a post-increment operator. The reason is that the
3921	// reassociations cause extra base+register formula to be created,
3922	// and possibly chosen, but the post-increment is more efficient.
3923	if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, S: BaseReg, L, SE))
3924	return;
3925	SmallVector<const SCEV *, `8`> AddOps;
3926	const SCEV Remainder = CollectSubexprs(S: BaseReg, C: nullptr*, Ops&: AddOps, L, SE);
3927	if (Remainder)
3928	AddOps.push_back(Elt: Remainder);
3929
3930	if (AddOps.size() == `1`)
3931	return;
3932
3933	for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3934	JE = AddOps.end();
3935	J != JE; ++J) {
3936	// Loop-variant "unknown" values are uninteresting; we won't be able to
3937	// do anything meaningful with them.
3938	if (isa<SCEVUnknown>(Val: J) && !SE.isLoopInvariant(S: J, L))
3939	continue;
3940
3941	// Don't pull a constant into a register if the constant could be folded
3942	// into an immediate field.
3943	if (isAlwaysFoldable(TTI, SE, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
3944	AccessTy: LU.AccessTy, S: *J, HasBaseReg: Base.getNumRegs() > `1`))
3945	continue;
3946
3947	// Collect all operands except J.*
3948	SmallVector<SCEVUse, `8`> InnerAddOps(std::as_const(t&: AddOps).begin(), J);
3949	InnerAddOps.append(in_start: std::next(x: J), in_end: std::as_const(t&: AddOps).end());
3950
3951	// Don't leave just a constant behind in a register if the constant could
3952	// be folded into an immediate field.
3953	if (InnerAddOps.size() == `1` &&
3954	isAlwaysFoldable(TTI, SE, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
3955	AccessTy: LU.AccessTy, S: InnerAddOps [`0`], HasBaseReg: Base.getNumRegs() > `1`))
3956	continue;
3957
3958	const SCEV *InnerSum = SE.getAddExpr(Ops&: InnerAddOps);
3959	if (InnerSum->isZero())
3960	continue;
3961	Formula F = Base;
3962
3963	if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())
3964	continue;
3965
3966	// Add the remaining pieces of the add back into the new formula.
3967	const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(Val: InnerSum);
3968	if (InnerSumSC && SE.getTypeSizeInBits(Ty: InnerSumSC->getType()) <= `64` &&
3969	TTI.isLegalAddImmediate(Imm: (uint64_t)F.UnfoldedOffset.getFixedValue() +
3970	InnerSumSC->getValue()->getZExtValue())) {
3971	F.UnfoldedOffset =
3972	Immediate::getFixed(MinVal: (uint64_t)F.UnfoldedOffset.getFixedValue() +
3973	InnerSumSC->getValue()->getZExtValue());
3974	if (IsScaledReg) {
3975	F.ScaledReg = nullptr;
3976	F.Scale = `0`;
3977	} else
3978	F.BaseRegs.erase(CI: F.BaseRegs.begin() + Idx);
3979	} else if (IsScaledReg)
3980	F.ScaledReg = InnerSum;
3981	else
3982	F.BaseRegs [Idx] = InnerSum;
3983
3984	// Add J as its own register, or an unfolded immediate.
3985	const SCEVConstant SC = dyn_cast<SCEVConstant>(Val: J);
3986	if (SC && SE.getTypeSizeInBits(Ty: SC->getType()) <= `64` &&
3987	TTI.isLegalAddImmediate(Imm: (uint64_t)F.UnfoldedOffset.getFixedValue() +
3988	SC->getValue()->getZExtValue()))
3989	F.UnfoldedOffset =
3990	Immediate::getFixed(MinVal: (uint64_t)F.UnfoldedOffset.getFixedValue() +
3991	SC->getValue()->getZExtValue());
3992	else
3993	F.BaseRegs.push_back(Elt: *J);
3994	// We may have changed the number of register in base regs, adjust the
3995	// formula accordingly.
3996	F.canonicalize(L: *L);
3997
3998	if (InsertFormula(LU, LUIdx, F))
3999	// If that formula hadn't been seen before, recurse to find more like
4000	// it.
4001	// Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
4002	// Because just Depth is not enough to bound compile time.
4003	// This means that every time AddOps.size() is greater 16^x we will add
4004	// x to Depth.
4005	GenerateReassociations(LU, LUIdx, Base: LU.Formulae.back(),
4006	Depth: Depth + `1` + (Log2_32(Value: AddOps.size()) >> `2`));
4007	}
4008	}
4009
4010	/// Split out subexpressions from adds and the bases of addrecs.
4011	void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
4012	Formula Base, unsigned Depth) {
4013	assert(Base.isCanonical(*L) && "Input must be in the canonical form");
4014	// Arbitrarily cap recursion to protect compile time.
4015	if (Depth >= `3`)
4016	return;
4017
4018	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
4019	GenerateReassociationsImpl(LU, LUIdx, Base, Depth, Idx: i);
4020
4021	if (Base.Scale == `1`)
4022	GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
4023	/ Idx / -`1`, / IsScaledReg / true);
4024	}
4025
4026	/// Generate a formula consisting of all of the loop-dominating registers added
4027	/// into a single register.
4028	void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
4029	Formula Base) {
4030	// This method is only interesting on a plurality of registers.
4031	if (Base.BaseRegs.size() + (Base.Scale == `1`) +
4032	(Base.UnfoldedOffset.isNonZero()) <=
4033	`1`)
4034	return;
4035
4036	// Flatten the representation, i.e., reg1 + 1reg2 => reg1 + reg2, before*
4037	// processing the formula.
4038	Base.unscale();
4039	SmallVector<SCEVUse, `4`> Ops;
4040	Formula NewBase = Base;
4041	NewBase.BaseRegs.clear();
4042	Type CombinedIntegerType = nullptr*;
4043	for (const SCEV *BaseReg : Base.BaseRegs) {
4044	if (SE.properlyDominates(S: BaseReg, BB: L->getHeader()) &&
4045	!SE.hasComputableLoopEvolution(S: BaseReg, L)) {
4046	if (!CombinedIntegerType)
4047	CombinedIntegerType = SE.getEffectiveSCEVType(Ty: BaseReg->getType());
4048	Ops.push_back(Elt: BaseReg);
4049	}
4050	else
4051	NewBase.BaseRegs.push_back(Elt: BaseReg);
4052	}
4053
4054	// If no register is relevant, we're done.
4055	if (Ops.size() == `0`)
4056	return;
4057
4058	// Utility function for generating the required variants of the combined
4059	// registers.
4060	auto GenerateFormula = [&](const SCEV *Sum) {
4061	Formula F = NewBase;
4062
4063	// TODO: If Sum is zero, it probably means ScalarEvolution missed an
4064	// opportunity to fold something. For now, just ignore such cases
4065	// rather than proceed with zero in a register.
4066	if (Sum->isZero())
4067	return;
4068
4069	F.BaseRegs.push_back(Elt: Sum);
4070	F.canonicalize(L: *L);
4071	(void)InsertFormula(LU, LUIdx, F);
4072	};
4073
4074	// If we collected at least two registers, generate a formula combining them.
4075	if (Ops.size() > `1`) {
4076	SmallVector<SCEVUse, `4`> OpsCopy(Ops); // Don't let SE modify Ops.
4077	GenerateFormula (SE.getAddExpr(Ops&: OpsCopy));
4078	}
4079
4080	// If we have an unfolded offset, generate a formula combining it with the
4081	// registers collected.
4082	if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4083	assert(CombinedIntegerType && "Missing a type for the unfolded offset");
4084	Ops.push_back(Elt: SE.getConstant(Ty: CombinedIntegerType,
4085	V: NewBase.UnfoldedOffset.getFixedValue(), isSigned: true));
4086	NewBase.UnfoldedOffset = Immediate::getFixed(MinVal: `0`);
4087	GenerateFormula (SE.getAddExpr(Ops));
4088	}
4089	}
4090
4091	/// Helper function for LSRInstance::GenerateSymbolicOffsets.
4092	void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
4093	const Formula &Base, size_t Idx,
4094	bool IsScaledReg) {
4095	SCEVUse G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
4096	GlobalValue *GV = ExtractSymbol(S&: G, SE);
4097	if (G ->isZero() \|\| !GV)
4098	return;
4099	Formula F = Base;
4100	F.BaseGV = GV;
4101	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
4102	return;
4103	if (IsScaledReg)
4104	F.ScaledReg = G;
4105	else
4106	F.BaseRegs [Idx] = G;
4107	(void)InsertFormula(LU, LUIdx, F);
4108	}
4109
4110	/// Generate reuse formulae using symbolic offsets.
4111	void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
4112	Formula Base) {
4113	// We can't add a symbolic offset if the address already contains one.
4114	if (Base.BaseGV) return;
4115
4116	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
4117	GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, Idx: i);
4118	if (Base.Scale == `1`)
4119	GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, / Idx / -`1`,
4120	/ IsScaledReg / true);
4121	}
4122
4123	/// Helper function for LSRInstance::GenerateConstantOffsets.
4124	void LSRInstance::GenerateConstantOffsetsImpl(
4125	LSRUse &LU, unsigned LUIdx, const Formula &Base,
4126	const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) {
4127
4128	auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {
4129	Formula F = Base;
4130	if (!Base.BaseOffset.isCompatibleImmediate(Imm: Offset))
4131	return;
4132	F.BaseOffset = Base.BaseOffset.subUnsigned(RHS: Offset);
4133
4134	if (isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F)) {
4135	// Add the offset to the base register.
4136	const SCEV *NewOffset = Offset.getSCEV(SE, Ty: G->getType());
4137	const SCEV *NewG = SE.getAddExpr(LHS: NewOffset, RHS: G);
4138	// If it cancelled out, drop the base register, otherwise update it.
4139	if (NewG->isZero()) {
4140	if (IsScaledReg) {
4141	F.Scale = `0`;
4142	F.ScaledReg = nullptr;
4143	} else
4144	F.deleteBaseReg(S&: F.BaseRegs [Idx]);
4145	F.canonicalize(L: *L);
4146	} else if (IsScaledReg)
4147	F.ScaledReg = NewG;
4148	else
4149	F.BaseRegs [Idx] = NewG;
4150
4151	(void)InsertFormula(LU, LUIdx, F);
4152	}
4153	};
4154
4155	SCEVUse G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
4156
4157	// With constant offsets and constant steps, we can generate pre-inc
4158	// accesses by having the offset equal the step. So, for access #0 with a
4159	// step of 8, we generate a G - 8 base which would require the first access
4160	// to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
4161	// for itself and hopefully becomes the base for other accesses. This means
4162	// means that a single pre-indexed access can be generated to become the new
4163	// base pointer for each iteration of the loop, resulting in no extra add/sub
4164	// instructions for pointer updating.
4165	if ((AMK & TTI::AMK_PreIndexed) && LU.Kind == LSRUse::Address) {
4166	const APInt *StepInt;
4167	if (match(U: G, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_APInt(C&: StepInt)))) {
4168	int64_t Step = StepInt->isNegative() ? StepInt->getSExtValue()
4169	: StepInt->getZExtValue();
4170
4171	for (Immediate Offset : Worklist) {
4172	if (Offset.isFixed()) {
4173	Offset = Immediate::getFixed(MinVal: Offset.getFixedValue() - Step);
4174	GenerateOffset (G, Offset);
4175	}
4176	}
4177	}
4178	}
4179	for (Immediate Offset : Worklist)
4180	GenerateOffset (G, Offset);
4181
4182	Immediate Imm = ExtractImmediate(S&: G, SE);
4183	if (G ->isZero() \|\| Imm.isZero() \|\|
4184	!Base.BaseOffset.isCompatibleImmediate(Imm))
4185	return;
4186	Formula F = Base;
4187	F.BaseOffset = F.BaseOffset.addUnsigned(RHS: Imm);
4188	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
4189	return;
4190	if (IsScaledReg) {
4191	F.ScaledReg = G;
4192	} else {
4193	F.BaseRegs [Idx] = G;
4194	// We may generate non canonical Formula if G is a recurrent expr reg
4195	// related with current loop while F.ScaledReg is not.
4196	F.canonicalize(L: *L);
4197	}
4198	(void)InsertFormula(LU, LUIdx, F);
4199	}
4200
4201	/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
4202	void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
4203	Formula Base) {
4204	// TODO: For now, just add the min and max offset, because it usually isn't
4205	// worthwhile looking at everything inbetween.
4206	SmallVector<Immediate, `2`> Worklist;
4207	Worklist.push_back(Elt: LU.MinOffset);
4208	if (LU.MaxOffset != LU.MinOffset)
4209	Worklist.push_back(Elt: LU.MaxOffset);
4210
4211	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
4212	GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, Idx: i);
4213	if (Base.Scale == `1`)
4214	GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, / Idx / -`1`,
4215	/ IsScaledReg / true);
4216	}
4217
4218	/// For ICmpZero, check to see if we can scale up the comparison. For example, x
4219	/// == y -> xc == yc.
4220	void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
4221	Formula Base) {
4222	if (LU.Kind != LSRUse::ICmpZero) return;
4223
4224	// Determine the integer type for the base formula.
4225	Type *IntTy = Base.getType();
4226	if (!IntTy) return;
4227	if (SE.getTypeSizeInBits(Ty: IntTy) > `64`) return;
4228
4229	// Don't do this if there is more than one offset.
4230	if (LU.MinOffset != LU.MaxOffset) return;
4231
4232	// Check if transformation is valid. It is illegal to multiply pointer.
4233	if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4234	return;
4235	for (const SCEV *BaseReg : Base.BaseRegs)
4236	if (BaseReg->getType()->isPointerTy())
4237	return;
4238	assert(!Base.BaseGV && "ICmpZero use is not legal!");
4239
4240	// Check each interesting stride.
4241	for (int64_t Factor : Factors) {
4242	// Check that Factor can be represented by IntTy
4243	if (!ConstantInt::isValueValidForType(Ty: IntTy, V: Factor))
4244	continue;
4245	// Check that the multiplication doesn't overflow.
4246	if (Base.BaseOffset.isMin() && Factor == -`1`)
4247	continue;
4248	// Not supporting scalable immediates.
4249	if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())
4250	continue;
4251	Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(RHS: Factor);
4252	assert(Factor != `0` && "Zero factor not expected!");
4253	if (NewBaseOffset.getFixedValue() / Factor !=
4254	Base.BaseOffset.getFixedValue())
4255	continue;
4256	// If the offset will be truncated at this use, check that it is in bounds.
4257	if (!IntTy->isPointerTy() &&
4258	!ConstantInt::isValueValidForType(Ty: IntTy, V: NewBaseOffset.getFixedValue()))
4259	continue;
4260
4261	// Check that multiplying with the use offset doesn't overflow.
4262	Immediate Offset = LU.MinOffset;
4263	if (Offset.isMin() && Factor == -`1`)
4264	continue;
4265	Offset = Offset.mulUnsigned(RHS: Factor);
4266	if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4267	continue;
4268	// If the offset will be truncated at this use, check that it is in bounds.
4269	if (!IntTy->isPointerTy() &&
4270	!ConstantInt::isValueValidForType(Ty: IntTy, V: Offset.getFixedValue()))
4271	continue;
4272
4273	Formula F = Base;
4274	F.BaseOffset = NewBaseOffset;
4275
4276	// Check that this scale is legal.
4277	if (!isLegalUse(TTI, MinOffset: Offset, MaxOffset: Offset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
4278	continue;
4279
4280	// Compensate for the use having MinOffset built into it.
4281	F.BaseOffset = F.BaseOffset.addUnsigned(RHS: Offset).subUnsigned(RHS: LU.MinOffset);
4282
4283	const SCEV *FactorS = SE.getConstant(Ty: IntTy, V: Factor);
4284
4285	// Check that multiplying with each base register doesn't overflow.
4286	for (size_t i = `0`, e = F.BaseRegs.size(); i != e; ++i) {
4287	F.BaseRegs [i] = SE.getMulExpr(LHS: F.BaseRegs [i], RHS: FactorS);
4288	if (getExactSDiv(LHS: F.BaseRegs [i], RHS: FactorS, SE) != Base.BaseRegs [i])
4289	goto next;
4290	}
4291
4292	// Check that multiplying with the scaled register doesn't overflow.
4293	if (F.ScaledReg) {
4294	F.ScaledReg = SE.getMulExpr(LHS: F.ScaledReg, RHS: FactorS);
4295	if (getExactSDiv(LHS: F.ScaledReg, RHS: FactorS, SE) != Base.ScaledReg)
4296	continue;
4297	}
4298
4299	// Check that multiplying with the unfolded offset doesn't overflow.
4300	if (F.UnfoldedOffset.isNonZero()) {
4301	if (F.UnfoldedOffset.isMin() && Factor == -`1`)
4302	continue;
4303	F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(RHS: Factor);
4304	if (F.UnfoldedOffset.getFixedValue() / Factor !=
4305	Base.UnfoldedOffset.getFixedValue())
4306	continue;
4307	// If the offset will be truncated, check that it is in bounds.
4308	if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(
4309	Ty: IntTy, V: F.UnfoldedOffset.getFixedValue()))
4310	continue;
4311	}
4312
4313	// If we make it here and it's legal, add it.
4314	(void)InsertFormula(LU, LUIdx, F);
4315	next:;
4316	}
4317	}
4318
4319	/// Generate stride factor reuse formulae by making use of scaled-offset address
4320	/// modes, for example.
4321	void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
4322	// Determine the integer type for the base formula.
4323	Type *IntTy = Base.getType();
4324	if (!IntTy) return;
4325
4326	// If this Formula already has a scaled register, we can't add another one.
4327	// Try to unscale the formula to generate a better scale.
4328	if (Base.Scale != `0` && !Base.unscale())
4329	return;
4330
4331	assert(Base.Scale == `0` && "unscale did not did its job!");
4332
4333	// Check each interesting stride.
4334	for (int64_t Factor : Factors) {
4335	Base.Scale = Factor;
4336	Base.HasBaseReg = Base.BaseRegs.size() > `1`;
4337	// Check whether this scale is going to be legal.
4338	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy,
4339	F: Base)) {
4340	// As a special-case, handle special out-of-loop Basic users specially.
4341	// TODO: Reconsider this special case.
4342	if (LU.Kind == LSRUse::Basic &&
4343	isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LSRUse::Special,
4344	AccessTy: LU.AccessTy, F: Base) &&
4345	LU.AllFixupsOutsideLoop)
4346	LU.Kind = LSRUse::Special;
4347	else
4348	continue;
4349	}
4350	// For an ICmpZero, negating a solitary base register won't lead to
4351	// new solutions.
4352	if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&
4353	Base.BaseOffset.isZero() && !Base.BaseGV)
4354	continue;
4355	// For each addrec base reg, if its loop is current loop, apply the scale.
4356	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i) {
4357	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Base.BaseRegs [i]);
4358	if (AR && (AR->getLoop() == L \|\| LU.AllFixupsOutsideLoop)) {
4359	const SCEV *FactorS = SE.getConstant(Ty: IntTy, V: Factor);
4360	if (FactorS->isZero())
4361	continue;
4362	// Divide out the factor, ignoring high bits, since we'll be
4363	// scaling the value back up in the end.
4364	if (const SCEV Quotient = getExactSDiv(LHS: AR, RHS: FactorS, SE, IgnoreSignificantBits: true*))
4365	if (!Quotient->isZero()) {
4366	// TODO: This could be optimized to avoid all the copying.
4367	Formula F = Base;
4368	F.ScaledReg = Quotient;
4369	F.deleteBaseReg(S&: F.BaseRegs [i]);
4370	// The canonical representation of 1reg is reg, which is already in*
4371	// Base. In that case, do not try to insert the formula, it will be
4372	// rejected anyway.
4373	if (F.Scale == `1` && (F.BaseRegs.empty() \|\|
4374	(AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
4375	continue;
4376	// If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
4377	// non canonical Formula with ScaledReg's loop not being L.
4378	if (F.Scale == `1` && LU.AllFixupsOutsideLoop)
4379	F.canonicalize(L: *L);
4380	(void)InsertFormula(LU, LUIdx, F);
4381	}
4382	}
4383	}
4384	}
4385	}
4386
4387	/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.
4388	/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then
4389	/// perform the extension/truncate and normalize again, as the normalized form
4390	/// can result in folds that are not valid in the post-inc use contexts. The
4391	/// expressions for all PostIncLoopSets must match, otherwise return nullptr.
4392	static const SCEV *
4393	getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,
4394	const SCEV Expr, Type ToTy,
4395	ScalarEvolution &SE) {
4396	const SCEV Result = nullptr*;
4397	for (auto &L : Loops) {
4398	auto *DenormExpr = denormalizeForPostIncUse(S: Expr, Loops: L, SE);
4399	const SCEV *NewDenormExpr = SE.getAnyExtendExpr(Op: DenormExpr, Ty: ToTy);
4400	const SCEV *New = normalizeForPostIncUse(S: NewDenormExpr, Loops: L, SE);
4401	if (!New \|\| (Result && New != Result))
4402	return nullptr;
4403	Result = New;
4404	}
4405
4406	assert(Result && "failed to create expression");
4407	return Result;
4408	}
4409
4410	/// Generate reuse formulae from different IV types.
4411	void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
4412	// Don't bother truncating symbolic values.
4413	if (Base.BaseGV) return;
4414
4415	// Determine the integer type for the base formula.
4416	Type *DstTy = Base.getType();
4417	if (!DstTy) return;
4418	if (DstTy->isPointerTy())
4419	return;
4420
4421	// It is invalid to extend a pointer type so exit early if ScaledReg or
4422	// any of the BaseRegs are pointers.
4423	if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4424	return;
4425	if (any_of(Range&: Base.BaseRegs,
4426	P: [](const SCEV S) { return* S->getType()->isPointerTy(); }))
4427	return;
4428
4429	SmallVector<PostIncLoopSet> Loops;
4430	for (auto &LF : LU.Fixups)
4431	Loops.push_back(Elt: LF.PostIncLoops);
4432
4433	for (Type *SrcTy : Types) {
4434	if (SrcTy != DstTy && TTI.isTruncateFree(Ty1: SrcTy, Ty2: DstTy)) {
4435	Formula F = Base;
4436
4437	// Sometimes SCEV is able to prove zero during ext transform. It may
4438	// happen if SCEV did not do all possible transforms while creating the
4439	// initial node (maybe due to depth limitations), but it can do them while
4440	// taking ext.
4441	if (F.ScaledReg) {
4442	const SCEV *NewScaledReg =
4443	getAnyExtendConsideringPostIncUses(Loops, Expr: F.ScaledReg, ToTy: SrcTy, SE);
4444	if (!NewScaledReg \|\| NewScaledReg->isZero())
4445	continue;
4446	F.ScaledReg = NewScaledReg;
4447	}
4448	bool HasZeroBaseReg = false;
4449	for (const SCEV *&BaseReg : F.BaseRegs) {
4450	const SCEV *NewBaseReg =
4451	getAnyExtendConsideringPostIncUses(Loops, Expr: BaseReg, ToTy: SrcTy, SE);
4452	if (!NewBaseReg \|\| NewBaseReg->isZero()) {
4453	HasZeroBaseReg = true;
4454	break;
4455	}
4456	BaseReg = NewBaseReg;
4457	}
4458	if (HasZeroBaseReg)
4459	continue;
4460
4461	// TODO: This assumes we've done basic processing on all uses and
4462	// have an idea what the register usage is.
4463	if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4464	continue;
4465
4466	F.canonicalize(L: *L);
4467	(void)InsertFormula(LU, LUIdx, F);
4468	}
4469	}
4470	}
4471
4472	namespace {
4473
4474	/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
4475	/// modifications so that the search phase doesn't have to worry about the data
4476	/// structures moving underneath it.
4477	struct WorkItem {
4478	size_t LUIdx;
4479	Immediate Imm;
4480	const SCEV *OrigReg;
4481
4482	WorkItem(size_t LI, Immediate I, const SCEV *R)
4483	: LUIdx(LI), Imm (I), OrigReg(R) {}
4484
4485	void print(raw_ostream &OS) const;
4486	void dump() const;
4487	};
4488
4489	} // end anonymous namespace
4490
4491	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
4492	void WorkItem::print(raw_ostream &OS) const {
4493	OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
4494	<< " , add offset " << Imm;
4495	}
4496
4497	LLVM_DUMP_METHOD void WorkItem::dump() const {
4498	print(errs()); errs() << `'\n'`;
4499	}
4500	#endif
4501
4502	/// Look for registers which are a constant distance apart and try to form reuse
4503	/// opportunities between them.
4504	void LSRInstance::GenerateCrossUseConstantOffsets() {
4505	// Group the registers by their value without any added constant offset.
4506	using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4507
4508	DenseMap<const SCEV *, ImmMapTy> Map;
4509	DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4510	SmallVector<const SCEV *, `8`> Sequence;
4511	for (const SCEV *Use : RegUses) {
4512	SCEVUse Reg = Use; // Make a copy for ExtractImmediate to modify.
4513	Immediate Imm = ExtractImmediate(S&: Reg, SE);
4514	auto Pair = Map.try_emplace(Key: Reg);
4515	if (Pair.second)
4516	Sequence.push_back(Elt: Reg);
4517	Pair.first ->second.insert(x: std::make_pair(x&: Imm, y&: Use));
4518	UsedByIndicesMap [Reg] \|= RegUses.getUsedByIndices(Reg: Use);
4519	}
4520
4521	// Now examine each set of registers with the same base value. Build up
4522	// a list of work to do and do the work in a separate step so that we're
4523	// not adding formulae and register counts while we're searching.
4524	SmallVector<WorkItem, `32`> WorkItems;
4525	SmallSet<std::pair<size_t, Immediate>, `32`, KeyOrderSizeTAndImmediate>
4526	UniqueItems;
4527	for (const SCEV *Reg : Sequence) {
4528	const ImmMapTy &Imms = Map.find(Val: Reg)->second;
4529
4530	// It's not worthwhile looking for reuse if there's only one offset.
4531	if (Imms.size() == `1`)
4532	continue;
4533
4534	LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << `':'`;
4535	for (const auto &Entry
4536	: Imms) dbgs()
4537	<< `' '` << Entry.first;
4538	dbgs() << `'\n'`);
4539
4540	// Examine each offset.
4541	for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4542	J != JE; ++J) {
4543	const SCEV *OrigReg = J ->second;
4544
4545	Immediate JImm = J ->first;
4546	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg: OrigReg);
4547
4548	if (!isa<SCEVConstant>(Val: OrigReg) &&
4549	UsedByIndicesMap [Reg].count() == `1`) {
4550	LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
4551	<< `'\n'`);
4552	continue;
4553	}
4554
4555	// Conservatively examine offsets between this orig reg a few selected
4556	// other orig regs.
4557	Immediate First = Imms.begin()->first;
4558	Immediate Last = std::prev(x: Imms.end())->first;
4559	if (!First.isCompatibleImmediate(Imm: Last)) {
4560	LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
4561	<< "\n");
4562	continue;
4563	}
4564	// Only scalable if both terms are scalable, or if one is scalable and
4565	// the other is 0.
4566	bool Scalable = First.isScalable() \|\| Last.isScalable();
4567	int64_t FI = First.getKnownMinValue();
4568	int64_t LI = Last.getKnownMinValue();
4569	// Compute (First + Last) / 2 without overflow using the fact that
4570	// First + Last = 2 (First + Last) + (First ^ Last).*
4571	int64_t Avg = (FI & LI) + ((FI ^ LI) >> `1`);
4572	// If the result is negative and FI is odd and LI even (or vice versa),
4573	// we rounded towards -inf. Add 1 in that case, to round towards 0.
4574	Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> `63`));
4575	ImmMapTy::const_iterator OtherImms[] = {
4576	Imms.begin(), std::prev(x: Imms.end()),
4577	Imms.lower_bound(x: Immediate::get(MinVal: Avg, Scalable))};
4578	for (const auto &M : OtherImms) {
4579	if (M == J \|\| M == JE) continue;
4580	if (!JImm.isCompatibleImmediate(Imm: M ->first))
4581	continue;
4582
4583	// Compute the difference between the two.
4584	Immediate Imm = JImm.subUnsigned(RHS: M ->first);
4585	for (unsigned LUIdx : UsedByIndices.set_bits())
4586	// Make a memo of this use, offset, and register tuple.
4587	if (UniqueItems.insert(V: std::make_pair(x&: LUIdx, y&: Imm)).second)
4588	WorkItems.push_back(Elt: WorkItem (LUIdx, Imm, OrigReg));
4589	}
4590	}
4591	}
4592
4593	Map.clear();
4594	Sequence.clear();
4595	UsedByIndicesMap.clear();
4596	UniqueItems.clear();
4597
4598	// Now iterate through the worklist and add new formulae.
4599	for (const WorkItem &WI : WorkItems) {
4600	size_t LUIdx = WI.LUIdx;
4601	LSRUse &LU = Uses [LUIdx];
4602	Immediate Imm = WI.Imm;
4603	const SCEV *OrigReg = WI.OrigReg;
4604
4605	Type *IntTy = SE.getEffectiveSCEVType(Ty: OrigReg->getType());
4606	const SCEV *NegImmS = Imm.getNegativeSCEV(SE, Ty: IntTy);
4607	unsigned BitWidth = SE.getTypeSizeInBits(Ty: IntTy);
4608
4609	// TODO: Use a more targeted data structure.
4610	for (size_t L = `0`, LE = LU.Formulae.size(); L != LE; ++L) {
4611	Formula F = LU.Formulae [L];
4612	// FIXME: The code for the scaled and unscaled registers looks
4613	// very similar but slightly different. Investigate if they
4614	// could be merged. That way, we would not have to unscale the
4615	// Formula.
4616	F.unscale();
4617	// Use the immediate in the scaled register.
4618	if (F.ScaledReg == OrigReg) {
4619	if (!F.BaseOffset.isCompatibleImmediate(Imm))
4620	continue;
4621	Immediate Offset = F.BaseOffset.addUnsigned(RHS: Imm.mulUnsigned(RHS: F.Scale));
4622	// Don't create 50 + reg(-50).
4623	const SCEV *S = Offset.getNegativeSCEV(SE, Ty: IntTy);
4624	if (F.referencesReg(S))
4625	continue;
4626	Formula NewF = F;
4627	NewF.BaseOffset = Offset;
4628	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy,
4629	F: NewF))
4630	continue;
4631	NewF.ScaledReg = SE.getAddExpr(LHS: NegImmS, RHS: NewF.ScaledReg);
4632
4633	// If the new scale is a constant in a register, and adding the constant
4634	// value to the immediate would produce a value closer to zero than the
4635	// immediate itself, then the formula isn't worthwhile.
4636	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: NewF.ScaledReg)) {
4637	// FIXME: Do we need to do something for scalable immediates here?
4638	// A scalable SCEV won't be constant, but we might still have
4639	// something in the offset? Bail out for now to be safe.
4640	if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4641	continue;
4642	if (C->getValue()->isNegative() !=
4643	(NewF.BaseOffset.isLessThanZero()) &&
4644	(C->getAPInt().abs() * APInt (BitWidth, F.Scale))
4645	.ule(RHS: std::abs(i: NewF.BaseOffset.getFixedValue())))
4646	continue;
4647	}
4648
4649	// OK, looks good.
4650	NewF.canonicalize(L: *this->L);
4651	(void)InsertFormula(LU, LUIdx, F: NewF);
4652	} else {
4653	// Use the immediate in a base register.
4654	for (size_t N = `0`, NE = F.BaseRegs.size(); N != NE; ++N) {
4655	const SCEV *BaseReg = F.BaseRegs [N];
4656	if (BaseReg != OrigReg)
4657	continue;
4658	Formula NewF = F;
4659	if (!NewF.BaseOffset.isCompatibleImmediate(Imm) \|\|
4660	!NewF.UnfoldedOffset.isCompatibleImmediate(Imm) \|\|
4661	!NewF.BaseOffset.isCompatibleImmediate(Imm: NewF.UnfoldedOffset))
4662	continue;
4663	NewF.BaseOffset = NewF.BaseOffset.addUnsigned(RHS: Imm);
4664	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset,
4665	Kind: LU.Kind, AccessTy: LU.AccessTy, F: NewF)) {
4666	if (AMK == TTI::AMK_PostIndexed &&
4667	mayUsePostIncMode(TTI, LU, S: OrigReg, L: this->L, SE))
4668	continue;
4669	Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(RHS: Imm);
4670	if (!isLegalAddImmediate(TTI, Offset: NewUnfoldedOffset))
4671	continue;
4672	NewF = F;
4673	NewF.UnfoldedOffset = NewUnfoldedOffset;
4674	}
4675	NewF.BaseRegs [N] = SE.getAddExpr(LHS: NegImmS, RHS: BaseReg);
4676
4677	// If the new formula has a constant in a register, and adding the
4678	// constant value to the immediate would produce a value closer to
4679	// zero than the immediate itself, then the formula isn't worthwhile.
4680	for (const SCEV *NewReg : NewF.BaseRegs)
4681	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: NewReg)) {
4682	if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4683	goto skip_formula;
4684	if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())
4685	.abs()
4686	.slt(RHS: std::abs(i: NewF.BaseOffset.getFixedValue())) &&
4687	(C->getAPInt() + NewF.BaseOffset.getFixedValue())
4688	.countr_zero() >=
4689	(unsigned)llvm::countr_zero<uint64_t>(
4690	Val: NewF.BaseOffset.getFixedValue()))
4691	goto skip_formula;
4692	}
4693
4694	// Ok, looks good.
4695	NewF.canonicalize(L: *this->L);
4696	(void)InsertFormula(LU, LUIdx, F: NewF);
4697	break;
4698	skip_formula:;
4699	}
4700	}
4701	}
4702	}
4703	}
4704
4705	/// Generate formulae for each use.
4706	void
4707	LSRInstance::GenerateAllReuseFormulae() {
4708	// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
4709	// queries are more precise.
4710	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4711	LSRUse &LU = Uses [LUIdx];
4712	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4713	GenerateReassociations(LU, LUIdx, Base: LU.Formulae [i]);
4714	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4715	GenerateCombinations(LU, LUIdx, Base: LU.Formulae [i]);
4716	}
4717	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4718	LSRUse &LU = Uses [LUIdx];
4719	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4720	GenerateSymbolicOffsets(LU, LUIdx, Base: LU.Formulae [i]);
4721	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4722	GenerateConstantOffsets(LU, LUIdx, Base: LU.Formulae [i]);
4723	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4724	GenerateICmpZeroScales(LU, LUIdx, Base: LU.Formulae [i]);
4725	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4726	GenerateScales(LU, LUIdx, Base: LU.Formulae [i]);
4727	}
4728	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4729	LSRUse &LU = Uses [LUIdx];
4730	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4731	GenerateTruncates(LU, LUIdx, Base: LU.Formulae [i]);
4732	}
4733
4734	GenerateCrossUseConstantOffsets();
4735
4736	LLVM_DEBUG(dbgs() << "\n"
4737	"After generating reuse formulae:\n";
4738	print_uses(dbgs()));
4739	}
4740
4741	/// If there are multiple formulae with the same set of registers used
4742	/// by other uses, pick the best one and delete the others.
4743	void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4744	DenseSet<const SCEV *> VisitedRegs;
4745	SmallPtrSet<const SCEV *, `16`> Regs;
4746	SmallPtrSet<const SCEV *, `16`> LoserRegs;
4747	#ifndef NDEBUG
4748	bool ChangedFormulae = false;
4749	#endif
4750
4751	// Collect the best formula for each unique set of shared registers. This
4752	// is reset for each use.
4753	using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, `4`>, size_t>;
4754
4755	BestFormulaeTy BestFormulae;
4756
4757	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4758	LSRUse &LU = Uses [LUIdx];
4759	LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
4760	dbgs() << `'\n'`);
4761
4762	bool Any = false;
4763	for (size_t FIdx = `0`, NumForms = LU.Formulae.size();
4764	FIdx != NumForms; ++FIdx) {
4765	Formula &F = LU.Formulae [FIdx];
4766
4767	// Some formulas are instant losers. For example, they may depend on
4768	// nonexistent AddRecs from other loops. These need to be filtered
4769	// immediately, otherwise heuristics could choose them over others leading
4770	// to an unsatisfactory solution. Passing LoserRegs into RateFormula here
4771	// avoids the need to recompute this information across formulae using the
4772	// same bad AddRec. Passing LoserRegs is also essential unless we remove
4773	// the corresponding bad register from the Regs set.
4774	Cost CostF(L, SE, TTI, AMK);
4775	Regs.clear();
4776	CostF.RateFormula(F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4777	LoserRegs: &LoserRegs);
4778	if (CostF.isLoser()) {
4779	// During initial formula generation, undesirable formulae are generated
4780	// by uses within other loops that have some non-trivial address mode or
4781	// use the postinc form of the IV. LSR needs to provide these formulae
4782	// as the basis of rediscovering the desired formula that uses an AddRec
4783	// corresponding to the existing phi. Once all formulae have been
4784	// generated, these initial losers may be pruned.
4785	LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
4786	dbgs() << "\n");
4787	}
4788	else {
4789	SmallVector<const SCEV *, `4`> Key;
4790	for (const SCEV *Reg : F.BaseRegs) {
4791	if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4792	Key.push_back(Elt: Reg);
4793	}
4794	if (F.ScaledReg &&
4795	RegUses.isRegUsedByUsesOtherThan(Reg: F.ScaledReg, LUIdx))
4796	Key.push_back(Elt: F.ScaledReg);
4797	// Unstable sort by host order ok, because this is only used for
4798	// uniquifying.
4799	llvm::sort(C&: Key);
4800
4801	std::pair<BestFormulaeTy::const_iterator, bool> P =
4802	BestFormulae.insert(KV: std::make_pair(x&: Key, y&: FIdx));
4803	if (P.second)
4804	continue;
4805
4806	Formula &Best = LU.Formulae [P.first ->second];
4807
4808	Cost CostBest(L, SE, TTI, AMK);
4809	Regs.clear();
4810	CostBest.RateFormula(F: Best, Regs, VisitedRegs, LU,
4811	HardwareLoopProfitable);
4812	if (CostF.isLess(Other: CostBest))
4813	std::swap(a&: F, b&: Best);
4814	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
4815	dbgs() << "\n"
4816	" in favor of formula ";
4817	Best.print(dbgs()); dbgs() << `'\n'`);
4818	}
4819	#ifndef NDEBUG
4820	ChangedFormulae = true;
4821	#endif
4822	LU.DeleteFormula(F);
4823	--FIdx;
4824	--NumForms;
4825	Any = true;
4826	}
4827
4828	// Now that we've filtered out some formulae, recompute the Regs set.
4829	if (Any)
4830	LU.RecomputeRegs(LUIdx, RegUses);
4831
4832	// Reset this to prepare for the next use.
4833	BestFormulae.clear();
4834	}
4835
4836	LLVM_DEBUG(if (ChangedFormulae) {
4837	dbgs() << "\n"
4838	"After filtering out undesirable candidates:\n";
4839	print_uses(dbgs());
4840	});
4841	}
4842
4843	/// Estimate the worst-case number of solutions the solver might have to
4844	/// consider. It almost never considers this many solutions because it prune the
4845	/// search space, but the pruning isn't always sufficient.
4846	size_t LSRInstance::EstimateSearchSpaceComplexity() const {
4847	size_t Power = `1`;
4848	for (const LSRUse &LU : Uses) {
4849	size_t FSize = LU.Formulae.size();
4850	if (FSize >= ComplexityLimit) {
4851	Power = ComplexityLimit;
4852	break;
4853	}
4854	Power *= FSize;
4855	if (Power >= ComplexityLimit)
4856	break;
4857	}
4858	return Power;
4859	}
4860
4861	/// When one formula uses a superset of the registers of another formula, it
4862	/// won't help reduce register pressure (though it may not necessarily hurt
4863	/// register pressure); remove it to simplify the system.
4864	void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4865	if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4866	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4867
4868	LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
4869	"which use a superset of registers used by other "
4870	"formulae.\n");
4871
4872	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4873	LSRUse &LU = Uses [LUIdx];
4874	bool Any = false;
4875	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
4876	Formula &F = LU.Formulae [i];
4877	if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())
4878	continue;
4879	// Look for a formula with a constant or GV in a register. If the use
4880	// also has a formula with that same value in an immediate field,
4881	// delete the one that uses a register.
4882	for (SmallVectorImpl<const SCEV *>::const_iterator
4883	I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4884	if (const SCEVConstant C = dyn_cast<SCEVConstant>(Val: I)) {
4885	Formula NewF = F;
4886	//FIXME: Formulas should store bitwidth to do wrapping properly.
4887	// See PR41034.
4888	NewF.BaseOffset =
4889	Immediate::getFixed(MinVal: NewF.BaseOffset.getFixedValue() +
4890	(uint64_t)C->getValue()->getSExtValue());
4891	NewF.BaseRegs.erase(CI: NewF.BaseRegs.begin() +
4892	(I - F.BaseRegs.begin()));
4893	if (LU.HasFormulaWithSameRegs(F: NewF)) {
4894	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4895	dbgs() << `'\n'`);
4896	LU.DeleteFormula(F);
4897	--i;
4898	--e;
4899	Any = true;
4900	break;
4901	}
4902	} else if (const SCEVUnknown U = dyn_cast<SCEVUnknown>(Val: I)) {
4903	if (GlobalValue *GV = dyn_cast<GlobalValue>(Val: U->getValue()))
4904	if (!F.BaseGV) {
4905	Formula NewF = F;
4906	NewF.BaseGV = GV;
4907	NewF.BaseRegs.erase(CI: NewF.BaseRegs.begin() +
4908	(I - F.BaseRegs.begin()));
4909	if (LU.HasFormulaWithSameRegs(F: NewF)) {
4910	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4911	dbgs() << `'\n'`);
4912	LU.DeleteFormula(F);
4913	--i;
4914	--e;
4915	Any = true;
4916	break;
4917	}
4918	}
4919	}
4920	}
4921	}
4922	if (Any)
4923	LU.RecomputeRegs(LUIdx, RegUses);
4924	}
4925
4926	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4927	}
4928	}
4929
4930	/// When there are many registers for expressions like A, A+1, A+2, etc.,
4931	/// allocate a single register for them.
4932	void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4933	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4934	return;
4935
4936	LLVM_DEBUG(
4937	dbgs() << "The search space is too complex.\n"
4938	"Narrowing the search space by assuming that uses separated "
4939	"by a constant offset will use the same registers.\n");
4940
4941	// This is especially useful for unrolled loops.
4942
4943	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4944	LSRUse &LU = Uses [LUIdx];
4945	for (const Formula &F : LU.Formulae) {
4946	if (F.BaseOffset.isZero() \|\| (F.Scale != `0` && F.Scale != `1`))
4947	continue;
4948	assert((LU.Kind == LSRUse::Address \|\| LU.Kind == LSRUse::ICmpZero) &&
4949	"Only address and cmp uses expected to have nonzero BaseOffset");
4950
4951	LSRUse *LUThatHas = FindUseWithSimilarFormula(OrigF: F, OrigLU: LU);
4952	if (!LUThatHas)
4953	continue;
4954
4955	if (!reconcileNewOffset(LU&: LUThatHas, NewOffset: F.BaseOffset, /HasBaseReg=/* false,
4956	Kind: LU.Kind, AccessTy: LU.AccessTy))
4957	continue;
4958
4959	LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << `'\n'`);
4960
4961	LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4962	LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
4963
4964	// Transfer the fixups of LU to LUThatHas.
4965	for (LSRFixup &Fixup : LU.Fixups) {
4966	Fixup.Offset += F.BaseOffset;
4967	LUThatHas->pushFixup(f&: Fixup);
4968	LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << `'\n'`);
4969	}
4970
4971	#ifndef NDEBUG
4972	Type *FixupType = LUThatHas->Fixups[`0`].OperandValToReplace->getType();
4973	for (LSRFixup &Fixup : LUThatHas->Fixups)
4974	assert(Fixup.OperandValToReplace->getType() == FixupType &&
4975	"Expected all fixups to have the same type");
4976	#endif
4977
4978	// Delete formulae from the new use which are no longer legal.
4979	bool Any = false;
4980	for (size_t i = `0`, e = LUThatHas->Formulae.size(); i != e; ++i) {
4981	Formula &F = LUThatHas->Formulae [i];
4982	if (!isLegalUse(TTI, MinOffset: LUThatHas->MinOffset, MaxOffset: LUThatHas->MaxOffset,
4983	Kind: LUThatHas->Kind, AccessTy: LUThatHas->AccessTy, F)) {
4984	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << `'\n'`);
4985	LUThatHas->DeleteFormula(F);
4986	--i;
4987	--e;
4988	Any = true;
4989	}
4990	}
4991
4992	if (Any)
4993	LUThatHas->RecomputeRegs(LUIdx: LUThatHas - &Uses.front(), RegUses);
4994
4995	// Delete the old use.
4996	DeleteUse(LU, LUIdx);
4997	--LUIdx;
4998	--NumUses;
4999	break;
5000	}
5001	}
5002
5003	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5004	}
5005
5006	/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
5007	/// we've done more filtering, as it may be able to find more formulae to
5008	/// eliminate.
5009	void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5010	if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
5011	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5012
5013	LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
5014	"undesirable dedicated registers.\n");
5015
5016	FilterOutUndesirableDedicatedRegisters();
5017
5018	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5019	}
5020	}
5021
5022	/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
5023	/// Pick the best one and delete the others.
5024	/// This narrowing heuristic is to keep as many formulae with different
5025	/// Scale and ScaledReg pair as possible while narrowing the search space.
5026	/// The benefit is that it is more likely to find out a better solution
5027	/// from a formulae set with more Scale and ScaledReg variations than
5028	/// a formulae set with the same Scale and ScaledReg. The picking winner
5029	/// reg heuristic will often keep the formulae with the same Scale and
5030	/// ScaledReg and filter others, and we want to avoid that if possible.
5031	void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5032	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
5033	return;
5034
5035	LLVM_DEBUG(
5036	dbgs() << "The search space is too complex.\n"
5037	"Narrowing the search space by choosing the best Formula "
5038	"from the Formulae with the same Scale and ScaledReg.\n");
5039
5040	// Map the "Scale ScaledReg" pair to the best formula of current LSRUse.*
5041	using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
5042
5043	BestFormulaeTy BestFormulae;
5044	#ifndef NDEBUG
5045	bool ChangedFormulae = false;
5046	#endif
5047	DenseSet<const SCEV *> VisitedRegs;
5048	SmallPtrSet<const SCEV *, `16`> Regs;
5049
5050	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5051	LSRUse &LU = Uses [LUIdx];
5052	LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
5053	dbgs() << `'\n'`);
5054
5055	// Return true if Formula FA is better than Formula FB.
5056	auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5057	// First we will try to choose the Formula with fewer new registers.
5058	// For a register used by current Formula, the more the register is
5059	// shared among LSRUses, the less we increase the register number
5060	// counter of the formula.
5061	size_t FARegNum = `0`;
5062	for (const SCEV *Reg : FA.BaseRegs) {
5063	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5064	FARegNum += (NumUses - UsedByIndices.count() + `1`);
5065	}
5066	size_t FBRegNum = `0`;
5067	for (const SCEV *Reg : FB.BaseRegs) {
5068	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5069	FBRegNum += (NumUses - UsedByIndices.count() + `1`);
5070	}
5071	if (FARegNum != FBRegNum)
5072	return FARegNum < FBRegNum;
5073
5074	// If the new register numbers are the same, choose the Formula with
5075	// less Cost.
5076	Cost CostFA(L, SE, TTI, AMK);
5077	Cost CostFB(L, SE, TTI, AMK);
5078	Regs.clear();
5079	CostFA.RateFormula(F: FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5080	Regs.clear();
5081	CostFB.RateFormula(F: FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5082	return CostFA.isLess(Other: CostFB);
5083	};
5084
5085	bool Any = false;
5086	for (size_t FIdx = `0`, NumForms = LU.Formulae.size(); FIdx != NumForms;
5087	++FIdx) {
5088	Formula &F = LU.Formulae [FIdx];
5089	if (!F.ScaledReg)
5090	continue;
5091	auto P = BestFormulae.insert(KV: {{F.ScaledReg, F.Scale}, FIdx});
5092	if (P.second)
5093	continue;
5094
5095	Formula &Best = LU.Formulae [P.first ->second];
5096	if (IsBetterThan (F, Best))
5097	std::swap(a&: F, b&: Best);
5098	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
5099	dbgs() << "\n"
5100	" in favor of formula ";
5101	Best.print(dbgs()); dbgs() << `'\n'`);
5102	#ifndef NDEBUG
5103	ChangedFormulae = true;
5104	#endif
5105	LU.DeleteFormula(F);
5106	--FIdx;
5107	--NumForms;
5108	Any = true;
5109	}
5110	if (Any)
5111	LU.RecomputeRegs(LUIdx, RegUses);
5112
5113	// Reset this to prepare for the next use.
5114	BestFormulae.clear();
5115	}
5116
5117	LLVM_DEBUG(if (ChangedFormulae) {
5118	dbgs() << "\n"
5119	"After filtering out undesirable candidates:\n";
5120	print_uses(dbgs());
5121	});
5122	}
5123
5124	/// If we are over the complexity limit, filter out any post-inc prefering
5125	/// variables to only post-inc values.
5126	void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5127	if (AMK != TTI::AMK_PostIndexed)
5128	return;
5129	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
5130	return;
5131
5132	LLVM_DEBUG(dbgs() << "The search space is too complex.\n"
5133	"Narrowing the search space by choosing the lowest "
5134	"register Formula for PostInc Uses.\n");
5135
5136	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5137	LSRUse &LU = Uses [LUIdx];
5138
5139	if (LU.Kind != LSRUse::Address)
5140	continue;
5141	if (!TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty: LU.AccessTy.getType()) &&
5142	!TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty: LU.AccessTy.getType()))
5143	continue;
5144
5145	size_t MinRegs = std::numeric_limits<size_t>::max();
5146	for (const Formula &F : LU.Formulae)
5147	MinRegs = std::min(a: F.getNumRegs(), b: MinRegs);
5148
5149	bool Any = false;
5150	for (size_t FIdx = `0`, NumForms = LU.Formulae.size(); FIdx != NumForms;
5151	++FIdx) {
5152	Formula &F = LU.Formulae [FIdx];
5153	if (F.getNumRegs() > MinRegs) {
5154	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
5155	dbgs() << "\n");
5156	LU.DeleteFormula(F);
5157	--FIdx;
5158	--NumForms;
5159	Any = true;
5160	}
5161	}
5162	if (Any)
5163	LU.RecomputeRegs(LUIdx, RegUses);
5164
5165	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
5166	break;
5167	}
5168
5169	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5170	}
5171
5172	/// The function delete formulas with high registers number expectation.
5173	/// Assuming we don't know the value of each formula (already delete
5174	/// all inefficient), generate probability of not selecting for each
5175	/// register.
5176	/// For example,
5177	/// Use1:
5178	/// reg(a) + reg({0,+,1})
5179	/// reg(a) + reg({-1,+,1}) + 1
5180	/// reg({a,+,1})
5181	/// Use2:
5182	/// reg(b) + reg({0,+,1})
5183	/// reg(b) + reg({-1,+,1}) + 1
5184	/// reg({b,+,1})
5185	/// Use3:
5186	/// reg(c) + reg(b) + reg({0,+,1})
5187	/// reg(c) + reg({b,+,1})
5188	///
5189	/// Probability of not selecting
5190	/// Use1 Use2 Use3
5191	/// reg(a) (1/3) 1 * 1*
5192	/// reg(b) 1 (1/3) * (1/2)*
5193	/// reg({0,+,1}) (2/3) (2/3) * (1/2)*
5194	/// reg({-1,+,1}) (2/3) (2/3) * 1*
5195	/// reg({a,+,1}) (2/3) 1 * 1*
5196	/// reg({b,+,1}) 1 (2/3) * (2/3)*
5197	/// reg(c) 1 1 * 0*
5198	///
5199	/// Now count registers number mathematical expectation for each formula:
5200	/// Note that for each use we exclude probability if not selecting for the use.
5201	/// For example for Use1 probability for reg(a) would be just 1 1 (excluding*
5202	/// probabilty 1/3 of not selecting for Use1).
5203	/// Use1:
5204	/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
5205	/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
5206	/// reg({a,+,1}) 1
5207	/// Use2:
5208	/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
5209	/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
5210	/// reg({b,+,1}) 2/3
5211	/// Use3:
5212	/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
5213	/// reg(c) + reg({b,+,1}) 1 + 2/3
5214	void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5215	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
5216	return;
5217	// Ok, we have too many of formulae on our hands to conveniently handle.
5218	// Use a rough heuristic to thin out the list.
5219
5220	// Set of Regs wich will be 100% used in final solution.
5221	// Used in each formula of a solution (in example above this is reg(c)).
5222	// We can skip them in calculations.
5223	SmallPtrSet<const SCEV *, `4`> UniqRegs;
5224	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5225
5226	// Map each register to probability of not selecting
5227	DenseMap <const SCEV , float*> RegNumMap;
5228	for (const SCEV *Reg : RegUses) {
5229	if (UniqRegs.count(Ptr: Reg))
5230	continue;
5231	float PNotSel = `1`;
5232	for (const LSRUse &LU : Uses) {
5233	if (!LU.Regs.count(Ptr: Reg))
5234	continue;
5235	float P = LU.getNotSelectedProbability(Reg);
5236	if (P != `0.0`)
5237	PNotSel *= P;
5238	else
5239	UniqRegs.insert(Ptr: Reg);
5240	}
5241	RegNumMap.insert(KV: std::make_pair(x&: Reg, y&: PNotSel));
5242	}
5243
5244	LLVM_DEBUG(
5245	dbgs() << "Narrowing the search space by deleting costly formulas\n");
5246
5247	// Delete formulas where registers number expectation is high.
5248	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5249	LSRUse &LU = Uses [LUIdx];
5250	// If nothing to delete - continue.
5251	if (LU.Formulae.size() < `2`)
5252	continue;
5253	// This is temporary solution to test performance. Float should be
5254	// replaced with round independent type (based on integers) to avoid
5255	// different results for different target builds.
5256	float FMinRegNum = LU.Formulae [`0`].getNumRegs();
5257	float FMinARegNum = LU.Formulae [`0`].getNumRegs();
5258	size_t MinIdx = `0`;
5259	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
5260	Formula &F = LU.Formulae [i];
5261	float FRegNum = `0`;
5262	float FARegNum = `0`;
5263	for (const SCEV *BaseReg : F.BaseRegs) {
5264	if (UniqRegs.count(Ptr: BaseReg))
5265	continue;
5266	FRegNum += RegNumMap [BaseReg] / LU.getNotSelectedProbability(Reg: BaseReg);
5267	if (isa<SCEVAddRecExpr>(Val: BaseReg))
5268	FARegNum +=
5269	RegNumMap [BaseReg] / LU.getNotSelectedProbability(Reg: BaseReg);
5270	}
5271	if (const SCEV *ScaledReg = F.ScaledReg) {
5272	if (!UniqRegs.count(Ptr: ScaledReg)) {
5273	FRegNum +=
5274	RegNumMap [ScaledReg] / LU.getNotSelectedProbability(Reg: ScaledReg);
5275	if (isa<SCEVAddRecExpr>(Val: ScaledReg))
5276	FARegNum +=
5277	RegNumMap [ScaledReg] / LU.getNotSelectedProbability(Reg: ScaledReg);
5278	}
5279	}
5280	if (FMinRegNum > FRegNum \|\|
5281	(FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5282	FMinRegNum = FRegNum;
5283	FMinARegNum = FARegNum;
5284	MinIdx = i;
5285	}
5286	}
5287	LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
5288	dbgs() << " with min reg num " << FMinRegNum << `'\n'`);
5289	if (MinIdx != `0`)
5290	std::swap(a&: LU.Formulae [MinIdx], b&: LU.Formulae [`0`]);
5291	while (LU.Formulae.size() != `1`) {
5292	LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
5293	dbgs() << `'\n'`);
5294	LU.Formulae.pop_back();
5295	}
5296	LU.RecomputeRegs(LUIdx, RegUses);
5297	assert(LU.Formulae.size() == `1` && "Should be exactly 1 min regs formula");
5298	Formula &F = LU.Formulae [`0`];
5299	LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << `'\n'`);
5300	// When we choose the formula, the regs become unique.
5301	UniqRegs.insert_range(R&: F.BaseRegs);
5302	if (F.ScaledReg)
5303	UniqRegs.insert(Ptr: F.ScaledReg);
5304	}
5305	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5306	}
5307
5308	// Check if Best and Reg are SCEVs separated by a constant amount C, and if so
5309	// would the addressing offset +C would be legal where the negative offset -C is
5310	// not.
5311	static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,
5312	ScalarEvolution &SE, const SCEV *Best,
5313	const SCEV *Reg,
5314	MemAccessTy AccessType) {
5315	if (Best->getType() != Reg->getType() \|\|
5316	(isa<SCEVAddRecExpr>(Val: Best) && isa<SCEVAddRecExpr>(Val: Reg) &&
5317	cast<SCEVAddRecExpr>(Val: Best)->getLoop() !=
5318	cast<SCEVAddRecExpr>(Val: Reg)->getLoop()))
5319	return false;
5320	std::optional<APInt> Diff = SE.computeConstantDifference(LHS: Best, RHS: Reg);
5321	if (!Diff)
5322	return false;
5323
5324	return TTI.isLegalAddressingMode(
5325	Ty: AccessType.MemTy, /BaseGV=/nullptr,
5326	/BaseOffset=/Diff ->getSExtValue(),
5327	/HasBaseReg=/true, /Scale=/`0`, AddrSpace: AccessType.AddrSpace) &&
5328	!TTI.isLegalAddressingMode(
5329	Ty: AccessType.MemTy, /BaseGV=/nullptr,
5330	/BaseOffset=/-Diff ->getSExtValue(),
5331	/HasBaseReg=/true, /Scale=/`0`, AddrSpace: AccessType.AddrSpace);
5332	}
5333
5334	/// Pick a register which seems likely to be profitable, and then in any use
5335	/// which has any reference to that register, delete all formulae which do not
5336	/// reference that register.
5337	void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5338	// With all other options exhausted, loop until the system is simple
5339	// enough to handle.
5340	SmallPtrSet<const SCEV *, `4`> Taken;
5341	while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
5342	// Ok, we have too many of formulae on our hands to conveniently handle.
5343	// Use a rough heuristic to thin out the list.
5344	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5345
5346	// Pick the register which is used by the most LSRUses, which is likely
5347	// to be a good reuse register candidate.
5348	const SCEV Best = nullptr*;
5349	unsigned BestNum = `0`;
5350	for (const SCEV *Reg : RegUses) {
5351	if (Taken.count(Ptr: Reg))
5352	continue;
5353	if (!Best) {
5354	Best = Reg;
5355	BestNum = RegUses.getUsedByIndices(Reg).count();
5356	} else {
5357	unsigned Count = RegUses.getUsedByIndices(Reg).count();
5358	if (Count > BestNum) {
5359	Best = Reg;
5360	BestNum = Count;
5361	}
5362
5363	// If the scores are the same, but the Reg is simpler for the target
5364	// (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
5365	// handle +C but not -C), opt for the simpler formula.
5366	if (Count == BestNum) {
5367	int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5368	if (LUIdx >= `0` && Uses [LUIdx].Kind == LSRUse::Address &&
5369	IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
5370	AccessType: Uses [LUIdx].AccessTy)) {
5371	Best = Reg;
5372	BestNum = Count;
5373	}
5374	}
5375	}
5376	}
5377	assert(Best && "Failed to find best LSRUse candidate");
5378
5379	LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
5380	<< " will yield profitable reuse.\n");
5381	Taken.insert(Ptr: Best);
5382
5383	// In any use with formulae which references this register, delete formulae
5384	// which don't reference it.
5385	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5386	LSRUse &LU = Uses [LUIdx];
5387	if (!LU.Regs.count(Ptr: Best)) continue;
5388
5389	bool Any = false;
5390	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
5391	Formula &F = LU.Formulae [i];
5392	if (!F.referencesReg(S: Best)) {
5393	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << `'\n'`);
5394	LU.DeleteFormula(F);
5395	--e;
5396	--i;
5397	Any = true;
5398	assert(e != `0` && "Use has no formulae left! Is Regs inconsistent?");
5399	continue;
5400	}
5401	}
5402
5403	if (Any)
5404	LU.RecomputeRegs(LUIdx, RegUses);
5405	}
5406
5407	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5408	}
5409	}
5410
5411	/// If there are an extraordinary number of formulae to choose from, use some
5412	/// rough heuristics to prune down the number of formulae. This keeps the main
5413	/// solver from taking an extraordinary amount of time in some worst-case
5414	/// scenarios.
5415	void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5416	NarrowSearchSpaceByDetectingSupersets();
5417	NarrowSearchSpaceByCollapsingUnrolledCode();
5418	NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5419	if (FilterSameScaledReg)
5420	NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5421	NarrowSearchSpaceByFilterPostInc();
5422	if (LSRExpNarrow)
5423	NarrowSearchSpaceByDeletingCostlyFormulas();
5424	else
5425	NarrowSearchSpaceByPickingWinnerRegs();
5426	}
5427
5428	/// This is the recursive solver.
5429	void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5430	Cost &SolutionCost,
5431	SmallVectorImpl<const Formula *> &Workspace,
5432	const Cost &CurCost,
5433	const SmallPtrSet<const SCEV *, `16`> &CurRegs,
5434	DenseSet<const SCEV > &VisitedRegs) const* {
5435	// Some ideas:
5436	// - prune more:
5437	// - use more aggressive filtering
5438	// - sort the formula so that the most profitable solutions are found first
5439	// - sort the uses too
5440	// - search faster:
5441	// - don't compute a cost, and then compare. compare while computing a cost
5442	// and bail early.
5443	// - track register sets with SmallBitVector
5444
5445	const LSRUse &LU = Uses [Workspace.size()];
5446
5447	// If this use references any register that's already a part of the
5448	// in-progress solution, consider it a requirement that a formula must
5449	// reference that register in order to be considered. This prunes out
5450	// unprofitable searching.
5451	SmallSetVector<const SCEV *, `4`> ReqRegs;
5452	for (const SCEV *S : CurRegs)
5453	if (LU.Regs.count(Ptr: S))
5454	ReqRegs.insert(X: S);
5455
5456	SmallPtrSet<const SCEV *, `16`> NewRegs;
5457	Cost NewCost(L, SE, TTI, AMK);
5458	for (const Formula &F : LU.Formulae) {
5459	// Ignore formulae which may not be ideal in terms of register reuse of
5460	// ReqRegs. The formula should use all required registers before
5461	// introducing new ones.
5462	// This can sometimes (notably when trying to favour postinc) lead to
5463	// sub-optimial decisions. There it is best left to the cost modelling to
5464	// get correct.
5465	if (!(AMK & TTI::AMK_PostIndexed) \|\| LU.Kind != LSRUse::Address) {
5466	int NumReqRegsToFind = std::min(a: F.getNumRegs(), b: ReqRegs.size());
5467	for (const SCEV *Reg : ReqRegs) {
5468	if ((F.ScaledReg && F.ScaledReg == Reg) \|\|
5469	is_contained(Range: F.BaseRegs, Element: Reg)) {
5470	--NumReqRegsToFind;
5471	if (NumReqRegsToFind == `0`)
5472	break;
5473	}
5474	}
5475	if (NumReqRegsToFind != `0`) {
5476	// If none of the formulae satisfied the required registers, then we could
5477	// clear ReqRegs and try again. Currently, we simply give up in this case.
5478	continue;
5479	}
5480	}
5481
5482	// Evaluate the cost of the current formula. If it's already worse than
5483	// the current best, prune the search at that point.
5484	NewCost = CurCost;
5485	NewRegs = CurRegs;
5486	NewCost.RateFormula(F, Regs&: NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5487	if (NewCost.isLess(Other: SolutionCost)) {
5488	Workspace.push_back(Elt: &F);
5489	if (Workspace.size() != Uses.size()) {
5490	SolveRecurse(Solution, SolutionCost, Workspace, CurCost: NewCost,
5491	CurRegs: NewRegs, VisitedRegs);
5492	if (F.getNumRegs() == `1` && Workspace.size() == `1`)
5493	VisitedRegs.insert(V: F.ScaledReg ? F.ScaledReg : F.BaseRegs [`0`]);
5494	} else {
5495	LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
5496	dbgs() << ".\nRegs:\n";
5497	for (const SCEV *S : NewRegs) dbgs()
5498	<< "- " << *S << "\n";
5499	dbgs() << `'\n'`);
5500
5501	SolutionCost = NewCost;
5502	Solution = Workspace;
5503	}
5504	Workspace.pop_back();
5505	}
5506	}
5507	}
5508
5509	/// Choose one formula from each use. Return the results in the given Solution
5510	/// vector.
5511	void LSRInstance::Solve(SmallVectorImpl<const Formula > &Solution) const* {
5512	SmallVector<const Formula *, `8`> Workspace;
5513	Cost SolutionCost(L, SE, TTI, AMK);
5514	SolutionCost.Lose();
5515	Cost CurCost(L, SE, TTI, AMK);
5516	SmallPtrSet<const SCEV *, `16`> CurRegs;
5517	DenseSet<const SCEV *> VisitedRegs;
5518	Workspace.reserve(N: Uses.size());
5519
5520	// SolveRecurse does all the work.
5521	SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5522	CurRegs, VisitedRegs);
5523	if (Solution.empty()) {
5524	LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
5525	return;
5526	}
5527
5528	// Ok, we've now made all our decisions.
5529	LLVM_DEBUG(dbgs() << "\n"
5530	"The chosen solution requires ";
5531	SolutionCost.print(dbgs()); dbgs() << ":\n";
5532	for (size_t i = `0`, e = Uses.size(); i != e; ++i) {
5533	dbgs() << " ";
5534	Uses[i].print(dbgs());
5535	dbgs() << "\n"
5536	" ";
5537	Solution[i]->print(dbgs());
5538	dbgs() << `'\n'`;
5539	});
5540
5541	assert(Solution.size() == Uses.size() && "Malformed solution!");
5542
5543	const bool EnableDropUnprofitableSolution = [&] {
5544	switch (AllowDropSolutionIfLessProfitable) {
5545	case cl::BOU_TRUE:
5546	return true;
5547	case cl::BOU_FALSE:
5548	return false;
5549	case cl::BOU_UNSET:
5550	return TTI.shouldDropLSRSolutionIfLessProfitable();
5551	}
5552	llvm_unreachable("Unhandled cl::boolOrDefault enum");
5553	}();
5554
5555	if (BaselineCost.isLess(Other: SolutionCost)) {
5556	if (!EnableDropUnprofitableSolution)
5557	LLVM_DEBUG(
5558	dbgs() << "Baseline is more profitable than chosen solution, "
5559	"add option 'lsr-drop-solution' to drop LSR solution.\n");
5560	else {
5561	LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "
5562	"solution, dropping LSR solution.\n";);
5563	Solution.clear();
5564	}
5565	}
5566	}
5567
5568	/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
5569	/// we can go while still being dominated by the input positions. This helps
5570	/// canonicalize the insert position, which encourages sharing.
5571	BasicBlock::iterator
5572	LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
5573	const SmallVectorImpl<Instruction *> &Inputs)
5574	const {
5575	Instruction Tentative = &IP;
5576	while (true) {
5577	bool AllDominate = true;
5578	Instruction BetterPos = nullptr*;
5579	// Don't bother attempting to insert before a catchswitch, their basic block
5580	// cannot have other non-PHI instructions.
5581	if (isa<CatchSwitchInst>(Val: Tentative))
5582	return IP;
5583
5584	for (Instruction *Inst : Inputs) {
5585	if (Inst == Tentative \|\| !DT.dominates(Def: Inst, User: Tentative)) {
5586	AllDominate = false;
5587	break;
5588	}
5589	// Attempt to find an insert position in the middle of the block,
5590	// instead of at the end, so that it can be used for other expansions.
5591	if (Tentative->getParent() == Inst->getParent() &&
5592	(!BetterPos \|\| !DT.dominates(Def: Inst, User: BetterPos)))
5593	BetterPos = &*std::next(x: BasicBlock::iterator (Inst));
5594	}
5595	if (!AllDominate)
5596	break;
5597	if (BetterPos)
5598	IP = BetterPos->getIterator();
5599	else
5600	IP = Tentative->getIterator();
5601
5602	const Loop *IPLoop = LI.getLoopFor(BB: IP ->getParent());
5603	unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : `0`;
5604
5605	BasicBlock *IDom;
5606	for (DomTreeNode *Rung = DT.getNode(BB: IP ->getParent()); ; ) {
5607	if (!Rung) return IP;
5608	Rung = Rung->getIDom();
5609	if (!Rung) return IP;
5610	IDom = Rung->getBlock();
5611
5612	// Don't climb into a loop though.
5613	const Loop *IDomLoop = LI.getLoopFor(BB: IDom);
5614	unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : `0`;
5615	if (IDomDepth <= IPLoopDepth &&
5616	(IDomDepth != IPLoopDepth \|\| IDomLoop == IPLoop))
5617	break;
5618	}
5619
5620	Tentative = IDom->getTerminator();
5621	}
5622
5623	return IP;
5624	}
5625
5626	/// Determine an input position which will be dominated by the operands and
5627	/// which will dominate the result.
5628	BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
5629	BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
5630	// Collect some instructions which must be dominated by the
5631	// expanding replacement. These must be dominated by any operands that
5632	// will be required in the expansion.
5633	SmallVector<Instruction *, `4`> Inputs;
5634	if (Instruction *I = dyn_cast<Instruction>(Val: LF.OperandValToReplace))
5635	Inputs.push_back(Elt: I);
5636	if (LU.Kind == LSRUse::ICmpZero)
5637	if (Instruction *I =
5638	dyn_cast<Instruction>(Val: cast<ICmpInst>(Val: LF.UserInst)->getOperand(i_nocapture: `1`)))
5639	Inputs.push_back(Elt: I);
5640	if (LF.PostIncLoops.count(Ptr: L)) {
5641	if (LF.isUseFullyOutsideLoop(L))
5642	Inputs.push_back(Elt: L->getLoopLatch()->getTerminator());
5643	else
5644	Inputs.push_back(Elt: IVIncInsertPos);
5645	}
5646	// The expansion must also be dominated by the increment positions of any
5647	// loops it for which it is using post-inc mode.
5648	for (const Loop *PIL : LF.PostIncLoops) {
5649	if (PIL == L) continue;
5650
5651	// Be dominated by the loop exit.
5652	SmallVector<BasicBlock *, `4`> ExitingBlocks;
5653	PIL->getExitingBlocks(ExitingBlocks);
5654	if (!ExitingBlocks.empty()) {
5655	BasicBlock *BB = ExitingBlocks [`0`];
5656	for (unsigned i = `1`, e = ExitingBlocks.size(); i != e; ++i)
5657	BB = DT.findNearestCommonDominator(A: BB, B: ExitingBlocks [i]);
5658	Inputs.push_back(Elt: BB->getTerminator());
5659	}
5660	}
5661
5662	assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() &&
5663	"Insertion point must be a normal instruction");
5664
5665	// Then, climb up the immediate dominator tree as far as we can go while
5666	// still being dominated by the input positions.
5667	BasicBlock::iterator IP = HoistInsertPosition(IP: LowestIP, Inputs);
5668
5669	// Don't insert instructions before PHI nodes.
5670	while (isa<PHINode>(Val: IP)) ++IP;
5671
5672	// Ignore landingpad instructions.
5673	while (IP ->isEHPad()) ++IP;
5674
5675	// Set IP below instructions recently inserted by SCEVExpander. This keeps the
5676	// IP consistent across expansions and allows the previously inserted
5677	// instructions to be reused by subsequent expansion.
5678	while (Rewriter.isInsertedInstruction(I: &*IP) && IP != LowestIP)
5679	++IP;
5680
5681	return IP;
5682	}
5683
5684	/// Emit instructions for the leading candidate expression for this LSRUse (this
5685	/// is called "expanding").
5686	Value LSRInstance::Expand(const* LSRUse &LU, const LSRFixup &LF,
5687	const Formula &F, BasicBlock::iterator IP,
5688	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5689	if (LU.RigidFormula)
5690	return LF.OperandValToReplace;
5691
5692	// Determine an input position which will be dominated by the operands and
5693	// which will dominate the result.
5694	IP = AdjustInsertPositionForExpand(LowestIP: IP, LF, LU);
5695	Rewriter.setInsertPoint(&*IP);
5696
5697	// Inform the Rewriter if we have a post-increment use, so that it can
5698	// perform an advantageous expansion.
5699	Rewriter.setPostInc(LF.PostIncLoops);
5700
5701	// This is the type that the user actually needs.
5702	Type *OpTy = LF.OperandValToReplace->getType();
5703	// This will be the type that we'll initially expand to.
5704	Type *Ty = F.getType();
5705	if (!Ty)
5706	// No type known; just expand directly to the ultimate type.
5707	Ty = OpTy;
5708	else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(Ty: OpTy))
5709	// Expand directly to the ultimate type if it's the right size.
5710	Ty = OpTy;
5711	// This is the type to do integer arithmetic in.
5712	Type *IntTy = SE.getEffectiveSCEVType(Ty);
5713
5714	// Build up a list of operands to add together to form the full base.
5715	SmallVector<SCEVUse, `8`> Ops;
5716
5717	// Expand the BaseRegs portion.
5718	for (const SCEV *Reg : F.BaseRegs) {
5719	assert(!Reg->isZero() && "Zero allocated in a base register!");
5720
5721	// If we're expanding for a post-inc user, make the post-inc adjustment.
5722	Reg = denormalizeForPostIncUse(S: Reg, Loops: LF.PostIncLoops, SE);
5723	Ops.push_back(Elt: SE.getUnknown(V: Rewriter.expandCodeFor(SH: Reg, Ty: nullptr)));
5724	}
5725
5726	// Expand the ScaledReg portion.
5727	Value ICmpScaledV = nullptr*;
5728	if (F.Scale != `0`) {
5729	const SCEV *ScaledS = F.ScaledReg;
5730
5731	// If we're expanding for a post-inc user, make the post-inc adjustment.
5732	PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
5733	ScaledS = denormalizeForPostIncUse(S: ScaledS, Loops, SE);
5734
5735	if (LU.Kind == LSRUse::ICmpZero) {
5736	// Expand ScaleReg as if it was part of the base regs.
5737	if (F.Scale == `1`)
5738	Ops.push_back(
5739	Elt: SE.getUnknown(V: Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr)));
5740	else {
5741	// An interesting way of "folding" with an icmp is to use a negated
5742	// scale, which we'll implement by inserting it into the other operand
5743	// of the icmp.
5744	assert(F.Scale == -`1` &&
5745	"The only scale supported by ICmpZero uses is -1!");
5746	ICmpScaledV = Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr);
5747	}
5748	} else {
5749	// Otherwise just expand the scaled register and an explicit scale,
5750	// which is expected to be matched as part of the address.
5751
5752	// Flush the operand list to suppress SCEVExpander hoisting address modes.
5753	// Unless the addressing mode will not be folded.
5754	if (!Ops.empty() && LU.Kind == LSRUse::Address &&
5755	isAMCompletelyFolded(TTI, LU, F)) {
5756	Value FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty: nullptr*);
5757	Ops.clear();
5758	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5759	}
5760	ScaledS = SE.getUnknown(V: Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr));
5761	if (F.Scale != `1`)
5762	ScaledS =
5763	SE.getMulExpr(LHS: ScaledS, RHS: SE.getConstant(Ty: ScaledS->getType(), V: F.Scale));
5764	Ops.push_back(Elt: ScaledS);
5765	}
5766	}
5767
5768	// Expand the GV portion.
5769	if (F.BaseGV) {
5770	// Flush the operand list to suppress SCEVExpander hoisting.
5771	if (!Ops.empty()) {
5772	Value *FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty: IntTy);
5773	Ops.clear();
5774	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5775	}
5776	Ops.push_back(Elt: SE.getUnknown(V: F.BaseGV));
5777	}
5778
5779	// Flush the operand list to suppress SCEVExpander hoisting of both folded and
5780	// unfolded offsets. LSR assumes they both live next to their uses.
5781	if (!Ops.empty()) {
5782	Value *FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty);
5783	Ops.clear();
5784	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5785	}
5786
5787	// FIXME: Are we sure we won't get a mismatch here? Is there a way to bail
5788	// out at this point, or should we generate a SCEV adding together mixed
5789	// offsets?
5790	assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5791	"Expanding mismatched offsets\n");
5792	// Expand the immediate portion.
5793	Immediate Offset = F.BaseOffset.addUnsigned(RHS: LF.Offset);
5794	if (Offset.isNonZero()) {
5795	if (LU.Kind == LSRUse::ICmpZero) {
5796	// The other interesting way of "folding" with an ICmpZero is to use a
5797	// negated immediate.
5798	if (!ICmpScaledV) {
5799	// TODO: Avoid implicit trunc?
5800	// See https://github.com/llvm/llvm-project/issues/112510.
5801	ICmpScaledV = ConstantInt::getSigned(
5802	Ty: IntTy, V: -(uint64_t)Offset.getFixedValue(), /ImplicitTrunc=/true);
5803	} else {
5804	Ops.push_back(Elt: SE.getUnknown(V: ICmpScaledV));
5805	ICmpScaledV = ConstantInt::getSigned(Ty: IntTy, V: Offset.getFixedValue(),
5806	/ImplicitTrunc=/true);
5807	}
5808	} else {
5809	// Just add the immediate values. These again are expected to be matched
5810	// as part of the address.
5811	Ops.push_back(Elt: Offset.getUnknownSCEV(SE, Ty: IntTy));
5812	}
5813	}
5814
5815	// Expand the unfolded offset portion.
5816	Immediate UnfoldedOffset = F.UnfoldedOffset;
5817	if (UnfoldedOffset.isNonZero()) {
5818	// Just add the immediate values.
5819	Ops.push_back(Elt: UnfoldedOffset.getUnknownSCEV(SE, Ty: IntTy));
5820	}
5821
5822	// Emit instructions summing all the operands.
5823	const SCEV *FullS = Ops.empty() ?
5824	SE.getConstant(Ty: IntTy, V: `0`) :
5825	SE.getAddExpr(Ops);
5826	Value *FullV = Rewriter.expandCodeFor(SH: FullS, Ty);
5827
5828	// We're done expanding now, so reset the rewriter.
5829	Rewriter.clearPostInc();
5830
5831	// An ICmpZero Formula represents an ICmp which we're handling as a
5832	// comparison against zero. Now that we've expanded an expression for that
5833	// form, update the ICmp's other operand.
5834	if (LU.Kind == LSRUse::ICmpZero) {
5835	ICmpInst *CI = cast<ICmpInst>(Val: LF.UserInst);
5836	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: `1`)))
5837	DeadInsts.emplace_back(Args&: OperandIsInstr);
5838	assert(!F.BaseGV && "ICmp does not support folding a global value and "
5839	"a scale at the same time!");
5840	if (F.Scale == -`1`) {
5841	if (ICmpScaledV->getType() != OpTy) {
5842	Instruction *Cast = CastInst::Create(
5843	CastInst::getCastOpcode(Val: ICmpScaledV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false),
5844	S: ICmpScaledV, Ty: OpTy, Name: "tmp", InsertBefore: CI->getIterator());
5845	ICmpScaledV = Cast;
5846	}
5847	CI->setOperand(i_nocapture: `1`, Val_nocapture: ICmpScaledV);
5848	} else {
5849	// A scale of 1 means that the scale has been expanded as part of the
5850	// base regs.
5851	assert((F.Scale == `0` \|\| F.Scale == `1`) &&
5852	"ICmp does not support folding a global value and "
5853	"a scale at the same time!");
5854	// TODO: Avoid implicit trunc?
5855	// See https://github.com/llvm/llvm-project/issues/112510.
5856	Constant *C = ConstantInt::getSigned(Ty: SE.getEffectiveSCEVType(Ty: OpTy),
5857	V: -(uint64_t)Offset.getFixedValue(),
5858	/ImplicitTrunc=/true);
5859	if (C->getType() != OpTy) {
5860	C = ConstantFoldCastOperand(
5861	Opcode: CastInst::getCastOpcode(Val: C, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false), C, DestTy: OpTy,
5862	DL: CI->getDataLayout());
5863	assert(C && "Cast of ConstantInt should have folded");
5864	}
5865
5866	CI->setOperand(i_nocapture: `1`, Val_nocapture: C);
5867	}
5868	}
5869
5870	return FullV;
5871	}
5872
5873	/// Helper for Rewrite. PHI nodes are special because the use of their operands
5874	/// effectively happens in their predecessor blocks, so the expression may need
5875	/// to be expanded in multiple places.
5876	void LSRInstance::RewriteForPHI(PHINode PN, const* LSRUse &LU,
5877	const LSRFixup &LF, const Formula &F,
5878	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5879	DenseMap<BasicBlock , Value > Inserted;
5880
5881	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i)
5882	if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
5883	bool needUpdateFixups = false;
5884	BasicBlock *BB = PN->getIncomingBlock(i);
5885
5886	// If this is a critical edge, split the edge so that we do not insert
5887	// the code on all predecessor/successor paths. We do this unless this
5888	// is the canonical backedge for this loop, which complicates post-inc
5889	// users.
5890	if (e != `1` && BB->getTerminator()->getNumSuccessors() > `1` &&
5891	!isa<IndirectBrInst>(Val: BB->getTerminator()) &&
5892	!isa<CatchSwitchInst>(Val: BB->getTerminator())) {
5893	BasicBlock *Parent = PN->getParent();
5894	Loop *PNLoop = LI.getLoopFor(BB: Parent);
5895	if (!PNLoop \|\| Parent != PNLoop->getHeader()) {
5896	// Split the critical edge.
5897	BasicBlock NewBB = nullptr*;
5898	if (!Parent->isLandingPad()) {
5899	NewBB =
5900	SplitCriticalEdge(Src: BB, Dst: Parent,
5901	Options: CriticalEdgeSplittingOptions (&DT, &LI, MSSAU)
5902	.setMergeIdenticalEdges()
5903	.setKeepOneInputPHIs());
5904	} else {
5905	SmallVector<BasicBlock*, `2`> NewBBs;
5906	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5907	SplitLandingPadPredecessors(OrigBB: Parent, Preds: BB, Suffix: "", Suffix2: "", NewBBs, DTU: &DTU, LI: &LI);
5908	NewBB = NewBBs [`0`];
5909	}
5910	// If NewBB==NULL, then SplitCriticalEdge refused to split because all
5911	// phi predecessors are identical. The simple thing to do is skip
5912	// splitting in this case rather than complicate the API.
5913	if (NewBB) {
5914	// If PN is outside of the loop and BB is in the loop, we want to
5915	// move the block to be immediately before the PHI block, not
5916	// immediately after BB.
5917	if (L->contains(BB) && !L->contains(Inst: PN))
5918	NewBB->moveBefore(MovePos: PN->getParent());
5919
5920	// Splitting the edge can reduce the number of PHI entries we have.
5921	e = PN->getNumIncomingValues();
5922	BB = NewBB;
5923	i = PN->getBasicBlockIndex(BB);
5924
5925	needUpdateFixups = true;
5926	}
5927	}
5928	}
5929
5930	std::pair<DenseMap<BasicBlock , Value >::iterator, bool> Pair =
5931	Inserted.try_emplace(Key: BB);
5932	if (!Pair.second)
5933	PN->setIncomingValue(i, V: Pair.first ->second);
5934	else {
5935	Value *FullV =
5936	Expand(LU, LF, F, IP: BB->getTerminator()->getIterator(), DeadInsts);
5937
5938	// If this is reuse-by-noop-cast, insert the noop cast.
5939	Type *OpTy = LF.OperandValToReplace->getType();
5940	if (FullV->getType() != OpTy)
5941	FullV = CastInst::Create(
5942	CastInst::getCastOpcode(Val: FullV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false), S: FullV,
5943	Ty: LF.OperandValToReplace->getType(), Name: "tmp",
5944	InsertBefore: BB->getTerminator()->getIterator());
5945
5946	// If the incoming block for this value is not in the loop, it means the
5947	// current PHI is not in a loop exit, so we must create a LCSSA PHI for
5948	// the inserted value.
5949	if (auto *I = dyn_cast<Instruction>(Val: FullV))
5950	if (L->contains(Inst: I) && !L->contains(BB))
5951	InsertedNonLCSSAInsts.insert(X: I);
5952
5953	PN->setIncomingValue(i, V: FullV);
5954	Pair.first ->second = FullV;
5955	}
5956
5957	// If LSR splits critical edge and phi node has other pending
5958	// fixup operands, we need to update those pending fixups. Otherwise
5959	// formulae will not be implemented completely and some instructions
5960	// will not be eliminated.
5961	if (needUpdateFixups) {
5962	for (LSRUse &LU : Uses)
5963	for (LSRFixup &Fixup : LU.Fixups)
5964	// If fixup is supposed to rewrite some operand in the phi
5965	// that was just updated, it may be already moved to
5966	// another phi node. Such fixup requires update.
5967	if (Fixup.UserInst == PN) {
5968	// Check if the operand we try to replace still exists in the
5969	// original phi.
5970	bool foundInOriginalPHI = false;
5971	for (const auto &val : PN->incoming_values())
5972	if (val == Fixup.OperandValToReplace) {
5973	foundInOriginalPHI = true;
5974	break;
5975	}
5976
5977	// If fixup operand found in original PHI - nothing to do.
5978	if (foundInOriginalPHI)
5979	continue;
5980
5981	// Otherwise it might be moved to another PHI and requires update.
5982	// If fixup operand not found in any of the incoming blocks that
5983	// means we have already rewritten it - nothing to do.
5984	for (const auto &Block : PN->blocks())
5985	for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(Val: I);
5986	++I) {
5987	PHINode *NewPN = cast<PHINode>(Val&: I);
5988	for (const auto &val : NewPN->incoming_values())
5989	if (val == Fixup.OperandValToReplace)
5990	Fixup.UserInst = NewPN;
5991	}
5992	}
5993	}
5994	}
5995	}
5996
5997	/// Emit instructions for the leading candidate expression for this LSRUse (this
5998	/// is called "expanding"), and update the UserInst to reference the newly
5999	/// expanded value.
6000	void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
6001	const Formula &F,
6002	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
6003	// First, find an insertion point that dominates UserInst. For PHI nodes,
6004	// find the nearest block which dominates all the relevant uses.
6005	if (PHINode *PN = dyn_cast<PHINode>(Val: LF.UserInst)) {
6006	RewriteForPHI(PN, LU, LF, F, DeadInsts);
6007	} else {
6008	Value *FullV = Expand(LU, LF, F, IP: LF.UserInst->getIterator(), DeadInsts);
6009
6010	// If this is reuse-by-noop-cast, insert the noop cast.
6011	Type *OpTy = LF.OperandValToReplace->getType();
6012	if (FullV->getType() != OpTy) {
6013	Instruction *Cast =
6014	CastInst::Create(CastInst::getCastOpcode(Val: FullV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false),
6015	S: FullV, Ty: OpTy, Name: "tmp", InsertBefore: LF.UserInst->getIterator());
6016	FullV = Cast;
6017	}
6018
6019	// Update the user. ICmpZero is handled specially here (for now) because
6020	// Expand may have updated one of the operands of the icmp already, and
6021	// its new value may happen to be equal to LF.OperandValToReplace, in
6022	// which case doing replaceUsesOfWith leads to replacing both operands
6023	// with the same value. TODO: Reorganize this.
6024	if (LU.Kind == LSRUse::ICmpZero)
6025	LF.UserInst->setOperand(i: `0`, Val: FullV);
6026	else
6027	LF.UserInst->replaceUsesOfWith(From: LF.OperandValToReplace, To: FullV);
6028	}
6029
6030	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: LF.OperandValToReplace))
6031	DeadInsts.emplace_back(Args&: OperandIsInstr);
6032	}
6033
6034	// Determine where to insert the transformed IV increment instruction for this
6035	// fixup. By default this is the default insert position, but if this is a
6036	// postincrement opportunity then we try to insert it in the same block as the
6037	// fixup user instruction, as this is needed for a postincrement instruction to
6038	// be generated.
6039	static Instruction getFixupInsertPos(const* TargetTransformInfo &TTI,
6040	const LSRFixup &Fixup, const LSRUse &LU,
6041	Instruction *IVIncInsertPos,
6042	DominatorTree &DT) {
6043	// Only address uses can be postincremented
6044	if (LU.Kind != LSRUse::Address)
6045	return IVIncInsertPos;
6046
6047	// Don't try to postincrement if it's not legal
6048	Instruction *I = Fixup.UserInst;
6049	Type *Ty = I->getType();
6050	if (!(isa<LoadInst>(Val: I) && TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty)) &&
6051	!(isa<StoreInst>(Val: I) && TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty)))
6052	return IVIncInsertPos;
6053
6054	// It's only legal to hoist to the user block if it dominates the default
6055	// insert position.
6056	BasicBlock *HoistBlock = I->getParent();
6057	BasicBlock *IVIncBlock = IVIncInsertPos->getParent();
6058	if (!DT.dominates(Def: I, BB: IVIncBlock))
6059	return IVIncInsertPos;
6060
6061	return HoistBlock->getTerminator();
6062	}
6063
6064	/// Rewrite all the fixup locations with new values, following the chosen
6065	/// solution.
6066	void LSRInstance::ImplementSolution(
6067	const SmallVectorImpl<const Formula *> &Solution) {
6068	// Keep track of instructions we may have made dead, so that
6069	// we can remove them after we are done working.
6070	SmallVector<WeakTrackingVH, `16`> DeadInsts;
6071
6072	// Mark phi nodes that terminate chains so the expander tries to reuse them.
6073	for (const IVChain &Chain : IVChainVec) {
6074	if (PHINode *PN = dyn_cast<PHINode>(Val: Chain.tailUserInst()))
6075	Rewriter.setChainedPhi(PN);
6076	}
6077
6078	// Expand the new value definitions and update the users.
6079	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
6080	for (const LSRFixup &Fixup : Uses [LUIdx].Fixups) {
6081	Instruction *InsertPos =
6082	getFixupInsertPos(TTI, Fixup, LU: Uses [LUIdx], IVIncInsertPos, DT);
6083	Rewriter.setIVIncInsertPos(L, Pos: InsertPos);
6084	Rewrite(LU: Uses [LUIdx], LF: Fixup, F: *Solution [LUIdx], DeadInsts);
6085	Changed = true;
6086	}
6087
6088	auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6089	formLCSSAForInstructions(Worklist&: InsertedInsts, DT, LI, SE: &SE);
6090
6091	for (const IVChain &Chain : IVChainVec) {
6092	GenerateIVChain(Chain, DeadInsts);
6093	Changed = true;
6094	}
6095
6096	for (const WeakVH &IV : Rewriter.getInsertedIVs())
6097	if (IV && dyn_cast<Instruction>(Val: &*IV)->getParent())
6098	ScalarEvolutionIVs.push_back(Elt: IV);
6099
6100	// Clean up after ourselves. This must be done before deleting any
6101	// instructions.
6102	Rewriter.clear();
6103
6104	Changed \|= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
6105	TLI: &TLI, MSSAU);
6106
6107	// In our cost analysis above, we assume that each addrec consumes exactly
6108	// one register, and arrange to have increments inserted just before the
6109	// latch to maximimize the chance this is true. However, if we reused
6110	// existing IVs, we now need to move the increments to match our
6111	// expectations. Otherwise, our cost modeling results in us having a
6112	// chosen a non-optimal result for the actual schedule. (And yes, this
6113	// scheduling decision does impact later codegen.)
6114	for (PHINode &PN : L->getHeader()->phis()) {
6115	BinaryOperator BO = nullptr*;
6116	Value Start = nullptr, Step = nullptr;
6117	if (!matchSimpleRecurrence(P: &PN, BO, Start, Step))
6118	continue;
6119
6120	switch (BO->getOpcode()) {
6121	case Instruction::Sub:
6122	if (BO->getOperand(i_nocapture: `0`) != &PN)
6123	// sub is non-commutative - match handling elsewhere in LSR
6124	continue;
6125	break;
6126	case Instruction::Add:
6127	break;
6128	default:
6129	continue;
6130	};
6131
6132	if (!isa<Constant>(Val: Step))
6133	// If not a constant step, might increase register pressure
6134	// (We assume constants have been canonicalized to RHS)
6135	continue;
6136
6137	if (BO->getParent() == IVIncInsertPos->getParent())
6138	// Only bother moving across blocks. Isel can handle block local case.
6139	continue;
6140
6141	// Can we legally schedule inc at the desired point?
6142	if (!llvm::all_of(Range: BO->uses(),
6143	P: [&](Use &U) {return DT.dominates(Def: IVIncInsertPos, U);}))
6144	continue;
6145	BO->moveBefore(InsertPos: IVIncInsertPos->getIterator());
6146	Changed = true;
6147	}
6148
6149
6150	}
6151
6152	LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6153	DominatorTree &DT, LoopInfo &LI,
6154	const TargetTransformInfo &TTI, AssumptionCache &AC,
6155	TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6156	: IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
6157	MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > `0`
6158	? PreferredAddresingMode
6159	: TTI.getPreferredAddressingMode(L, SE: &SE)),
6160	Rewriter (SE, "lsr", false), BaselineCost (L, SE, TTI, AMK) {
6161	// If LoopSimplify form is not available, stay out of trouble.
6162	if (!L->isLoopSimplifyForm())
6163	return;
6164
6165	// If there's no interesting work to be done, bail early.
6166	if (IU.empty()) return;
6167
6168	// If there's too much analysis to be done, bail early. We won't be able to
6169	// model the problem anyway.
6170	unsigned NumUsers = `0`;
6171	for (const IVStrideUse &U : IU) {
6172	if (++NumUsers > MaxIVUsers) {
6173	(void)U;
6174	LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
6175	<< "\n");
6176	return;
6177	}
6178	// Bail out if we have a PHI on an EHPad that gets a value from a
6179	// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
6180	// no good place to stick any instructions.
6181	if (auto *PN = dyn_cast<PHINode>(Val: U.getUser())) {
6182	auto FirstNonPHI = PN->getParent()->getFirstNonPHIIt();
6183	if (isa<FuncletPadInst>(Val: FirstNonPHI) \|\|
6184	isa<CatchSwitchInst>(Val: FirstNonPHI))
6185	for (BasicBlock *PredBB : PN->blocks())
6186	if (isa<CatchSwitchInst>(Val: PredBB->getFirstNonPHIIt()))
6187	return;
6188	}
6189	}
6190
6191	LLVM_DEBUG(dbgs() << "\nLSR on loop ";
6192	L->getHeader()->printAsOperand(dbgs(), /PrintType=/false);
6193	dbgs() << ":\n");
6194
6195	// Check if we expect this loop to use a hardware loop instruction, which will
6196	// be used when calculating the costs of formulas.
6197	HardwareLoopInfo HWLoopInfo(L);
6198	HardwareLoopProfitable =
6199	TTI.isHardwareLoopProfitable(L, SE, AC, LibInfo: &TLI, HWLoopInfo);
6200
6201	// Configure SCEVExpander already now, so the correct mode is used for
6202	// isSafeToExpand() checks.
6203	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6204	Rewriter.setDebugType(DEBUG_TYPE);
6205	#endif
6206	Rewriter.disableCanonicalMode();
6207	Rewriter.enableLSRMode();
6208
6209	// First, perform some low-level loop optimizations.
6210	OptimizeShadowIV();
6211	OptimizeLoopTermCond();
6212
6213	// If loop preparation eliminates all interesting IV users, bail.
6214	if (IU.empty()) return;
6215
6216	// Skip nested loops until we can model them better with formulae.
6217	if (!L->isInnermost()) {
6218	LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
6219	return;
6220	}
6221
6222	// Start collecting data and preparing for the solver.
6223	// If number of registers is not the major cost, we cannot benefit from the
6224	// current profitable chain optimization which is based on number of
6225	// registers.
6226	// FIXME: add profitable chain optimization for other kinds major cost, for
6227	// example number of instructions.
6228	if (TTI.isNumRegsMajorCostOfLSR() \|\| StressIVChain)
6229	CollectChains();
6230	CollectInterestingTypesAndFactors();
6231	CollectFixupsAndInitialFormulae();
6232	CollectLoopInvariantFixupsAndFormulae();
6233
6234	if (Uses.empty())
6235	return;
6236
6237	LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
6238	print_uses(dbgs()));
6239	LLVM_DEBUG(dbgs() << "The baseline solution requires ";
6240	BaselineCost.print(dbgs()); dbgs() << "\n");
6241
6242	// Now use the reuse data to generate a bunch of interesting ways
6243	// to formulate the values needed for the uses.
6244	GenerateAllReuseFormulae();
6245
6246	FilterOutUndesirableDedicatedRegisters();
6247	NarrowSearchSpaceUsingHeuristics();
6248
6249	SmallVector<const Formula *, `8`> Solution;
6250	Solve(Solution);
6251
6252	// Release memory that is no longer needed.
6253	Factors.clear();
6254	Types.clear();
6255	RegUses.clear();
6256
6257	if (Solution.empty())
6258	return;
6259
6260	#ifndef NDEBUG
6261	// Formulae should be legal.
6262	for (const LSRUse &LU : Uses) {
6263	for (const Formula &F : LU.Formulae)
6264	assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
6265	F) && "Illegal formula generated!");
6266	};
6267	#endif
6268
6269	// Now that we've decided what we want, make it so.
6270	ImplementSolution(Solution);
6271	}
6272
6273	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
6274	void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
6275	if (Factors.empty() && Types.empty()) return;
6276
6277	OS << "LSR has identified the following interesting factors and types: ";
6278	ListSeparator LS;
6279
6280	for (int64_t Factor : Factors)
6281	OS << LS << `'*'` << Factor;
6282
6283	for (Type *Ty : Types)
6284	OS << LS << `'('` << *Ty << `')'`;
6285	OS << `'\n'`;
6286	}
6287
6288	void LSRInstance::print_fixups(raw_ostream &OS) const {
6289	OS << "LSR is examining the following fixup sites:\n";
6290	for (const LSRUse &LU : Uses)
6291	for (const LSRFixup &LF : LU.Fixups) {
6292	dbgs() << " ";
6293	LF.print(OS);
6294	OS << `'\n'`;
6295	}
6296	}
6297
6298	void LSRInstance::print_uses(raw_ostream &OS) const {
6299	OS << "LSR is examining the following uses:\n";
6300	for (const LSRUse &LU : Uses) {
6301	dbgs() << " ";
6302	LU.print(OS);
6303	OS << `'\n'`;
6304	for (const Formula &F : LU.Formulae) {
6305	OS << " ";
6306	F.print(OS);
6307	OS << `'\n'`;
6308	}
6309	}
6310	}
6311
6312	void LSRInstance::print(raw_ostream &OS) const {
6313	print_factors_and_types(OS);
6314	print_fixups(OS);
6315	print_uses(OS);
6316	}
6317
6318	LLVM_DUMP_METHOD void LSRInstance::dump() const {
6319	print(errs()); errs() << `'\n'`;
6320	}
6321	#endif
6322
6323	namespace {
6324
6325	class LoopStrengthReduce : public LoopPass {
6326	public:
6327	static char ID; // Pass ID, replacement for typeid
6328
6329	LoopStrengthReduce();
6330
6331	private:
6332	bool runOnLoop(Loop *L, LPPassManager &LPM) override;
6333	void getAnalysisUsage(AnalysisUsage &AU) const override;
6334	};
6335
6336	} // end anonymous namespace
6337
6338	LoopStrengthReduce::LoopStrengthReduce() : LoopPass (ID) {
6339	initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
6340	}
6341
6342	void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
6343	// We split critical edges, so we change the CFG. However, we do update
6344	// many analyses if they are around.
6345	AU.addPreservedID(ID&: LoopSimplifyID);
6346
6347	AU.addRequired<LoopInfoWrapperPass>();
6348	AU.addPreserved<LoopInfoWrapperPass>();
6349	AU.addRequiredID(ID&: LoopSimplifyID);
6350	AU.addRequired<DominatorTreeWrapperPass>();
6351	AU.addPreserved<DominatorTreeWrapperPass>();
6352	AU.addRequired<ScalarEvolutionWrapperPass>();
6353	AU.addPreserved<ScalarEvolutionWrapperPass>();
6354	AU.addRequired<AssumptionCacheTracker>();
6355	AU.addRequired<TargetLibraryInfoWrapperPass>();
6356	// Requiring LoopSimplify a second time here prevents IVUsers from running
6357	// twice, since LoopSimplify was invalidated by running ScalarEvolution.
6358	AU.addRequiredID(ID&: LoopSimplifyID);
6359	AU.addRequired<IVUsersWrapperPass>();
6360	AU.addPreserved<IVUsersWrapperPass>();
6361	AU.addRequired<TargetTransformInfoWrapperPass>();
6362	AU.addPreserved<MemorySSAWrapperPass>();
6363	}
6364
6365	namespace {
6366
6367	/// Enables more convenient iteration over a DWARF expression vector.
6368	static iterator_range<llvm::DIExpression::expr_op_iterator>
6369	ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6370	llvm::DIExpression::expr_op_iterator Begin =
6371	llvm::DIExpression::expr_op_iterator (Expr.begin());
6372	llvm::DIExpression::expr_op_iterator End =
6373	llvm::DIExpression::expr_op_iterator (Expr.end());
6374	return {Begin, End};
6375	}
6376
6377	struct SCEVDbgValueBuilder {
6378	SCEVDbgValueBuilder() = default;
6379	SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
6380
6381	void clone(const SCEVDbgValueBuilder &Base) {
6382	LocationOps = Base.LocationOps;
6383	Expr = Base.Expr;
6384	}
6385
6386	void clear() {
6387	LocationOps.clear();
6388	Expr.clear();
6389	}
6390
6391	/// The DIExpression as we translate the SCEV.
6392	SmallVector<uint64_t, `6`> Expr;
6393	/// The location ops of the DIExpression.
6394	SmallVector<Value *, `2`> LocationOps;
6395
6396	void pushOperator(uint64_t Op) { Expr.push_back(Elt: Op); }
6397	void pushUInt(uint64_t Operand) { Expr.push_back(Elt: Operand); }
6398
6399	/// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
6400	/// in the set of values referenced by the expression.
6401	void pushLocation(llvm::Value *V) {
6402	Expr.push_back(Elt: llvm::dwarf::DW_OP_LLVM_arg);
6403	auto *It = llvm::find(Range&: LocationOps, Val: V);
6404	unsigned ArgIndex = `0`;
6405	if (It != LocationOps.end()) {
6406	ArgIndex = std::distance(first: LocationOps.begin(), last: It);
6407	} else {
6408	ArgIndex = LocationOps.size();
6409	LocationOps.push_back(Elt: V);
6410	}
6411	Expr.push_back(Elt: ArgIndex);
6412	}
6413
6414	void pushValue(const SCEVUnknown *U) {
6415	llvm::Value *V = cast<SCEVUnknown>(Val: U)->getValue();
6416	pushLocation(V);
6417	}
6418
6419	bool pushConst(const SCEVConstant *C) {
6420	if (C->getAPInt().getSignificantBits() > `64`)
6421	return false;
6422	Expr.push_back(Elt: llvm::dwarf::DW_OP_consts);
6423	Expr.push_back(Elt: C->getAPInt().getSExtValue());
6424	return true;
6425	}
6426
6427	// Iterating the expression as DWARF ops is convenient when updating
6428	// DWARF_OP_LLVM_args.
6429	iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
6430	return ToDwarfOpIter(Expr);
6431	}
6432
6433	/// Several SCEV types are sequences of the same arithmetic operator applied
6434	/// to constants and values that may be extended or truncated.
6435	bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
6436	uint64_t DwarfOp) {
6437	assert((isa<llvm::SCEVAddExpr>(CommExpr) \|\| isa<SCEVMulExpr>(CommExpr)) &&
6438	"Expected arithmetic SCEV type");
6439	bool Success = true;
6440	unsigned EmitOperator = `0`;
6441	for (const auto &Op : CommExpr->operands()) {
6442	Success &= pushSCEV(S: Op);
6443
6444	if (EmitOperator >= `1`)
6445	pushOperator(Op: DwarfOp);
6446	++EmitOperator;
6447	}
6448	return Success;
6449	}
6450
6451	// TODO: Identify and omit noop casts.
6452	bool pushCast(const llvm::SCEVCastExpr C, bool* IsSigned) {
6453	const llvm::SCEV *Inner = C->getOperand(i: `0`);
6454	const llvm::Type *Type = C->getType();
6455	uint64_t ToWidth = Type->getIntegerBitWidth();
6456	bool Success = pushSCEV(S: Inner);
6457	uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
6458	IsSigned ? llvm::dwarf::DW_ATE_signed
6459	: llvm::dwarf::DW_ATE_unsigned};
6460	for (const auto &Op : CastOps)
6461	pushOperator(Op);
6462	return Success;
6463	}
6464
6465	// TODO: MinMax - although these haven't been encountered in the test suite.
6466	bool pushSCEV(const llvm::SCEV *S) {
6467	bool Success = true;
6468	if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(Val: S)) {
6469	Success &= pushConst(C: StartInt);
6470
6471	} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S)) {
6472	if (!U->getValue())
6473	return false;
6474	pushLocation(V: U->getValue());
6475
6476	} else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(Val: S)) {
6477	Success &= pushArithmeticExpr(CommExpr: MulRec, DwarfOp: llvm::dwarf::DW_OP_mul);
6478
6479	} else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(Val: S)) {
6480	Success &= pushSCEV(S: UDiv->getLHS());
6481	Success &= pushSCEV(S: UDiv->getRHS());
6482	pushOperator(Op: llvm::dwarf::DW_OP_div);
6483
6484	} else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(Val: S)) {
6485	// Assert if a new and unknown SCEVCastEXpr type is encountered.
6486	assert((isa<SCEVZeroExtendExpr>(Cast) \|\| isa<SCEVTruncateExpr>(Cast) \|\|
6487	isa<SCEVPtrToIntExpr>(Cast) \|\| isa<SCEVPtrToAddrExpr>(Cast) \|\|
6488	isa<SCEVSignExtendExpr>(Cast)) &&
6489	"Unexpected cast type in SCEV.");
6490	Success &= pushCast(C: Cast, IsSigned: (isa<SCEVSignExtendExpr>(Val: Cast)));
6491
6492	} else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(Val: S)) {
6493	Success &= pushArithmeticExpr(CommExpr: AddExpr, DwarfOp: llvm::dwarf::DW_OP_plus);
6494
6495	} else if (isa<SCEVAddRecExpr>(Val: S)) {
6496	// Nested SCEVAddRecExpr are generated by nested loops and are currently
6497	// unsupported.
6498	return false;
6499
6500	} else {
6501	return false;
6502	}
6503	return Success;
6504	}
6505
6506	/// Return true if the combination of arithmetic operator and underlying
6507	/// SCEV constant value is an identity function.
6508	bool isIdentityFunction(uint64_t Op, const SCEV *S) {
6509	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: S)) {
6510	if (C->getAPInt().getSignificantBits() > `64`)
6511	return false;
6512	int64_t I = C->getAPInt().getSExtValue();
6513	switch (Op) {
6514	case llvm::dwarf::DW_OP_plus:
6515	case llvm::dwarf::DW_OP_minus:
6516	return I == `0`;
6517	case llvm::dwarf::DW_OP_mul:
6518	case llvm::dwarf::DW_OP_div:
6519	return I == `1`;
6520	}
6521	}
6522	return false;
6523	}
6524
6525	/// Convert a SCEV of a value to a DIExpression that is pushed onto the
6526	/// builder's expression stack. The stack should already contain an
6527	/// expression for the iteration count, so that it can be multiplied by
6528	/// the stride and added to the start.
6529	/// Components of the expression are omitted if they are an identity function.
6530	/// Chain (non-affine) SCEVs are not supported.
6531	bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6532	assert(SAR.isAffine() && "Expected affine SCEV");
6533	const SCEV *Start = SAR.getStart();
6534	const SCEV *Stride = SAR.getStepRecurrence(SE);
6535
6536	// Skip pushing arithmetic noops.
6537	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_mul, S: Stride)) {
6538	if (!pushSCEV(S: Stride))
6539	return false;
6540	pushOperator(Op: llvm::dwarf::DW_OP_mul);
6541	}
6542	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_plus, S: Start)) {
6543	if (!pushSCEV(S: Start))
6544	return false;
6545	pushOperator(Op: llvm::dwarf::DW_OP_plus);
6546	}
6547	return true;
6548	}
6549
6550	/// Create an expression that is an offset from a value (usually the IV).
6551	void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
6552	pushLocation(V: OffsetValue);
6553	DIExpression::appendOffset(Ops&: Expr, Offset);
6554	LLVM_DEBUG(
6555	dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
6556	<< std::to_string(Offset) << "\n");
6557	}
6558
6559	/// Combine a translation of the SCEV and the IV to create an expression that
6560	/// recovers a location's value.
6561	/// returns true if an expression was created.
6562	bool createIterCountExpr(const SCEV *S,
6563	const SCEVDbgValueBuilder &IterationCount,
6564	ScalarEvolution &SE) {
6565	// SCEVs for SSA values are most frquently of the form
6566	// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
6567	// This is because %a is a PHI node that is not the IV. However, these
6568	// SCEVs have not been observed to result in debuginfo-lossy optimisations,
6569	// so its not expected this point will be reached.
6570	if (!isa<SCEVAddRecExpr>(Val: S))
6571	return false;
6572
6573	LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
6574	<< `'\n'`);
6575
6576	const auto *Rec = cast<SCEVAddRecExpr>(Val: S);
6577	if (!Rec->isAffine())
6578	return false;
6579
6580	if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6581	return false;
6582
6583	// Initialise a new builder with the iteration count expression. In
6584	// combination with the value's SCEV this enables recovery.
6585	clone(Base: IterationCount);
6586	if (!SCEVToValueExpr(SAR: *Rec, SE))
6587	return false;
6588
6589	return true;
6590	}
6591
6592	/// Convert a SCEV of a value to a DIExpression that is pushed onto the
6593	/// builder's expression stack. The stack should already contain an
6594	/// expression for the iteration count, so that it can be multiplied by
6595	/// the stride and added to the start.
6596	/// Components of the expression are omitted if they are an identity function.
6597	bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
6598	ScalarEvolution &SE) {
6599	assert(SAR.isAffine() && "Expected affine SCEV");
6600	const SCEV *Start = SAR.getStart();
6601	const SCEV *Stride = SAR.getStepRecurrence(SE);
6602
6603	// Skip pushing arithmetic noops.
6604	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_minus, S: Start)) {
6605	if (!pushSCEV(S: Start))
6606	return false;
6607	pushOperator(Op: llvm::dwarf::DW_OP_minus);
6608	}
6609	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_div, S: Stride)) {
6610	if (!pushSCEV(S: Stride))
6611	return false;
6612	pushOperator(Op: llvm::dwarf::DW_OP_div);
6613	}
6614	return true;
6615	}
6616
6617	// Append the current expression and locations to a location list and an
6618	// expression list. Modify the DW_OP_LLVM_arg indexes to account for
6619	// the locations already present in the destination list.
6620	void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6621	SmallVectorImpl<Value *> &DestLocations) {
6622	assert(!DestLocations.empty() &&
6623	"Expected the locations vector to contain the IV");
6624	// The DWARF_OP_LLVM_arg arguments of the expression being appended must be
6625	// modified to account for the locations already in the destination vector.
6626	// All builders contain the IV as the first location op.
6627	assert(!LocationOps.empty() &&
6628	"Expected the location ops to contain the IV.");
6629	// DestIndexMap[n] contains the index in DestLocations for the nth
6630	// location in this SCEVDbgValueBuilder.
6631	SmallVector<uint64_t, `2`> DestIndexMap;
6632	for (const auto &Op : LocationOps) {
6633	auto It = find(Range&: DestLocations, Val: Op);
6634	if (It != DestLocations.end()) {
6635	// Location already exists in DestLocations, reuse existing ArgIndex.
6636	DestIndexMap.push_back(Elt: std::distance(first: DestLocations.begin(), last: It));
6637	continue;
6638	}
6639	// Location is not in DestLocations, add it.
6640	DestIndexMap.push_back(Elt: DestLocations.size());
6641	DestLocations.push_back(Elt: Op);
6642	}
6643
6644	for (const auto &Op : expr_ops()) {
6645	if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
6646	Op.appendToVector(V&: DestExpr);
6647	continue;
6648	}
6649
6650	DestExpr.push_back(Elt: dwarf::DW_OP_LLVM_arg);
6651	// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
6652	// DestIndexMap[n] contains its new index in DestLocations.
6653	uint64_t NewIndex = DestIndexMap [Op.getArg(I: `0`)];
6654	DestExpr.push_back(Elt: NewIndex);
6655	}
6656	}
6657	};
6658
6659	/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
6660	/// and DIExpression.
6661	struct DVIRecoveryRec {
6662	DVIRecoveryRec(DbgVariableRecord *DVR)
6663	: DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
6664
6665	DbgVariableRecord *DbgRef;
6666	DIExpression *Expr;
6667	bool HadLocationArgList;
6668	SmallVector<WeakVH, `2`> LocationOps;
6669	SmallVector<const llvm::SCEV *, `2`> SCEVs;
6670	SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, `2`> RecoveryExprs;
6671
6672	void clear() {
6673	for (auto &RE : RecoveryExprs)
6674	RE.reset();
6675	RecoveryExprs.clear();
6676	}
6677
6678	~DVIRecoveryRec() { clear(); }
6679	};
6680	} // namespace
6681
6682	/// Returns the total number of DW_OP_llvm_arg operands in the expression.
6683	/// This helps in determining if a DIArglist is necessary or can be omitted from
6684	/// the dbg.value.
6685	static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
6686	auto expr_ops = ToDwarfOpIter(Expr);
6687	unsigned Count = `0`;
6688	for (auto Op : expr_ops)
6689	if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
6690	Count++;
6691	return Count;
6692	}
6693
6694	/// Overwrites DVI with the location and Ops as the DIExpression. This will
6695	/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
6696	/// because a DIArglist is not created for the first argument of the dbg.value.
6697	template <typename T>
6698	static void updateDVIWithLocation(T &DbgVal, Value *Location,
6699	SmallVectorImpl<uint64_t> &Ops) {
6700	assert(numLLVMArgOps(Ops) == `0` && "Expected expression that does not "
6701	"contain any DW_OP_llvm_arg operands.");
6702	DbgVal.setRawLocation(ValueAsMetadata::get(V: Location));
6703	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6704	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6705	}
6706
6707	/// Overwrite DVI with locations placed into a DIArglist.
6708	template <typename T>
6709	static void updateDVIWithLocations(T &DbgVal,
6710	SmallVectorImpl<Value *> &Locations,
6711	SmallVectorImpl<uint64_t> &Ops) {
6712	assert(numLLVMArgOps(Ops) != `0` &&
6713	"Expected expression that references DIArglist locations using "
6714	"DW_OP_llvm_arg operands.");
6715	SmallVector<ValueAsMetadata *, `3`> MetadataLocs;
6716	for (Value *V : Locations)
6717	MetadataLocs.push_back(Elt: ValueAsMetadata::get(V));
6718	auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6719	DbgVal.setRawLocation(llvm::DIArgList::get(Context&: DbgVal.getContext(), Args: ValArrayRef));
6720	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6721	}
6722
6723	/// Write the new expression and new location ops for the dbg.value. If possible
6724	/// reduce the szie of the dbg.value by omitting DIArglist. This
6725	/// can be omitted if:
6726	/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
6727	/// 2. The DW_OP_LLVM_arg is the first operand in the expression.
6728	static void UpdateDbgValue(DVIRecoveryRec &DVIRec,
6729	SmallVectorImpl<Value *> &NewLocationOps,
6730	SmallVectorImpl<uint64_t> &NewExpr) {
6731	DbgVariableRecord *DbgVal = DVIRec.DbgRef;
6732	unsigned NumLLVMArgs = numLLVMArgOps(Expr&: NewExpr);
6733	if (NumLLVMArgs == `0`) {
6734	// Location assumed to be on the stack.
6735	updateDVIWithLocation(DbgVal&: *DbgVal, Location: NewLocationOps [`0`], Ops&: NewExpr);
6736	} else if (NumLLVMArgs == `1` && NewExpr [`0`] == dwarf::DW_OP_LLVM_arg) {
6737	// There is only a single DW_OP_llvm_arg at the start of the expression,
6738	// so it can be omitted along with DIArglist.
6739	assert(NewExpr[`1`] == `0` &&
6740	"Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6741	llvm::SmallVector<uint64_t, `6`> ShortenedOps(llvm::drop_begin(RangeOrContainer&: NewExpr, N: `2`));
6742	updateDVIWithLocation(DbgVal&: *DbgVal, Location: NewLocationOps [`0`], Ops&: ShortenedOps);
6743	} else {
6744	// Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
6745	updateDVIWithLocations(DbgVal&: *DbgVal, Locations&: NewLocationOps, Ops&: NewExpr);
6746	}
6747
6748	// If the DIExpression was previously empty then add the stack terminator.
6749	// Non-empty expressions have only had elements inserted into them and so
6750	// the terminator should already be present e.g. stack_value or fragment.
6751	DIExpression *SalvageExpr = DbgVal->getExpression();
6752	if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
6753	SalvageExpr = DIExpression::append(Expr: SalvageExpr, Ops: {dwarf::DW_OP_stack_value});
6754	DbgVal->setExpression(SalvageExpr);
6755	}
6756	}
6757
6758	/// Cached location ops may be erased during LSR, in which case a poison is
6759	/// required when restoring from the cache. The type of that location is no
6760	/// longer available, so just use int8. The poison will be replaced by one or
6761	/// more locations later when a SCEVDbgValueBuilder selects alternative
6762	/// locations to use for the salvage.
6763	static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {
6764	return (VH) ? VH : PoisonValue::get(T: llvm::Type::getInt8Ty(C));
6765	}
6766
6767	/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
6768	static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
6769	DbgVariableRecord *DbgVal = DVIRec.DbgRef;
6770	LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
6771	<< "scev-salvage: post-LSR: " << *DbgVal << `'\n'`);
6772	assert(DVIRec.Expr && "Expected an expression");
6773	DbgVal->setExpression(DVIRec.Expr);
6774
6775	// Even a single location-op may be inside a DIArgList and referenced with
6776	// DW_OP_LLVM_arg, which is valid only with a DIArgList.
6777	if (!DVIRec.HadLocationArgList) {
6778	assert(DVIRec.LocationOps.size() == `1` &&
6779	"Unexpected number of location ops.");
6780	// LSR's unsuccessful salvage attempt may have added DIArgList, which in
6781	// this case was not present before, so force the location back to a
6782	// single uncontained Value.
6783	Value *CachedValue =
6784	getValueOrPoison(VH&: DVIRec.LocationOps [`0`], C&: DbgVal->getContext());
6785	DbgVal->setRawLocation(ValueAsMetadata::get(V: CachedValue));
6786	} else {
6787	SmallVector<ValueAsMetadata *, `3`> MetadataLocs;
6788	for (WeakVH VH : DVIRec.LocationOps) {
6789	Value *CachedValue = getValueOrPoison(VH, C&: DbgVal->getContext());
6790	MetadataLocs.push_back(Elt: ValueAsMetadata::get(V: CachedValue));
6791	}
6792	auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6793	DbgVal->setRawLocation(
6794	llvm::DIArgList::get(Context&: DbgVal->getContext(), Args: ValArrayRef));
6795	}
6796	LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << `'\n'`);
6797	}
6798
6799	static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
6800	llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
6801	const SCEV *SCEVInductionVar,
6802	SCEVDbgValueBuilder IterCountExpr) {
6803
6804	if (!DVIRec.DbgRef->isKillLocation())
6805	return false;
6806
6807	// LSR may have caused several changes to the dbg.value in the failed salvage
6808	// attempt. So restore the DIExpression, the location ops and also the
6809	// location ops format, which is always DIArglist for multiple ops, but only
6810	// sometimes for a single op.
6811	restorePreTransformState(DVIRec);
6812
6813	// LocationOpIndexMap[i] will store the post-LSR location index of
6814	// the non-optimised out location at pre-LSR index i.
6815	SmallVector<int64_t, `2`> LocationOpIndexMap;
6816	LocationOpIndexMap.assign(NumElts: DVIRec.LocationOps.size(), Elt: -`1`);
6817	SmallVector<Value *, `2`> NewLocationOps;
6818	NewLocationOps.push_back(Elt: LSRInductionVar);
6819
6820	for (unsigned i = `0`; i < DVIRec.LocationOps.size(); i++) {
6821	WeakVH VH = DVIRec.LocationOps [i];
6822	// Place the locations not optimised out in the list first, avoiding
6823	// inserts later. The map is used to update the DIExpression's
6824	// DW_OP_LLVM_arg arguments as the expression is updated.
6825	if (VH && !isa<UndefValue>(Val: VH)) {
6826	NewLocationOps.push_back(Elt: VH);
6827	LocationOpIndexMap [i] = NewLocationOps.size() - `1`;
6828	LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
6829	<< " now at index " << LocationOpIndexMap[i] << "\n");
6830	continue;
6831	}
6832
6833	// It's possible that a value referred to in the SCEV may have been
6834	// optimised out by LSR.
6835	if (SE.containsErasedValue(S: DVIRec.SCEVs [i]) \|\|
6836	SE.containsUndefs(S: DVIRec.SCEVs [i])) {
6837	LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
6838	<< " refers to a location that is now undef or erased. "
6839	"Salvage abandoned.\n");
6840	return false;
6841	}
6842
6843	LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
6844	<< " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
6845
6846	DVIRec.RecoveryExprs [i] = std::make_unique<SCEVDbgValueBuilder>();
6847	SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs [i].get();
6848
6849	// Create an offset-based salvage expression if possible, as it requires
6850	// less DWARF ops than an iteration count-based expression.
6851	if (std::optional<APInt> Offset =
6852	SE.computeConstantDifference(LHS: DVIRec.SCEVs [i], RHS: SCEVInductionVar)) {
6853	if (Offset ->getSignificantBits() <= `64`)
6854	SalvageExpr->createOffsetExpr(Offset: Offset ->getSExtValue(), OffsetValue: LSRInductionVar);
6855	else
6856	return false;
6857	} else if (!SalvageExpr->createIterCountExpr(S: DVIRec.SCEVs [i], IterationCount: IterCountExpr,
6858	SE))
6859	return false;
6860	}
6861
6862	// Merge the DbgValueBuilder generated expressions and the original
6863	// DIExpression, place the result into an new vector.
6864	SmallVector<uint64_t, `3`> NewExpr;
6865	if (DVIRec.Expr->getNumElements() == `0`) {
6866	assert(DVIRec.RecoveryExprs.size() == `1` &&
6867	"Expected only a single recovery expression for an empty "
6868	"DIExpression.");
6869	assert(DVIRec.RecoveryExprs[`0`] &&
6870	"Expected a SCEVDbgSalvageBuilder for location 0");
6871	SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs [`0`].get();
6872	B->appendToVectors(DestExpr&: NewExpr, DestLocations&: NewLocationOps);
6873	}
6874	for (const auto &Op : DVIRec.Expr->expr_ops()) {
6875	// Most Ops needn't be updated.
6876	if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
6877	Op.appendToVector(V&: NewExpr);
6878	continue;
6879	}
6880
6881	uint64_t LocationArgIndex = Op.getArg(I: `0`);
6882	SCEVDbgValueBuilder *DbgBuilder =
6883	DVIRec.RecoveryExprs [LocationArgIndex].get();
6884	// The location doesn't have s SCEVDbgValueBuilder, so LSR did not
6885	// optimise it away. So just translate the argument to the updated
6886	// location index.
6887	if (!DbgBuilder) {
6888	NewExpr.push_back(Elt: dwarf::DW_OP_LLVM_arg);
6889	assert(LocationOpIndexMap[Op.getArg(`0`)] != -`1` &&
6890	"Expected a positive index for the location-op position.");
6891	NewExpr.push_back(Elt: LocationOpIndexMap [Op.getArg(I: `0`)]);
6892	continue;
6893	}
6894	// The location has a recovery expression.
6895	DbgBuilder->appendToVectors(DestExpr&: NewExpr, DestLocations&: NewLocationOps);
6896	}
6897
6898	UpdateDbgValue(DVIRec, NewLocationOps, NewExpr);
6899	LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DbgRef << "\n");
6900	return true;
6901	}
6902
6903	/// Obtain an expression for the iteration count, then attempt to salvage the
6904	/// dbg.value intrinsics.
6905	static void DbgRewriteSalvageableDVIs(
6906	llvm::Loop L, ScalarEvolution &SE, llvm::PHINode LSRInductionVar,
6907	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> &DVIToUpdate) {
6908	if (DVIToUpdate.empty())
6909	return;
6910
6911	const llvm::SCEV *SCEVInductionVar = SE.getSCEV(V: LSRInductionVar);
6912	assert(SCEVInductionVar &&
6913	"Anticipated a SCEV for the post-LSR induction variable");
6914
6915	if (const SCEVAddRecExpr *IVAddRec =
6916	dyn_cast<SCEVAddRecExpr>(Val: SCEVInductionVar)) {
6917	if (!IVAddRec->isAffine())
6918	return;
6919
6920	// Prevent translation using excessive resources.
6921	if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6922	return;
6923
6924	// The iteration count is required to recover location values.
6925	SCEVDbgValueBuilder IterCountExpr;
6926	IterCountExpr.pushLocation(V: LSRInductionVar);
6927	if (!IterCountExpr.SCEVToIterCountExpr(SAR: *IVAddRec, SE))
6928	return;
6929
6930	LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
6931	<< `'\n'`);
6932
6933	for (auto &DVIRec : DVIToUpdate) {
6934	SalvageDVI(L, SE, LSRInductionVar, DVIRec&: *DVIRec, SCEVInductionVar,
6935	IterCountExpr);
6936	}
6937	}
6938	}
6939
6940	/// Identify and cache salvageable DVI locations and expressions along with the
6941	/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
6942	/// cacheing and salvaging.
6943	static void DbgGatherSalvagableDVI(
6944	Loop *L, ScalarEvolution &SE,
6945	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> &SalvageableDVISCEVs) {
6946	for (const auto &B : L->getBlocks()) {
6947	for (auto &I : *B) {
6948	for (DbgVariableRecord &DbgVal : filterDbgVars(R: I.getDbgRecordRange())) {
6949	if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6950	continue;
6951
6952	// Ensure that if any location op is undef that the dbg.vlue is not
6953	// cached.
6954	if (DbgVal.isKillLocation())
6955	continue;
6956
6957	// Check that the location op SCEVs are suitable for translation to
6958	// DIExpression.
6959	const auto &HasTranslatableLocationOps =
6960	[&](const DbgVariableRecord &DbgValToTranslate) -> bool {
6961	for (const auto LocOp : DbgValToTranslate.location_ops()) {
6962	if (!LocOp)
6963	return false;
6964
6965	if (!SE.isSCEVable(Ty: LocOp->getType()))
6966	return false;
6967
6968	const SCEV *S = SE.getSCEV(V: LocOp);
6969	if (SE.containsUndefs(S))
6970	return false;
6971	}
6972	return true;
6973	};
6974
6975	if (!HasTranslatableLocationOps (DbgVal))
6976	continue;
6977
6978	std::unique_ptr<DVIRecoveryRec> NewRec =
6979	std::make_unique<DVIRecoveryRec>(args: &DbgVal);
6980	// Each location Op may need a SCEVDbgValueBuilder in order to recover
6981	// it. Pre-allocating a vector will enable quick lookups of the builder
6982	// later during the salvage.
6983	NewRec ->RecoveryExprs.resize(N: DbgVal.getNumVariableLocationOps());
6984	for (const auto LocOp : DbgVal.location_ops()) {
6985	NewRec ->SCEVs.push_back(Elt: SE.getSCEV(V: LocOp));
6986	NewRec ->LocationOps.push_back(Elt: LocOp);
6987	NewRec ->HadLocationArgList = DbgVal.hasArgList();
6988	}
6989	SalvageableDVISCEVs.push_back(Elt: std::move(NewRec));
6990	}
6991	}
6992	}
6993	}
6994
6995	/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
6996	/// any PHi from the loop header is usable, but may have less chance of
6997	/// surviving subsequent transforms.
6998	static llvm::PHINode GetInductionVariable(const* Loop &L, ScalarEvolution &SE,
6999	const LSRInstance &LSR) {
7000
7001	auto IsSuitableIV = [&](PHINode *P) {
7002	if (!SE.isSCEVable(Ty: P->getType()))
7003	return false;
7004	if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: P)))
7005	return Rec->isAffine() && !SE.containsUndefs(S: SE.getSCEV(V: P));
7006	return false;
7007	};
7008
7009	// For now, just pick the first IV that was generated and inserted by
7010	// ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
7011	// by subsequent transforms.
7012	for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
7013	if (!IV)
7014	continue;
7015
7016	// There should only be PHI node IVs.
7017	PHINode P = cast<PHINode>(Val: &IV);
7018
7019	if (IsSuitableIV (P))
7020	return P;
7021	}
7022
7023	for (PHINode &P : L.getHeader()->phis()) {
7024	if (IsSuitableIV (&P))
7025	return &P;
7026	}
7027	return nullptr;
7028	}
7029
7030	static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
7031	DominatorTree &DT, LoopInfo &LI,
7032	const TargetTransformInfo &TTI,
7033	AssumptionCache &AC, TargetLibraryInfo &TLI,
7034	MemorySSA *MSSA) {
7035
7036	// Debug preservation - before we start removing anything identify which DVI
7037	// meet the salvageable criteria and store their DIExpression and SCEVs.
7038	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> SalvageableDVIRecords;
7039	DbgGatherSalvagableDVI(L, SE, SalvageableDVISCEVs&: SalvageableDVIRecords);
7040
7041	bool Changed = false;
7042	std::unique_ptr<MemorySSAUpdater> MSSAU;
7043	if (MSSA)
7044	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
7045
7046	// Run the main LSR transformation.
7047	const LSRInstance &Reducer =
7048	LSRInstance (L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
7049	Changed \|= Reducer.getChanged();
7050
7051	// Remove any extra phis created by processing inner loops.
7052	Changed \|= DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
7053	if (EnablePhiElim && L->isLoopSimplifyForm()) {
7054	SmallVector<WeakTrackingVH, `16`> DeadInsts;
7055	SCEVExpander Rewriter(SE, "lsr", false);
7056	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7057	Rewriter.setDebugType(DEBUG_TYPE);
7058	#endif
7059	unsigned numFolded = Rewriter.replaceCongruentIVs(L, DT: &DT, DeadInsts, TTI: &TTI);
7060	Rewriter.clear();
7061	if (numFolded) {
7062	Changed = true;
7063	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, TLI: &TLI,
7064	MSSAU: MSSAU.get());
7065	DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
7066	}
7067	}
7068	// LSR may at times remove all uses of an induction variable from a loop.
7069	// The only remaining use is the PHI in the exit block.
7070	// When this is the case, if the exit value of the IV can be calculated using
7071	// SCEV, we can replace the exit block PHI with the final value of the IV and
7072	// skip the updates in each loop iteration.
7073	if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7074	SmallVector<WeakTrackingVH, `16`> DeadInsts;
7075	SCEVExpander Rewriter(SE, "lsr", true);
7076	int Rewrites = rewriteLoopExitValues(L, LI: &LI, TLI: &TLI, SE: &SE, TTI: &TTI, Rewriter, DT: &DT,
7077	ReplaceExitValue: UnusedIndVarInLoop, DeadInsts);
7078	Rewriter.clear();
7079	if (Rewrites) {
7080	Changed = true;
7081	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, TLI: &TLI,
7082	MSSAU: MSSAU.get());
7083	DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
7084	}
7085	}
7086
7087	if (SalvageableDVIRecords.empty())
7088	return Changed;
7089
7090	// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
7091	// expressions composed using the derived iteration count.
7092	// TODO: Allow for multiple IV references for nested AddRecSCEVs
7093	for (const auto &L : LI) {
7094	if (llvm::PHINode IV = GetInductionVariable(L: L, SE, LSR: Reducer))
7095	DbgRewriteSalvageableDVIs(L, SE, LSRInductionVar: IV, DVIToUpdate&: SalvageableDVIRecords);
7096	else {
7097	LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
7098	"could not be identified.\n");
7099	}
7100	}
7101
7102	for (auto &Rec : SalvageableDVIRecords)
7103	Rec ->clear();
7104	SalvageableDVIRecords.clear();
7105	return Changed;
7106	}
7107
7108	bool LoopStrengthReduce::runOnLoop(Loop L, LPPassManager & /LPM/*) {
7109	if (skipLoop(L))
7110	return false;
7111
7112	auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7113	auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7114	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7115	auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7116	const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7117	F: *L->getHeader()->getParent());
7118	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7119	F&: *L->getHeader()->getParent());
7120	auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7121	F: *L->getHeader()->getParent());
7122	auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7123	MemorySSA MSSA = nullptr*;
7124	if (MSSAAnalysis)
7125	MSSA = &MSSAAnalysis->getMSSA();
7126	return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);
7127	}
7128
7129	PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
7130	LoopStandardAnalysisResults &AR,
7131	LPMUpdater &) {
7132	if (!ReduceLoopStrength(L: &L, IU&: AM.getResult<IVUsersAnalysis>(IR&: L, ExtraArgs&: AR), SE&: AR.SE,
7133	DT&: AR.DT, LI&: AR.LI, TTI: AR.TTI, AC&: AR.AC, TLI&: AR.TLI, MSSA: AR.MSSA))
7134	return PreservedAnalyses::all();
7135
7136	auto PA = getLoopPassPreservedAnalyses();
7137	if (AR.MSSA)
7138	PA.preserve<MemorySSAAnalysis>();
7139	return PA;
7140	}
7141
7142	char LoopStrengthReduce::ID = `0`;
7143
7144	INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
7145	"Loop Strength Reduction", false, false)
7146	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
7147	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
7148	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
7149	INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
7150	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
7151	INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
7152	INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
7153	"Loop Strength Reduction", false, false)
7154
7155	Pass llvm::createLoopStrengthReducePass() { return* new LoopStrengthReduce (); }
7156

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp