LoopStrengthReduce.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp]

1	//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This transformation analyzes and transforms the induction variables (and
10	// computations derived from them) into forms suitable for efficient execution
11	// on the target.
12	//
13	// This pass performs a strength reduction on array references inside loops that
14	// have as one or more of their components the loop induction variable, it
15	// rewrites expressions to take advantage of scaled-index addressing modes
16	// available on the target, and it performs a variety of other optimizations
17	// related to loop induction variables.
18	//
19	// Terminology note: this code has a lot of handling for "post-increment" or
20	// "post-inc" users. This is not talking about post-increment addressing modes;
21	// it is instead talking about code like this:
22	//
23	// %i = phi [ 0, %entry ], [ %i.next, %latch ]
24	// ...
25	// %i.next = add %i, 1
26	// %c = icmp eq %i.next, %n
27	//
28	// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
29	// it's useful to think about these as the same register, with some uses using
30	// the value of the register before the add and some using it after. In this
31	// example, the icmp is a post-increment user, since it uses %i.next, which is
32	// the value of the induction variable after the increment. The other common
33	// case of post-increment users is users outside the loop.
34	//
35	// TODO: More sophistication in the way Formulae are generated and filtered.
36	//
37	// TODO: Handle multiple loops at a time.
38	//
39	// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
40	// of a GlobalValue?
41	//
42	// TODO: When truncation is free, truncate ICmp users' operands to make it a
43	// smaller encoding (on x86 at least).
44	//
45	// TODO: When a negated register is used by an add (such as in a list of
46	// multiple base registers, or as the increment expression in an addrec),
47	// we may not actually need both reg and (-1 reg) in registers; the*
48	// negation can be implemented by using a sub instead of an add. The
49	// lack of support for taking this into consideration when making
50	// register pressure decisions is partly worked around by the "Special"
51	// use kind.
52	//
53	//===----------------------------------------------------------------------===//
54
55	#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
56	#include "llvm/ADT/APInt.h"
57	#include "llvm/ADT/DenseMap.h"
58	#include "llvm/ADT/DenseSet.h"
59	#include "llvm/ADT/PointerIntPair.h"
60	#include "llvm/ADT/STLExtras.h"
61	#include "llvm/ADT/SetVector.h"
62	#include "llvm/ADT/SmallBitVector.h"
63	#include "llvm/ADT/SmallPtrSet.h"
64	#include "llvm/ADT/SmallSet.h"
65	#include "llvm/ADT/SmallVector.h"
66	#include "llvm/ADT/Statistic.h"
67	#include "llvm/ADT/iterator_range.h"
68	#include "llvm/Analysis/AssumptionCache.h"
69	#include "llvm/Analysis/DomTreeUpdater.h"
70	#include "llvm/Analysis/IVUsers.h"
71	#include "llvm/Analysis/LoopAnalysisManager.h"
72	#include "llvm/Analysis/LoopInfo.h"
73	#include "llvm/Analysis/LoopPass.h"
74	#include "llvm/Analysis/MemorySSA.h"
75	#include "llvm/Analysis/MemorySSAUpdater.h"
76	#include "llvm/Analysis/ScalarEvolution.h"
77	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
78	#include "llvm/Analysis/ScalarEvolutionNormalization.h"
79	#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
80	#include "llvm/Analysis/TargetLibraryInfo.h"
81	#include "llvm/Analysis/TargetTransformInfo.h"
82	#include "llvm/Analysis/ValueTracking.h"
83	#include "llvm/BinaryFormat/Dwarf.h"
84	#include "llvm/IR/BasicBlock.h"
85	#include "llvm/IR/Constant.h"
86	#include "llvm/IR/Constants.h"
87	#include "llvm/IR/DebugInfoMetadata.h"
88	#include "llvm/IR/DerivedTypes.h"
89	#include "llvm/IR/Dominators.h"
90	#include "llvm/IR/GlobalValue.h"
91	#include "llvm/IR/IRBuilder.h"
92	#include "llvm/IR/InstrTypes.h"
93	#include "llvm/IR/Instruction.h"
94	#include "llvm/IR/Instructions.h"
95	#include "llvm/IR/IntrinsicInst.h"
96	#include "llvm/IR/Module.h"
97	#include "llvm/IR/Operator.h"
98	#include "llvm/IR/Type.h"
99	#include "llvm/IR/Use.h"
100	#include "llvm/IR/User.h"
101	#include "llvm/IR/Value.h"
102	#include "llvm/IR/ValueHandle.h"
103	#include "llvm/InitializePasses.h"
104	#include "llvm/Pass.h"
105	#include "llvm/Support/Casting.h"
106	#include "llvm/Support/CommandLine.h"
107	#include "llvm/Support/Compiler.h"
108	#include "llvm/Support/Debug.h"
109	#include "llvm/Support/ErrorHandling.h"
110	#include "llvm/Support/MathExtras.h"
111	#include "llvm/Support/raw_ostream.h"
112	#include "llvm/Transforms/Scalar.h"
113	#include "llvm/Transforms/Utils.h"
114	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
115	#include "llvm/Transforms/Utils/Local.h"
116	#include "llvm/Transforms/Utils/LoopUtils.h"
117	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
118	#include <algorithm>
119	#include <cassert>
120	#include <cstddef>
121	#include <cstdint>
122	#include <iterator>
123	#include <limits>
124	#include <map>
125	#include <numeric>
126	#include <optional>
127	#include <utility>
128
129	using namespace llvm;
130	using namespace SCEVPatternMatch;
131
132	#define DEBUG_TYPE "loop-reduce"
133
134	/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
135	/// bail out. This threshold is far beyond the number of users that LSR can
136	/// conceivably solve, so it should not affect generated code, but catches the
137	/// worst cases before LSR burns too much compile time and stack space.
138	static const unsigned MaxIVUsers = `200`;
139
140	/// Limit the size of expression that SCEV-based salvaging will attempt to
141	/// translate into a DIExpression.
142	/// Choose a maximum size such that debuginfo is not excessively increased and
143	/// the salvaging is not too expensive for the compiler.
144	static const unsigned MaxSCEVSalvageExpressionSize = `64`;
145
146	// Cleanup congruent phis after LSR phi expansion.
147	static cl::opt<bool> EnablePhiElim(
148	"enable-lsr-phielim", cl::Hidden, cl::init(Val: true),
149	cl::desc ("Enable LSR phi elimination"));
150
151	// The flag adds instruction count to solutions cost comparison.
152	static cl::opt<bool> InsnsCost(
153	"lsr-insns-cost", cl::Hidden, cl::init(Val: true),
154	cl::desc ("Add instruction count to a LSR cost model"));
155
156	// Flag to choose how to narrow complex lsr solution
157	static cl::opt<bool> LSRExpNarrow(
158	"lsr-exp-narrow", cl::Hidden, cl::init(Val: false),
159	cl::desc ("Narrow LSR complex solution using"
160	" expectation of registers number"));
161
162	// Flag to narrow search space by filtering non-optimal formulae with
163	// the same ScaledReg and Scale.
164	static cl::opt<bool> FilterSameScaledReg(
165	"lsr-filter-same-scaled-reg", cl::Hidden, cl::init(Val: true),
166	cl::desc ("Narrow LSR search space by filtering non-optimal formulae"
167	" with the same ScaledReg and Scale"));
168
169	static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(
170	"lsr-preferred-addressing-mode", cl::Hidden, cl::init(Val: TTI::AMK_None),
171	cl::desc ("A flag that overrides the target's preferred addressing mode."),
172	cl::values(clEnumValN(TTI::AMK_None,
173	"none",
174	"Don't prefer any addressing mode"),
175	clEnumValN(TTI::AMK_PreIndexed,
176	"preindexed",
177	"Prefer pre-indexed addressing mode"),
178	clEnumValN(TTI::AMK_PostIndexed,
179	"postindexed",
180	"Prefer post-indexed addressing mode")));
181
182	static cl::opt<unsigned> ComplexityLimit(
183	"lsr-complexity-limit", cl::Hidden,
184	cl::init(Val: std::numeric_limits<uint16_t>::max()),
185	cl::desc ("LSR search space complexity limit"));
186
187	static cl::opt<unsigned> SetupCostDepthLimit(
188	"lsr-setupcost-depth-limit", cl::Hidden, cl::init(Val: `7`),
189	cl::desc ("The limit on recursion depth for LSRs setup cost"));
190
191	static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
192	"lsr-drop-solution", cl::Hidden,
193	cl::desc ("Attempt to drop solution if it is less profitable"));
194
195	static cl::opt<bool> EnableVScaleImmediates(
196	"lsr-enable-vscale-immediates", cl::Hidden, cl::init(Val: true),
197	cl::desc ("Enable analysis of vscale-relative immediates in LSR"));
198
199	static cl::opt<bool> DropScaledForVScale(
200	"lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(Val: true),
201	cl::desc ("Avoid using scaled registers with vscale-relative addressing"));
202
203	#ifndef NDEBUG
204	// Stress test IV chain generation.
205	static cl::opt<bool> StressIVChain(
206	"stress-ivchain", cl::Hidden, cl::init(false),
207	cl::desc("Stress test LSR IV chains"));
208	#else
209	static bool StressIVChain = false;
210	#endif
211
212	namespace {
213
214	struct MemAccessTy {
215	/// Used in situations where the accessed memory type is unknown.
216	static const unsigned UnknownAddressSpace =
217	std::numeric_limits<unsigned>::max();
218
219	Type MemTy = nullptr*;
220	unsigned AddrSpace = UnknownAddressSpace;
221
222	MemAccessTy() = default;
223	MemAccessTy(Type Ty, unsigned* AS) : MemTy(Ty), AddrSpace(AS) {}
224
225	bool operator==(MemAccessTy Other) const {
226	return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
227	}
228
229	bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
230
231	static MemAccessTy getUnknown(LLVMContext &Ctx,
232	unsigned AS = UnknownAddressSpace) {
233	return MemAccessTy (Type::getVoidTy(C&: Ctx), AS);
234	}
235
236	Type getType() { return* MemTy; }
237	};
238
239	/// This class holds data which is used to order reuse candidates.
240	class RegSortData {
241	public:
242	/// This represents the set of LSRUse indices which reference
243	/// a particular register.
244	SmallBitVector UsedByIndices;
245
246	void print(raw_ostream &OS) const;
247	void dump() const;
248	};
249
250	// An offset from an address that is either scalable or fixed. Used for
251	// per-target optimizations of addressing modes.
252	class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> {
253	constexpr Immediate(ScalarTy MinVal, bool Scalable)
254	: FixedOrScalableQuantity (MinVal, Scalable) {}
255
256	constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)
257	: FixedOrScalableQuantity (V) {}
258
259	public:
260	constexpr Immediate() = delete;
261
262	static constexpr Immediate getFixed(ScalarTy MinVal) {
263	return {MinVal, false};
264	}
265	static constexpr Immediate getScalable(ScalarTy MinVal) {
266	return {MinVal, true};
267	}
268	static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {
269	return {MinVal, Scalable};
270	}
271	static constexpr Immediate getZero() { return {`0`, false}; }
272	static constexpr Immediate getFixedMin() {
273	return {std::numeric_limits<int64_t>::min(), false};
274	}
275	static constexpr Immediate getFixedMax() {
276	return {std::numeric_limits<int64_t>::max(), false};
277	}
278	static constexpr Immediate getScalableMin() {
279	return {std::numeric_limits<int64_t>::min(), true};
280	}
281	static constexpr Immediate getScalableMax() {
282	return {std::numeric_limits<int64_t>::max(), true};
283	}
284
285	constexpr bool isLessThanZero() const { return Quantity < `0`; }
286
287	constexpr bool isGreaterThanZero() const { return Quantity > `0`; }
288
289	constexpr bool isCompatibleImmediate(const Immediate &Imm) const {
290	return isZero() \|\| Imm.isZero() \|\| Imm.Scalable == Scalable;
291	}
292
293	constexpr bool isMin() const {
294	return Quantity == std::numeric_limits<ScalarTy>::min();
295	}
296
297	constexpr bool isMax() const {
298	return Quantity == std::numeric_limits<ScalarTy>::max();
299	}
300
301	// Arithmetic 'operators' that cast to unsigned types first.
302	constexpr Immediate addUnsigned(const Immediate &RHS) const {
303	assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
304	ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();
305	return {Value, Scalable \|\| RHS.isScalable()};
306	}
307
308	constexpr Immediate subUnsigned(const Immediate &RHS) const {
309	assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
310	ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();
311	return {Value, Scalable \|\| RHS.isScalable()};
312	}
313
314	// Scale the quantity by a constant without caring about runtime scalability.
315	constexpr Immediate mulUnsigned(const ScalarTy RHS) const {
316	ScalarTy Value = (uint64_t)Quantity * RHS;
317	return {Value, Scalable};
318	}
319
320	// Helpers for generating SCEVs with vscale terms where needed.
321	const SCEV getSCEV(ScalarEvolution &SE, Type Ty) const {
322	const SCEV *S = SE.getConstant(Ty, V: Quantity);
323	if (Scalable)
324	S = SE.getMulExpr(LHS: S, RHS: SE.getVScale(Ty: S->getType()));
325	return S;
326	}
327
328	const SCEV getNegativeSCEV(ScalarEvolution &SE, Type Ty) const {
329	const SCEV *NegS = SE.getConstant(Ty, V: -(uint64_t)Quantity);
330	if (Scalable)
331	NegS = SE.getMulExpr(LHS: NegS, RHS: SE.getVScale(Ty: NegS->getType()));
332	return NegS;
333	}
334
335	const SCEV getUnknownSCEV(ScalarEvolution &SE, Type Ty) const {
336	const SCEV *SU = SE.getUnknown(V: ConstantInt::getSigned(Ty, V: Quantity));
337	if (Scalable)
338	SU = SE.getMulExpr(LHS: SU, RHS: SE.getVScale(Ty: SU->getType()));
339	return SU;
340	}
341	};
342
343	// This is needed for the Compare type of std::map when Immediate is used
344	// as a key. We don't need it to be fully correct against any value of vscale,
345	// just to make sure that vscale-related terms in the map are considered against
346	// each other rather than being mixed up and potentially missing opportunities.
347	struct KeyOrderTargetImmediate {
348	bool operator()(const Immediate &LHS, const Immediate &RHS) const {
349	if (LHS.isScalable() && !RHS.isScalable())
350	return false;
351	if (!LHS.isScalable() && RHS.isScalable())
352	return true;
353	return LHS.getKnownMinValue() < RHS.getKnownMinValue();
354	}
355	};
356
357	// This would be nicer if we could be generic instead of directly using size_t,
358	// but there doesn't seem to be a type trait for is_orderable or
359	// is_lessthan_comparable or similar.
360	struct KeyOrderSizeTAndImmediate {
361	bool operator()(const std::pair<size_t, Immediate> &LHS,
362	const std::pair<size_t, Immediate> &RHS) const {
363	size_t LSize = LHS.first;
364	size_t RSize = RHS.first;
365	if (LSize != RSize)
366	return LSize < RSize;
367	return KeyOrderTargetImmediate ()(LHS.second, RHS.second);
368	}
369	};
370	} // end anonymous namespace
371
372	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
373	void RegSortData::print(raw_ostream &OS) const {
374	OS << "[NumUses=" << UsedByIndices.count() << `']'`;
375	}
376
377	LLVM_DUMP_METHOD void RegSortData::dump() const {
378	print(errs()); errs() << `'\n'`;
379	}
380	#endif
381
382	namespace {
383
384	/// Map register candidates to information about how they are used.
385	class RegUseTracker {
386	using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
387
388	RegUsesTy RegUsesMap;
389	SmallVector<const SCEV *, `16`> RegSequence;
390
391	public:
392	void countRegister(const SCEV *Reg, size_t LUIdx);
393	void dropRegister(const SCEV *Reg, size_t LUIdx);
394	void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
395
396	bool isRegUsedByUsesOtherThan(const SCEV Reg, size_t LUIdx) const*;
397
398	const SmallBitVector &getUsedByIndices(const SCEV Reg) const*;
399
400	void clear();
401
402	using iterator = SmallVectorImpl<const SCEV *>::iterator;
403	using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
404
405	iterator begin() { return RegSequence.begin(); }
406	iterator end() { return RegSequence.end(); }
407	const_iterator begin() const { return RegSequence.begin(); }
408	const_iterator end() const { return RegSequence.end(); }
409	};
410
411	} // end anonymous namespace
412
413	void
414	RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
415	std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(Key: Reg);
416	RegSortData &RSD = Pair.first ->second;
417	if (Pair.second)
418	RegSequence.push_back(Elt: Reg);
419	RSD.UsedByIndices.resize(N: std::max(a: RSD.UsedByIndices.size(), b: LUIdx + `1`));
420	RSD.UsedByIndices.set(LUIdx);
421	}
422
423	void
424	RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
425	RegUsesTy::iterator It = RegUsesMap.find(Val: Reg);
426	assert(It != RegUsesMap.end());
427	RegSortData &RSD = It ->second;
428	assert(RSD.UsedByIndices.size() > LUIdx);
429	RSD.UsedByIndices.reset(Idx: LUIdx);
430	}
431
432	void
433	RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
434	assert(LUIdx <= LastLUIdx);
435
436	// Update RegUses. The data structure is not optimized for this purpose;
437	// we must iterate through it and update each of the bit vectors.
438	for (auto &Pair : RegUsesMap) {
439	SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
440	if (LUIdx < UsedByIndices.size())
441	UsedByIndices [LUIdx] =
442	LastLUIdx < UsedByIndices.size() ? UsedByIndices [LastLUIdx] : false;
443	UsedByIndices.resize(N: std::min(a: UsedByIndices.size(), b: LastLUIdx));
444	}
445	}
446
447	bool
448	RegUseTracker::isRegUsedByUsesOtherThan(const SCEV Reg, size_t LUIdx) const* {
449	RegUsesTy::const_iterator I = RegUsesMap.find(Val: Reg);
450	if (I == RegUsesMap.end())
451	return false;
452	const SmallBitVector &UsedByIndices = I ->second.UsedByIndices;
453	int i = UsedByIndices.find_first();
454	if (i == -`1`) return false;
455	if ((size_t)i != LUIdx) return true;
456	return UsedByIndices.find_next(Prev: i) != -`1`;
457	}
458
459	const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV Reg) const* {
460	RegUsesTy::const_iterator I = RegUsesMap.find(Val: Reg);
461	assert(I != RegUsesMap.end() && "Unknown register!");
462	return I ->second.UsedByIndices;
463	}
464
465	void RegUseTracker::clear() {
466	RegUsesMap.clear();
467	RegSequence.clear();
468	}
469
470	namespace {
471
472	/// This class holds information that describes a formula for computing
473	/// satisfying a use. It may include broken-out immediates and scaled registers.
474	struct Formula {
475	/// Global base address used for complex addressing.
476	GlobalValue BaseGV = nullptr*;
477
478	/// Base offset for complex addressing.
479	Immediate BaseOffset = Immediate::getZero();
480
481	/// Whether any complex addressing has a base register.
482	bool HasBaseReg = false;
483
484	/// The scale of any complex addressing.
485	int64_t Scale = `0`;
486
487	/// The list of "base" registers for this use. When this is non-empty. The
488	/// canonical representation of a formula is
489	/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
490	/// 2. ScaledReg != NULL implies Scale != 1 \|\| !BaseRegs.empty().
491	/// 3. The reg containing recurrent expr related with currect loop in the
492	/// formula should be put in the ScaledReg.
493	/// #1 enforces that the scaled register is always used when at least two
494	/// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 reg2.*
495	/// #2 enforces that 1 reg is reg.*
496	/// #3 ensures invariant regs with respect to current loop can be combined
497	/// together in LSR codegen.
498	/// This invariant can be temporarily broken while building a formula.
499	/// However, every formula inserted into the LSRInstance must be in canonical
500	/// form.
501	SmallVector<const SCEV *, `4`> BaseRegs;
502
503	/// The 'scaled' register for this use. This should be non-null when Scale is
504	/// not zero.
505	const SCEV ScaledReg = nullptr*;
506
507	/// An additional constant offset which added near the use. This requires a
508	/// temporary register, but the offset itself can live in an add immediate
509	/// field rather than a register.
510	Immediate UnfoldedOffset = Immediate::getZero();
511
512	Formula() = default;
513
514	void initialMatch(const SCEV S, Loop L, ScalarEvolution &SE);
515
516	bool isCanonical(const Loop &L) const;
517
518	void canonicalize(const Loop &L);
519
520	bool unscale();
521
522	bool hasZeroEnd() const;
523
524	size_t getNumRegs() const;
525	Type getType() const*;
526
527	void deleteBaseReg(const SCEV *&S);
528
529	bool referencesReg(const SCEV S) const*;
530	bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
531	const RegUseTracker &RegUses) const;
532
533	void print(raw_ostream &OS) const;
534	void dump() const;
535	};
536
537	} // end anonymous namespace
538
539	/// Recursion helper for initialMatch.
540	static void DoInitialMatch(const SCEV S, Loop L,
541	SmallVectorImpl<const SCEV *> &Good,
542	SmallVectorImpl<const SCEV *> &Bad,
543	ScalarEvolution &SE) {
544	// Collect expressions which properly dominate the loop header.
545	if (SE.properlyDominates(S, BB: L->getHeader())) {
546	Good.push_back(Elt: S);
547	return;
548	}
549
550	// Look at add operands.
551	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
552	for (const SCEV *S : Add->operands())
553	DoInitialMatch(S, L, Good, Bad, SE);
554	return;
555	}
556
557	// Look at addrec operands.
558	const SCEV Start, Step;
559	const Loop *ARLoop;
560	if (match(S,
561	P: m_scev_AffineAddRec(Op0: m_SCEV(V&: Start), Op1: m_SCEV(V&: Step), L: m_Loop(L&: ARLoop))) &&
562	!Start->isZero()) {
563	DoInitialMatch(S: Start, L, Good, Bad, SE);
564	DoInitialMatch(S: SE.getAddRecExpr(Start: SE.getConstant(Ty: S->getType(), V: `0`), Step,
565	// FIXME: AR->getNoWrapFlags()
566	L: ARLoop, Flags: SCEV::FlagAnyWrap),
567	L, Good, Bad, SE);
568	return;
569	}
570
571	// Handle a multiplication by -1 (negation) if it didn't fold.
572	if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Val: S))
573	if (Mul->getOperand(i: `0`)->isAllOnesValue()) {
574	SmallVector<const SCEV *, `4`> Ops(drop_begin(RangeOrContainer: Mul->operands()));
575	const SCEV *NewMul = SE.getMulExpr(Ops);
576
577	SmallVector<const SCEV *, `4`> MyGood;
578	SmallVector<const SCEV *, `4`> MyBad;
579	DoInitialMatch(S: NewMul, L, Good&: MyGood, Bad&: MyBad, SE);
580	const SCEV *NegOne = SE.getSCEV(V: ConstantInt::getAllOnesValue(
581	Ty: SE.getEffectiveSCEVType(Ty: NewMul->getType())));
582	for (const SCEV *S : MyGood)
583	Good.push_back(Elt: SE.getMulExpr(LHS: NegOne, RHS: S));
584	for (const SCEV *S : MyBad)
585	Bad.push_back(Elt: SE.getMulExpr(LHS: NegOne, RHS: S));
586	return;
587	}
588
589	// Ok, we can't do anything interesting. Just stuff the whole thing into a
590	// register and hope for the best.
591	Bad.push_back(Elt: S);
592	}
593
594	/// Incorporate loop-variant parts of S into this Formula, attempting to keep
595	/// all loop-invariant and loop-computable values in a single base register.
596	void Formula::initialMatch(const SCEV S, Loop L, ScalarEvolution &SE) {
597	SmallVector<const SCEV *, `4`> Good;
598	SmallVector<const SCEV *, `4`> Bad;
599	DoInitialMatch(S, L, Good, Bad, SE);
600	if (!Good.empty()) {
601	const SCEV *Sum = SE.getAddExpr(Ops&: Good);
602	if (!Sum->isZero())
603	BaseRegs.push_back(Elt: Sum);
604	HasBaseReg = true;
605	}
606	if (!Bad.empty()) {
607	const SCEV *Sum = SE.getAddExpr(Ops&: Bad);
608	if (!Sum->isZero())
609	BaseRegs.push_back(Elt: Sum);
610	HasBaseReg = true;
611	}
612	canonicalize(L: *L);
613	}
614
615	static bool containsAddRecDependentOnLoop(const SCEV S, const* Loop &L) {
616	return SCEVExprContains(Root: S, Pred: [&L](const SCEV *S) {
617	return isa<SCEVAddRecExpr>(Val: S) && (cast<SCEVAddRecExpr>(Val: S)->getLoop() == &L);
618	});
619	}
620
621	/// Check whether or not this formula satisfies the canonical
622	/// representation.
623	/// \see Formula::BaseRegs.
624	bool Formula::isCanonical(const Loop &L) const {
625	assert((Scale == `0` \|\| ScaledReg) &&
626	"ScaledReg must be non-null if Scale is non-zero");
627
628	if (!ScaledReg)
629	return BaseRegs.size() <= `1`;
630
631	if (Scale != `1`)
632	return true;
633
634	if (Scale == `1` && BaseRegs.empty())
635	return false;
636
637	if (containsAddRecDependentOnLoop(S: ScaledReg, L))
638	return true;
639
640	// If ScaledReg is not a recurrent expr, or it is but its loop is not current
641	// loop, meanwhile BaseRegs contains a recurrent expr reg related with current
642	// loop, we want to swap the reg in BaseRegs with ScaledReg.
643	return none_of(Range: BaseRegs, P: [&L](const SCEV *S) {
644	return containsAddRecDependentOnLoop(S, L);
645	});
646	}
647
648	/// Helper method to morph a formula into its canonical representation.
649	/// \see Formula::BaseRegs.
650	/// Every formula having more than one base register, must use the ScaledReg
651	/// field. Otherwise, we would have to do special cases everywhere in LSR
652	/// to treat reg1 + reg2 + ... the same way as reg1 + 1reg2 + ...*
653	/// On the other hand, 1reg should be canonicalized into reg.*
654	void Formula::canonicalize(const Loop &L) {
655	if (isCanonical(L))
656	return;
657
658	if (BaseRegs.empty()) {
659	// No base reg? Use scale reg with scale = 1 as such.
660	assert(ScaledReg && "Expected 1*reg => reg");
661	assert(Scale == `1` && "Expected 1*reg => reg");
662	BaseRegs.push_back(Elt: ScaledReg);
663	Scale = `0`;
664	ScaledReg = nullptr;
665	return;
666	}
667
668	// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
669	if (!ScaledReg) {
670	ScaledReg = BaseRegs.pop_back_val();
671	Scale = `1`;
672	}
673
674	// If ScaledReg is an invariant with respect to L, find the reg from
675	// BaseRegs containing the recurrent expr related with Loop L. Swap the
676	// reg with ScaledReg.
677	if (!containsAddRecDependentOnLoop(S: ScaledReg, L)) {
678	auto I = find_if(Range&: BaseRegs, P: [&L](const SCEV *S) {
679	return containsAddRecDependentOnLoop(S, L);
680	});
681	if (I != BaseRegs.end())
682	std::swap(a&: ScaledReg, b&: *I);
683	}
684	assert(isCanonical(L) && "Failed to canonicalize?");
685	}
686
687	/// Get rid of the scale in the formula.
688	/// In other words, this method morphes reg1 + 1reg2 into reg1 + reg2.*
689	/// \return true if it was possible to get rid of the scale, false otherwise.
690	/// \note After this operation the formula may not be in the canonical form.
691	bool Formula::unscale() {
692	if (Scale != `1`)
693	return false;
694	Scale = `0`;
695	BaseRegs.push_back(Elt: ScaledReg);
696	ScaledReg = nullptr;
697	return true;
698	}
699
700	bool Formula::hasZeroEnd() const {
701	if (UnfoldedOffset \|\| BaseOffset)
702	return false;
703	if (BaseRegs.size() != `1` \|\| ScaledReg)
704	return false;
705	return true;
706	}
707
708	/// Return the total number of register operands used by this formula. This does
709	/// not include register uses implied by non-constant addrec strides.
710	size_t Formula::getNumRegs() const {
711	return !!ScaledReg + BaseRegs.size();
712	}
713
714	/// Return the type of this formula, if it has one, or null otherwise. This type
715	/// is meaningless except for the bit size.
716	Type Formula::getType() const* {
717	return !BaseRegs.empty() ? BaseRegs.front()->getType() :
718	ScaledReg ? ScaledReg->getType() :
719	BaseGV ? BaseGV->getType() :
720	nullptr;
721	}
722
723	/// Delete the given base reg from the BaseRegs list.
724	void Formula::deleteBaseReg(const SCEV *&S) {
725	if (&S != &BaseRegs.back())
726	std::swap(a&: S, b&: BaseRegs.back());
727	BaseRegs.pop_back();
728	}
729
730	/// Test if this formula references the given register.
731	bool Formula::referencesReg(const SCEV S) const* {
732	return S == ScaledReg \|\| is_contained(Range: BaseRegs, Element: S);
733	}
734
735	/// Test whether this formula uses registers which are used by uses other than
736	/// the use with the given index.
737	bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
738	const RegUseTracker &RegUses) const {
739	if (ScaledReg)
740	if (RegUses.isRegUsedByUsesOtherThan(Reg: ScaledReg, LUIdx))
741	return true;
742	for (const SCEV *BaseReg : BaseRegs)
743	if (RegUses.isRegUsedByUsesOtherThan(Reg: BaseReg, LUIdx))
744	return true;
745	return false;
746	}
747
748	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
749	void Formula::print(raw_ostream &OS) const {
750	bool First = true;
751	if (BaseGV) {
752	if (!First) OS << " + "; else First = false;
753	BaseGV->printAsOperand(OS, /PrintType=/false);
754	}
755	if (BaseOffset.isNonZero()) {
756	if (!First) OS << " + "; else First = false;
757	OS << BaseOffset;
758	}
759	for (const SCEV *BaseReg : BaseRegs) {
760	if (!First) OS << " + "; else First = false;
761	OS << "reg(" << *BaseReg << `')'`;
762	}
763	if (HasBaseReg && BaseRegs.empty()) {
764	if (!First) OS << " + "; else First = false;
765	OS << "error: HasBaseReg";
766	} else if (!HasBaseReg && !BaseRegs.empty()) {
767	if (!First) OS << " + "; else First = false;
768	OS << "error: !HasBaseReg";
769	}
770	if (Scale != `0`) {
771	if (!First) OS << " + "; else First = false;
772	OS << Scale << "*reg(";
773	if (ScaledReg)
774	OS << *ScaledReg;
775	else
776	OS << "<unknown>";
777	OS << `')'`;
778	}
779	if (UnfoldedOffset.isNonZero()) {
780	if (!First) OS << " + ";
781	OS << "imm(" << UnfoldedOffset << `')'`;
782	}
783	}
784
785	LLVM_DUMP_METHOD void Formula::dump() const {
786	print(errs()); errs() << `'\n'`;
787	}
788	#endif
789
790	/// Return true if the given addrec can be sign-extended without changing its
791	/// value.
792	static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
793	Type *WideTy =
794	IntegerType::get(C&: SE.getContext(), NumBits: SE.getTypeSizeInBits(Ty: AR->getType()) + `1`);
795	return isa<SCEVAddRecExpr>(Val: SE.getSignExtendExpr(Op: AR, Ty: WideTy));
796	}
797
798	/// Return true if the given add can be sign-extended without changing its
799	/// value.
800	static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
801	Type *WideTy =
802	IntegerType::get(C&: SE.getContext(), NumBits: SE.getTypeSizeInBits(Ty: A->getType()) + `1`);
803	return isa<SCEVAddExpr>(Val: SE.getSignExtendExpr(Op: A, Ty: WideTy));
804	}
805
806	/// Return true if the given mul can be sign-extended without changing its
807	/// value.
808	static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
809	Type *WideTy =
810	IntegerType::get(C&: SE.getContext(),
811	NumBits: SE.getTypeSizeInBits(Ty: M->getType()) * M->getNumOperands());
812	return isa<SCEVMulExpr>(Val: SE.getSignExtendExpr(Op: M, Ty: WideTy));
813	}
814
815	/// Return an expression for LHS /s RHS, if it can be determined and if the
816	/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
817	/// is true, expressions like (X Y) /s Y are simplified to X, ignoring that*
818	/// the multiplication may overflow, which is useful when the result will be
819	/// used in a context where the most significant bits are ignored.
820	static const SCEV getExactSDiv(const* SCEV LHS, const* SCEV *RHS,
821	ScalarEvolution &SE,
822	bool IgnoreSignificantBits = false) {
823	// Handle the trivial case, which works for any SCEV type.
824	if (LHS == RHS)
825	return SE.getConstant(Ty: LHS->getType(), V: `1`);
826
827	// Handle a few RHS special cases.
828	const SCEVConstant *RC = dyn_cast<SCEVConstant>(Val: RHS);
829	if (RC) {
830	const APInt &RA = RC->getAPInt();
831	// Handle x /s -1 as x -1, to give ScalarEvolution a chance to do*
832	// some folding.
833	if (RA.isAllOnes()) {
834	if (LHS->getType()->isPointerTy())
835	return nullptr;
836	return SE.getMulExpr(LHS, RHS: RC);
837	}
838	// Handle x /s 1 as x.
839	if (RA == `1`)
840	return LHS;
841	}
842
843	// Check for a division of a constant by a constant.
844	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: LHS)) {
845	if (!RC)
846	return nullptr;
847	const APInt &LA = C->getAPInt();
848	const APInt &RA = RC->getAPInt();
849	if (LA.srem(RHS: RA) != `0`)
850	return nullptr;
851	return SE.getConstant(Val: LA.sdiv(RHS: RA));
852	}
853
854	// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
855	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: LHS)) {
856	if ((IgnoreSignificantBits \|\| isAddRecSExtable(AR, SE)) && AR->isAffine()) {
857	const SCEV *Step = getExactSDiv(LHS: AR->getStepRecurrence(SE), RHS, SE,
858	IgnoreSignificantBits);
859	if (!Step) return nullptr;
860	const SCEV *Start = getExactSDiv(LHS: AR->getStart(), RHS, SE,
861	IgnoreSignificantBits);
862	if (!Start) return nullptr;
863	// FlagNW is independent of the start value, step direction, and is
864	// preserved with smaller magnitude steps.
865	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
866	return SE.getAddRecExpr(Start, Step, L: AR->getLoop(), Flags: SCEV::FlagAnyWrap);
867	}
868	return nullptr;
869	}
870
871	// Distribute the sdiv over add operands, if the add doesn't overflow.
872	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: LHS)) {
873	if (IgnoreSignificantBits \|\| isAddSExtable(A: Add, SE)) {
874	SmallVector<const SCEV *, `8`> Ops;
875	for (const SCEV *S : Add->operands()) {
876	const SCEV *Op = getExactSDiv(LHS: S, RHS, SE, IgnoreSignificantBits);
877	if (!Op) return nullptr;
878	Ops.push_back(Elt: Op);
879	}
880	return SE.getAddExpr(Ops);
881	}
882	return nullptr;
883	}
884
885	// Check for a multiply operand that we can pull RHS out of.
886	if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Val: LHS)) {
887	if (IgnoreSignificantBits \|\| isMulSExtable(M: Mul, SE)) {
888	// Handle special case C1XY /s C2XY.
889	if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(Val: RHS)) {
890	if (IgnoreSignificantBits \|\| isMulSExtable(M: MulRHS, SE)) {
891	const SCEVConstant *LC = dyn_cast<SCEVConstant>(Val: Mul->getOperand(i: `0`));
892	const SCEVConstant *RC =
893	dyn_cast<SCEVConstant>(Val: MulRHS->getOperand(i: `0`));
894	if (LC && RC) {
895	SmallVector<const SCEV *, `4`> LOps(drop_begin(RangeOrContainer: Mul->operands()));
896	SmallVector<const SCEV *, `4`> ROps(drop_begin(RangeOrContainer: MulRHS->operands()));
897	if (LOps == ROps)
898	return getExactSDiv(LHS: LC, RHS: RC, SE, IgnoreSignificantBits);
899	}
900	}
901	}
902
903	SmallVector<const SCEV *, `4`> Ops;
904	bool Found = false;
905	for (const SCEV *S : Mul->operands()) {
906	if (!Found)
907	if (const SCEV *Q = getExactSDiv(LHS: S, RHS, SE,
908	IgnoreSignificantBits)) {
909	S = Q;
910	Found = true;
911	}
912	Ops.push_back(Elt: S);
913	}
914	return Found ? SE.getMulExpr(Ops) : nullptr;
915	}
916	return nullptr;
917	}
918
919	// Otherwise we don't know.
920	return nullptr;
921	}
922
923	/// If S involves the addition of a constant integer value, return that integer
924	/// value, and mutate S to point to a new SCEV with that value excluded.
925	static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
926	const APInt *C;
927	if (match(S, P: m_scev_APInt(C))) {
928	if (C->getSignificantBits() <= `64`) {
929	S = SE.getConstant(Ty: S->getType(), V: `0`);
930	return Immediate::getFixed(MinVal: C->getSExtValue());
931	}
932	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
933	SmallVector<const SCEV *, `8`> NewOps(Add->operands());
934	Immediate Result = ExtractImmediate(S&: NewOps.front(), SE);
935	if (Result.isNonZero())
936	S = SE.getAddExpr(Ops&: NewOps);
937	return Result;
938	} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
939	SmallVector<const SCEV *, `8`> NewOps(AR->operands());
940	Immediate Result = ExtractImmediate(S&: NewOps.front(), SE);
941	if (Result.isNonZero())
942	S = SE.getAddRecExpr(Operands&: NewOps, L: AR->getLoop(),
943	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
944	Flags: SCEV::FlagAnyWrap);
945	return Result;
946	} else if (EnableVScaleImmediates &&
947	match(S, P: m_scev_Mul(Op0: m_scev_APInt(C), Op1: m_SCEVVScale()))) {
948	S = SE.getConstant(Ty: S->getType(), V: `0`);
949	return Immediate::getScalable(MinVal: C->getSExtValue());
950	}
951	return Immediate::getZero();
952	}
953
954	/// If S involves the addition of a GlobalValue address, return that symbol, and
955	/// mutate S to point to a new SCEV with that value excluded.
956	static GlobalValue ExtractSymbol(const* SCEV *&S, ScalarEvolution &SE) {
957	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S)) {
958	if (GlobalValue *GV = dyn_cast<GlobalValue>(Val: U->getValue())) {
959	S = SE.getConstant(Ty: GV->getType(), V: `0`);
960	return GV;
961	}
962	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
963	SmallVector<const SCEV *, `8`> NewOps(Add->operands());
964	GlobalValue *Result = ExtractSymbol(S&: NewOps.back(), SE);
965	if (Result)
966	S = SE.getAddExpr(Ops&: NewOps);
967	return Result;
968	} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
969	SmallVector<const SCEV *, `8`> NewOps(AR->operands());
970	GlobalValue *Result = ExtractSymbol(S&: NewOps.front(), SE);
971	if (Result)
972	S = SE.getAddRecExpr(Operands&: NewOps, L: AR->getLoop(),
973	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
974	Flags: SCEV::FlagAnyWrap);
975	return Result;
976	}
977	return nullptr;
978	}
979
980	/// Returns true if the specified instruction is using the specified value as an
981	/// address.
982	static bool isAddressUse(const TargetTransformInfo &TTI,
983	Instruction Inst, Value OperandVal) {
984	bool isAddress = isa<LoadInst>(Val: Inst);
985	if (StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
986	if (SI->getPointerOperand() == OperandVal)
987	isAddress = true;
988	} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) {
989	// Addressing modes can also be folded into prefetches and a variety
990	// of intrinsics.
991	switch (II->getIntrinsicID()) {
992	case Intrinsic::memset:
993	case Intrinsic::prefetch:
994	case Intrinsic::masked_load:
995	if (II->getArgOperand(i: `0`) == OperandVal)
996	isAddress = true;
997	break;
998	case Intrinsic::masked_store:
999	if (II->getArgOperand(i: `1`) == OperandVal)
1000	isAddress = true;
1001	break;
1002	case Intrinsic::memmove:
1003	case Intrinsic::memcpy:
1004	if (II->getArgOperand(i: `0`) == OperandVal \|\|
1005	II->getArgOperand(i: `1`) == OperandVal)
1006	isAddress = true;
1007	break;
1008	default: {
1009	MemIntrinsicInfo IntrInfo;
1010	if (TTI.getTgtMemIntrinsic(Inst: II, Info&: IntrInfo)) {
1011	if (IntrInfo.PtrVal == OperandVal)
1012	isAddress = true;
1013	}
1014	}
1015	}
1016	} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) {
1017	if (RMW->getPointerOperand() == OperandVal)
1018	isAddress = true;
1019	} else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) {
1020	if (CmpX->getPointerOperand() == OperandVal)
1021	isAddress = true;
1022	}
1023	return isAddress;
1024	}
1025
1026	/// Return the type of the memory being accessed.
1027	static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
1028	Instruction Inst, Value OperandVal) {
1029	MemAccessTy AccessTy = MemAccessTy::getUnknown(Ctx&: Inst->getContext());
1030
1031	// First get the type of memory being accessed.
1032	if (Type *Ty = Inst->getAccessType())
1033	AccessTy.MemTy = Ty;
1034
1035	// Then get the pointer address space.
1036	if (const StoreInst *SI = dyn_cast<StoreInst>(Val: Inst)) {
1037	AccessTy.AddrSpace = SI->getPointerAddressSpace();
1038	} else if (const LoadInst *LI = dyn_cast<LoadInst>(Val: Inst)) {
1039	AccessTy.AddrSpace = LI->getPointerAddressSpace();
1040	} else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) {
1041	AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1042	} else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) {
1043	AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1044	} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: Inst)) {
1045	switch (II->getIntrinsicID()) {
1046	case Intrinsic::prefetch:
1047	case Intrinsic::memset:
1048	AccessTy.AddrSpace = II->getArgOperand(i: `0`)->getType()->getPointerAddressSpace();
1049	AccessTy.MemTy = OperandVal->getType();
1050	break;
1051	case Intrinsic::memmove:
1052	case Intrinsic::memcpy:
1053	AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
1054	AccessTy.MemTy = OperandVal->getType();
1055	break;
1056	case Intrinsic::masked_load:
1057	AccessTy.AddrSpace =
1058	II->getArgOperand(i: `0`)->getType()->getPointerAddressSpace();
1059	break;
1060	case Intrinsic::masked_store:
1061	AccessTy.AddrSpace =
1062	II->getArgOperand(i: `1`)->getType()->getPointerAddressSpace();
1063	break;
1064	default: {
1065	MemIntrinsicInfo IntrInfo;
1066	if (TTI.getTgtMemIntrinsic(Inst: II, Info&: IntrInfo) && IntrInfo.PtrVal) {
1067	AccessTy.AddrSpace
1068	= IntrInfo.PtrVal->getType()->getPointerAddressSpace();
1069	}
1070
1071	break;
1072	}
1073	}
1074	}
1075
1076	return AccessTy;
1077	}
1078
1079	/// Return true if this AddRec is already a phi in its loop.
1080	static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
1081	for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
1082	if (SE.isSCEVable(Ty: PN.getType()) &&
1083	(SE.getEffectiveSCEVType(Ty: PN.getType()) ==
1084	SE.getEffectiveSCEVType(Ty: AR->getType())) &&
1085	SE.getSCEV(V: &PN) == AR)
1086	return true;
1087	}
1088	return false;
1089	}
1090
1091	/// Check if expanding this expression is likely to incur significant cost. This
1092	/// is tricky because SCEV doesn't track which expressions are actually computed
1093	/// by the current IR.
1094	///
1095	/// We currently allow expansion of IV increments that involve adds,
1096	/// multiplication by constants, and AddRecs from existing phis.
1097	///
1098	/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
1099	/// obvious multiple of the UDivExpr.
1100	static bool isHighCostExpansion(const SCEV *S,
1101	SmallPtrSetImpl<const SCEV*> &Processed,
1102	ScalarEvolution &SE) {
1103	// Zero/One operand expressions
1104	switch (S->getSCEVType()) {
1105	case scUnknown:
1106	case scConstant:
1107	case scVScale:
1108	return false;
1109	case scTruncate:
1110	return isHighCostExpansion(S: cast<SCEVTruncateExpr>(Val: S)->getOperand(),
1111	Processed, SE);
1112	case scZeroExtend:
1113	return isHighCostExpansion(S: cast<SCEVZeroExtendExpr>(Val: S)->getOperand(),
1114	Processed, SE);
1115	case scSignExtend:
1116	return isHighCostExpansion(S: cast<SCEVSignExtendExpr>(Val: S)->getOperand(),
1117	Processed, SE);
1118	default:
1119	break;
1120	}
1121
1122	if (!Processed.insert(Ptr: S).second)
1123	return false;
1124
1125	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
1126	for (const SCEV *S : Add->operands()) {
1127	if (isHighCostExpansion(S, Processed, SE))
1128	return true;
1129	}
1130	return false;
1131	}
1132
1133	const SCEV Op0, Op1;
1134	if (match(S, P: m_scev_Mul(Op0: m_SCEV(V&: Op0), Op1: m_SCEV(V&: Op1)))) {
1135	// Multiplication by a constant is ok
1136	if (isa<SCEVConstant>(Val: Op0))
1137	return isHighCostExpansion(S: Op1, Processed, SE);
1138
1139	// If we have the value of one operand, check if an existing
1140	// multiplication already generates this expression.
1141	if (const auto *U = dyn_cast<SCEVUnknown>(Val: Op1)) {
1142	Value *UVal = U->getValue();
1143	for (User *UR : UVal->users()) {
1144	// If U is a constant, it may be used by a ConstantExpr.
1145	Instruction *UI = dyn_cast<Instruction>(Val: UR);
1146	if (UI && UI->getOpcode() == Instruction::Mul &&
1147	SE.isSCEVable(Ty: UI->getType())) {
1148	return SE.getSCEV(V: UI) == S;
1149	}
1150	}
1151	}
1152	}
1153
1154	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
1155	if (isExistingPhi(AR, SE))
1156	return false;
1157	}
1158
1159	// Fow now, consider any other type of expression (div/mul/min/max) high cost.
1160	return true;
1161	}
1162
1163	namespace {
1164
1165	class LSRUse;
1166
1167	} // end anonymous namespace
1168
1169	/// Check if the addressing mode defined by \p F is completely
1170	/// folded in \p LU at isel time.
1171	/// This includes address-mode folding and special icmp tricks.
1172	/// This function returns true if \p LU can accommodate what \p F
1173	/// defines and up to 1 base + 1 scaled + offset.
1174	/// In other words, if \p F has several base registers, this function may
1175	/// still return true. Therefore, users still need to account for
1176	/// additional base registers and/or unfolded offsets to derive an
1177	/// accurate cost model.
1178	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1179	const LSRUse &LU, const Formula &F);
1180
1181	// Get the cost of the scaling factor used in F for LU.
1182	static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
1183	const LSRUse &LU, const Formula &F,
1184	const Loop &L);
1185
1186	namespace {
1187
1188	/// This class is used to measure and compare candidate formulae.
1189	class Cost {
1190	const Loop L = nullptr*;
1191	ScalarEvolution SE = nullptr*;
1192	const TargetTransformInfo TTI = nullptr*;
1193	TargetTransformInfo::LSRCost C;
1194	TTI::AddressingModeKind AMK = TTI::AMK_None;
1195
1196	public:
1197	Cost() = delete;
1198	Cost(const Loop L, ScalarEvolution &SE, const* TargetTransformInfo &TTI,
1199	TTI::AddressingModeKind AMK) :
1200	L(L), SE(&SE), TTI(&TTI), AMK(AMK) {
1201	C.Insns = `0`;
1202	C.NumRegs = `0`;
1203	C.AddRecCost = `0`;
1204	C.NumIVMuls = `0`;
1205	C.NumBaseAdds = `0`;
1206	C.ImmCost = `0`;
1207	C.SetupCost = `0`;
1208	C.ScaleCost = `0`;
1209	}
1210
1211	bool isLess(const Cost &Other) const;
1212
1213	void Lose();
1214
1215	#ifndef NDEBUG
1216	// Once any of the metrics loses, they must all remain losers.
1217	bool isValid() {
1218	return ((C.Insns \| C.NumRegs \| C.AddRecCost \| C.NumIVMuls \| C.NumBaseAdds
1219	\| C.ImmCost \| C.SetupCost \| C.ScaleCost) != ~`0u`)
1220	\|\| ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
1221	& C.ImmCost & C.SetupCost & C.ScaleCost) == ~`0u`);
1222	}
1223	#endif
1224
1225	bool isLoser() {
1226	assert(isValid() && "invalid cost");
1227	return C.NumRegs == ~`0u`;
1228	}
1229
1230	void RateFormula(const Formula &F,
1231	SmallPtrSetImpl<const SCEV *> &Regs,
1232	const DenseSet<const SCEV *> &VisitedRegs,
1233	const LSRUse &LU,
1234	SmallPtrSetImpl<const SCEV > LoserRegs = nullptr);
1235
1236	void print(raw_ostream &OS) const;
1237	void dump() const;
1238
1239	private:
1240	void RateRegister(const Formula &F, const SCEV *Reg,
1241	SmallPtrSetImpl<const SCEV *> &Regs);
1242	void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
1243	SmallPtrSetImpl<const SCEV *> &Regs,
1244	SmallPtrSetImpl<const SCEV > LoserRegs);
1245	};
1246
1247	/// An operand value in an instruction which is to be replaced with some
1248	/// equivalent, possibly strength-reduced, replacement.
1249	struct LSRFixup {
1250	/// The instruction which will be updated.
1251	Instruction UserInst = nullptr*;
1252
1253	/// The operand of the instruction which will be replaced. The operand may be
1254	/// used more than once; every instance will be replaced.
1255	Value OperandValToReplace = nullptr*;
1256
1257	/// If this user is to use the post-incremented value of an induction
1258	/// variable, this set is non-empty and holds the loops associated with the
1259	/// induction variable.
1260	PostIncLoopSet PostIncLoops;
1261
1262	/// A constant offset to be added to the LSRUse expression. This allows
1263	/// multiple fixups to share the same LSRUse with different offsets, for
1264	/// example in an unrolled loop.
1265	Immediate Offset = Immediate::getZero();
1266
1267	LSRFixup() = default;
1268
1269	bool isUseFullyOutsideLoop(const Loop L) const*;
1270
1271	void print(raw_ostream &OS) const;
1272	void dump() const;
1273	};
1274
1275	/// This class holds the state that LSR keeps for each use in IVUsers, as well
1276	/// as uses invented by LSR itself. It includes information about what kinds of
1277	/// things can be folded into the user, information about the user itself, and
1278	/// information about how the use may be satisfied. TODO: Represent multiple
1279	/// users of the same expression in common?
1280	class LSRUse {
1281	DenseSet<SmallVector<const SCEV *, `4`>> Uniquifier;
1282
1283	public:
1284	/// An enum for a kind of use, indicating what types of scaled and immediate
1285	/// operands it might support.
1286	enum KindType {
1287	Basic, ///< A normal use, with no folding.
1288	Special, ///< A special case of basic, allowing -1 scales.
1289	Address, ///< An address use; folding according to TargetLowering
1290	ICmpZero ///< An equality icmp with both operands folded into one.
1291	// TODO: Add a generic icmp too?
1292	};
1293
1294	using SCEVUseKindPair = PointerIntPair<const SCEV *, `2`, KindType>;
1295
1296	KindType Kind;
1297	MemAccessTy AccessTy;
1298
1299	/// The list of operands which are to be replaced.
1300	SmallVector<LSRFixup, `8`> Fixups;
1301
1302	/// Keep track of the min and max offsets of the fixups.
1303	Immediate MinOffset = Immediate::getFixedMax();
1304	Immediate MaxOffset = Immediate::getFixedMin();
1305
1306	/// This records whether all of the fixups using this LSRUse are outside of
1307	/// the loop, in which case some special-case heuristics may be used.
1308	bool AllFixupsOutsideLoop = true;
1309
1310	/// RigidFormula is set to true to guarantee that this use will be associated
1311	/// with a single formula--the one that initially matched. Some SCEV
1312	/// expressions cannot be expanded. This allows LSR to consider the registers
1313	/// used by those expressions without the need to expand them later after
1314	/// changing the formula.
1315	bool RigidFormula = false;
1316
1317	/// This records the widest use type for any fixup using this
1318	/// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
1319	/// fixup widths to be equivalent, because the narrower one may be relying on
1320	/// the implicit truncation to truncate away bogus bits.
1321	Type WidestFixupType = nullptr*;
1322
1323	/// A list of ways to build a value that can satisfy this user. After the
1324	/// list is populated, one of these is selected heuristically and used to
1325	/// formulate a replacement for OperandValToReplace in UserInst.
1326	SmallVector<Formula, `12`> Formulae;
1327
1328	/// The set of register candidates used by all formulae in this LSRUse.
1329	SmallPtrSet<const SCEV *, `4`> Regs;
1330
1331	LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy (AT) {}
1332
1333	LSRFixup &getNewFixup() {
1334	Fixups.push_back(Elt: LSRFixup ());
1335	return Fixups.back();
1336	}
1337
1338	void pushFixup(LSRFixup &f) {
1339	Fixups.push_back(Elt: f);
1340	if (Immediate::isKnownGT(LHS: f.Offset, RHS: MaxOffset))
1341	MaxOffset = f.Offset;
1342	if (Immediate::isKnownLT(LHS: f.Offset, RHS: MinOffset))
1343	MinOffset = f.Offset;
1344	}
1345
1346	bool HasFormulaWithSameRegs(const Formula &F) const;
1347	float getNotSelectedProbability(const SCEV Reg) const*;
1348	bool InsertFormula(const Formula &F, const Loop &L);
1349	void DeleteFormula(Formula &F);
1350	void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1351
1352	void print(raw_ostream &OS) const;
1353	void dump() const;
1354	};
1355
1356	} // end anonymous namespace
1357
1358	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1359	LSRUse::KindType Kind, MemAccessTy AccessTy,
1360	GlobalValue *BaseGV, Immediate BaseOffset,
1361	bool HasBaseReg, int64_t Scale,
1362	Instruction Fixup = nullptr*);
1363
1364	static unsigned getSetupCost(const SCEV Reg, unsigned* Depth) {
1365	if (isa<SCEVUnknown>(Val: Reg) \|\| isa<SCEVConstant>(Val: Reg))
1366	return `1`;
1367	if (Depth == `0`)
1368	return `0`;
1369	if (const auto *S = dyn_cast<SCEVAddRecExpr>(Val: Reg))
1370	return getSetupCost(Reg: S->getStart(), Depth: Depth - `1`);
1371	if (auto S = dyn_cast<SCEVIntegralCastExpr>(Val: Reg))
1372	return getSetupCost(Reg: S->getOperand(), Depth: Depth - `1`);
1373	if (auto S = dyn_cast<SCEVNAryExpr>(Val: Reg))
1374	return std::accumulate(first: S->operands().begin(), last: S->operands().end(), init: `0`,
1375	binary_op: [&](unsigned i, const SCEV *Reg) {
1376	return i + getSetupCost(Reg, Depth: Depth - `1`);
1377	});
1378	if (auto S = dyn_cast<SCEVUDivExpr>(Val: Reg))
1379	return getSetupCost(Reg: S->getLHS(), Depth: Depth - `1`) +
1380	getSetupCost(Reg: S->getRHS(), Depth: Depth - `1`);
1381	return `0`;
1382	}
1383
1384	/// Tally up interesting quantities from the given register.
1385	void Cost::RateRegister(const Formula &F, const SCEV *Reg,
1386	SmallPtrSetImpl<const SCEV *> &Regs) {
1387	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Reg)) {
1388	// If this is an addrec for another loop, it should be an invariant
1389	// with respect to L since L is the innermost loop (at least
1390	// for now LSR only handles innermost loops).
1391	if (AR->getLoop() != L) {
1392	// If the AddRec exists, consider it's register free and leave it alone.
1393	if (isExistingPhi(AR, SE&: *SE) && AMK != TTI::AMK_PostIndexed)
1394	return;
1395
1396	// It is bad to allow LSR for current loop to add induction variables
1397	// for its sibling loops.
1398	if (!AR->getLoop()->contains(L)) {
1399	Lose();
1400	return;
1401	}
1402
1403	// Otherwise, it will be an invariant with respect to Loop L.
1404	++C.NumRegs;
1405	return;
1406	}
1407
1408	unsigned LoopCost = `1`;
1409	if (TTI->isIndexedLoadLegal(Mode: TTI->MIM_PostInc, Ty: AR->getType()) \|\|
1410	TTI->isIndexedStoreLegal(Mode: TTI->MIM_PostInc, Ty: AR->getType())) {
1411	const SCEV *Start;
1412	const SCEVConstant *Step;
1413	if (match(S: AR, P: m_scev_AffineAddRec(Op0: m_SCEV(V&: Start), Op1: m_SCEVConstant(V&: Step))))
1414	// If the step size matches the base offset, we could use pre-indexed
1415	// addressing.
1416	if ((AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed() &&
1417	Step->getAPInt() == F.BaseOffset.getFixedValue()) \|\|
1418	(AMK == TTI::AMK_PostIndexed && !isa<SCEVConstant>(Val: Start) &&
1419	SE->isLoopInvariant(S: Start, L)))
1420	LoopCost = `0`;
1421	}
1422	C.AddRecCost += LoopCost;
1423
1424	// Add the step value register, if it needs one.
1425	// TODO: The non-affine case isn't precisely modeled here.
1426	if (!AR->isAffine() \|\| !isa<SCEVConstant>(Val: AR->getOperand(i: `1`))) {
1427	if (!Regs.count(Ptr: AR->getOperand(i: `1`))) {
1428	RateRegister(F, Reg: AR->getOperand(i: `1`), Regs);
1429	if (isLoser())
1430	return;
1431	}
1432	}
1433	}
1434	++C.NumRegs;
1435
1436	// Rough heuristic; favor registers which don't require extra setup
1437	// instructions in the preheader.
1438	C.SetupCost += getSetupCost(Reg, Depth: SetupCostDepthLimit);
1439	// Ensure we don't, even with the recusion limit, produce invalid costs.
1440	C.SetupCost = std::min<unsigned>(a: C.SetupCost, b: `1` << `16`);
1441
1442	C.NumIVMuls += isa<SCEVMulExpr>(Val: Reg) &&
1443	SE->hasComputableLoopEvolution(S: Reg, L);
1444	}
1445
1446	/// Record this register in the set. If we haven't seen it before, rate
1447	/// it. Optional LoserRegs provides a way to declare any formula that refers to
1448	/// one of those regs an instant loser.
1449	void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
1450	SmallPtrSetImpl<const SCEV *> &Regs,
1451	SmallPtrSetImpl<const SCEV > LoserRegs) {
1452	if (LoserRegs && LoserRegs->count(Ptr: Reg)) {
1453	Lose();
1454	return;
1455	}
1456	if (Regs.insert(Ptr: Reg).second) {
1457	RateRegister(F, Reg, Regs);
1458	if (LoserRegs && isLoser())
1459	LoserRegs->insert(Ptr: Reg);
1460	}
1461	}
1462
1463	void Cost::RateFormula(const Formula &F,
1464	SmallPtrSetImpl<const SCEV *> &Regs,
1465	const DenseSet<const SCEV *> &VisitedRegs,
1466	const LSRUse &LU,
1467	SmallPtrSetImpl<const SCEV > LoserRegs) {
1468	if (isLoser())
1469	return;
1470	assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
1471	// Tally up the registers.
1472	unsigned PrevAddRecCost = C.AddRecCost;
1473	unsigned PrevNumRegs = C.NumRegs;
1474	unsigned PrevNumBaseAdds = C.NumBaseAdds;
1475	if (const SCEV *ScaledReg = F.ScaledReg) {
1476	if (VisitedRegs.count(V: ScaledReg)) {
1477	Lose();
1478	return;
1479	}
1480	RatePrimaryRegister(F, Reg: ScaledReg, Regs, LoserRegs);
1481	if (isLoser())
1482	return;
1483	}
1484	for (const SCEV *BaseReg : F.BaseRegs) {
1485	if (VisitedRegs.count(V: BaseReg)) {
1486	Lose();
1487	return;
1488	}
1489	RatePrimaryRegister(F, Reg: BaseReg, Regs, LoserRegs);
1490	if (isLoser())
1491	return;
1492	}
1493
1494	// Determine how many (unfolded) adds we'll need inside the loop.
1495	size_t NumBaseParts = F.getNumRegs();
1496	if (NumBaseParts > `1`)
1497	// Do not count the base and a possible second register if the target
1498	// allows to fold 2 registers.
1499	C.NumBaseAdds +=
1500	NumBaseParts - (`1` + (F.Scale && isAMCompletelyFolded(TTI: *TTI, LU, F)));
1501	C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());
1502
1503	// Accumulate non-free scaling amounts.
1504	C.ScaleCost += getScalingFactorCost(TTI: TTI, LU, F, L: L).getValue();
1505
1506	// Tally up the non-zero immediates.
1507	for (const LSRFixup &Fixup : LU.Fixups) {
1508	if (Fixup.Offset.isCompatibleImmediate(Imm: F.BaseOffset)) {
1509	Immediate Offset = Fixup.Offset.addUnsigned(RHS: F.BaseOffset);
1510	if (F.BaseGV)
1511	C.ImmCost += `64`; // Handle symbolic values conservatively.
1512	// TODO: This should probably be the pointer size.
1513	else if (Offset.isNonZero())
1514	C.ImmCost +=
1515	APInt (`64`, Offset.getKnownMinValue(), true).getSignificantBits();
1516
1517	// Check with target if this offset with this instruction is
1518	// specifically not supported.
1519	if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&
1520	!isAMCompletelyFolded(TTI: *TTI, Kind: LSRUse::Address, AccessTy: LU.AccessTy, BaseGV: F.BaseGV,
1521	BaseOffset: Offset, HasBaseReg: F.HasBaseReg, Scale: F.Scale, Fixup: Fixup.UserInst))
1522	C.NumBaseAdds++;
1523	} else {
1524	// Incompatible immediate type, increase cost to avoid using
1525	C.ImmCost += `2048`;
1526	}
1527	}
1528
1529	// If we don't count instruction cost exit here.
1530	if (!InsnsCost) {
1531	assert(isValid() && "invalid cost");
1532	return;
1533	}
1534
1535	// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
1536	// additional instruction (at least fill).
1537	// TODO: Need distinguish register class?
1538	unsigned TTIRegNum = TTI->getNumberOfRegisters(
1539	ClassID: TTI->getRegisterClassForType(Vector: false, Ty: F.getType())) - `1`;
1540	if (C.NumRegs > TTIRegNum) {
1541	// Cost already exceeded TTIRegNum, then only newly added register can add
1542	// new instructions.
1543	if (PrevNumRegs > TTIRegNum)
1544	C.Insns += (C.NumRegs - PrevNumRegs);
1545	else
1546	C.Insns += (C.NumRegs - TTIRegNum);
1547	}
1548
1549	// If ICmpZero formula ends with not 0, it could not be replaced by
1550	// just add or sub. We'll need to compare final result of AddRec.
1551	// That means we'll need an additional instruction. But if the target can
1552	// macro-fuse a compare with a branch, don't count this extra instruction.
1553	// For -10 + {0, +, 1}:
1554	// i = i + 1;
1555	// cmp i, 10
1556	//
1557	// For {-10, +, 1}:
1558	// i = i + 1;
1559	if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
1560	!TTI->canMacroFuseCmp())
1561	C.Insns++;
1562	// Each new AddRec adds 1 instruction to calculation.
1563	C.Insns += (C.AddRecCost - PrevAddRecCost);
1564
1565	// BaseAdds adds instructions for unfolded registers.
1566	if (LU.Kind != LSRUse::ICmpZero)
1567	C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
1568	assert(isValid() && "invalid cost");
1569	}
1570
1571	/// Set this cost to a losing value.
1572	void Cost::Lose() {
1573	C.Insns = std::numeric_limits<unsigned>::max();
1574	C.NumRegs = std::numeric_limits<unsigned>::max();
1575	C.AddRecCost = std::numeric_limits<unsigned>::max();
1576	C.NumIVMuls = std::numeric_limits<unsigned>::max();
1577	C.NumBaseAdds = std::numeric_limits<unsigned>::max();
1578	C.ImmCost = std::numeric_limits<unsigned>::max();
1579	C.SetupCost = std::numeric_limits<unsigned>::max();
1580	C.ScaleCost = std::numeric_limits<unsigned>::max();
1581	}
1582
1583	/// Choose the lower cost.
1584	bool Cost::isLess(const Cost &Other) const {
1585	if (InsnsCost.getNumOccurrences() > `0` && InsnsCost &&
1586	C.Insns != Other.C.Insns)
1587	return C.Insns < Other.C.Insns;
1588	return TTI->isLSRCostLess(C1: C, C2: Other.C);
1589	}
1590
1591	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1592	void Cost::print(raw_ostream &OS) const {
1593	if (InsnsCost)
1594	OS << C.Insns << " instruction" << (C.Insns == `1` ? " " : "s ");
1595	OS << C.NumRegs << " reg" << (C.NumRegs == `1` ? "" : "s");
1596	if (C.AddRecCost != `0`)
1597	OS << ", with addrec cost " << C.AddRecCost;
1598	if (C.NumIVMuls != `0`)
1599	OS << ", plus " << C.NumIVMuls << " IV mul"
1600	<< (C.NumIVMuls == `1` ? "" : "s");
1601	if (C.NumBaseAdds != `0`)
1602	OS << ", plus " << C.NumBaseAdds << " base add"
1603	<< (C.NumBaseAdds == `1` ? "" : "s");
1604	if (C.ScaleCost != `0`)
1605	OS << ", plus " << C.ScaleCost << " scale cost";
1606	if (C.ImmCost != `0`)
1607	OS << ", plus " << C.ImmCost << " imm cost";
1608	if (C.SetupCost != `0`)
1609	OS << ", plus " << C.SetupCost << " setup cost";
1610	}
1611
1612	LLVM_DUMP_METHOD void Cost::dump() const {
1613	print(errs()); errs() << `'\n'`;
1614	}
1615	#endif
1616
1617	/// Test whether this fixup always uses its value outside of the given loop.
1618	bool LSRFixup::isUseFullyOutsideLoop(const Loop L) const* {
1619	// PHI nodes use their value in their incoming blocks.
1620	if (const PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) {
1621	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i)
1622	if (PN->getIncomingValue(i) == OperandValToReplace &&
1623	L->contains(BB: PN->getIncomingBlock(i)))
1624	return false;
1625	return true;
1626	}
1627
1628	return !L->contains(Inst: UserInst);
1629	}
1630
1631	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1632	void LSRFixup::print(raw_ostream &OS) const {
1633	OS << "UserInst=";
1634	// Store is common and interesting enough to be worth special-casing.
1635	if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
1636	OS << "store ";
1637	Store->getOperand(`0`)->printAsOperand(OS, /PrintType=/false);
1638	} else if (UserInst->getType()->isVoidTy())
1639	OS << UserInst->getOpcodeName();
1640	else
1641	UserInst->printAsOperand(OS, /PrintType=/false);
1642
1643	OS << ", OperandValToReplace=";
1644	OperandValToReplace->printAsOperand(OS, /PrintType=/false);
1645
1646	for (const Loop *PIL : PostIncLoops) {
1647	OS << ", PostIncLoop=";
1648	PIL->getHeader()->printAsOperand(OS, /PrintType=/false);
1649	}
1650
1651	if (Offset.isNonZero())
1652	OS << ", Offset=" << Offset;
1653	}
1654
1655	LLVM_DUMP_METHOD void LSRFixup::dump() const {
1656	print(errs()); errs() << `'\n'`;
1657	}
1658	#endif
1659
1660	/// Test whether this use as a formula which has the same registers as the given
1661	/// formula.
1662	bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1663	SmallVector<const SCEV *, `4`> Key = F.BaseRegs;
1664	if (F.ScaledReg) Key.push_back(Elt: F.ScaledReg);
1665	// Unstable sort by host order ok, because this is only used for uniquifying.
1666	llvm::sort(C&: Key);
1667	return Uniquifier.count(V: Key);
1668	}
1669
1670	/// The function returns a probability of selecting formula without Reg.
1671	float LSRUse::getNotSelectedProbability(const SCEV Reg) const* {
1672	unsigned FNum = `0`;
1673	for (const Formula &F : Formulae)
1674	if (F.referencesReg(S: Reg))
1675	FNum++;
1676	return ((float)(Formulae.size() - FNum)) / Formulae.size();
1677	}
1678
1679	/// If the given formula has not yet been inserted, add it to the list, and
1680	/// return true. Return false otherwise. The formula must be in canonical form.
1681	bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
1682	assert(F.isCanonical(L) && "Invalid canonical representation");
1683
1684	if (!Formulae.empty() && RigidFormula)
1685	return false;
1686
1687	SmallVector<const SCEV *, `4`> Key = F.BaseRegs;
1688	if (F.ScaledReg) Key.push_back(Elt: F.ScaledReg);
1689	// Unstable sort by host order ok, because this is only used for uniquifying.
1690	llvm::sort(C&: Key);
1691
1692	if (!Uniquifier.insert(V: Key).second)
1693	return false;
1694
1695	// Using a register to hold the value of 0 is not profitable.
1696	assert((!F.ScaledReg \|\| !F.ScaledReg->isZero()) &&
1697	"Zero allocated in a scaled register!");
1698	#ifndef NDEBUG
1699	for (const SCEV *BaseReg : F.BaseRegs)
1700	assert(!BaseReg->isZero() && "Zero allocated in a base register!");
1701	#endif
1702
1703	// Add the formula to the list.
1704	Formulae.push_back(Elt: F);
1705
1706	// Record registers now being used by this use.
1707	Regs.insert_range(R: F.BaseRegs);
1708	if (F.ScaledReg)
1709	Regs.insert(Ptr: F.ScaledReg);
1710
1711	return true;
1712	}
1713
1714	/// Remove the given formula from this use's list.
1715	void LSRUse::DeleteFormula(Formula &F) {
1716	if (&F != &Formulae.back())
1717	std::swap(a&: F, b&: Formulae.back());
1718	Formulae.pop_back();
1719	}
1720
1721	/// Recompute the Regs field, and update RegUses.
1722	void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1723	// Now that we've filtered out some formulae, recompute the Regs set.
1724	SmallPtrSet<const SCEV *, `4`> OldRegs = std::move(Regs);
1725	Regs.clear();
1726	for (const Formula &F : Formulae) {
1727	if (F.ScaledReg) Regs.insert(Ptr: F.ScaledReg);
1728	Regs.insert_range(R: F.BaseRegs);
1729	}
1730
1731	// Update the RegTracker.
1732	for (const SCEV *S : OldRegs)
1733	if (!Regs.count(Ptr: S))
1734	RegUses.dropRegister(Reg: S, LUIdx);
1735	}
1736
1737	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1738	void LSRUse::print(raw_ostream &OS) const {
1739	OS << "LSR Use: Kind=";
1740	switch (Kind) {
1741	case Basic: OS << "Basic"; break;
1742	case Special: OS << "Special"; break;
1743	case ICmpZero: OS << "ICmpZero"; break;
1744	case Address:
1745	OS << "Address of ";
1746	if (AccessTy.MemTy->isPointerTy())
1747	OS << "pointer"; // the full pointer type could be really verbose
1748	else {
1749	OS << *AccessTy.MemTy;
1750	}
1751
1752	OS << " in addrspace(" << AccessTy.AddrSpace << `')'`;
1753	}
1754
1755	OS << ", Offsets={";
1756	bool NeedComma = false;
1757	for (const LSRFixup &Fixup : Fixups) {
1758	if (NeedComma) OS << `','`;
1759	OS << Fixup.Offset;
1760	NeedComma = true;
1761	}
1762	OS << `'}'`;
1763
1764	if (AllFixupsOutsideLoop)
1765	OS << ", all-fixups-outside-loop";
1766
1767	if (WidestFixupType)
1768	OS << ", widest fixup type: " << *WidestFixupType;
1769	}
1770
1771	LLVM_DUMP_METHOD void LSRUse::dump() const {
1772	print(errs()); errs() << `'\n'`;
1773	}
1774	#endif
1775
1776	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1777	LSRUse::KindType Kind, MemAccessTy AccessTy,
1778	GlobalValue *BaseGV, Immediate BaseOffset,
1779	bool HasBaseReg, int64_t Scale,
1780	Instruction Fixup /* = nullptr /) {
1781	switch (Kind) {
1782	case LSRUse::Address: {
1783	int64_t FixedOffset =
1784	BaseOffset.isScalable() ? `0` : BaseOffset.getFixedValue();
1785	int64_t ScalableOffset =
1786	BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : `0`;
1787	return TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, BaseGV, BaseOffset: FixedOffset,
1788	HasBaseReg, Scale, AddrSpace: AccessTy.AddrSpace,
1789	I: Fixup, ScalableOffset);
1790	}
1791	case LSRUse::ICmpZero:
1792	// There's not even a target hook for querying whether it would be legal to
1793	// fold a GV into an ICmp.
1794	if (BaseGV)
1795	return false;
1796
1797	// ICmp only has two operands; don't allow more than two non-trivial parts.
1798	if (Scale != `0` && HasBaseReg && BaseOffset.isNonZero())
1799	return false;
1800
1801	// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
1802	// putting the scaled register in the other operand of the icmp.
1803	if (Scale != `0` && Scale != -`1`)
1804	return false;
1805
1806	// If we have low-level target information, ask the target if it can fold an
1807	// integer immediate on an icmp.
1808	if (BaseOffset.isNonZero()) {
1809	// We don't have an interface to query whether the target supports
1810	// icmpzero against scalable quantities yet.
1811	if (BaseOffset.isScalable())
1812	return false;
1813
1814	// We have one of:
1815	// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
1816	// ICmpZero -1ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset*
1817	// Offs is the ICmp immediate.
1818	if (Scale == `0`)
1819	// The cast does the right thing with
1820	// std::numeric_limits<int64_t>::min().
1821	BaseOffset = BaseOffset.getFixed(MinVal: -(uint64_t)BaseOffset.getFixedValue());
1822	return TTI.isLegalICmpImmediate(Imm: BaseOffset.getFixedValue());
1823	}
1824
1825	// ICmpZero BaseReg + -1ScaleReg => ICmp BaseReg, ScaleReg*
1826	return true;
1827
1828	case LSRUse::Basic:
1829	// Only handle single-register values.
1830	return !BaseGV && Scale == `0` && BaseOffset.isZero();
1831
1832	case LSRUse::Special:
1833	// Special case Basic to handle -1 scales.
1834	return !BaseGV && (Scale == `0` \|\| Scale == -`1`) && BaseOffset.isZero();
1835	}
1836
1837	llvm_unreachable("Invalid LSRUse Kind!");
1838	}
1839
1840	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1841	Immediate MinOffset, Immediate MaxOffset,
1842	LSRUse::KindType Kind, MemAccessTy AccessTy,
1843	GlobalValue *BaseGV, Immediate BaseOffset,
1844	bool HasBaseReg, int64_t Scale) {
1845	if (BaseOffset.isNonZero() &&
1846	(BaseOffset.isScalable() != MinOffset.isScalable() \|\|
1847	BaseOffset.isScalable() != MaxOffset.isScalable()))
1848	return false;
1849	// Check for overflow.
1850	int64_t Base = BaseOffset.getKnownMinValue();
1851	int64_t Min = MinOffset.getKnownMinValue();
1852	int64_t Max = MaxOffset.getKnownMinValue();
1853	if (((int64_t)((uint64_t)Base + Min) > Base) != (Min > `0`))
1854	return false;
1855	MinOffset = Immediate::get(MinVal: (uint64_t)Base + Min, Scalable: MinOffset.isScalable());
1856	if (((int64_t)((uint64_t)Base + Max) > Base) != (Max > `0`))
1857	return false;
1858	MaxOffset = Immediate::get(MinVal: (uint64_t)Base + Max, Scalable: MaxOffset.isScalable());
1859
1860	return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset: MinOffset,
1861	HasBaseReg, Scale) &&
1862	isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset: MaxOffset,
1863	HasBaseReg, Scale);
1864	}
1865
1866	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1867	Immediate MinOffset, Immediate MaxOffset,
1868	LSRUse::KindType Kind, MemAccessTy AccessTy,
1869	const Formula &F, const Loop &L) {
1870	// For the purpose of isAMCompletelyFolded either having a canonical formula
1871	// or a scale not equal to zero is correct.
1872	// Problems may arise from non canonical formulae having a scale == 0.
1873	// Strictly speaking it would best to just rely on canonical formulae.
1874	// However, when we generate the scaled formulae, we first check that the
1875	// scaling factor is profitable before computing the actual ScaledReg for
1876	// compile time sake.
1877	assert((F.isCanonical(L) \|\| F.Scale != `0`));
1878	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1879	BaseGV: F.BaseGV, BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg, Scale: F.Scale);
1880	}
1881
1882	/// Test whether we know how to expand the current formula.
1883	static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
1884	Immediate MaxOffset, LSRUse::KindType Kind,
1885	MemAccessTy AccessTy, GlobalValue *BaseGV,
1886	Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {
1887	// We know how to expand completely foldable formulae.
1888	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
1889	BaseOffset, HasBaseReg, Scale) \|\|
1890	// Or formulae that use a base register produced by a sum of base
1891	// registers.
1892	(Scale == `1` &&
1893	isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
1894	BaseGV, BaseOffset, HasBaseReg: true, Scale: `0`));
1895	}
1896
1897	static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
1898	Immediate MaxOffset, LSRUse::KindType Kind,
1899	MemAccessTy AccessTy, const Formula &F) {
1900	return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV: F.BaseGV,
1901	BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg, Scale: F.Scale);
1902	}
1903
1904	static bool isLegalAddImmediate(const TargetTransformInfo &TTI,
1905	Immediate Offset) {
1906	if (Offset.isScalable())
1907	return TTI.isLegalAddScalableImmediate(Imm: Offset.getKnownMinValue());
1908
1909	return TTI.isLegalAddImmediate(Imm: Offset.getFixedValue());
1910	}
1911
1912	static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
1913	const LSRUse &LU, const Formula &F) {
1914	// Target may want to look at the user instructions.
1915	if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
1916	for (const LSRFixup &Fixup : LU.Fixups)
1917	if (!isAMCompletelyFolded(TTI, Kind: LSRUse::Address, AccessTy: LU.AccessTy, BaseGV: F.BaseGV,
1918	BaseOffset: (F.BaseOffset + Fixup.Offset), HasBaseReg: F.HasBaseReg,
1919	Scale: F.Scale, Fixup: Fixup.UserInst))
1920	return false;
1921	return true;
1922	}
1923
1924	return isAMCompletelyFolded(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
1925	AccessTy: LU.AccessTy, BaseGV: F.BaseGV, BaseOffset: F.BaseOffset, HasBaseReg: F.HasBaseReg,
1926	Scale: F.Scale);
1927	}
1928
1929	static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
1930	const LSRUse &LU, const Formula &F,
1931	const Loop &L) {
1932	if (!F.Scale)
1933	return `0`;
1934
1935	// If the use is not completely folded in that instruction, we will have to
1936	// pay an extra cost only for scale != 1.
1937	if (!isAMCompletelyFolded(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
1938	AccessTy: LU.AccessTy, F, L))
1939	return F.Scale != `1`;
1940
1941	switch (LU.Kind) {
1942	case LSRUse::Address: {
1943	// Check the scaling factor cost with both the min and max offsets.
1944	int64_t ScalableMin = `0`, ScalableMax = `0`, FixedMin = `0`, FixedMax = `0`;
1945	if (F.BaseOffset.isScalable()) {
1946	ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();
1947	ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1948	} else {
1949	FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();
1950	FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();
1951	}
1952	InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(
1953	Ty: LU.AccessTy.MemTy, BaseGV: F.BaseGV, BaseOffset: StackOffset::get(Fixed: FixedMin, Scalable: ScalableMin),
1954	HasBaseReg: F.HasBaseReg, Scale: F.Scale, AddrSpace: LU.AccessTy.AddrSpace);
1955	InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(
1956	Ty: LU.AccessTy.MemTy, BaseGV: F.BaseGV, BaseOffset: StackOffset::get(Fixed: FixedMax, Scalable: ScalableMax),
1957	HasBaseReg: F.HasBaseReg, Scale: F.Scale, AddrSpace: LU.AccessTy.AddrSpace);
1958
1959	assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
1960	"Legal addressing mode has an illegal cost!");
1961	return std::max(a: ScaleCostMinOffset, b: ScaleCostMaxOffset);
1962	}
1963	case LSRUse::ICmpZero:
1964	case LSRUse::Basic:
1965	case LSRUse::Special:
1966	// The use is completely folded, i.e., everything is folded into the
1967	// instruction.
1968	return `0`;
1969	}
1970
1971	llvm_unreachable("Invalid LSRUse Kind!");
1972	}
1973
1974	static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
1975	LSRUse::KindType Kind, MemAccessTy AccessTy,
1976	GlobalValue *BaseGV, Immediate BaseOffset,
1977	bool HasBaseReg) {
1978	// Fast-path: zero is always foldable.
1979	if (BaseOffset.isZero() && !BaseGV)
1980	return true;
1981
1982	// Conservatively, create an address with an immediate and a
1983	// base and a scale.
1984	int64_t Scale = Kind == LSRUse::ICmpZero ? -`1` : `1`;
1985
1986	// Canonicalize a scale of 1 to a base register if the formula doesn't
1987	// already have a base register.
1988	if (!HasBaseReg && Scale == `1`) {
1989	Scale = `0`;
1990	HasBaseReg = true;
1991	}
1992
1993	// FIXME: Try with + without a scale? Maybe based on TTI?
1994	// I think basereg + scaledreg + immediateoffset isn't a good 'conservative'
1995	// default for many architectures, not just AArch64 SVE. More investigation
1996	// needed later to determine if this should be used more widely than just
1997	// on scalable types.
1998	if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
1999	AccessTy.MemTy && AccessTy.MemTy->isScalableTy() && DropScaledForVScale)
2000	Scale = `0`;
2001
2002	return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
2003	HasBaseReg, Scale);
2004	}
2005
2006	static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
2007	ScalarEvolution &SE, Immediate MinOffset,
2008	Immediate MaxOffset, LSRUse::KindType Kind,
2009	MemAccessTy AccessTy, const SCEV *S,
2010	bool HasBaseReg) {
2011	// Fast-path: zero is always foldable.
2012	if (S->isZero()) return true;
2013
2014	// Conservatively, create an address with an immediate and a
2015	// base and a scale.
2016	Immediate BaseOffset = ExtractImmediate(S, SE);
2017	GlobalValue *BaseGV = ExtractSymbol(S, SE);
2018
2019	// If there's anything else involved, it's not foldable.
2020	if (!S->isZero()) return false;
2021
2022	// Fast-path: zero is always foldable.
2023	if (BaseOffset.isZero() && !BaseGV)
2024	return true;
2025
2026	if (BaseOffset.isScalable())
2027	return false;
2028
2029	// Conservatively, create an address with an immediate and a
2030	// base and a scale.
2031	int64_t Scale = Kind == LSRUse::ICmpZero ? -`1` : `1`;
2032
2033	return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
2034	BaseOffset, HasBaseReg, Scale);
2035	}
2036
2037	namespace {
2038
2039	/// An individual increment in a Chain of IV increments. Relate an IV user to
2040	/// an expression that computes the IV it uses from the IV used by the previous
2041	/// link in the Chain.
2042	///
2043	/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
2044	/// original IVOperand. The head of the chain's IVOperand is only valid during
2045	/// chain collection, before LSR replaces IV users. During chain generation,
2046	/// IncExpr can be used to find the new IVOperand that computes the same
2047	/// expression.
2048	struct IVInc {
2049	Instruction *UserInst;
2050	Value* IVOperand;
2051	const SCEV *IncExpr;
2052
2053	IVInc(Instruction U, Value O, const SCEV *E)
2054	: UserInst(U), IVOperand(O), IncExpr(E) {}
2055	};
2056
2057	// The list of IV increments in program order. We typically add the head of a
2058	// chain without finding subsequent links.
2059	struct IVChain {
2060	SmallVector<IVInc, `1`> Incs;
2061	const SCEV ExprBase = nullptr*;
2062
2063	IVChain() = default;
2064	IVChain(const IVInc &Head, const SCEV *Base)
2065	: Incs (`1`, Head), ExprBase(Base) {}
2066
2067	using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
2068
2069	// Return the first increment in the chain.
2070	const_iterator begin() const {
2071	assert(!Incs.empty());
2072	return std::next(x: Incs.begin());
2073	}
2074	const_iterator end() const {
2075	return Incs.end();
2076	}
2077
2078	// Returns true if this chain contains any increments.
2079	bool hasIncs() const { return Incs.size() >= `2`; }
2080
2081	// Add an IVInc to the end of this chain.
2082	void add(const IVInc &X) { Incs.push_back(Elt: X); }
2083
2084	// Returns the last UserInst in the chain.
2085	Instruction tailUserInst() const* { return Incs.back().UserInst; }
2086
2087	// Returns true if IncExpr can be profitably added to this chain.
2088	bool isProfitableIncrement(const SCEV *OperExpr,
2089	const SCEV *IncExpr,
2090	ScalarEvolution&);
2091	};
2092
2093	/// Helper for CollectChains to track multiple IV increment uses. Distinguish
2094	/// between FarUsers that definitely cross IV increments and NearUsers that may
2095	/// be used between IV increments.
2096	struct ChainUsers {
2097	SmallPtrSet<Instruction*, `4`> FarUsers;
2098	SmallPtrSet<Instruction*, `4`> NearUsers;
2099	};
2100
2101	/// This class holds state for the main loop strength reduction logic.
2102	class LSRInstance {
2103	IVUsers &IU;
2104	ScalarEvolution &SE;
2105	DominatorTree &DT;
2106	LoopInfo &LI;
2107	AssumptionCache &AC;
2108	TargetLibraryInfo &TLI;
2109	const TargetTransformInfo &TTI;
2110	Loop *const L;
2111	MemorySSAUpdater *MSSAU;
2112	TTI::AddressingModeKind AMK;
2113	mutable SCEVExpander Rewriter;
2114	bool Changed = false;
2115
2116	/// This is the insert position that the current loop's induction variable
2117	/// increment should be placed. In simple loops, this is the latch block's
2118	/// terminator. But in more complicated cases, this is a position which will
2119	/// dominate all the in-loop post-increment users.
2120	Instruction IVIncInsertPos = nullptr*;
2121
2122	/// Interesting factors between use strides.
2123	///
2124	/// We explicitly use a SetVector which contains a SmallSet, instead of the
2125	/// default, a SmallDenseSet, because we need to use the full range of
2126	/// int64_ts, and there's currently no good way of doing that with
2127	/// SmallDenseSet.
2128	SetVector<int64_t, SmallVector<int64_t, `8`>, SmallSet<int64_t, `8`>> Factors;
2129
2130	/// The cost of the current SCEV, the best solution by LSR will be dropped if
2131	/// the solution is not profitable.
2132	Cost BaselineCost;
2133
2134	/// Interesting use types, to facilitate truncation reuse.
2135	SmallSetVector<Type *, `4`> Types;
2136
2137	/// The list of interesting uses.
2138	mutable SmallVector<LSRUse, `16`> Uses;
2139
2140	/// Track which uses use which register candidates.
2141	RegUseTracker RegUses;
2142
2143	// Limit the number of chains to avoid quadratic behavior. We don't expect to
2144	// have more than a few IV increment chains in a loop. Missing a Chain falls
2145	// back to normal LSR behavior for those uses.
2146	static const unsigned MaxChains = `8`;
2147
2148	/// IV users can form a chain of IV increments.
2149	SmallVector<IVChain, MaxChains> IVChainVec;
2150
2151	/// IV users that belong to profitable IVChains.
2152	SmallPtrSet<Use*, MaxChains> IVIncSet;
2153
2154	/// Induction variables that were generated and inserted by the SCEV Expander.
2155	SmallVector<llvm::WeakVH, `2`> ScalarEvolutionIVs;
2156
2157	// Inserting instructions in the loop and using them as PHI's input could
2158	// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
2159	// corresponding incoming block is not loop exiting). So collect all such
2160	// instructions to form LCSSA for them later.
2161	SmallSetVector<Instruction *, `4`> InsertedNonLCSSAInsts;
2162
2163	void OptimizeShadowIV();
2164	bool FindIVUserForCond(ICmpInst Cond, IVStrideUse &CondUse);
2165	ICmpInst OptimizeMax(ICmpInst Cond, IVStrideUse* &CondUse);
2166	void OptimizeLoopTermCond();
2167
2168	void ChainInstruction(Instruction UserInst, Instruction IVOper,
2169	SmallVectorImpl<ChainUsers> &ChainUsersVec);
2170	void FinalizeChain(IVChain &Chain);
2171	void CollectChains();
2172	void GenerateIVChain(const IVChain &Chain,
2173	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2174
2175	void CollectInterestingTypesAndFactors();
2176	void CollectFixupsAndInitialFormulae();
2177
2178	// Support for sharing of LSRUses between LSRFixups.
2179	using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2180	UseMapTy UseMap;
2181
2182	bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,
2183	LSRUse::KindType Kind, MemAccessTy AccessTy);
2184
2185	std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
2186	MemAccessTy AccessTy);
2187
2188	void DeleteUse(LSRUse &LU, size_t LUIdx);
2189
2190	LSRUse FindUseWithSimilarFormula(const* Formula &F, const LSRUse &OrigLU);
2191
2192	void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
2193	void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
2194	void CountRegisters(const Formula &F, size_t LUIdx);
2195	bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
2196
2197	void CollectLoopInvariantFixupsAndFormulae();
2198
2199	void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
2200	unsigned Depth = `0`);
2201
2202	void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
2203	const Formula &Base, unsigned Depth,
2204	size_t Idx, bool IsScaledReg = false);
2205	void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
2206	void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
2207	const Formula &Base, size_t Idx,
2208	bool IsScaledReg = false);
2209	void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
2210	void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
2211	const Formula &Base,
2212	const SmallVectorImpl<Immediate> &Worklist,
2213	size_t Idx, bool IsScaledReg = false);
2214	void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
2215	void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
2216	void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
2217	void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
2218	void GenerateCrossUseConstantOffsets();
2219	void GenerateAllReuseFormulae();
2220
2221	void FilterOutUndesirableDedicatedRegisters();
2222
2223	size_t EstimateSearchSpaceComplexity() const;
2224	void NarrowSearchSpaceByDetectingSupersets();
2225	void NarrowSearchSpaceByCollapsingUnrolledCode();
2226	void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2227	void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2228	void NarrowSearchSpaceByFilterPostInc();
2229	void NarrowSearchSpaceByDeletingCostlyFormulas();
2230	void NarrowSearchSpaceByPickingWinnerRegs();
2231	void NarrowSearchSpaceUsingHeuristics();
2232
2233	void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2234	Cost &SolutionCost,
2235	SmallVectorImpl<const Formula *> &Workspace,
2236	const Cost &CurCost,
2237	const SmallPtrSet<const SCEV *, `16`> &CurRegs,
2238	DenseSet<const SCEV > &VisitedRegs) const*;
2239	void Solve(SmallVectorImpl<const Formula > &Solution) const*;
2240
2241	BasicBlock::iterator
2242	HoistInsertPosition(BasicBlock::iterator IP,
2243	const SmallVectorImpl<Instruction > &Inputs) const*;
2244	BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
2245	const LSRFixup &LF,
2246	const LSRUse &LU) const;
2247
2248	Value Expand(const* LSRUse &LU, const LSRFixup &LF, const Formula &F,
2249	BasicBlock::iterator IP,
2250	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2251	void RewriteForPHI(PHINode PN, const* LSRUse &LU, const LSRFixup &LF,
2252	const Formula &F,
2253	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2254	void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2255	SmallVectorImpl<WeakTrackingVH> &DeadInsts);
2256	void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
2257
2258	public:
2259	LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2260	LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
2261	TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2262
2263	bool getChanged() const { return Changed; }
2264	const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
2265	return ScalarEvolutionIVs;
2266	}
2267
2268	void print_factors_and_types(raw_ostream &OS) const;
2269	void print_fixups(raw_ostream &OS) const;
2270	void print_uses(raw_ostream &OS) const;
2271	void print(raw_ostream &OS) const;
2272	void dump() const;
2273	};
2274
2275	} // end anonymous namespace
2276
2277	/// If IV is used in a int-to-float cast inside the loop then try to eliminate
2278	/// the cast operation.
2279	void LSRInstance::OptimizeShadowIV() {
2280	const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2281	if (isa<SCEVCouldNotCompute>(Val: BackedgeTakenCount))
2282	return;
2283
2284	for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
2285	UI != E; / empty /) {
2286	IVUsers::const_iterator CandidateUI = UI;
2287	++UI;
2288	Instruction *ShadowUse = CandidateUI ->getUser();
2289	Type DestTy = nullptr*;
2290	bool IsSigned = false;
2291
2292	/ If shadow use is a int->float cast then insert a second IV*
2293	to eliminate this cast.
2294
2295	for (unsigned i = 0; i < n; ++i)
2296	foo((double)i);
2297
2298	is transformed into
2299
2300	double d = 0.0;
2301	for (unsigned i = 0; i < n; ++i, ++d)
2302	foo(d);
2303	*/
2304	if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(Val: CandidateUI ->getUser())) {
2305	IsSigned = false;
2306	DestTy = UCast->getDestTy();
2307	}
2308	else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(Val: CandidateUI ->getUser())) {
2309	IsSigned = true;
2310	DestTy = SCast->getDestTy();
2311	}
2312	if (!DestTy) continue;
2313
2314	// If target does not support DestTy natively then do not apply
2315	// this transformation.
2316	if (!TTI.isTypeLegal(Ty: DestTy)) continue;
2317
2318	PHINode *PH = dyn_cast<PHINode>(Val: ShadowUse->getOperand(i: `0`));
2319	if (!PH) continue;
2320	if (PH->getNumIncomingValues() != `2`) continue;
2321
2322	// If the calculation in integers overflows, the result in FP type will
2323	// differ. So we only can do this transformation if we are guaranteed to not
2324	// deal with overflowing values
2325	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: PH));
2326	if (!AR) continue;
2327	if (IsSigned && !AR->hasNoSignedWrap()) continue;
2328	if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
2329
2330	Type *SrcTy = PH->getType();
2331	int Mantissa = DestTy->getFPMantissaWidth();
2332	if (Mantissa == -`1`) continue;
2333	if ((int)SE.getTypeSizeInBits(Ty: SrcTy) > Mantissa)
2334	continue;
2335
2336	unsigned Entry, Latch;
2337	if (PH->getIncomingBlock(i: `0`) == L->getLoopPreheader()) {
2338	Entry = `0`;
2339	Latch = `1`;
2340	} else {
2341	Entry = `1`;
2342	Latch = `0`;
2343	}
2344
2345	ConstantInt *Init = dyn_cast<ConstantInt>(Val: PH->getIncomingValue(i: Entry));
2346	if (!Init) continue;
2347	Constant *NewInit = ConstantFP::get(Ty: DestTy, V: IsSigned ?
2348	(double)Init->getSExtValue() :
2349	(double)Init->getZExtValue());
2350
2351	BinaryOperator *Incr =
2352	dyn_cast<BinaryOperator>(Val: PH->getIncomingValue(i: Latch));
2353	if (!Incr) continue;
2354	if (Incr->getOpcode() != Instruction::Add
2355	&& Incr->getOpcode() != Instruction::Sub)
2356	continue;
2357
2358	/ Initialize new IV, double d = 0.0 in above example. /
2359	ConstantInt C = nullptr*;
2360	if (Incr->getOperand(i_nocapture: `0`) == PH)
2361	C = dyn_cast<ConstantInt>(Val: Incr->getOperand(i_nocapture: `1`));
2362	else if (Incr->getOperand(i_nocapture: `1`) == PH)
2363	C = dyn_cast<ConstantInt>(Val: Incr->getOperand(i_nocapture: `0`));
2364	else
2365	continue;
2366
2367	if (!C) continue;
2368
2369	// Ignore negative constants, as the code below doesn't handle them
2370	// correctly. TODO: Remove this restriction.
2371	if (!C->getValue().isStrictlyPositive())
2372	continue;
2373
2374	/ Add new PHINode. /
2375	PHINode *NewPH = PHINode::Create(Ty: DestTy, NumReservedValues: `2`, NameStr: "IV.S.", InsertBefore: PH->getIterator());
2376	NewPH->setDebugLoc(PH->getDebugLoc());
2377
2378	/ create new increment. '++d' in above example. /
2379	Constant *CFP = ConstantFP::get(Ty: DestTy, V: C->getZExtValue());
2380	BinaryOperator *NewIncr = BinaryOperator::Create(
2381	Op: Incr->getOpcode() == Instruction::Add ? Instruction::FAdd
2382	: Instruction::FSub,
2383	S1: NewPH, S2: CFP, Name: "IV.S.next.", InsertBefore: Incr->getIterator());
2384	NewIncr->setDebugLoc(Incr->getDebugLoc());
2385
2386	NewPH->addIncoming(V: NewInit, BB: PH->getIncomingBlock(i: Entry));
2387	NewPH->addIncoming(V: NewIncr, BB: PH->getIncomingBlock(i: Latch));
2388
2389	/ Remove cast operation /
2390	ShadowUse->replaceAllUsesWith(V: NewPH);
2391	ShadowUse->eraseFromParent();
2392	Changed = true;
2393	break;
2394	}
2395	}
2396
2397	/// If Cond has an operand that is an expression of an IV, set the IV user and
2398	/// stride information and return true, otherwise return false.
2399	bool LSRInstance::FindIVUserForCond(ICmpInst Cond, IVStrideUse &CondUse) {
2400	for (IVStrideUse &U : IU)
2401	if (U.getUser() == Cond) {
2402	// NOTE: we could handle setcc instructions with multiple uses here, but
2403	// InstCombine does it as well for simple uses, it's not clear that it
2404	// occurs enough in real life to handle.
2405	CondUse = &U;
2406	return true;
2407	}
2408	return false;
2409	}
2410
2411	/// Rewrite the loop's terminating condition if it uses a max computation.
2412	///
2413	/// This is a narrow solution to a specific, but acute, problem. For loops
2414	/// like this:
2415	///
2416	/// i = 0;
2417	/// do {
2418	/// p[i] = 0.0;
2419	/// } while (++i < n);
2420	///
2421	/// the trip count isn't just 'n', because 'n' might not be positive. And
2422	/// unfortunately this can come up even for loops where the user didn't use
2423	/// a C do-while loop. For example, seemingly well-behaved top-test loops
2424	/// will commonly be lowered like this:
2425	///
2426	/// if (n > 0) {
2427	/// i = 0;
2428	/// do {
2429	/// p[i] = 0.0;
2430	/// } while (++i < n);
2431	/// }
2432	///
2433	/// and then it's possible for subsequent optimization to obscure the if
2434	/// test in such a way that indvars can't find it.
2435	///
2436	/// When indvars can't find the if test in loops like this, it creates a
2437	/// max expression, which allows it to give the loop a canonical
2438	/// induction variable:
2439	///
2440	/// i = 0;
2441	/// max = n < 1 ? 1 : n;
2442	/// do {
2443	/// p[i] = 0.0;
2444	/// } while (++i != max);
2445	///
2446	/// Canonical induction variables are necessary because the loop passes
2447	/// are designed around them. The most obvious example of this is the
2448	/// LoopInfo analysis, which doesn't remember trip count values. It
2449	/// expects to be able to rediscover the trip count each time it is
2450	/// needed, and it does this using a simple analysis that only succeeds if
2451	/// the loop has a canonical induction variable.
2452	///
2453	/// However, when it comes time to generate code, the maximum operation
2454	/// can be quite costly, especially if it's inside of an outer loop.
2455	///
2456	/// This function solves this problem by detecting this type of loop and
2457	/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
2458	/// the instructions for the maximum computation.
2459	ICmpInst LSRInstance::OptimizeMax(ICmpInst Cond, IVStrideUse* &CondUse) {
2460	// Check that the loop matches the pattern we're looking for.
2461	if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
2462	Cond->getPredicate() != CmpInst::ICMP_NE)
2463	return Cond;
2464
2465	SelectInst *Sel = dyn_cast<SelectInst>(Val: Cond->getOperand(i_nocapture: `1`));
2466	if (!Sel \|\| !Sel->hasOneUse()) return Cond;
2467
2468	const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
2469	if (isa<SCEVCouldNotCompute>(Val: BackedgeTakenCount))
2470	return Cond;
2471	const SCEV *One = SE.getConstant(Ty: BackedgeTakenCount->getType(), V: `1`);
2472
2473	// Add one to the backedge-taken count to get the trip count.
2474	const SCEV *IterationCount = SE.getAddExpr(LHS: One, RHS: BackedgeTakenCount);
2475	if (IterationCount != SE.getSCEV(V: Sel)) return Cond;
2476
2477	// Check for a max calculation that matches the pattern. There's no check
2478	// for ICMP_ULE here because the comparison would be with zero, which
2479	// isn't interesting.
2480	CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
2481	const SCEVNAryExpr Max = nullptr*;
2482	if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(Val: BackedgeTakenCount)) {
2483	Pred = ICmpInst::ICMP_SLE;
2484	Max = S;
2485	} else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(Val: IterationCount)) {
2486	Pred = ICmpInst::ICMP_SLT;
2487	Max = S;
2488	} else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(Val: IterationCount)) {
2489	Pred = ICmpInst::ICMP_ULT;
2490	Max = U;
2491	} else {
2492	// No match; bail.
2493	return Cond;
2494	}
2495
2496	// To handle a max with more than two operands, this optimization would
2497	// require additional checking and setup.
2498	if (Max->getNumOperands() != `2`)
2499	return Cond;
2500
2501	const SCEV *MaxLHS = Max->getOperand(i: `0`);
2502	const SCEV *MaxRHS = Max->getOperand(i: `1`);
2503
2504	// ScalarEvolution canonicalizes constants to the left. For < and >, look
2505	// for a comparison with 1. For <= and >=, a comparison with zero.
2506	if (!MaxLHS \|\|
2507	(ICmpInst::isTrueWhenEqual(predicate: Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2508	return Cond;
2509
2510	// Check the relevant induction variable for conformance to
2511	// the pattern.
2512	const SCEV *IV = SE.getSCEV(V: Cond->getOperand(i_nocapture: `0`));
2513	if (!match(S: IV,
2514	P: m_scev_AffineAddRec(Op0: m_scev_SpecificInt(V: `1`), Op1: m_scev_SpecificInt(V: `1`))))
2515	return Cond;
2516
2517	assert(cast<SCEVAddRecExpr>(IV)->getLoop() == L &&
2518	"Loop condition operand is an addrec in a different loop!");
2519
2520	// Check the right operand of the select, and remember it, as it will
2521	// be used in the new comparison instruction.
2522	Value NewRHS = nullptr*;
2523	if (ICmpInst::isTrueWhenEqual(predicate: Pred)) {
2524	// Look for n+1, and grab n.
2525	if (AddOperator *BO = dyn_cast<AddOperator>(Val: Sel->getOperand(i_nocapture: `1`)))
2526	if (ConstantInt *BO1 = dyn_cast<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`)))
2527	if (BO1->isOne() && SE.getSCEV(V: BO->getOperand(i_nocapture: `0`)) == MaxRHS)
2528	NewRHS = BO->getOperand(i_nocapture: `0`);
2529	if (AddOperator *BO = dyn_cast<AddOperator>(Val: Sel->getOperand(i_nocapture: `2`)))
2530	if (ConstantInt *BO1 = dyn_cast<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`)))
2531	if (BO1->isOne() && SE.getSCEV(V: BO->getOperand(i_nocapture: `0`)) == MaxRHS)
2532	NewRHS = BO->getOperand(i_nocapture: `0`);
2533	if (!NewRHS)
2534	return Cond;
2535	} else if (SE.getSCEV(V: Sel->getOperand(i_nocapture: `1`)) == MaxRHS)
2536	NewRHS = Sel->getOperand(i_nocapture: `1`);
2537	else if (SE.getSCEV(V: Sel->getOperand(i_nocapture: `2`)) == MaxRHS)
2538	NewRHS = Sel->getOperand(i_nocapture: `2`);
2539	else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Val: MaxRHS))
2540	NewRHS = SU->getValue();
2541	else
2542	// Max doesn't match expected pattern.
2543	return Cond;
2544
2545	// Determine the new comparison opcode. It may be signed or unsigned,
2546	// and the original comparison may be either equality or inequality.
2547	if (Cond->getPredicate() == CmpInst::ICMP_EQ)
2548	Pred = CmpInst::getInversePredicate(pred: Pred);
2549
2550	// Ok, everything looks ok to change the condition into an SLT or SGE and
2551	// delete the max calculation.
2552	ICmpInst NewCond = new* ICmpInst (Cond->getIterator(), Pred,
2553	Cond->getOperand(i_nocapture: `0`), NewRHS, "scmp");
2554
2555	// Delete the max calculation instructions.
2556	NewCond->setDebugLoc(Cond->getDebugLoc());
2557	Cond->replaceAllUsesWith(V: NewCond);
2558	CondUse->setUser(NewCond);
2559	Instruction *Cmp = cast<Instruction>(Val: Sel->getOperand(i_nocapture: `0`));
2560	Cond->eraseFromParent();
2561	Sel->eraseFromParent();
2562	if (Cmp->use_empty())
2563	Cmp->eraseFromParent();
2564	return NewCond;
2565	}
2566
2567	/// Change loop terminating condition to use the postinc iv when possible.
2568	void
2569	LSRInstance::OptimizeLoopTermCond() {
2570	SmallPtrSet<Instruction *, `4`> PostIncs;
2571
2572	// We need a different set of heuristics for rotated and non-rotated loops.
2573	// If a loop is rotated then the latch is also the backedge, so inserting
2574	// post-inc expressions just before the latch is ideal. To reduce live ranges
2575	// it also makes sense to rewrite terminating conditions to use post-inc
2576	// expressions.
2577	//
2578	// If the loop is not rotated then the latch is not a backedge; the latch
2579	// check is done in the loop head. Adding post-inc expressions before the
2580	// latch will cause overlapping live-ranges of pre-inc and post-inc expressions
2581	// in the loop body. In this case we do not* want to use post-inc expressions*
2582	// in the latch check, and we want to insert post-inc expressions before
2583	// the backedge.
2584	BasicBlock *LatchBlock = L->getLoopLatch();
2585	SmallVector<BasicBlock*, `8`> ExitingBlocks;
2586	L->getExitingBlocks(ExitingBlocks);
2587	if (!llvm::is_contained(Range&: ExitingBlocks, Element: LatchBlock)) {
2588	// The backedge doesn't exit the loop; treat this as a head-tested loop.
2589	IVIncInsertPos = LatchBlock->getTerminator();
2590	return;
2591	}
2592
2593	// Otherwise treat this as a rotated loop.
2594	for (BasicBlock *ExitingBlock : ExitingBlocks) {
2595	// Get the terminating condition for the loop if possible. If we
2596	// can, we want to change it to use a post-incremented version of its
2597	// induction variable, to allow coalescing the live ranges for the IV into
2598	// one register value.
2599
2600	BranchInst *TermBr = dyn_cast<BranchInst>(Val: ExitingBlock->getTerminator());
2601	if (!TermBr)
2602	continue;
2603	// FIXME: Overly conservative, termination condition could be an 'or' etc..
2604	if (TermBr->isUnconditional() \|\| !isa<ICmpInst>(Val: TermBr->getCondition()))
2605	continue;
2606
2607	// Search IVUsesByStride to find Cond's IVUse if there is one.
2608	IVStrideUse CondUse = nullptr*;
2609	ICmpInst *Cond = cast<ICmpInst>(Val: TermBr->getCondition());
2610	if (!FindIVUserForCond(Cond, CondUse))
2611	continue;
2612
2613	// If the trip count is computed in terms of a max (due to ScalarEvolution
2614	// being unable to find a sufficient guard, for example), change the loop
2615	// comparison to use SLT or ULT instead of NE.
2616	// One consequence of doing this now is that it disrupts the count-down
2617	// optimization. That's not always a bad thing though, because in such
2618	// cases it may still be worthwhile to avoid a max.
2619	Cond = OptimizeMax(Cond, CondUse);
2620
2621	// If this exiting block dominates the latch block, it may also use
2622	// the post-inc value if it won't be shared with other uses.
2623	// Check for dominance.
2624	if (!DT.dominates(A: ExitingBlock, B: LatchBlock))
2625	continue;
2626
2627	// Conservatively avoid trying to use the post-inc value in non-latch
2628	// exits if there may be pre-inc users in intervening blocks.
2629	if (LatchBlock != ExitingBlock)
2630	for (const IVStrideUse &UI : IU)
2631	// Test if the use is reachable from the exiting block. This dominator
2632	// query is a conservative approximation of reachability.
2633	if (&UI != CondUse &&
2634	!DT.properlyDominates(A: UI.getUser()->getParent(), B: ExitingBlock)) {
2635	// Conservatively assume there may be reuse if the quotient of their
2636	// strides could be a legal scale.
2637	const SCEV A = IU.getStride(IU: CondUse, L);
2638	const SCEV *B = IU.getStride(IU: UI, L);
2639	if (!A \|\| !B) continue;
2640	if (SE.getTypeSizeInBits(Ty: A->getType()) !=
2641	SE.getTypeSizeInBits(Ty: B->getType())) {
2642	if (SE.getTypeSizeInBits(Ty: A->getType()) >
2643	SE.getTypeSizeInBits(Ty: B->getType()))
2644	B = SE.getSignExtendExpr(Op: B, Ty: A->getType());
2645	else
2646	A = SE.getSignExtendExpr(Op: A, Ty: B->getType());
2647	}
2648	if (const SCEVConstant *D =
2649	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: B, RHS: A, SE))) {
2650	const ConstantInt *C = D->getValue();
2651	// Stride of one or negative one can have reuse with non-addresses.
2652	if (C->isOne() \|\| C->isMinusOne())
2653	goto decline_post_inc;
2654	// Avoid weird situations.
2655	if (C->getValue().getSignificantBits() >= `64` \|\|
2656	C->getValue().isMinSignedValue())
2657	goto decline_post_inc;
2658	// Check for possible scaled-address reuse.
2659	if (isAddressUse(TTI, Inst: UI.getUser(), OperandVal: UI.getOperandValToReplace())) {
2660	MemAccessTy AccessTy =
2661	getAccessType(TTI, Inst: UI.getUser(), OperandVal: UI.getOperandValToReplace());
2662	int64_t Scale = C->getSExtValue();
2663	if (TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, /BaseGV=/nullptr,
2664	/BaseOffset=/`0`,
2665	/HasBaseReg=/true, Scale,
2666	AddrSpace: AccessTy.AddrSpace))
2667	goto decline_post_inc;
2668	Scale = -Scale;
2669	if (TTI.isLegalAddressingMode(Ty: AccessTy.MemTy, /BaseGV=/nullptr,
2670	/BaseOffset=/`0`,
2671	/HasBaseReg=/true, Scale,
2672	AddrSpace: AccessTy.AddrSpace))
2673	goto decline_post_inc;
2674	}
2675	}
2676	}
2677
2678	LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
2679	<< *Cond << `'\n'`);
2680
2681	// It's possible for the setcc instruction to be anywhere in the loop, and
2682	// possible for it to have multiple users. If it is not immediately before
2683	// the exiting block branch, move it.
2684	if (Cond->getNextNonDebugInstruction() != TermBr) {
2685	if (Cond->hasOneUse()) {
2686	Cond->moveBefore(InsertPos: TermBr->getIterator());
2687	} else {
2688	// Clone the terminating condition and insert into the loopend.
2689	ICmpInst *OldCond = Cond;
2690	Cond = cast<ICmpInst>(Val: Cond->clone());
2691	Cond->setName(L->getHeader()->getName() + ".termcond");
2692	Cond->insertInto(ParentBB: ExitingBlock, It: TermBr->getIterator());
2693
2694	// Clone the IVUse, as the old use still exists!
2695	CondUse = &IU.AddUser(User: Cond, Operand: CondUse->getOperandValToReplace());
2696	TermBr->replaceUsesOfWith(From: OldCond, To: Cond);
2697	}
2698	}
2699
2700	// If we get to here, we know that we can transform the setcc instruction to
2701	// use the post-incremented version of the IV, allowing us to coalesce the
2702	// live ranges for the IV correctly.
2703	CondUse->transformToPostInc(L);
2704	Changed = true;
2705
2706	PostIncs.insert(Ptr: Cond);
2707	decline_post_inc:;
2708	}
2709
2710	// Determine an insertion point for the loop induction variable increment. It
2711	// must dominate all the post-inc comparisons we just set up, and it must
2712	// dominate the loop latch edge.
2713	IVIncInsertPos = L->getLoopLatch()->getTerminator();
2714	for (Instruction *Inst : PostIncs)
2715	IVIncInsertPos = DT.findNearestCommonDominator(I1: IVIncInsertPos, I2: Inst);
2716	}
2717
2718	/// Determine if the given use can accommodate a fixup at the given offset and
2719	/// other details. If so, update the use and return true.
2720	bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2721	bool HasBaseReg, LSRUse::KindType Kind,
2722	MemAccessTy AccessTy) {
2723	Immediate NewMinOffset = LU.MinOffset;
2724	Immediate NewMaxOffset = LU.MaxOffset;
2725	MemAccessTy NewAccessTy = AccessTy;
2726
2727	// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
2728	// something conservative, however this can pessimize in the case that one of
2729	// the uses will have all its uses outside the loop, for example.
2730	if (LU.Kind != Kind)
2731	return false;
2732
2733	// Check for a mismatched access type, and fall back conservatively as needed.
2734	// TODO: Be less conservative when the type is similar and can use the same
2735	// addressing modes.
2736	if (Kind == LSRUse::Address) {
2737	if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2738	NewAccessTy = MemAccessTy::getUnknown(Ctx&: AccessTy.MemTy->getContext(),
2739	AS: AccessTy.AddrSpace);
2740	}
2741	}
2742
2743	// Conservatively assume HasBaseReg is true for now.
2744	if (Immediate::isKnownLT(LHS: NewOffset, RHS: LU.MinOffset)) {
2745	if (!isAlwaysFoldable(TTI, Kind, AccessTy: NewAccessTy, /BaseGV=/nullptr,
2746	BaseOffset: LU.MaxOffset - NewOffset, HasBaseReg))
2747	return false;
2748	NewMinOffset = NewOffset;
2749	} else if (Immediate::isKnownGT(LHS: NewOffset, RHS: LU.MaxOffset)) {
2750	if (!isAlwaysFoldable(TTI, Kind, AccessTy: NewAccessTy, /BaseGV=/nullptr,
2751	BaseOffset: NewOffset - LU.MinOffset, HasBaseReg))
2752	return false;
2753	NewMaxOffset = NewOffset;
2754	}
2755
2756	// FIXME: We should be able to handle some level of scalable offset support
2757	// for 'void', but in order to get basic support up and running this is
2758	// being left out.
2759	if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2760	(NewMinOffset.isScalable() \|\| NewMaxOffset.isScalable()))
2761	return false;
2762
2763	// Update the use.
2764	LU.MinOffset = NewMinOffset;
2765	LU.MaxOffset = NewMaxOffset;
2766	LU.AccessTy = NewAccessTy;
2767	return true;
2768	}
2769
2770	/// Return an LSRUse index and an offset value for a fixup which needs the given
2771	/// expression, with the given kind and optional access type. Either reuse an
2772	/// existing use or create a new one, as needed.
2773	std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,
2774	LSRUse::KindType Kind,
2775	MemAccessTy AccessTy) {
2776	const SCEV *Copy = Expr;
2777	Immediate Offset = ExtractImmediate(S&: Expr, SE);
2778
2779	// Basic uses can't accept any offset, for example.
2780	if (!isAlwaysFoldable(TTI, Kind, AccessTy, /BaseGV=/ nullptr,
2781	BaseOffset: Offset, /HasBaseReg=/ true)) {
2782	Expr = Copy;
2783	Offset = Immediate::getFixed(MinVal: `0`);
2784	}
2785
2786	std::pair<UseMapTy::iterator, bool> P =
2787	UseMap.try_emplace(Key: LSRUse::SCEVUseKindPair (Expr, Kind));
2788	if (!P.second) {
2789	// A use already existed with this base.
2790	size_t LUIdx = P.first ->second;
2791	LSRUse &LU = Uses [LUIdx];
2792	if (reconcileNewOffset(LU, NewOffset: Offset, /HasBaseReg=/true, Kind, AccessTy))
2793	// Reuse this use.
2794	return std::make_pair(x&: LUIdx, y&: Offset);
2795	}
2796
2797	// Create a new use.
2798	size_t LUIdx = Uses.size();
2799	P.first ->second = LUIdx;
2800	Uses.push_back(Elt: LSRUse (Kind, AccessTy));
2801	LSRUse &LU = Uses [LUIdx];
2802
2803	LU.MinOffset = Offset;
2804	LU.MaxOffset = Offset;
2805	return std::make_pair(x&: LUIdx, y&: Offset);
2806	}
2807
2808	/// Delete the given use from the Uses list.
2809	void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2810	if (&LU != &Uses.back())
2811	std::swap(a&: LU, b&: Uses.back());
2812	Uses.pop_back();
2813
2814	// Update RegUses.
2815	RegUses.swapAndDropUse(LUIdx, LastLUIdx: Uses.size());
2816	}
2817
2818	/// Look for a use distinct from OrigLU which is has a formula that has the same
2819	/// registers as the given formula.
2820	LSRUse *
2821	LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2822	const LSRUse &OrigLU) {
2823	// Search all uses for the formula. This could be more clever.
2824	for (LSRUse &LU : Uses) {
2825	// Check whether this use is close enough to OrigLU, to see whether it's
2826	// worthwhile looking through its formulae.
2827	// Ignore ICmpZero uses because they may contain formulae generated by
2828	// GenerateICmpZeroScales, in which case adding fixup offsets may
2829	// be invalid.
2830	if (&LU != &OrigLU &&
2831	LU.Kind != LSRUse::ICmpZero &&
2832	LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2833	LU.WidestFixupType == OrigLU.WidestFixupType &&
2834	LU.HasFormulaWithSameRegs(F: OrigF)) {
2835	// Scan through this use's formulae.
2836	for (const Formula &F : LU.Formulae) {
2837	// Check to see if this formula has the same registers and symbols
2838	// as OrigF.
2839	if (F.BaseRegs == OrigF.BaseRegs &&
2840	F.ScaledReg == OrigF.ScaledReg &&
2841	F.BaseGV == OrigF.BaseGV &&
2842	F.Scale == OrigF.Scale &&
2843	F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2844	if (F.BaseOffset.isZero())
2845	return &LU;
2846	// This is the formula where all the registers and symbols matched;
2847	// there aren't going to be any others. Since we declined it, we
2848	// can skip the rest of the formulae and proceed to the next LSRUse.
2849	break;
2850	}
2851	}
2852	}
2853	}
2854
2855	// Nothing looked good.
2856	return nullptr;
2857	}
2858
2859	void LSRInstance::CollectInterestingTypesAndFactors() {
2860	SmallSetVector<const SCEV *, `4`> Strides;
2861
2862	// Collect interesting types and strides.
2863	SmallVector<const SCEV *, `4`> Worklist;
2864	for (const IVStrideUse &U : IU) {
2865	const SCEV *Expr = IU.getExpr(IU: U);
2866	if (!Expr)
2867	continue;
2868
2869	// Collect interesting types.
2870	Types.insert(X: SE.getEffectiveSCEVType(Ty: Expr->getType()));
2871
2872	// Add strides for mentioned loops.
2873	Worklist.push_back(Elt: Expr);
2874	do {
2875	const SCEV *S = Worklist.pop_back_val();
2876	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) {
2877	if (AR->getLoop() == L)
2878	Strides.insert(X: AR->getStepRecurrence(SE));
2879	Worklist.push_back(Elt: AR->getStart());
2880	} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
2881	append_range(C&: Worklist, R: Add->operands());
2882	}
2883	} while (!Worklist.empty());
2884	}
2885
2886	// Compute interesting factors from the set of interesting strides.
2887	for (SmallSetVector<const SCEV *, `4`>::const_iterator
2888	I = Strides.begin(), E = Strides.end(); I != E; ++I)
2889	for (SmallSetVector<const SCEV *, `4`>::const_iterator NewStrideIter =
2890	std::next(x: I); NewStrideIter != E; ++NewStrideIter) {
2891	const SCEV OldStride = I;
2892	const SCEV NewStride = NewStrideIter;
2893
2894	if (SE.getTypeSizeInBits(Ty: OldStride->getType()) !=
2895	SE.getTypeSizeInBits(Ty: NewStride->getType())) {
2896	if (SE.getTypeSizeInBits(Ty: OldStride->getType()) >
2897	SE.getTypeSizeInBits(Ty: NewStride->getType()))
2898	NewStride = SE.getSignExtendExpr(Op: NewStride, Ty: OldStride->getType());
2899	else
2900	OldStride = SE.getSignExtendExpr(Op: OldStride, Ty: NewStride->getType());
2901	}
2902	if (const SCEVConstant *Factor =
2903	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: NewStride, RHS: OldStride,
2904	SE, IgnoreSignificantBits: true))) {
2905	if (Factor->getAPInt().getSignificantBits() <= `64` && !Factor->isZero())
2906	Factors.insert(X: Factor->getAPInt().getSExtValue());
2907	} else if (const SCEVConstant *Factor =
2908	dyn_cast_or_null<SCEVConstant>(Val: getExactSDiv(LHS: OldStride,
2909	RHS: NewStride,
2910	SE, IgnoreSignificantBits: true))) {
2911	if (Factor->getAPInt().getSignificantBits() <= `64` && !Factor->isZero())
2912	Factors.insert(X: Factor->getAPInt().getSExtValue());
2913	}
2914	}
2915
2916	// If all uses use the same type, don't bother looking for truncation-based
2917	// reuse.
2918	if (Types.size() == `1`)
2919	Types.clear();
2920
2921	LLVM_DEBUG(print_factors_and_types(dbgs()));
2922	}
2923
2924	/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
2925	/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
2926	/// IVStrideUses, we could partially skip this.
2927	static User::op_iterator
2928	findIVOperand(User::op_iterator OI, User::op_iterator OE,
2929	Loop *L, ScalarEvolution &SE) {
2930	for(; OI != OE; ++OI) {
2931	if (Instruction Oper = dyn_cast<Instruction>(Val&: OI)) {
2932	if (!SE.isSCEVable(Ty: Oper->getType()))
2933	continue;
2934
2935	if (const SCEVAddRecExpr *AR =
2936	dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: Oper))) {
2937	if (AR->getLoop() == L)
2938	break;
2939	}
2940	}
2941	}
2942	return OI;
2943	}
2944
2945	/// IVChain logic must consistently peek base TruncInst operands, so wrap it in
2946	/// a convenient helper.
2947	static Value getWideOperand(Value Oper) {
2948	if (TruncInst *Trunc = dyn_cast<TruncInst>(Val: Oper))
2949	return Trunc->getOperand(i_nocapture: `0`);
2950	return Oper;
2951	}
2952
2953	/// Return an approximation of this SCEV expression's "base", or NULL for any
2954	/// constant. Returning the expression itself is conservative. Returning a
2955	/// deeper subexpression is more precise and valid as long as it isn't less
2956	/// complex than another subexpression. For expressions involving multiple
2957	/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
2958	/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
2959	/// IVInc==b-a.
2960	///
2961	/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
2962	/// SCEVUnknown, we simply return the rightmost SCEV operand.
2963	static const SCEV getExprBase(const* SCEV *S) {
2964	switch (S->getSCEVType()) {
2965	default: // including scUnknown.
2966	return S;
2967	case scConstant:
2968	case scVScale:
2969	return nullptr;
2970	case scTruncate:
2971	return getExprBase(S: cast<SCEVTruncateExpr>(Val: S)->getOperand());
2972	case scZeroExtend:
2973	return getExprBase(S: cast<SCEVZeroExtendExpr>(Val: S)->getOperand());
2974	case scSignExtend:
2975	return getExprBase(S: cast<SCEVSignExtendExpr>(Val: S)->getOperand());
2976	case scAddExpr: {
2977	// Skip over scaled operands (scMulExpr) to follow add operands as long as
2978	// there's nothing more complex.
2979	// FIXME: not sure if we want to recognize negation.
2980	const SCEVAddExpr *Add = cast<SCEVAddExpr>(Val: S);
2981	for (const SCEV *SubExpr : reverse(C: Add->operands())) {
2982	if (SubExpr->getSCEVType() == scAddExpr)
2983	return getExprBase(S: SubExpr);
2984
2985	if (SubExpr->getSCEVType() != scMulExpr)
2986	return SubExpr;
2987	}
2988	return S; // all operands are scaled, be conservative.
2989	}
2990	case scAddRecExpr:
2991	return getExprBase(S: cast<SCEVAddRecExpr>(Val: S)->getStart());
2992	}
2993	llvm_unreachable("Unknown SCEV kind!");
2994	}
2995
2996	/// Return true if the chain increment is profitable to expand into a loop
2997	/// invariant value, which may require its own register. A profitable chain
2998	/// increment will be an offset relative to the same base. We allow such offsets
2999	/// to potentially be used as chain increment as long as it's not obviously
3000	/// expensive to expand using real instructions.
3001	bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
3002	const SCEV *IncExpr,
3003	ScalarEvolution &SE) {
3004	// Aggressively form chains when -stress-ivchain.
3005	if (StressIVChain)
3006	return true;
3007
3008	// Do not replace a constant offset from IV head with a nonconstant IV
3009	// increment.
3010	if (!isa<SCEVConstant>(Val: IncExpr)) {
3011	const SCEV *HeadExpr = SE.getSCEV(V: getWideOperand(Oper: Incs [`0`].IVOperand));
3012	if (isa<SCEVConstant>(Val: SE.getMinusSCEV(LHS: OperExpr, RHS: HeadExpr)))
3013	return false;
3014	}
3015
3016	SmallPtrSet<const SCEV*, `8`> Processed;
3017	return !isHighCostExpansion(S: IncExpr, Processed, SE);
3018	}
3019
3020	/// Return true if the number of registers needed for the chain is estimated to
3021	/// be less than the number required for the individual IV users. First prohibit
3022	/// any IV users that keep the IV live across increments (the Users set should
3023	/// be empty). Next count the number and type of increments in the chain.
3024	///
3025	/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
3026	/// effectively use postinc addressing modes. Only consider it profitable it the
3027	/// increments can be computed in fewer registers when chained.
3028	///
3029	/// TODO: Consider IVInc free if it's already used in another chains.
3030	static bool isProfitableChain(IVChain &Chain,
3031	SmallPtrSetImpl<Instruction *> &Users,
3032	ScalarEvolution &SE,
3033	const TargetTransformInfo &TTI) {
3034	if (StressIVChain)
3035	return true;
3036
3037	if (!Chain.hasIncs())
3038	return false;
3039
3040	if (!Users.empty()) {
3041	LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[`0`].UserInst << " users:\n";
3042	for (Instruction *Inst
3043	: Users) { dbgs() << " " << *Inst << "\n"; });
3044	return false;
3045	}
3046	assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
3047
3048	// The chain itself may require a register, so intialize cost to 1.
3049	int cost = `1`;
3050
3051	// A complete chain likely eliminates the need for keeping the original IV in
3052	// a register. LSR does not currently know how to form a complete chain unless
3053	// the header phi already exists.
3054	if (isa<PHINode>(Val: Chain.tailUserInst())
3055	&& SE.getSCEV(V: Chain.tailUserInst()) == Chain.Incs [`0`].IncExpr) {
3056	--cost;
3057	}
3058	const SCEV LastIncExpr = nullptr*;
3059	unsigned NumConstIncrements = `0`;
3060	unsigned NumVarIncrements = `0`;
3061	unsigned NumReusedIncrements = `0`;
3062
3063	if (TTI.isProfitableLSRChainElement(I: Chain.Incs [`0`].UserInst))
3064	return true;
3065
3066	for (const IVInc &Inc : Chain) {
3067	if (TTI.isProfitableLSRChainElement(I: Inc.UserInst))
3068	return true;
3069	if (Inc.IncExpr->isZero())
3070	continue;
3071
3072	// Incrementing by zero or some constant is neutral. We assume constants can
3073	// be folded into an addressing mode or an add's immediate operand.
3074	if (isa<SCEVConstant>(Val: Inc.IncExpr)) {
3075	++NumConstIncrements;
3076	continue;
3077	}
3078
3079	if (Inc.IncExpr == LastIncExpr)
3080	++NumReusedIncrements;
3081	else
3082	++NumVarIncrements;
3083
3084	LastIncExpr = Inc.IncExpr;
3085	}
3086	// An IV chain with a single increment is handled by LSR's postinc
3087	// uses. However, a chain with multiple increments requires keeping the IV's
3088	// value live longer than it needs to be if chained.
3089	if (NumConstIncrements > `1`)
3090	--cost;
3091
3092	// Materializing increment expressions in the preheader that didn't exist in
3093	// the original code may cost a register. For example, sign-extended array
3094	// indices can produce ridiculous increments like this:
3095	// IV + ((sext i32 (2 %s) to i64) + (-1 * (sext i32 %s to i64)))*
3096	cost += NumVarIncrements;
3097
3098	// Reusing variable increments likely saves a register to hold the multiple of
3099	// the stride.
3100	cost -= NumReusedIncrements;
3101
3102	LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[`0`].UserInst << " Cost: " << cost
3103	<< "\n");
3104
3105	return cost < `0`;
3106	}
3107
3108	/// Add this IV user to an existing chain or make it the head of a new chain.
3109	void LSRInstance::ChainInstruction(Instruction UserInst, Instruction IVOper,
3110	SmallVectorImpl<ChainUsers> &ChainUsersVec) {
3111	// When IVs are used as types of varying widths, they are generally converted
3112	// to a wider type with some uses remaining narrow under a (free) trunc.
3113	Value *const NextIV = getWideOperand(Oper: IVOper);
3114	const SCEV *const OperExpr = SE.getSCEV(V: NextIV);
3115	const SCEV *const OperExprBase = getExprBase(S: OperExpr);
3116
3117	// Visit all existing chains. Check if its IVOper can be computed as a
3118	// profitable loop invariant increment from the last link in the Chain.
3119	unsigned ChainIdx = `0`, NChains = IVChainVec.size();
3120	const SCEV LastIncExpr = nullptr*;
3121	for (; ChainIdx < NChains; ++ChainIdx) {
3122	IVChain &Chain = IVChainVec [ChainIdx];
3123
3124	// Prune the solution space aggressively by checking that both IV operands
3125	// are expressions that operate on the same unscaled SCEVUnknown. This
3126	// "base" will be canceled by the subsequent getMinusSCEV call. Checking
3127	// first avoids creating extra SCEV expressions.
3128	if (!StressIVChain && Chain.ExprBase != OperExprBase)
3129	continue;
3130
3131	Value *PrevIV = getWideOperand(Oper: Chain.Incs.back().IVOperand);
3132	if (PrevIV->getType() != NextIV->getType())
3133	continue;
3134
3135	// A phi node terminates a chain.
3136	if (isa<PHINode>(Val: UserInst) && isa<PHINode>(Val: Chain.tailUserInst()))
3137	continue;
3138
3139	// The increment must be loop-invariant so it can be kept in a register.
3140	const SCEV *PrevExpr = SE.getSCEV(V: PrevIV);
3141	const SCEV *IncExpr = SE.getMinusSCEV(LHS: OperExpr, RHS: PrevExpr);
3142	if (isa<SCEVCouldNotCompute>(Val: IncExpr) \|\| !SE.isLoopInvariant(S: IncExpr, L))
3143	continue;
3144
3145	if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3146	LastIncExpr = IncExpr;
3147	break;
3148	}
3149	}
3150	// If we haven't found a chain, create a new one, unless we hit the max. Don't
3151	// bother for phi nodes, because they must be last in the chain.
3152	if (ChainIdx == NChains) {
3153	if (isa<PHINode>(Val: UserInst))
3154	return;
3155	if (NChains >= MaxChains && !StressIVChain) {
3156	LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
3157	return;
3158	}
3159	LastIncExpr = OperExpr;
3160	// IVUsers may have skipped over sign/zero extensions. We don't currently
3161	// attempt to form chains involving extensions unless they can be hoisted
3162	// into this loop's AddRec.
3163	if (!isa<SCEVAddRecExpr>(Val: LastIncExpr))
3164	return;
3165	++NChains;
3166	IVChainVec.push_back(Elt: IVChain (IVInc (UserInst, IVOper, LastIncExpr),
3167	OperExprBase));
3168	ChainUsersVec.resize(N: NChains);
3169	LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
3170	<< ") IV=" << *LastIncExpr << "\n");
3171	} else {
3172	LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
3173	<< ") IV+" << *LastIncExpr << "\n");
3174	// Add this IV user to the end of the chain.
3175	IVChainVec [ChainIdx].add(X: IVInc (UserInst, IVOper, LastIncExpr));
3176	}
3177	IVChain &Chain = IVChainVec [ChainIdx];
3178
3179	SmallPtrSet<Instruction*,`4`> &NearUsers = ChainUsersVec [ChainIdx].NearUsers;
3180	// This chain's NearUsers become FarUsers.
3181	if (!LastIncExpr->isZero()) {
3182	ChainUsersVec [ChainIdx].FarUsers.insert_range(R&: NearUsers);
3183	NearUsers.clear();
3184	}
3185
3186	// All other uses of IVOperand become near uses of the chain.
3187	// We currently ignore intermediate values within SCEV expressions, assuming
3188	// they will eventually be used be the current chain, or can be computed
3189	// from one of the chain increments. To be more precise we could
3190	// transitively follow its user and only add leaf IV users to the set.
3191	for (User *U : IVOper->users()) {
3192	Instruction *OtherUse = dyn_cast<Instruction>(Val: U);
3193	if (!OtherUse)
3194	continue;
3195	// Uses in the chain will no longer be uses if the chain is formed.
3196	// Include the head of the chain in this iteration (not Chain.begin()).
3197	IVChain::const_iterator IncIter = Chain.Incs.begin();
3198	IVChain::const_iterator IncEnd = Chain.Incs.end();
3199	for( ; IncIter != IncEnd; ++IncIter) {
3200	if (IncIter->UserInst == OtherUse)
3201	break;
3202	}
3203	if (IncIter != IncEnd)
3204	continue;
3205
3206	if (SE.isSCEVable(Ty: OtherUse->getType())
3207	&& !isa<SCEVUnknown>(Val: SE.getSCEV(V: OtherUse))
3208	&& IU.isIVUserOrOperand(Inst: OtherUse)) {
3209	continue;
3210	}
3211	NearUsers.insert(Ptr: OtherUse);
3212	}
3213
3214	// Since this user is part of the chain, it's no longer considered a use
3215	// of the chain.
3216	ChainUsersVec [ChainIdx].FarUsers.erase(Ptr: UserInst);
3217	}
3218
3219	/// Populate the vector of Chains.
3220	///
3221	/// This decreases ILP at the architecture level. Targets with ample registers,
3222	/// multiple memory ports, and no register renaming probably don't want
3223	/// this. However, such targets should probably disable LSR altogether.
3224	///
3225	/// The job of LSR is to make a reasonable choice of induction variables across
3226	/// the loop. Subsequent passes can easily "unchain" computation exposing more
3227	/// ILP within the loop* if the target wants it.*
3228	///
3229	/// Finding the best IV chain is potentially a scheduling problem. Since LSR
3230	/// will not reorder memory operations, it will recognize this as a chain, but
3231	/// will generate redundant IV increments. Ideally this would be corrected later
3232	/// by a smart scheduler:
3233	/// = A[i]
3234	/// = A[i+x]
3235	/// A[i] =
3236	/// A[i+x] =
3237	///
3238	/// TODO: Walk the entire domtree within this loop, not just the path to the
3239	/// loop latch. This will discover chains on side paths, but requires
3240	/// maintaining multiple copies of the Chains state.
3241	void LSRInstance::CollectChains() {
3242	LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
3243	SmallVector<ChainUsers, `8`> ChainUsersVec;
3244
3245	SmallVector<BasicBlock *,`8`> LatchPath;
3246	BasicBlock *LoopHeader = L->getHeader();
3247	for (DomTreeNode *Rung = DT.getNode(BB: L->getLoopLatch());
3248	Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3249	LatchPath.push_back(Elt: Rung->getBlock());
3250	}
3251	LatchPath.push_back(Elt: LoopHeader);
3252
3253	// Walk the instruction stream from the loop header to the loop latch.
3254	for (BasicBlock *BB : reverse(C&: LatchPath)) {
3255	for (Instruction &I : *BB) {
3256	// Skip instructions that weren't seen by IVUsers analysis.
3257	if (isa<PHINode>(Val: I) \|\| !IU.isIVUserOrOperand(Inst: &I))
3258	continue;
3259
3260	// Ignore users that are part of a SCEV expression. This way we only
3261	// consider leaf IV Users. This effectively rediscovers a portion of
3262	// IVUsers analysis but in program order this time.
3263	if (SE.isSCEVable(Ty: I.getType()) && !isa<SCEVUnknown>(Val: SE.getSCEV(V: &I)))
3264	continue;
3265
3266	// Remove this instruction from any NearUsers set it may be in.
3267	for (unsigned ChainIdx = `0`, NChains = IVChainVec.size();
3268	ChainIdx < NChains; ++ChainIdx) {
3269	ChainUsersVec [ChainIdx].NearUsers.erase(Ptr: &I);
3270	}
3271	// Search for operands that can be chained.
3272	SmallPtrSet<Instruction*, `4`> UniqueOperands;
3273	User::op_iterator IVOpEnd = I.op_end();
3274	User::op_iterator IVOpIter = findIVOperand(OI: I.op_begin(), OE: IVOpEnd, L, SE);
3275	while (IVOpIter != IVOpEnd) {
3276	Instruction IVOpInst = cast<Instruction>(Val&: IVOpIter);
3277	if (UniqueOperands.insert(Ptr: IVOpInst).second)
3278	ChainInstruction(UserInst: &I, IVOper: IVOpInst, ChainUsersVec);
3279	IVOpIter = findIVOperand(OI: std::next(x: IVOpIter), OE: IVOpEnd, L, SE);
3280	}
3281	} // Continue walking down the instructions.
3282	} // Continue walking down the domtree.
3283	// Visit phi backedges to determine if the chain can generate the IV postinc.
3284	for (PHINode &PN : L->getHeader()->phis()) {
3285	if (!SE.isSCEVable(Ty: PN.getType()))
3286	continue;
3287
3288	Instruction *IncV =
3289	dyn_cast<Instruction>(Val: PN.getIncomingValueForBlock(BB: L->getLoopLatch()));
3290	if (IncV)
3291	ChainInstruction(UserInst: &PN, IVOper: IncV, ChainUsersVec);
3292	}
3293	// Remove any unprofitable chains.
3294	unsigned ChainIdx = `0`;
3295	for (unsigned UsersIdx = `0`, NChains = IVChainVec.size();
3296	UsersIdx < NChains; ++UsersIdx) {
3297	if (!isProfitableChain(Chain&: IVChainVec [UsersIdx],
3298	Users&: ChainUsersVec [UsersIdx].FarUsers, SE, TTI))
3299	continue;
3300	// Preserve the chain at UsesIdx.
3301	if (ChainIdx != UsersIdx)
3302	IVChainVec [ChainIdx] = IVChainVec [UsersIdx];
3303	FinalizeChain(Chain&: IVChainVec [ChainIdx]);
3304	++ChainIdx;
3305	}
3306	IVChainVec.resize(N: ChainIdx);
3307	}
3308
3309	void LSRInstance::FinalizeChain(IVChain &Chain) {
3310	assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
3311	LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[`0`].UserInst << "\n");
3312
3313	for (const IVInc &Inc : Chain) {
3314	LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
3315	auto UseI = find(Range: Inc.UserInst->operands(), Val: Inc.IVOperand);
3316	assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
3317	IVIncSet.insert(Ptr: UseI);
3318	}
3319	}
3320
3321	/// Return true if the IVInc can be folded into an addressing mode.
3322	static bool canFoldIVIncExpr(const SCEV IncExpr, Instruction UserInst,
3323	Value Operand, const* TargetTransformInfo &TTI) {
3324	const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(Val: IncExpr);
3325	Immediate IncOffset = Immediate::getZero();
3326	if (IncConst) {
3327	if (IncConst && IncConst->getAPInt().getSignificantBits() > `64`)
3328	return false;
3329	IncOffset = Immediate::getFixed(MinVal: IncConst->getValue()->getSExtValue());
3330	} else {
3331	// Look for mul(vscale, constant), to detect a scalable offset.
3332	const APInt *C;
3333	if (!match(S: IncExpr, P: m_scev_Mul(Op0: m_scev_APInt(C), Op1: m_SCEVVScale())) \|\|
3334	C->getSignificantBits() > `64`)
3335	return false;
3336	IncOffset = Immediate::getScalable(MinVal: C->getSExtValue());
3337	}
3338
3339	if (!isAddressUse(TTI, Inst: UserInst, OperandVal: Operand))
3340	return false;
3341
3342	MemAccessTy AccessTy = getAccessType(TTI, Inst: UserInst, OperandVal: Operand);
3343	if (!isAlwaysFoldable(TTI, Kind: LSRUse::Address, AccessTy, /BaseGV=/nullptr,
3344	BaseOffset: IncOffset, /HasBaseReg=/false))
3345	return false;
3346
3347	return true;
3348	}
3349
3350	/// Generate an add or subtract for each IVInc in a chain to materialize the IV
3351	/// user's operand from the previous IV user's operand.
3352	void LSRInstance::GenerateIVChain(const IVChain &Chain,
3353	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
3354	// Find the new IVOperand for the head of the chain. It may have been replaced
3355	// by LSR.
3356	const IVInc &Head = Chain.Incs [`0`];
3357	User::op_iterator IVOpEnd = Head.UserInst->op_end();
3358	// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
3359	User::op_iterator IVOpIter = findIVOperand(OI: Head.UserInst->op_begin(),
3360	OE: IVOpEnd, L, SE);
3361	Value IVSrc = nullptr*;
3362	while (IVOpIter != IVOpEnd) {
3363	IVSrc = getWideOperand(Oper: *IVOpIter);
3364
3365	// If this operand computes the expression that the chain needs, we may use
3366	// it. (Check this after setting IVSrc which is used below.)
3367	//
3368	// Note that if Head.IncExpr is wider than IVSrc, then this phi is too
3369	// narrow for the chain, so we can no longer use it. We do allow using a
3370	// wider phi, assuming the LSR checked for free truncation. In that case we
3371	// should already have a truncate on this operand such that
3372	// getSCEV(IVSrc) == IncExpr.
3373	if (SE.getSCEV(V: *IVOpIter) == Head.IncExpr
3374	\|\| SE.getSCEV(V: IVSrc) == Head.IncExpr) {
3375	break;
3376	}
3377	IVOpIter = findIVOperand(OI: std::next(x: IVOpIter), OE: IVOpEnd, L, SE);
3378	}
3379	if (IVOpIter == IVOpEnd) {
3380	// Gracefully give up on this chain.
3381	LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
3382	return;
3383	}
3384	assert(IVSrc && "Failed to find IV chain source");
3385
3386	LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
3387	Type *IVTy = IVSrc->getType();
3388	Type *IntTy = SE.getEffectiveSCEVType(Ty: IVTy);
3389	const SCEV LeftOverExpr = nullptr*;
3390	const SCEV *Accum = SE.getZero(Ty: IntTy);
3391	SmallVector<std::pair<const SCEV , Value >> Bases;
3392	Bases.emplace_back(Args&: Accum, Args&: IVSrc);
3393
3394	for (const IVInc &Inc : Chain) {
3395	Instruction *InsertPt = Inc.UserInst;
3396	if (isa<PHINode>(Val: InsertPt))
3397	InsertPt = L->getLoopLatch()->getTerminator();
3398
3399	// IVOper will replace the current IV User's operand. IVSrc is the IV
3400	// value currently held in a register.
3401	Value *IVOper = IVSrc;
3402	if (!Inc.IncExpr->isZero()) {
3403	// IncExpr was the result of subtraction of two narrow values, so must
3404	// be signed.
3405	const SCEV *IncExpr = SE.getNoopOrSignExtend(V: Inc.IncExpr, Ty: IntTy);
3406	Accum = SE.getAddExpr(LHS: Accum, RHS: IncExpr);
3407	LeftOverExpr = LeftOverExpr ?
3408	SE.getAddExpr(LHS: LeftOverExpr, RHS: IncExpr) : IncExpr;
3409	}
3410
3411	// Look through each base to see if any can produce a nice addressing mode.
3412	bool FoundBase = false;
3413	for (auto [MapScev, MapIVOper] : reverse(C&: Bases)) {
3414	const SCEV *Remainder = SE.getMinusSCEV(LHS: Accum, RHS: MapScev);
3415	if (canFoldIVIncExpr(IncExpr: Remainder, UserInst: Inc.UserInst, Operand: Inc.IVOperand, TTI)) {
3416	if (!Remainder->isZero()) {
3417	Rewriter.clearPostInc();
3418	Value *IncV = Rewriter.expandCodeFor(SH: Remainder, Ty: IntTy, I: InsertPt);
3419	const SCEV *IVOperExpr =
3420	SE.getAddExpr(LHS: SE.getUnknown(V: MapIVOper), RHS: SE.getUnknown(V: IncV));
3421	IVOper = Rewriter.expandCodeFor(SH: IVOperExpr, Ty: IVTy, I: InsertPt);
3422	} else {
3423	IVOper = MapIVOper;
3424	}
3425
3426	FoundBase = true;
3427	break;
3428	}
3429	}
3430	if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {
3431	// Expand the IV increment.
3432	Rewriter.clearPostInc();
3433	Value *IncV = Rewriter.expandCodeFor(SH: LeftOverExpr, Ty: IntTy, I: InsertPt);
3434	const SCEV *IVOperExpr = SE.getAddExpr(LHS: SE.getUnknown(V: IVSrc),
3435	RHS: SE.getUnknown(V: IncV));
3436	IVOper = Rewriter.expandCodeFor(SH: IVOperExpr, Ty: IVTy, I: InsertPt);
3437
3438	// If an IV increment can't be folded, use it as the next IV value.
3439	if (!canFoldIVIncExpr(IncExpr: LeftOverExpr, UserInst: Inc.UserInst, Operand: Inc.IVOperand, TTI)) {
3440	assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
3441	Bases.emplace_back(Args&: Accum, Args&: IVOper);
3442	IVSrc = IVOper;
3443	LeftOverExpr = nullptr;
3444	}
3445	}
3446	Type *OperTy = Inc.IVOperand->getType();
3447	if (IVTy != OperTy) {
3448	assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
3449	"cannot extend a chained IV");
3450	IRBuilder<> Builder(InsertPt);
3451	IVOper = Builder.CreateTruncOrBitCast(V: IVOper, DestTy: OperTy, Name: "lsr.chain");
3452	}
3453	Inc.UserInst->replaceUsesOfWith(From: Inc.IVOperand, To: IVOper);
3454	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: Inc.IVOperand))
3455	DeadInsts.emplace_back(Args&: OperandIsInstr);
3456	}
3457	// If LSR created a new, wider phi, we may also replace its postinc. We only
3458	// do this if we also found a wide value for the head of the chain.
3459	if (isa<PHINode>(Val: Chain.tailUserInst())) {
3460	for (PHINode &Phi : L->getHeader()->phis()) {
3461	if (Phi.getType() != IVSrc->getType())
3462	continue;
3463	Instruction *PostIncV = dyn_cast<Instruction>(
3464	Val: Phi.getIncomingValueForBlock(BB: L->getLoopLatch()));
3465	if (!PostIncV \|\| (SE.getSCEV(V: PostIncV) != SE.getSCEV(V: IVSrc)))
3466	continue;
3467	Value *IVOper = IVSrc;
3468	Type *PostIncTy = PostIncV->getType();
3469	if (IVTy != PostIncTy) {
3470	assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
3471	IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3472	Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
3473	IVOper = Builder.CreatePointerCast(V: IVSrc, DestTy: PostIncTy, Name: "lsr.chain");
3474	}
3475	Phi.replaceUsesOfWith(From: PostIncV, To: IVOper);
3476	DeadInsts.emplace_back(Args&: PostIncV);
3477	}
3478	}
3479	}
3480
3481	void LSRInstance::CollectFixupsAndInitialFormulae() {
3482	BranchInst ExitBranch = nullptr*;
3483	bool SaveCmp = TTI.canSaveCmp(L, BI: &ExitBranch, SE: &SE, LI: &LI, DT: &DT, AC: &AC, LibInfo: &TLI);
3484
3485	// For calculating baseline cost
3486	SmallPtrSet<const SCEV *, `16`> Regs;
3487	DenseSet<const SCEV *> VisitedRegs;
3488	DenseSet<size_t> VisitedLSRUse;
3489
3490	for (const IVStrideUse &U : IU) {
3491	Instruction *UserInst = U.getUser();
3492	// Skip IV users that are part of profitable IV Chains.
3493	User::op_iterator UseI =
3494	find(Range: UserInst->operands(), Val: U.getOperandValToReplace());
3495	assert(UseI != UserInst->op_end() && "cannot find IV operand");
3496	if (IVIncSet.count(Ptr: UseI)) {
3497	LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << `'\n'`);
3498	continue;
3499	}
3500
3501	LSRUse::KindType Kind = LSRUse::Basic;
3502	MemAccessTy AccessTy;
3503	if (isAddressUse(TTI, Inst: UserInst, OperandVal: U.getOperandValToReplace())) {
3504	Kind = LSRUse::Address;
3505	AccessTy = getAccessType(TTI, Inst: UserInst, OperandVal: U.getOperandValToReplace());
3506	}
3507
3508	const SCEV *S = IU.getExpr(IU: U);
3509	if (!S)
3510	continue;
3511	PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
3512
3513	// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
3514	// (N - i == 0), and this allows (N - i) to be the expression that we work
3515	// with rather than just N or i, so we can consider the register
3516	// requirements for both N and i at the same time. Limiting this code to
3517	// equality icmps is not a problem because all interesting loops use
3518	// equality icmps, thanks to IndVarSimplify.
3519	if (ICmpInst *CI = dyn_cast<ICmpInst>(Val: UserInst)) {
3520	// If CI can be saved in some target, like replaced inside hardware loop
3521	// in PowerPC, no need to generate initial formulae for it.
3522	if (SaveCmp && CI == dyn_cast<ICmpInst>(Val: ExitBranch->getCondition()))
3523	continue;
3524	if (CI->isEquality()) {
3525	// Swap the operands if needed to put the OperandValToReplace on the
3526	// left, for consistency.
3527	Value *NV = CI->getOperand(i_nocapture: `1`);
3528	if (NV == U.getOperandValToReplace()) {
3529	CI->setOperand(i_nocapture: `1`, Val_nocapture: CI->getOperand(i_nocapture: `0`));
3530	CI->setOperand(i_nocapture: `0`, Val_nocapture: NV);
3531	NV = CI->getOperand(i_nocapture: `1`);
3532	Changed = true;
3533	}
3534
3535	// x == y --> x - y == 0
3536	const SCEV *N = SE.getSCEV(V: NV);
3537	if (SE.isLoopInvariant(S: N, L) && Rewriter.isSafeToExpand(S: N) &&
3538	(!NV->getType()->isPointerTy() \|\|
3539	SE.getPointerBase(V: N) == SE.getPointerBase(V: S))) {
3540	// S is normalized, so normalize N before folding it into S
3541	// to keep the result normalized.
3542	N = normalizeForPostIncUse(S: N, Loops: TmpPostIncLoops, SE);
3543	if (!N)
3544	continue;
3545	Kind = LSRUse::ICmpZero;
3546	S = SE.getMinusSCEV(LHS: N, RHS: S);
3547	} else if (L->isLoopInvariant(V: NV) &&
3548	(!isa<Instruction>(Val: NV) \|\|
3549	DT.dominates(Def: cast<Instruction>(Val: NV), BB: L->getHeader())) &&
3550	!NV->getType()->isPointerTy()) {
3551	// If we can't generally expand the expression (e.g. it contains
3552	// a divide), but it is already at a loop invariant point before the
3553	// loop, wrap it in an unknown (to prevent the expander from trying
3554	// to re-expand in a potentially unsafe way.) The restriction to
3555	// integer types is required because the unknown hides the base, and
3556	// SCEV can't compute the difference of two unknown pointers.
3557	N = SE.getUnknown(V: NV);
3558	N = normalizeForPostIncUse(S: N, Loops: TmpPostIncLoops, SE);
3559	if (!N)
3560	continue;
3561	Kind = LSRUse::ICmpZero;
3562	S = SE.getMinusSCEV(LHS: N, RHS: S);
3563	assert(!isa<SCEVCouldNotCompute>(S));
3564	}
3565
3566	// -1 and the negations of all interesting strides (except the negation
3567	// of -1) are now also interesting.
3568	for (size_t i = `0`, e = Factors.size(); i != e; ++i)
3569	if (Factors [i] != -`1`)
3570	Factors.insert(X: -(uint64_t)Factors [i]);
3571	Factors.insert(X: -`1`);
3572	}
3573	}
3574
3575	// Get or create an LSRUse.
3576	std::pair<size_t, Immediate> P = getUse(Expr&: S, Kind, AccessTy);
3577	size_t LUIdx = P.first;
3578	Immediate Offset = P.second;
3579	LSRUse &LU = Uses [LUIdx];
3580
3581	// Record the fixup.
3582	LSRFixup &LF = LU.getNewFixup();
3583	LF.UserInst = UserInst;
3584	LF.OperandValToReplace = U.getOperandValToReplace();
3585	LF.PostIncLoops = TmpPostIncLoops;
3586	LF.Offset = Offset;
3587	LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3588
3589	// Create SCEV as Formula for calculating baseline cost
3590	if (!VisitedLSRUse.count(V: LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3591	Formula F;
3592	F.initialMatch(S, L, SE);
3593	BaselineCost.RateFormula(F, Regs, VisitedRegs, LU);
3594	VisitedLSRUse.insert(V: LUIdx);
3595	}
3596
3597	if (!LU.WidestFixupType \|\|
3598	SE.getTypeSizeInBits(Ty: LU.WidestFixupType) <
3599	SE.getTypeSizeInBits(Ty: LF.OperandValToReplace->getType()))
3600	LU.WidestFixupType = LF.OperandValToReplace->getType();
3601
3602	// If this is the first use of this LSRUse, give it a formula.
3603	if (LU.Formulae.empty()) {
3604	InsertInitialFormula(S, LU, LUIdx);
3605	CountRegisters(F: LU.Formulae.back(), LUIdx);
3606	}
3607	}
3608
3609	LLVM_DEBUG(print_fixups(dbgs()));
3610	}
3611
3612	/// Insert a formula for the given expression into the given use, separating out
3613	/// loop-variant portions from loop-invariant and loop-computable portions.
3614	void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
3615	size_t LUIdx) {
3616	// Mark uses whose expressions cannot be expanded.
3617	if (!Rewriter.isSafeToExpand(S))
3618	LU.RigidFormula = true;
3619
3620	Formula F;
3621	F.initialMatch(S, L, SE);
3622	bool Inserted = InsertFormula(LU, LUIdx, F);
3623	assert(Inserted && "Initial formula already exists!"); (void)Inserted;
3624	}
3625
3626	/// Insert a simple single-register formula for the given expression into the
3627	/// given use.
3628	void
3629	LSRInstance::InsertSupplementalFormula(const SCEV *S,
3630	LSRUse &LU, size_t LUIdx) {
3631	Formula F;
3632	F.BaseRegs.push_back(Elt: S);
3633	F.HasBaseReg = true;
3634	bool Inserted = InsertFormula(LU, LUIdx, F);
3635	assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
3636	}
3637
3638	/// Note which registers are used by the given formula, updating RegUses.
3639	void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3640	if (F.ScaledReg)
3641	RegUses.countRegister(Reg: F.ScaledReg, LUIdx);
3642	for (const SCEV *BaseReg : F.BaseRegs)
3643	RegUses.countRegister(Reg: BaseReg, LUIdx);
3644	}
3645
3646	/// If the given formula has not yet been inserted, add it to the list, and
3647	/// return true. Return false otherwise.
3648	bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3649	// Do not insert formula that we will not be able to expand.
3650	assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3651	"Formula is illegal");
3652
3653	if (!LU.InsertFormula(F, L: *L))
3654	return false;
3655
3656	CountRegisters(F, LUIdx);
3657	return true;
3658	}
3659
3660	/// Check for other uses of loop-invariant values which we're tracking. These
3661	/// other uses will pin these values in registers, making them less profitable
3662	/// for elimination.
3663	/// TODO: This currently misses non-constant addrec step registers.
3664	/// TODO: Should this give more weight to users inside the loop?
3665	void
3666	LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3667	SmallVector<const SCEV *, `8`> Worklist(RegUses.begin(), RegUses.end());
3668	SmallPtrSet<const SCEV *, `32`> Visited;
3669
3670	// Don't collect outside uses if we are favoring postinc - the instructions in
3671	// the loop are more important than the ones outside of it.
3672	if (AMK == TTI::AMK_PostIndexed)
3673	return;
3674
3675	while (!Worklist.empty()) {
3676	const SCEV *S = Worklist.pop_back_val();
3677
3678	// Don't process the same SCEV twice
3679	if (!Visited.insert(Ptr: S).second)
3680	continue;
3681
3682	if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(Val: S))
3683	append_range(C&: Worklist, R: N->operands());
3684	else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(Val: S))
3685	Worklist.push_back(Elt: C->getOperand());
3686	else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(Val: S)) {
3687	Worklist.push_back(Elt: D->getLHS());
3688	Worklist.push_back(Elt: D->getRHS());
3689	} else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(Val: S)) {
3690	const Value *V = US->getValue();
3691	if (const Instruction *Inst = dyn_cast<Instruction>(Val: V)) {
3692	// Look for instructions defined outside the loop.
3693	if (L->contains(Inst)) continue;
3694	} else if (isa<Constant>(Val: V))
3695	// Constants can be re-materialized.
3696	continue;
3697	for (const Use &U : V->uses()) {
3698	const Instruction *UserInst = dyn_cast<Instruction>(Val: U.getUser());
3699	// Ignore non-instructions.
3700	if (!UserInst)
3701	continue;
3702	// Don't bother if the instruction is an EHPad.
3703	if (UserInst->isEHPad())
3704	continue;
3705	// Ignore instructions in other functions (as can happen with
3706	// Constants).
3707	if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3708	continue;
3709	// Ignore instructions not dominated by the loop.
3710	const BasicBlock *UseBB = !isa<PHINode>(Val: UserInst) ?
3711	UserInst->getParent() :
3712	cast<PHINode>(Val: UserInst)->getIncomingBlock(
3713	i: PHINode::getIncomingValueNumForOperand(i: U.getOperandNo()));
3714	if (!DT.dominates(A: L->getHeader(), B: UseBB))
3715	continue;
3716	// Don't bother if the instruction is in a BB which ends in an EHPad.
3717	if (UseBB->getTerminator()->isEHPad())
3718	continue;
3719
3720	// Ignore cases in which the currently-examined value could come from
3721	// a basic block terminated with an EHPad. This checks all incoming
3722	// blocks of the phi node since it is possible that the same incoming
3723	// value comes from multiple basic blocks, only some of which may end
3724	// in an EHPad. If any of them do, a subsequent rewrite attempt by this
3725	// pass would try to insert instructions into an EHPad, hitting an
3726	// assertion.
3727	if (isa<PHINode>(Val: UserInst)) {
3728	const auto *PhiNode = cast<PHINode>(Val: UserInst);
3729	bool HasIncompatibleEHPTerminatedBlock = false;
3730	llvm::Value *ExpectedValue = U;
3731	for (unsigned int I = `0`; I < PhiNode->getNumIncomingValues(); I++) {
3732	if (PhiNode->getIncomingValue(i: I) == ExpectedValue) {
3733	if (PhiNode->getIncomingBlock(i: I)->getTerminator()->isEHPad()) {
3734	HasIncompatibleEHPTerminatedBlock = true;
3735	break;
3736	}
3737	}
3738	}
3739	if (HasIncompatibleEHPTerminatedBlock) {
3740	continue;
3741	}
3742	}
3743
3744	// Don't bother rewriting PHIs in catchswitch blocks.
3745	if (isa<CatchSwitchInst>(Val: UserInst->getParent()->getTerminator()))
3746	continue;
3747	// Ignore uses which are part of other SCEV expressions, to avoid
3748	// analyzing them multiple times.
3749	if (SE.isSCEVable(Ty: UserInst->getType())) {
3750	const SCEV UserS = SE.getSCEV(V: const_cast<Instruction >(UserInst));
3751	// If the user is a no-op, look through to its uses.
3752	if (!isa<SCEVUnknown>(Val: UserS))
3753	continue;
3754	if (UserS == US) {
3755	Worklist.push_back(
3756	Elt: SE.getUnknown(V: const_cast<Instruction *>(UserInst)));
3757	continue;
3758	}
3759	}
3760	// Ignore icmp instructions which are already being analyzed.
3761	if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Val: UserInst)) {
3762	unsigned OtherIdx = !U.getOperandNo();
3763	Value OtherOp = const_cast<Value >(ICI->getOperand(i_nocapture: OtherIdx));
3764	if (SE.hasComputableLoopEvolution(S: SE.getSCEV(V: OtherOp), L))
3765	continue;
3766	}
3767
3768	std::pair<size_t, Immediate> P =
3769	getUse(Expr&: S, Kind: LSRUse::Basic, AccessTy: MemAccessTy ());
3770	size_t LUIdx = P.first;
3771	Immediate Offset = P.second;
3772	LSRUse &LU = Uses [LUIdx];
3773	LSRFixup &LF = LU.getNewFixup();
3774	LF.UserInst = const_cast<Instruction *>(UserInst);
3775	LF.OperandValToReplace = U;
3776	LF.Offset = Offset;
3777	LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3778	if (!LU.WidestFixupType \|\|
3779	SE.getTypeSizeInBits(Ty: LU.WidestFixupType) <
3780	SE.getTypeSizeInBits(Ty: LF.OperandValToReplace->getType()))
3781	LU.WidestFixupType = LF.OperandValToReplace->getType();
3782	InsertSupplementalFormula(S: US, LU, LUIdx);
3783	CountRegisters(F: LU.Formulae.back(), LUIdx: Uses.size() - `1`);
3784	break;
3785	}
3786	}
3787	}
3788	}
3789
3790	/// Split S into subexpressions which can be pulled out into separate
3791	/// registers. If C is non-null, multiply each subexpression by C.
3792	///
3793	/// Return remainder expression after factoring the subexpressions captured by
3794	/// Ops. If Ops is complete, return NULL.
3795	static const SCEV CollectSubexprs(const* SCEV S, const* SCEVConstant *C,
3796	SmallVectorImpl<const SCEV *> &Ops,
3797	const Loop *L,
3798	ScalarEvolution &SE,
3799	unsigned Depth = `0`) {
3800	// Arbitrarily cap recursion to protect compile time.
3801	if (Depth >= `3`)
3802	return S;
3803
3804	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Val: S)) {
3805	// Break out add operands.
3806	for (const SCEV *S : Add->operands()) {
3807	const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth: Depth+`1`);
3808	if (Remainder)
3809	Ops.push_back(Elt: C ? SE.getMulExpr(LHS: C, RHS: Remainder) : Remainder);
3810	}
3811	return nullptr;
3812	}
3813	const SCEV Start, Step;
3814	const SCEVConstant *Op0;
3815	const SCEV *Op1;
3816	if (match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(V&: Start), Op1: m_SCEV(V&: Step)))) {
3817	// Split a non-zero base out of an addrec.
3818	if (Start->isZero())
3819	return S;
3820
3821	const SCEV *Remainder = CollectSubexprs(S: Start, C, Ops, L, SE, Depth: Depth + `1`);
3822	// Split the non-zero AddRec unless it is part of a nested recurrence that
3823	// does not pertain to this loop.
3824	if (Remainder && (cast<SCEVAddRecExpr>(Val: S)->getLoop() == L \|\|
3825	!isa<SCEVAddRecExpr>(Val: Remainder))) {
3826	Ops.push_back(Elt: C ? SE.getMulExpr(LHS: C, RHS: Remainder) : Remainder);
3827	Remainder = nullptr;
3828	}
3829	if (Remainder != Start) {
3830	if (!Remainder)
3831	Remainder = SE.getConstant(Ty: S->getType(), V: `0`);
3832	return SE.getAddRecExpr(Start: Remainder, Step,
3833	L: cast<SCEVAddRecExpr>(Val: S)->getLoop(),
3834	// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
3835	Flags: SCEV::FlagAnyWrap);
3836	}
3837	} else if (match(S, P: m_scev_Mul(Op0: m_SCEVConstant(V&: Op0), Op1: m_SCEV(V&: Op1)))) {
3838	// Break (C (a + b + c)) into Ca + Cb + Cc.
3839	C = C ? cast<SCEVConstant>(Val: SE.getMulExpr(LHS: C, RHS: Op0)) : Op0;
3840	const SCEV *Remainder = CollectSubexprs(S: Op1, C, Ops, L, SE, Depth: Depth + `1`);
3841	if (Remainder)
3842	Ops.push_back(Elt: SE.getMulExpr(LHS: C, RHS: Remainder));
3843	return nullptr;
3844	}
3845	return S;
3846	}
3847
3848	/// Return true if the SCEV represents a value that may end up as a
3849	/// post-increment operation.
3850	static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
3851	LSRUse &LU, const SCEV S, const* Loop *L,
3852	ScalarEvolution &SE) {
3853	if (LU.Kind != LSRUse::Address \|\|
3854	!LU.AccessTy.getType()->isIntOrIntVectorTy())
3855	return false;
3856	const SCEV *Start;
3857	if (!match(S, P: m_scev_AffineAddRec(Op0: m_SCEV(V&: Start), Op1: m_SCEVConstant())))
3858	return false;
3859	// Check if a post-indexed load/store can be used.
3860	if (TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty: S->getType()) \|\|
3861	TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty: S->getType())) {
3862	if (!isa<SCEVConstant>(Val: Start) && SE.isLoopInvariant(S: Start, L))
3863	return true;
3864	}
3865	return false;
3866	}
3867
3868	/// Helper function for LSRInstance::GenerateReassociations.
3869	void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3870	const Formula &Base,
3871	unsigned Depth, size_t Idx,
3872	bool IsScaledReg) {
3873	const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
3874	// Don't generate reassociations for the base register of a value that
3875	// may generate a post-increment operator. The reason is that the
3876	// reassociations cause extra base+register formula to be created,
3877	// and possibly chosen, but the post-increment is more efficient.
3878	if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, S: BaseReg, L, SE))
3879	return;
3880	SmallVector<const SCEV *, `8`> AddOps;
3881	const SCEV Remainder = CollectSubexprs(S: BaseReg, C: nullptr*, Ops&: AddOps, L, SE);
3882	if (Remainder)
3883	AddOps.push_back(Elt: Remainder);
3884
3885	if (AddOps.size() == `1`)
3886	return;
3887
3888	for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
3889	JE = AddOps.end();
3890	J != JE; ++J) {
3891	// Loop-variant "unknown" values are uninteresting; we won't be able to
3892	// do anything meaningful with them.
3893	if (isa<SCEVUnknown>(Val: J) && !SE.isLoopInvariant(S: J, L))
3894	continue;
3895
3896	// Don't pull a constant into a register if the constant could be folded
3897	// into an immediate field.
3898	if (isAlwaysFoldable(TTI, SE, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
3899	AccessTy: LU.AccessTy, S: *J, HasBaseReg: Base.getNumRegs() > `1`))
3900	continue;
3901
3902	// Collect all operands except J.*
3903	SmallVector<const SCEV *, `8`> InnerAddOps(std::as_const(t&: AddOps).begin(), J);
3904	InnerAddOps.append(in_start: std::next(x: J), in_end: std::as_const(t&: AddOps).end());
3905
3906	// Don't leave just a constant behind in a register if the constant could
3907	// be folded into an immediate field.
3908	if (InnerAddOps.size() == `1` &&
3909	isAlwaysFoldable(TTI, SE, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind,
3910	AccessTy: LU.AccessTy, S: InnerAddOps [`0`], HasBaseReg: Base.getNumRegs() > `1`))
3911	continue;
3912
3913	const SCEV *InnerSum = SE.getAddExpr(Ops&: InnerAddOps);
3914	if (InnerSum->isZero())
3915	continue;
3916	Formula F = Base;
3917
3918	if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())
3919	continue;
3920
3921	// Add the remaining pieces of the add back into the new formula.
3922	const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(Val: InnerSum);
3923	if (InnerSumSC && SE.getTypeSizeInBits(Ty: InnerSumSC->getType()) <= `64` &&
3924	TTI.isLegalAddImmediate(Imm: (uint64_t)F.UnfoldedOffset.getFixedValue() +
3925	InnerSumSC->getValue()->getZExtValue())) {
3926	F.UnfoldedOffset =
3927	Immediate::getFixed(MinVal: (uint64_t)F.UnfoldedOffset.getFixedValue() +
3928	InnerSumSC->getValue()->getZExtValue());
3929	if (IsScaledReg) {
3930	F.ScaledReg = nullptr;
3931	F.Scale = `0`;
3932	} else
3933	F.BaseRegs.erase(CI: F.BaseRegs.begin() + Idx);
3934	} else if (IsScaledReg)
3935	F.ScaledReg = InnerSum;
3936	else
3937	F.BaseRegs [Idx] = InnerSum;
3938
3939	// Add J as its own register, or an unfolded immediate.
3940	const SCEVConstant SC = dyn_cast<SCEVConstant>(Val: J);
3941	if (SC && SE.getTypeSizeInBits(Ty: SC->getType()) <= `64` &&
3942	TTI.isLegalAddImmediate(Imm: (uint64_t)F.UnfoldedOffset.getFixedValue() +
3943	SC->getValue()->getZExtValue()))
3944	F.UnfoldedOffset =
3945	Immediate::getFixed(MinVal: (uint64_t)F.UnfoldedOffset.getFixedValue() +
3946	SC->getValue()->getZExtValue());
3947	else
3948	F.BaseRegs.push_back(Elt: *J);
3949	// We may have changed the number of register in base regs, adjust the
3950	// formula accordingly.
3951	F.canonicalize(L: *L);
3952
3953	if (InsertFormula(LU, LUIdx, F))
3954	// If that formula hadn't been seen before, recurse to find more like
3955	// it.
3956	// Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
3957	// Because just Depth is not enough to bound compile time.
3958	// This means that every time AddOps.size() is greater 16^x we will add
3959	// x to Depth.
3960	GenerateReassociations(LU, LUIdx, Base: LU.Formulae.back(),
3961	Depth: Depth + `1` + (Log2_32(Value: AddOps.size()) >> `2`));
3962	}
3963	}
3964
3965	/// Split out subexpressions from adds and the bases of addrecs.
3966	void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
3967	Formula Base, unsigned Depth) {
3968	assert(Base.isCanonical(*L) && "Input must be in the canonical form");
3969	// Arbitrarily cap recursion to protect compile time.
3970	if (Depth >= `3`)
3971	return;
3972
3973	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
3974	GenerateReassociationsImpl(LU, LUIdx, Base, Depth, Idx: i);
3975
3976	if (Base.Scale == `1`)
3977	GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
3978	/ Idx / -`1`, / IsScaledReg / true);
3979	}
3980
3981	/// Generate a formula consisting of all of the loop-dominating registers added
3982	/// into a single register.
3983	void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
3984	Formula Base) {
3985	// This method is only interesting on a plurality of registers.
3986	if (Base.BaseRegs.size() + (Base.Scale == `1`) +
3987	(Base.UnfoldedOffset.isNonZero()) <=
3988	`1`)
3989	return;
3990
3991	// Flatten the representation, i.e., reg1 + 1reg2 => reg1 + reg2, before*
3992	// processing the formula.
3993	Base.unscale();
3994	SmallVector<const SCEV *, `4`> Ops;
3995	Formula NewBase = Base;
3996	NewBase.BaseRegs.clear();
3997	Type CombinedIntegerType = nullptr*;
3998	for (const SCEV *BaseReg : Base.BaseRegs) {
3999	if (SE.properlyDominates(S: BaseReg, BB: L->getHeader()) &&
4000	!SE.hasComputableLoopEvolution(S: BaseReg, L)) {
4001	if (!CombinedIntegerType)
4002	CombinedIntegerType = SE.getEffectiveSCEVType(Ty: BaseReg->getType());
4003	Ops.push_back(Elt: BaseReg);
4004	}
4005	else
4006	NewBase.BaseRegs.push_back(Elt: BaseReg);
4007	}
4008
4009	// If no register is relevant, we're done.
4010	if (Ops.size() == `0`)
4011	return;
4012
4013	// Utility function for generating the required variants of the combined
4014	// registers.
4015	auto GenerateFormula = [&](const SCEV *Sum) {
4016	Formula F = NewBase;
4017
4018	// TODO: If Sum is zero, it probably means ScalarEvolution missed an
4019	// opportunity to fold something. For now, just ignore such cases
4020	// rather than proceed with zero in a register.
4021	if (Sum->isZero())
4022	return;
4023
4024	F.BaseRegs.push_back(Elt: Sum);
4025	F.canonicalize(L: *L);
4026	(void)InsertFormula(LU, LUIdx, F);
4027	};
4028
4029	// If we collected at least two registers, generate a formula combining them.
4030	if (Ops.size() > `1`) {
4031	SmallVector<const SCEV , `4`> OpsCopy(Ops); // Don't let SE modify Ops.*
4032	GenerateFormula (SE.getAddExpr(Ops&: OpsCopy));
4033	}
4034
4035	// If we have an unfolded offset, generate a formula combining it with the
4036	// registers collected.
4037	if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4038	assert(CombinedIntegerType && "Missing a type for the unfolded offset");
4039	Ops.push_back(Elt: SE.getConstant(Ty: CombinedIntegerType,
4040	V: NewBase.UnfoldedOffset.getFixedValue(), isSigned: true));
4041	NewBase.UnfoldedOffset = Immediate::getFixed(MinVal: `0`);
4042	GenerateFormula (SE.getAddExpr(Ops));
4043	}
4044	}
4045
4046	/// Helper function for LSRInstance::GenerateSymbolicOffsets.
4047	void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
4048	const Formula &Base, size_t Idx,
4049	bool IsScaledReg) {
4050	const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
4051	GlobalValue *GV = ExtractSymbol(S&: G, SE);
4052	if (G->isZero() \|\| !GV)
4053	return;
4054	Formula F = Base;
4055	F.BaseGV = GV;
4056	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
4057	return;
4058	if (IsScaledReg)
4059	F.ScaledReg = G;
4060	else
4061	F.BaseRegs [Idx] = G;
4062	(void)InsertFormula(LU, LUIdx, F);
4063	}
4064
4065	/// Generate reuse formulae using symbolic offsets.
4066	void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
4067	Formula Base) {
4068	// We can't add a symbolic offset if the address already contains one.
4069	if (Base.BaseGV) return;
4070
4071	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
4072	GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, Idx: i);
4073	if (Base.Scale == `1`)
4074	GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, / Idx / -`1`,
4075	/ IsScaledReg / true);
4076	}
4077
4078	/// Helper function for LSRInstance::GenerateConstantOffsets.
4079	void LSRInstance::GenerateConstantOffsetsImpl(
4080	LSRUse &LU, unsigned LUIdx, const Formula &Base,
4081	const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) {
4082
4083	auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {
4084	Formula F = Base;
4085	if (!Base.BaseOffset.isCompatibleImmediate(Imm: Offset))
4086	return;
4087	F.BaseOffset = Base.BaseOffset.subUnsigned(RHS: Offset);
4088
4089	if (isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F)) {
4090	// Add the offset to the base register.
4091	const SCEV *NewOffset = Offset.getSCEV(SE, Ty: G->getType());
4092	const SCEV *NewG = SE.getAddExpr(LHS: NewOffset, RHS: G);
4093	// If it cancelled out, drop the base register, otherwise update it.
4094	if (NewG->isZero()) {
4095	if (IsScaledReg) {
4096	F.Scale = `0`;
4097	F.ScaledReg = nullptr;
4098	} else
4099	F.deleteBaseReg(S&: F.BaseRegs [Idx]);
4100	F.canonicalize(L: *L);
4101	} else if (IsScaledReg)
4102	F.ScaledReg = NewG;
4103	else
4104	F.BaseRegs [Idx] = NewG;
4105
4106	(void)InsertFormula(LU, LUIdx, F);
4107	}
4108	};
4109
4110	const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs [Idx];
4111
4112	// With constant offsets and constant steps, we can generate pre-inc
4113	// accesses by having the offset equal the step. So, for access #0 with a
4114	// step of 8, we generate a G - 8 base which would require the first access
4115	// to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
4116	// for itself and hopefully becomes the base for other accesses. This means
4117	// means that a single pre-indexed access can be generated to become the new
4118	// base pointer for each iteration of the loop, resulting in no extra add/sub
4119	// instructions for pointer updating.
4120	if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {
4121	const APInt *StepInt;
4122	if (match(S: G, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_scev_APInt(C&: StepInt)))) {
4123	int64_t Step = StepInt->isNegative() ? StepInt->getSExtValue()
4124	: StepInt->getZExtValue();
4125
4126	for (Immediate Offset : Worklist) {
4127	if (Offset.isFixed()) {
4128	Offset = Immediate::getFixed(MinVal: Offset.getFixedValue() - Step);
4129	GenerateOffset (G, Offset);
4130	}
4131	}
4132	}
4133	}
4134	for (Immediate Offset : Worklist)
4135	GenerateOffset (G, Offset);
4136
4137	Immediate Imm = ExtractImmediate(S&: G, SE);
4138	if (G->isZero() \|\| Imm.isZero() \|\|
4139	!Base.BaseOffset.isCompatibleImmediate(Imm))
4140	return;
4141	Formula F = Base;
4142	F.BaseOffset = F.BaseOffset.addUnsigned(RHS: Imm);
4143	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
4144	return;
4145	if (IsScaledReg) {
4146	F.ScaledReg = G;
4147	} else {
4148	F.BaseRegs [Idx] = G;
4149	// We may generate non canonical Formula if G is a recurrent expr reg
4150	// related with current loop while F.ScaledReg is not.
4151	F.canonicalize(L: *L);
4152	}
4153	(void)InsertFormula(LU, LUIdx, F);
4154	}
4155
4156	/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
4157	void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
4158	Formula Base) {
4159	// TODO: For now, just add the min and max offset, because it usually isn't
4160	// worthwhile looking at everything inbetween.
4161	SmallVector<Immediate, `2`> Worklist;
4162	Worklist.push_back(Elt: LU.MinOffset);
4163	if (LU.MaxOffset != LU.MinOffset)
4164	Worklist.push_back(Elt: LU.MaxOffset);
4165
4166	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i)
4167	GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, Idx: i);
4168	if (Base.Scale == `1`)
4169	GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, / Idx / -`1`,
4170	/ IsScaledReg / true);
4171	}
4172
4173	/// For ICmpZero, check to see if we can scale up the comparison. For example, x
4174	/// == y -> xc == yc.
4175	void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
4176	Formula Base) {
4177	if (LU.Kind != LSRUse::ICmpZero) return;
4178
4179	// Determine the integer type for the base formula.
4180	Type *IntTy = Base.getType();
4181	if (!IntTy) return;
4182	if (SE.getTypeSizeInBits(Ty: IntTy) > `64`) return;
4183
4184	// Don't do this if there is more than one offset.
4185	if (LU.MinOffset != LU.MaxOffset) return;
4186
4187	// Check if transformation is valid. It is illegal to multiply pointer.
4188	if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4189	return;
4190	for (const SCEV *BaseReg : Base.BaseRegs)
4191	if (BaseReg->getType()->isPointerTy())
4192	return;
4193	assert(!Base.BaseGV && "ICmpZero use is not legal!");
4194
4195	// Check each interesting stride.
4196	for (int64_t Factor : Factors) {
4197	// Check that Factor can be represented by IntTy
4198	if (!ConstantInt::isValueValidForType(Ty: IntTy, V: Factor))
4199	continue;
4200	// Check that the multiplication doesn't overflow.
4201	if (Base.BaseOffset.isMin() && Factor == -`1`)
4202	continue;
4203	// Not supporting scalable immediates.
4204	if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())
4205	continue;
4206	Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(RHS: Factor);
4207	assert(Factor != `0` && "Zero factor not expected!");
4208	if (NewBaseOffset.getFixedValue() / Factor !=
4209	Base.BaseOffset.getFixedValue())
4210	continue;
4211	// If the offset will be truncated at this use, check that it is in bounds.
4212	if (!IntTy->isPointerTy() &&
4213	!ConstantInt::isValueValidForType(Ty: IntTy, V: NewBaseOffset.getFixedValue()))
4214	continue;
4215
4216	// Check that multiplying with the use offset doesn't overflow.
4217	Immediate Offset = LU.MinOffset;
4218	if (Offset.isMin() && Factor == -`1`)
4219	continue;
4220	Offset = Offset.mulUnsigned(RHS: Factor);
4221	if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4222	continue;
4223	// If the offset will be truncated at this use, check that it is in bounds.
4224	if (!IntTy->isPointerTy() &&
4225	!ConstantInt::isValueValidForType(Ty: IntTy, V: Offset.getFixedValue()))
4226	continue;
4227
4228	Formula F = Base;
4229	F.BaseOffset = NewBaseOffset;
4230
4231	// Check that this scale is legal.
4232	if (!isLegalUse(TTI, MinOffset: Offset, MaxOffset: Offset, Kind: LU.Kind, AccessTy: LU.AccessTy, F))
4233	continue;
4234
4235	// Compensate for the use having MinOffset built into it.
4236	F.BaseOffset = F.BaseOffset.addUnsigned(RHS: Offset).subUnsigned(RHS: LU.MinOffset);
4237
4238	const SCEV *FactorS = SE.getConstant(Ty: IntTy, V: Factor);
4239
4240	// Check that multiplying with each base register doesn't overflow.
4241	for (size_t i = `0`, e = F.BaseRegs.size(); i != e; ++i) {
4242	F.BaseRegs [i] = SE.getMulExpr(LHS: F.BaseRegs [i], RHS: FactorS);
4243	if (getExactSDiv(LHS: F.BaseRegs [i], RHS: FactorS, SE) != Base.BaseRegs [i])
4244	goto next;
4245	}
4246
4247	// Check that multiplying with the scaled register doesn't overflow.
4248	if (F.ScaledReg) {
4249	F.ScaledReg = SE.getMulExpr(LHS: F.ScaledReg, RHS: FactorS);
4250	if (getExactSDiv(LHS: F.ScaledReg, RHS: FactorS, SE) != Base.ScaledReg)
4251	continue;
4252	}
4253
4254	// Check that multiplying with the unfolded offset doesn't overflow.
4255	if (F.UnfoldedOffset.isNonZero()) {
4256	if (F.UnfoldedOffset.isMin() && Factor == -`1`)
4257	continue;
4258	F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(RHS: Factor);
4259	if (F.UnfoldedOffset.getFixedValue() / Factor !=
4260	Base.UnfoldedOffset.getFixedValue())
4261	continue;
4262	// If the offset will be truncated, check that it is in bounds.
4263	if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(
4264	Ty: IntTy, V: F.UnfoldedOffset.getFixedValue()))
4265	continue;
4266	}
4267
4268	// If we make it here and it's legal, add it.
4269	(void)InsertFormula(LU, LUIdx, F);
4270	next:;
4271	}
4272	}
4273
4274	/// Generate stride factor reuse formulae by making use of scaled-offset address
4275	/// modes, for example.
4276	void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
4277	// Determine the integer type for the base formula.
4278	Type *IntTy = Base.getType();
4279	if (!IntTy) return;
4280
4281	// If this Formula already has a scaled register, we can't add another one.
4282	// Try to unscale the formula to generate a better scale.
4283	if (Base.Scale != `0` && !Base.unscale())
4284	return;
4285
4286	assert(Base.Scale == `0` && "unscale did not did its job!");
4287
4288	// Check each interesting stride.
4289	for (int64_t Factor : Factors) {
4290	Base.Scale = Factor;
4291	Base.HasBaseReg = Base.BaseRegs.size() > `1`;
4292	// Check whether this scale is going to be legal.
4293	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy,
4294	F: Base)) {
4295	// As a special-case, handle special out-of-loop Basic users specially.
4296	// TODO: Reconsider this special case.
4297	if (LU.Kind == LSRUse::Basic &&
4298	isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LSRUse::Special,
4299	AccessTy: LU.AccessTy, F: Base) &&
4300	LU.AllFixupsOutsideLoop)
4301	LU.Kind = LSRUse::Special;
4302	else
4303	continue;
4304	}
4305	// For an ICmpZero, negating a solitary base register won't lead to
4306	// new solutions.
4307	if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&
4308	Base.BaseOffset.isZero() && !Base.BaseGV)
4309	continue;
4310	// For each addrec base reg, if its loop is current loop, apply the scale.
4311	for (size_t i = `0`, e = Base.BaseRegs.size(); i != e; ++i) {
4312	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Base.BaseRegs [i]);
4313	if (AR && (AR->getLoop() == L \|\| LU.AllFixupsOutsideLoop)) {
4314	const SCEV *FactorS = SE.getConstant(Ty: IntTy, V: Factor);
4315	if (FactorS->isZero())
4316	continue;
4317	// Divide out the factor, ignoring high bits, since we'll be
4318	// scaling the value back up in the end.
4319	if (const SCEV Quotient = getExactSDiv(LHS: AR, RHS: FactorS, SE, IgnoreSignificantBits: true*))
4320	if (!Quotient->isZero()) {
4321	// TODO: This could be optimized to avoid all the copying.
4322	Formula F = Base;
4323	F.ScaledReg = Quotient;
4324	F.deleteBaseReg(S&: F.BaseRegs [i]);
4325	// The canonical representation of 1reg is reg, which is already in*
4326	// Base. In that case, do not try to insert the formula, it will be
4327	// rejected anyway.
4328	if (F.Scale == `1` && (F.BaseRegs.empty() \|\|
4329	(AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
4330	continue;
4331	// If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
4332	// non canonical Formula with ScaledReg's loop not being L.
4333	if (F.Scale == `1` && LU.AllFixupsOutsideLoop)
4334	F.canonicalize(L: *L);
4335	(void)InsertFormula(LU, LUIdx, F);
4336	}
4337	}
4338	}
4339	}
4340	}
4341
4342	/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.
4343	/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then
4344	/// perform the extension/truncate and normalize again, as the normalized form
4345	/// can result in folds that are not valid in the post-inc use contexts. The
4346	/// expressions for all PostIncLoopSets must match, otherwise return nullptr.
4347	static const SCEV *
4348	getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,
4349	const SCEV Expr, Type ToTy,
4350	ScalarEvolution &SE) {
4351	const SCEV Result = nullptr*;
4352	for (auto &L : Loops) {
4353	auto *DenormExpr = denormalizeForPostIncUse(S: Expr, Loops: L, SE);
4354	const SCEV *NewDenormExpr = SE.getAnyExtendExpr(Op: DenormExpr, Ty: ToTy);
4355	const SCEV *New = normalizeForPostIncUse(S: NewDenormExpr, Loops: L, SE);
4356	if (!New \|\| (Result && New != Result))
4357	return nullptr;
4358	Result = New;
4359	}
4360
4361	assert(Result && "failed to create expression");
4362	return Result;
4363	}
4364
4365	/// Generate reuse formulae from different IV types.
4366	void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
4367	// Don't bother truncating symbolic values.
4368	if (Base.BaseGV) return;
4369
4370	// Determine the integer type for the base formula.
4371	Type *DstTy = Base.getType();
4372	if (!DstTy) return;
4373	if (DstTy->isPointerTy())
4374	return;
4375
4376	// It is invalid to extend a pointer type so exit early if ScaledReg or
4377	// any of the BaseRegs are pointers.
4378	if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4379	return;
4380	if (any_of(Range&: Base.BaseRegs,
4381	P: [](const SCEV S) { return* S->getType()->isPointerTy(); }))
4382	return;
4383
4384	SmallVector<PostIncLoopSet> Loops;
4385	for (auto &LF : LU.Fixups)
4386	Loops.push_back(Elt: LF.PostIncLoops);
4387
4388	for (Type *SrcTy : Types) {
4389	if (SrcTy != DstTy && TTI.isTruncateFree(Ty1: SrcTy, Ty2: DstTy)) {
4390	Formula F = Base;
4391
4392	// Sometimes SCEV is able to prove zero during ext transform. It may
4393	// happen if SCEV did not do all possible transforms while creating the
4394	// initial node (maybe due to depth limitations), but it can do them while
4395	// taking ext.
4396	if (F.ScaledReg) {
4397	const SCEV *NewScaledReg =
4398	getAnyExtendConsideringPostIncUses(Loops, Expr: F.ScaledReg, ToTy: SrcTy, SE);
4399	if (!NewScaledReg \|\| NewScaledReg->isZero())
4400	continue;
4401	F.ScaledReg = NewScaledReg;
4402	}
4403	bool HasZeroBaseReg = false;
4404	for (const SCEV *&BaseReg : F.BaseRegs) {
4405	const SCEV *NewBaseReg =
4406	getAnyExtendConsideringPostIncUses(Loops, Expr: BaseReg, ToTy: SrcTy, SE);
4407	if (!NewBaseReg \|\| NewBaseReg->isZero()) {
4408	HasZeroBaseReg = true;
4409	break;
4410	}
4411	BaseReg = NewBaseReg;
4412	}
4413	if (HasZeroBaseReg)
4414	continue;
4415
4416	// TODO: This assumes we've done basic processing on all uses and
4417	// have an idea what the register usage is.
4418	if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4419	continue;
4420
4421	F.canonicalize(L: *L);
4422	(void)InsertFormula(LU, LUIdx, F);
4423	}
4424	}
4425	}
4426
4427	namespace {
4428
4429	/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
4430	/// modifications so that the search phase doesn't have to worry about the data
4431	/// structures moving underneath it.
4432	struct WorkItem {
4433	size_t LUIdx;
4434	Immediate Imm;
4435	const SCEV *OrigReg;
4436
4437	WorkItem(size_t LI, Immediate I, const SCEV *R)
4438	: LUIdx(LI), Imm (I), OrigReg(R) {}
4439
4440	void print(raw_ostream &OS) const;
4441	void dump() const;
4442	};
4443
4444	} // end anonymous namespace
4445
4446	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
4447	void WorkItem::print(raw_ostream &OS) const {
4448	OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
4449	<< " , add offset " << Imm;
4450	}
4451
4452	LLVM_DUMP_METHOD void WorkItem::dump() const {
4453	print(errs()); errs() << `'\n'`;
4454	}
4455	#endif
4456
4457	/// Look for registers which are a constant distance apart and try to form reuse
4458	/// opportunities between them.
4459	void LSRInstance::GenerateCrossUseConstantOffsets() {
4460	// Group the registers by their value without any added constant offset.
4461	using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4462
4463	DenseMap<const SCEV *, ImmMapTy> Map;
4464	DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4465	SmallVector<const SCEV *, `8`> Sequence;
4466	for (const SCEV *Use : RegUses) {
4467	const SCEV Reg = Use; // Make a copy for ExtractImmediate to modify.*
4468	Immediate Imm = ExtractImmediate(S&: Reg, SE);
4469	auto Pair = Map.try_emplace(Key: Reg);
4470	if (Pair.second)
4471	Sequence.push_back(Elt: Reg);
4472	Pair.first ->second.insert(x: std::make_pair(x&: Imm, y&: Use));
4473	UsedByIndicesMap [Reg] \|= RegUses.getUsedByIndices(Reg: Use);
4474	}
4475
4476	// Now examine each set of registers with the same base value. Build up
4477	// a list of work to do and do the work in a separate step so that we're
4478	// not adding formulae and register counts while we're searching.
4479	SmallVector<WorkItem, `32`> WorkItems;
4480	SmallSet<std::pair<size_t, Immediate>, `32`, KeyOrderSizeTAndImmediate>
4481	UniqueItems;
4482	for (const SCEV *Reg : Sequence) {
4483	const ImmMapTy &Imms = Map.find(Val: Reg)->second;
4484
4485	// It's not worthwhile looking for reuse if there's only one offset.
4486	if (Imms.size() == `1`)
4487	continue;
4488
4489	LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << `':'`;
4490	for (const auto &Entry
4491	: Imms) dbgs()
4492	<< `' '` << Entry.first;
4493	dbgs() << `'\n'`);
4494
4495	// Examine each offset.
4496	for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4497	J != JE; ++J) {
4498	const SCEV *OrigReg = J ->second;
4499
4500	Immediate JImm = J ->first;
4501	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg: OrigReg);
4502
4503	if (!isa<SCEVConstant>(Val: OrigReg) &&
4504	UsedByIndicesMap [Reg].count() == `1`) {
4505	LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
4506	<< `'\n'`);
4507	continue;
4508	}
4509
4510	// Conservatively examine offsets between this orig reg a few selected
4511	// other orig regs.
4512	Immediate First = Imms.begin()->first;
4513	Immediate Last = std::prev(x: Imms.end())->first;
4514	if (!First.isCompatibleImmediate(Imm: Last)) {
4515	LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
4516	<< "\n");
4517	continue;
4518	}
4519	// Only scalable if both terms are scalable, or if one is scalable and
4520	// the other is 0.
4521	bool Scalable = First.isScalable() \|\| Last.isScalable();
4522	int64_t FI = First.getKnownMinValue();
4523	int64_t LI = Last.getKnownMinValue();
4524	// Compute (First + Last) / 2 without overflow using the fact that
4525	// First + Last = 2 (First + Last) + (First ^ Last).*
4526	int64_t Avg = (FI & LI) + ((FI ^ LI) >> `1`);
4527	// If the result is negative and FI is odd and LI even (or vice versa),
4528	// we rounded towards -inf. Add 1 in that case, to round towards 0.
4529	Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> `63`));
4530	ImmMapTy::const_iterator OtherImms[] = {
4531	Imms.begin(), std::prev(x: Imms.end()),
4532	Imms.lower_bound(x: Immediate::get(MinVal: Avg, Scalable))};
4533	for (const auto &M : OtherImms) {
4534	if (M == J \|\| M == JE) continue;
4535	if (!JImm.isCompatibleImmediate(Imm: M ->first))
4536	continue;
4537
4538	// Compute the difference between the two.
4539	Immediate Imm = JImm.subUnsigned(RHS: M ->first);
4540	for (unsigned LUIdx : UsedByIndices.set_bits())
4541	// Make a memo of this use, offset, and register tuple.
4542	if (UniqueItems.insert(V: std::make_pair(x&: LUIdx, y&: Imm)).second)
4543	WorkItems.push_back(Elt: WorkItem (LUIdx, Imm, OrigReg));
4544	}
4545	}
4546	}
4547
4548	Map.clear();
4549	Sequence.clear();
4550	UsedByIndicesMap.clear();
4551	UniqueItems.clear();
4552
4553	// Now iterate through the worklist and add new formulae.
4554	for (const WorkItem &WI : WorkItems) {
4555	size_t LUIdx = WI.LUIdx;
4556	LSRUse &LU = Uses [LUIdx];
4557	Immediate Imm = WI.Imm;
4558	const SCEV *OrigReg = WI.OrigReg;
4559
4560	Type *IntTy = SE.getEffectiveSCEVType(Ty: OrigReg->getType());
4561	const SCEV *NegImmS = Imm.getNegativeSCEV(SE, Ty: IntTy);
4562	unsigned BitWidth = SE.getTypeSizeInBits(Ty: IntTy);
4563
4564	// TODO: Use a more targeted data structure.
4565	for (size_t L = `0`, LE = LU.Formulae.size(); L != LE; ++L) {
4566	Formula F = LU.Formulae [L];
4567	// FIXME: The code for the scaled and unscaled registers looks
4568	// very similar but slightly different. Investigate if they
4569	// could be merged. That way, we would not have to unscale the
4570	// Formula.
4571	F.unscale();
4572	// Use the immediate in the scaled register.
4573	if (F.ScaledReg == OrigReg) {
4574	if (!F.BaseOffset.isCompatibleImmediate(Imm))
4575	continue;
4576	Immediate Offset = F.BaseOffset.addUnsigned(RHS: Imm.mulUnsigned(RHS: F.Scale));
4577	// Don't create 50 + reg(-50).
4578	const SCEV *S = Offset.getNegativeSCEV(SE, Ty: IntTy);
4579	if (F.referencesReg(S))
4580	continue;
4581	Formula NewF = F;
4582	NewF.BaseOffset = Offset;
4583	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset, Kind: LU.Kind, AccessTy: LU.AccessTy,
4584	F: NewF))
4585	continue;
4586	NewF.ScaledReg = SE.getAddExpr(LHS: NegImmS, RHS: NewF.ScaledReg);
4587
4588	// If the new scale is a constant in a register, and adding the constant
4589	// value to the immediate would produce a value closer to zero than the
4590	// immediate itself, then the formula isn't worthwhile.
4591	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: NewF.ScaledReg)) {
4592	// FIXME: Do we need to do something for scalable immediates here?
4593	// A scalable SCEV won't be constant, but we might still have
4594	// something in the offset? Bail out for now to be safe.
4595	if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4596	continue;
4597	if (C->getValue()->isNegative() !=
4598	(NewF.BaseOffset.isLessThanZero()) &&
4599	(C->getAPInt().abs() * APInt (BitWidth, F.Scale))
4600	.ule(RHS: std::abs(i: NewF.BaseOffset.getFixedValue())))
4601	continue;
4602	}
4603
4604	// OK, looks good.
4605	NewF.canonicalize(L: *this->L);
4606	(void)InsertFormula(LU, LUIdx, F: NewF);
4607	} else {
4608	// Use the immediate in a base register.
4609	for (size_t N = `0`, NE = F.BaseRegs.size(); N != NE; ++N) {
4610	const SCEV *BaseReg = F.BaseRegs [N];
4611	if (BaseReg != OrigReg)
4612	continue;
4613	Formula NewF = F;
4614	if (!NewF.BaseOffset.isCompatibleImmediate(Imm) \|\|
4615	!NewF.UnfoldedOffset.isCompatibleImmediate(Imm) \|\|
4616	!NewF.BaseOffset.isCompatibleImmediate(Imm: NewF.UnfoldedOffset))
4617	continue;
4618	NewF.BaseOffset = NewF.BaseOffset.addUnsigned(RHS: Imm);
4619	if (!isLegalUse(TTI, MinOffset: LU.MinOffset, MaxOffset: LU.MaxOffset,
4620	Kind: LU.Kind, AccessTy: LU.AccessTy, F: NewF)) {
4621	if (AMK == TTI::AMK_PostIndexed &&
4622	mayUsePostIncMode(TTI, LU, S: OrigReg, L: this->L, SE))
4623	continue;
4624	Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(RHS: Imm);
4625	if (!isLegalAddImmediate(TTI, Offset: NewUnfoldedOffset))
4626	continue;
4627	NewF = F;
4628	NewF.UnfoldedOffset = NewUnfoldedOffset;
4629	}
4630	NewF.BaseRegs [N] = SE.getAddExpr(LHS: NegImmS, RHS: BaseReg);
4631
4632	// If the new formula has a constant in a register, and adding the
4633	// constant value to the immediate would produce a value closer to
4634	// zero than the immediate itself, then the formula isn't worthwhile.
4635	for (const SCEV *NewReg : NewF.BaseRegs)
4636	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: NewReg)) {
4637	if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4638	goto skip_formula;
4639	if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())
4640	.abs()
4641	.slt(RHS: std::abs(i: NewF.BaseOffset.getFixedValue())) &&
4642	(C->getAPInt() + NewF.BaseOffset.getFixedValue())
4643	.countr_zero() >=
4644	(unsigned)llvm::countr_zero<uint64_t>(
4645	Val: NewF.BaseOffset.getFixedValue()))
4646	goto skip_formula;
4647	}
4648
4649	// Ok, looks good.
4650	NewF.canonicalize(L: *this->L);
4651	(void)InsertFormula(LU, LUIdx, F: NewF);
4652	break;
4653	skip_formula:;
4654	}
4655	}
4656	}
4657	}
4658	}
4659
4660	/// Generate formulae for each use.
4661	void
4662	LSRInstance::GenerateAllReuseFormulae() {
4663	// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
4664	// queries are more precise.
4665	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4666	LSRUse &LU = Uses [LUIdx];
4667	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4668	GenerateReassociations(LU, LUIdx, Base: LU.Formulae [i]);
4669	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4670	GenerateCombinations(LU, LUIdx, Base: LU.Formulae [i]);
4671	}
4672	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4673	LSRUse &LU = Uses [LUIdx];
4674	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4675	GenerateSymbolicOffsets(LU, LUIdx, Base: LU.Formulae [i]);
4676	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4677	GenerateConstantOffsets(LU, LUIdx, Base: LU.Formulae [i]);
4678	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4679	GenerateICmpZeroScales(LU, LUIdx, Base: LU.Formulae [i]);
4680	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4681	GenerateScales(LU, LUIdx, Base: LU.Formulae [i]);
4682	}
4683	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4684	LSRUse &LU = Uses [LUIdx];
4685	for (size_t i = `0`, f = LU.Formulae.size(); i != f; ++i)
4686	GenerateTruncates(LU, LUIdx, Base: LU.Formulae [i]);
4687	}
4688
4689	GenerateCrossUseConstantOffsets();
4690
4691	LLVM_DEBUG(dbgs() << "\n"
4692	"After generating reuse formulae:\n";
4693	print_uses(dbgs()));
4694	}
4695
4696	/// If there are multiple formulae with the same set of registers used
4697	/// by other uses, pick the best one and delete the others.
4698	void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4699	DenseSet<const SCEV *> VisitedRegs;
4700	SmallPtrSet<const SCEV *, `16`> Regs;
4701	SmallPtrSet<const SCEV *, `16`> LoserRegs;
4702	#ifndef NDEBUG
4703	bool ChangedFormulae = false;
4704	#endif
4705
4706	// Collect the best formula for each unique set of shared registers. This
4707	// is reset for each use.
4708	using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, `4`>, size_t>;
4709
4710	BestFormulaeTy BestFormulae;
4711
4712	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4713	LSRUse &LU = Uses [LUIdx];
4714	LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
4715	dbgs() << `'\n'`);
4716
4717	bool Any = false;
4718	for (size_t FIdx = `0`, NumForms = LU.Formulae.size();
4719	FIdx != NumForms; ++FIdx) {
4720	Formula &F = LU.Formulae [FIdx];
4721
4722	// Some formulas are instant losers. For example, they may depend on
4723	// nonexistent AddRecs from other loops. These need to be filtered
4724	// immediately, otherwise heuristics could choose them over others leading
4725	// to an unsatisfactory solution. Passing LoserRegs into RateFormula here
4726	// avoids the need to recompute this information across formulae using the
4727	// same bad AddRec. Passing LoserRegs is also essential unless we remove
4728	// the corresponding bad register from the Regs set.
4729	Cost CostF(L, SE, TTI, AMK);
4730	Regs.clear();
4731	CostF.RateFormula(F, Regs, VisitedRegs, LU, LoserRegs: &LoserRegs);
4732	if (CostF.isLoser()) {
4733	// During initial formula generation, undesirable formulae are generated
4734	// by uses within other loops that have some non-trivial address mode or
4735	// use the postinc form of the IV. LSR needs to provide these formulae
4736	// as the basis of rediscovering the desired formula that uses an AddRec
4737	// corresponding to the existing phi. Once all formulae have been
4738	// generated, these initial losers may be pruned.
4739	LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
4740	dbgs() << "\n");
4741	}
4742	else {
4743	SmallVector<const SCEV *, `4`> Key;
4744	for (const SCEV *Reg : F.BaseRegs) {
4745	if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4746	Key.push_back(Elt: Reg);
4747	}
4748	if (F.ScaledReg &&
4749	RegUses.isRegUsedByUsesOtherThan(Reg: F.ScaledReg, LUIdx))
4750	Key.push_back(Elt: F.ScaledReg);
4751	// Unstable sort by host order ok, because this is only used for
4752	// uniquifying.
4753	llvm::sort(C&: Key);
4754
4755	std::pair<BestFormulaeTy::const_iterator, bool> P =
4756	BestFormulae.insert(KV: std::make_pair(x&: Key, y&: FIdx));
4757	if (P.second)
4758	continue;
4759
4760	Formula &Best = LU.Formulae [P.first ->second];
4761
4762	Cost CostBest(L, SE, TTI, AMK);
4763	Regs.clear();
4764	CostBest.RateFormula(F: Best, Regs, VisitedRegs, LU);
4765	if (CostF.isLess(Other: CostBest))
4766	std::swap(a&: F, b&: Best);
4767	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
4768	dbgs() << "\n"
4769	" in favor of formula ";
4770	Best.print(dbgs()); dbgs() << `'\n'`);
4771	}
4772	#ifndef NDEBUG
4773	ChangedFormulae = true;
4774	#endif
4775	LU.DeleteFormula(F);
4776	--FIdx;
4777	--NumForms;
4778	Any = true;
4779	}
4780
4781	// Now that we've filtered out some formulae, recompute the Regs set.
4782	if (Any)
4783	LU.RecomputeRegs(LUIdx, RegUses);
4784
4785	// Reset this to prepare for the next use.
4786	BestFormulae.clear();
4787	}
4788
4789	LLVM_DEBUG(if (ChangedFormulae) {
4790	dbgs() << "\n"
4791	"After filtering out undesirable candidates:\n";
4792	print_uses(dbgs());
4793	});
4794	}
4795
4796	/// Estimate the worst-case number of solutions the solver might have to
4797	/// consider. It almost never considers this many solutions because it prune the
4798	/// search space, but the pruning isn't always sufficient.
4799	size_t LSRInstance::EstimateSearchSpaceComplexity() const {
4800	size_t Power = `1`;
4801	for (const LSRUse &LU : Uses) {
4802	size_t FSize = LU.Formulae.size();
4803	if (FSize >= ComplexityLimit) {
4804	Power = ComplexityLimit;
4805	break;
4806	}
4807	Power *= FSize;
4808	if (Power >= ComplexityLimit)
4809	break;
4810	}
4811	return Power;
4812	}
4813
4814	/// When one formula uses a superset of the registers of another formula, it
4815	/// won't help reduce register pressure (though it may not necessarily hurt
4816	/// register pressure); remove it to simplify the system.
4817	void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4818	if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4819	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4820
4821	LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
4822	"which use a superset of registers used by other "
4823	"formulae.\n");
4824
4825	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4826	LSRUse &LU = Uses [LUIdx];
4827	bool Any = false;
4828	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
4829	Formula &F = LU.Formulae [i];
4830	if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())
4831	continue;
4832	// Look for a formula with a constant or GV in a register. If the use
4833	// also has a formula with that same value in an immediate field,
4834	// delete the one that uses a register.
4835	for (SmallVectorImpl<const SCEV *>::const_iterator
4836	I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4837	if (const SCEVConstant C = dyn_cast<SCEVConstant>(Val: I)) {
4838	Formula NewF = F;
4839	//FIXME: Formulas should store bitwidth to do wrapping properly.
4840	// See PR41034.
4841	NewF.BaseOffset =
4842	Immediate::getFixed(MinVal: NewF.BaseOffset.getFixedValue() +
4843	(uint64_t)C->getValue()->getSExtValue());
4844	NewF.BaseRegs.erase(CI: NewF.BaseRegs.begin() +
4845	(I - F.BaseRegs.begin()));
4846	if (LU.HasFormulaWithSameRegs(F: NewF)) {
4847	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4848	dbgs() << `'\n'`);
4849	LU.DeleteFormula(F);
4850	--i;
4851	--e;
4852	Any = true;
4853	break;
4854	}
4855	} else if (const SCEVUnknown U = dyn_cast<SCEVUnknown>(Val: I)) {
4856	if (GlobalValue *GV = dyn_cast<GlobalValue>(Val: U->getValue()))
4857	if (!F.BaseGV) {
4858	Formula NewF = F;
4859	NewF.BaseGV = GV;
4860	NewF.BaseRegs.erase(CI: NewF.BaseRegs.begin() +
4861	(I - F.BaseRegs.begin()));
4862	if (LU.HasFormulaWithSameRegs(F: NewF)) {
4863	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
4864	dbgs() << `'\n'`);
4865	LU.DeleteFormula(F);
4866	--i;
4867	--e;
4868	Any = true;
4869	break;
4870	}
4871	}
4872	}
4873	}
4874	}
4875	if (Any)
4876	LU.RecomputeRegs(LUIdx, RegUses);
4877	}
4878
4879	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4880	}
4881	}
4882
4883	/// When there are many registers for expressions like A, A+1, A+2, etc.,
4884	/// allocate a single register for them.
4885	void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4886	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4887	return;
4888
4889	LLVM_DEBUG(
4890	dbgs() << "The search space is too complex.\n"
4891	"Narrowing the search space by assuming that uses separated "
4892	"by a constant offset will use the same registers.\n");
4893
4894	// This is especially useful for unrolled loops.
4895
4896	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4897	LSRUse &LU = Uses [LUIdx];
4898	for (const Formula &F : LU.Formulae) {
4899	if (F.BaseOffset.isZero() \|\| (F.Scale != `0` && F.Scale != `1`))
4900	continue;
4901
4902	LSRUse *LUThatHas = FindUseWithSimilarFormula(OrigF: F, OrigLU: LU);
4903	if (!LUThatHas)
4904	continue;
4905
4906	if (!reconcileNewOffset(LU&: LUThatHas, NewOffset: F.BaseOffset, /HasBaseReg=/* false,
4907	Kind: LU.Kind, AccessTy: LU.AccessTy))
4908	continue;
4909
4910	LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << `'\n'`);
4911
4912	LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4913
4914	// Transfer the fixups of LU to LUThatHas.
4915	for (LSRFixup &Fixup : LU.Fixups) {
4916	Fixup.Offset += F.BaseOffset;
4917	LUThatHas->pushFixup(f&: Fixup);
4918	LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << `'\n'`);
4919	}
4920
4921	// Delete formulae from the new use which are no longer legal.
4922	bool Any = false;
4923	for (size_t i = `0`, e = LUThatHas->Formulae.size(); i != e; ++i) {
4924	Formula &F = LUThatHas->Formulae [i];
4925	if (!isLegalUse(TTI, MinOffset: LUThatHas->MinOffset, MaxOffset: LUThatHas->MaxOffset,
4926	Kind: LUThatHas->Kind, AccessTy: LUThatHas->AccessTy, F)) {
4927	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << `'\n'`);
4928	LUThatHas->DeleteFormula(F);
4929	--i;
4930	--e;
4931	Any = true;
4932	}
4933	}
4934
4935	if (Any)
4936	LUThatHas->RecomputeRegs(LUIdx: LUThatHas - &Uses.front(), RegUses);
4937
4938	// Delete the old use.
4939	DeleteUse(LU, LUIdx);
4940	--LUIdx;
4941	--NumUses;
4942	break;
4943	}
4944	}
4945
4946	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4947	}
4948
4949	/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
4950	/// we've done more filtering, as it may be able to find more formulae to
4951	/// eliminate.
4952	void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
4953	if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4954	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4955
4956	LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
4957	"undesirable dedicated registers.\n");
4958
4959	FilterOutUndesirableDedicatedRegisters();
4960
4961	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4962	}
4963	}
4964
4965	/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
4966	/// Pick the best one and delete the others.
4967	/// This narrowing heuristic is to keep as many formulae with different
4968	/// Scale and ScaledReg pair as possible while narrowing the search space.
4969	/// The benefit is that it is more likely to find out a better solution
4970	/// from a formulae set with more Scale and ScaledReg variations than
4971	/// a formulae set with the same Scale and ScaledReg. The picking winner
4972	/// reg heuristic will often keep the formulae with the same Scale and
4973	/// ScaledReg and filter others, and we want to avoid that if possible.
4974	void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
4975	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4976	return;
4977
4978	LLVM_DEBUG(
4979	dbgs() << "The search space is too complex.\n"
4980	"Narrowing the search space by choosing the best Formula "
4981	"from the Formulae with the same Scale and ScaledReg.\n");
4982
4983	// Map the "Scale ScaledReg" pair to the best formula of current LSRUse.*
4984	using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
4985
4986	BestFormulaeTy BestFormulae;
4987	#ifndef NDEBUG
4988	bool ChangedFormulae = false;
4989	#endif
4990	DenseSet<const SCEV *> VisitedRegs;
4991	SmallPtrSet<const SCEV *, `16`> Regs;
4992
4993	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4994	LSRUse &LU = Uses [LUIdx];
4995	LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
4996	dbgs() << `'\n'`);
4997
4998	// Return true if Formula FA is better than Formula FB.
4999	auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5000	// First we will try to choose the Formula with fewer new registers.
5001	// For a register used by current Formula, the more the register is
5002	// shared among LSRUses, the less we increase the register number
5003	// counter of the formula.
5004	size_t FARegNum = `0`;
5005	for (const SCEV *Reg : FA.BaseRegs) {
5006	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5007	FARegNum += (NumUses - UsedByIndices.count() + `1`);
5008	}
5009	size_t FBRegNum = `0`;
5010	for (const SCEV *Reg : FB.BaseRegs) {
5011	const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5012	FBRegNum += (NumUses - UsedByIndices.count() + `1`);
5013	}
5014	if (FARegNum != FBRegNum)
5015	return FARegNum < FBRegNum;
5016
5017	// If the new register numbers are the same, choose the Formula with
5018	// less Cost.
5019	Cost CostFA(L, SE, TTI, AMK);
5020	Cost CostFB(L, SE, TTI, AMK);
5021	Regs.clear();
5022	CostFA.RateFormula(F: FA, Regs, VisitedRegs, LU);
5023	Regs.clear();
5024	CostFB.RateFormula(F: FB, Regs, VisitedRegs, LU);
5025	return CostFA.isLess(Other: CostFB);
5026	};
5027
5028	bool Any = false;
5029	for (size_t FIdx = `0`, NumForms = LU.Formulae.size(); FIdx != NumForms;
5030	++FIdx) {
5031	Formula &F = LU.Formulae [FIdx];
5032	if (!F.ScaledReg)
5033	continue;
5034	auto P = BestFormulae.insert(KV: {{F.ScaledReg, F.Scale}, FIdx});
5035	if (P.second)
5036	continue;
5037
5038	Formula &Best = LU.Formulae [P.first ->second];
5039	if (IsBetterThan (F, Best))
5040	std::swap(a&: F, b&: Best);
5041	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
5042	dbgs() << "\n"
5043	" in favor of formula ";
5044	Best.print(dbgs()); dbgs() << `'\n'`);
5045	#ifndef NDEBUG
5046	ChangedFormulae = true;
5047	#endif
5048	LU.DeleteFormula(F);
5049	--FIdx;
5050	--NumForms;
5051	Any = true;
5052	}
5053	if (Any)
5054	LU.RecomputeRegs(LUIdx, RegUses);
5055
5056	// Reset this to prepare for the next use.
5057	BestFormulae.clear();
5058	}
5059
5060	LLVM_DEBUG(if (ChangedFormulae) {
5061	dbgs() << "\n"
5062	"After filtering out undesirable candidates:\n";
5063	print_uses(dbgs());
5064	});
5065	}
5066
5067	/// If we are over the complexity limit, filter out any post-inc prefering
5068	/// variables to only post-inc values.
5069	void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5070	if (AMK != TTI::AMK_PostIndexed)
5071	return;
5072	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
5073	return;
5074
5075	LLVM_DEBUG(dbgs() << "The search space is too complex.\n"
5076	"Narrowing the search space by choosing the lowest "
5077	"register Formula for PostInc Uses.\n");
5078
5079	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5080	LSRUse &LU = Uses [LUIdx];
5081
5082	if (LU.Kind != LSRUse::Address)
5083	continue;
5084	if (!TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty: LU.AccessTy.getType()) &&
5085	!TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty: LU.AccessTy.getType()))
5086	continue;
5087
5088	size_t MinRegs = std::numeric_limits<size_t>::max();
5089	for (const Formula &F : LU.Formulae)
5090	MinRegs = std::min(a: F.getNumRegs(), b: MinRegs);
5091
5092	bool Any = false;
5093	for (size_t FIdx = `0`, NumForms = LU.Formulae.size(); FIdx != NumForms;
5094	++FIdx) {
5095	Formula &F = LU.Formulae [FIdx];
5096	if (F.getNumRegs() > MinRegs) {
5097	LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
5098	dbgs() << "\n");
5099	LU.DeleteFormula(F);
5100	--FIdx;
5101	--NumForms;
5102	Any = true;
5103	}
5104	}
5105	if (Any)
5106	LU.RecomputeRegs(LUIdx, RegUses);
5107
5108	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
5109	break;
5110	}
5111
5112	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5113	}
5114
5115	/// The function delete formulas with high registers number expectation.
5116	/// Assuming we don't know the value of each formula (already delete
5117	/// all inefficient), generate probability of not selecting for each
5118	/// register.
5119	/// For example,
5120	/// Use1:
5121	/// reg(a) + reg({0,+,1})
5122	/// reg(a) + reg({-1,+,1}) + 1
5123	/// reg({a,+,1})
5124	/// Use2:
5125	/// reg(b) + reg({0,+,1})
5126	/// reg(b) + reg({-1,+,1}) + 1
5127	/// reg({b,+,1})
5128	/// Use3:
5129	/// reg(c) + reg(b) + reg({0,+,1})
5130	/// reg(c) + reg({b,+,1})
5131	///
5132	/// Probability of not selecting
5133	/// Use1 Use2 Use3
5134	/// reg(a) (1/3) 1 * 1*
5135	/// reg(b) 1 (1/3) * (1/2)*
5136	/// reg({0,+,1}) (2/3) (2/3) * (1/2)*
5137	/// reg({-1,+,1}) (2/3) (2/3) * 1*
5138	/// reg({a,+,1}) (2/3) 1 * 1*
5139	/// reg({b,+,1}) 1 (2/3) * (2/3)*
5140	/// reg(c) 1 1 * 0*
5141	///
5142	/// Now count registers number mathematical expectation for each formula:
5143	/// Note that for each use we exclude probability if not selecting for the use.
5144	/// For example for Use1 probability for reg(a) would be just 1 1 (excluding*
5145	/// probabilty 1/3 of not selecting for Use1).
5146	/// Use1:
5147	/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
5148	/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
5149	/// reg({a,+,1}) 1
5150	/// Use2:
5151	/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
5152	/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
5153	/// reg({b,+,1}) 2/3
5154	/// Use3:
5155	/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
5156	/// reg(c) + reg({b,+,1}) 1 + 2/3
5157	void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5158	if (EstimateSearchSpaceComplexity() < ComplexityLimit)
5159	return;
5160	// Ok, we have too many of formulae on our hands to conveniently handle.
5161	// Use a rough heuristic to thin out the list.
5162
5163	// Set of Regs wich will be 100% used in final solution.
5164	// Used in each formula of a solution (in example above this is reg(c)).
5165	// We can skip them in calculations.
5166	SmallPtrSet<const SCEV *, `4`> UniqRegs;
5167	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5168
5169	// Map each register to probability of not selecting
5170	DenseMap <const SCEV , float*> RegNumMap;
5171	for (const SCEV *Reg : RegUses) {
5172	if (UniqRegs.count(Ptr: Reg))
5173	continue;
5174	float PNotSel = `1`;
5175	for (const LSRUse &LU : Uses) {
5176	if (!LU.Regs.count(Ptr: Reg))
5177	continue;
5178	float P = LU.getNotSelectedProbability(Reg);
5179	if (P != `0.0`)
5180	PNotSel *= P;
5181	else
5182	UniqRegs.insert(Ptr: Reg);
5183	}
5184	RegNumMap.insert(KV: std::make_pair(x&: Reg, y&: PNotSel));
5185	}
5186
5187	LLVM_DEBUG(
5188	dbgs() << "Narrowing the search space by deleting costly formulas\n");
5189
5190	// Delete formulas where registers number expectation is high.
5191	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5192	LSRUse &LU = Uses [LUIdx];
5193	// If nothing to delete - continue.
5194	if (LU.Formulae.size() < `2`)
5195	continue;
5196	// This is temporary solution to test performance. Float should be
5197	// replaced with round independent type (based on integers) to avoid
5198	// different results for different target builds.
5199	float FMinRegNum = LU.Formulae [`0`].getNumRegs();
5200	float FMinARegNum = LU.Formulae [`0`].getNumRegs();
5201	size_t MinIdx = `0`;
5202	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
5203	Formula &F = LU.Formulae [i];
5204	float FRegNum = `0`;
5205	float FARegNum = `0`;
5206	for (const SCEV *BaseReg : F.BaseRegs) {
5207	if (UniqRegs.count(Ptr: BaseReg))
5208	continue;
5209	FRegNum += RegNumMap [BaseReg] / LU.getNotSelectedProbability(Reg: BaseReg);
5210	if (isa<SCEVAddRecExpr>(Val: BaseReg))
5211	FARegNum +=
5212	RegNumMap [BaseReg] / LU.getNotSelectedProbability(Reg: BaseReg);
5213	}
5214	if (const SCEV *ScaledReg = F.ScaledReg) {
5215	if (!UniqRegs.count(Ptr: ScaledReg)) {
5216	FRegNum +=
5217	RegNumMap [ScaledReg] / LU.getNotSelectedProbability(Reg: ScaledReg);
5218	if (isa<SCEVAddRecExpr>(Val: ScaledReg))
5219	FARegNum +=
5220	RegNumMap [ScaledReg] / LU.getNotSelectedProbability(Reg: ScaledReg);
5221	}
5222	}
5223	if (FMinRegNum > FRegNum \|\|
5224	(FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5225	FMinRegNum = FRegNum;
5226	FMinARegNum = FARegNum;
5227	MinIdx = i;
5228	}
5229	}
5230	LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
5231	dbgs() << " with min reg num " << FMinRegNum << `'\n'`);
5232	if (MinIdx != `0`)
5233	std::swap(a&: LU.Formulae [MinIdx], b&: LU.Formulae [`0`]);
5234	while (LU.Formulae.size() != `1`) {
5235	LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
5236	dbgs() << `'\n'`);
5237	LU.Formulae.pop_back();
5238	}
5239	LU.RecomputeRegs(LUIdx, RegUses);
5240	assert(LU.Formulae.size() == `1` && "Should be exactly 1 min regs formula");
5241	Formula &F = LU.Formulae [`0`];
5242	LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << `'\n'`);
5243	// When we choose the formula, the regs become unique.
5244	UniqRegs.insert_range(R&: F.BaseRegs);
5245	if (F.ScaledReg)
5246	UniqRegs.insert(Ptr: F.ScaledReg);
5247	}
5248	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5249	}
5250
5251	// Check if Best and Reg are SCEVs separated by a constant amount C, and if so
5252	// would the addressing offset +C would be legal where the negative offset -C is
5253	// not.
5254	static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,
5255	ScalarEvolution &SE, const SCEV *Best,
5256	const SCEV *Reg,
5257	MemAccessTy AccessType) {
5258	if (Best->getType() != Reg->getType() \|\|
5259	(isa<SCEVAddRecExpr>(Val: Best) && isa<SCEVAddRecExpr>(Val: Reg) &&
5260	cast<SCEVAddRecExpr>(Val: Best)->getLoop() !=
5261	cast<SCEVAddRecExpr>(Val: Reg)->getLoop()))
5262	return false;
5263	std::optional<APInt> Diff = SE.computeConstantDifference(LHS: Best, RHS: Reg);
5264	if (!Diff)
5265	return false;
5266
5267	return TTI.isLegalAddressingMode(
5268	Ty: AccessType.MemTy, /BaseGV=/nullptr,
5269	/BaseOffset=/Diff ->getSExtValue(),
5270	/HasBaseReg=/true, /Scale=/`0`, AddrSpace: AccessType.AddrSpace) &&
5271	!TTI.isLegalAddressingMode(
5272	Ty: AccessType.MemTy, /BaseGV=/nullptr,
5273	/BaseOffset=/-Diff ->getSExtValue(),
5274	/HasBaseReg=/true, /Scale=/`0`, AddrSpace: AccessType.AddrSpace);
5275	}
5276
5277	/// Pick a register which seems likely to be profitable, and then in any use
5278	/// which has any reference to that register, delete all formulae which do not
5279	/// reference that register.
5280	void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5281	// With all other options exhausted, loop until the system is simple
5282	// enough to handle.
5283	SmallPtrSet<const SCEV *, `4`> Taken;
5284	while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
5285	// Ok, we have too many of formulae on our hands to conveniently handle.
5286	// Use a rough heuristic to thin out the list.
5287	LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5288
5289	// Pick the register which is used by the most LSRUses, which is likely
5290	// to be a good reuse register candidate.
5291	const SCEV Best = nullptr*;
5292	unsigned BestNum = `0`;
5293	for (const SCEV *Reg : RegUses) {
5294	if (Taken.count(Ptr: Reg))
5295	continue;
5296	if (!Best) {
5297	Best = Reg;
5298	BestNum = RegUses.getUsedByIndices(Reg).count();
5299	} else {
5300	unsigned Count = RegUses.getUsedByIndices(Reg).count();
5301	if (Count > BestNum) {
5302	Best = Reg;
5303	BestNum = Count;
5304	}
5305
5306	// If the scores are the same, but the Reg is simpler for the target
5307	// (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
5308	// handle +C but not -C), opt for the simpler formula.
5309	if (Count == BestNum) {
5310	int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5311	if (LUIdx >= `0` && Uses [LUIdx].Kind == LSRUse::Address &&
5312	IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
5313	AccessType: Uses [LUIdx].AccessTy)) {
5314	Best = Reg;
5315	BestNum = Count;
5316	}
5317	}
5318	}
5319	}
5320	assert(Best && "Failed to find best LSRUse candidate");
5321
5322	LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
5323	<< " will yield profitable reuse.\n");
5324	Taken.insert(Ptr: Best);
5325
5326	// In any use with formulae which references this register, delete formulae
5327	// which don't reference it.
5328	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5329	LSRUse &LU = Uses [LUIdx];
5330	if (!LU.Regs.count(Ptr: Best)) continue;
5331
5332	bool Any = false;
5333	for (size_t i = `0`, e = LU.Formulae.size(); i != e; ++i) {
5334	Formula &F = LU.Formulae [i];
5335	if (!F.referencesReg(S: Best)) {
5336	LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << `'\n'`);
5337	LU.DeleteFormula(F);
5338	--e;
5339	--i;
5340	Any = true;
5341	assert(e != `0` && "Use has no formulae left! Is Regs inconsistent?");
5342	continue;
5343	}
5344	}
5345
5346	if (Any)
5347	LU.RecomputeRegs(LUIdx, RegUses);
5348	}
5349
5350	LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
5351	}
5352	}
5353
5354	/// If there are an extraordinary number of formulae to choose from, use some
5355	/// rough heuristics to prune down the number of formulae. This keeps the main
5356	/// solver from taking an extraordinary amount of time in some worst-case
5357	/// scenarios.
5358	void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5359	NarrowSearchSpaceByDetectingSupersets();
5360	NarrowSearchSpaceByCollapsingUnrolledCode();
5361	NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5362	if (FilterSameScaledReg)
5363	NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5364	NarrowSearchSpaceByFilterPostInc();
5365	if (LSRExpNarrow)
5366	NarrowSearchSpaceByDeletingCostlyFormulas();
5367	else
5368	NarrowSearchSpaceByPickingWinnerRegs();
5369	}
5370
5371	/// This is the recursive solver.
5372	void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5373	Cost &SolutionCost,
5374	SmallVectorImpl<const Formula *> &Workspace,
5375	const Cost &CurCost,
5376	const SmallPtrSet<const SCEV *, `16`> &CurRegs,
5377	DenseSet<const SCEV > &VisitedRegs) const* {
5378	// Some ideas:
5379	// - prune more:
5380	// - use more aggressive filtering
5381	// - sort the formula so that the most profitable solutions are found first
5382	// - sort the uses too
5383	// - search faster:
5384	// - don't compute a cost, and then compare. compare while computing a cost
5385	// and bail early.
5386	// - track register sets with SmallBitVector
5387
5388	const LSRUse &LU = Uses [Workspace.size()];
5389
5390	// If this use references any register that's already a part of the
5391	// in-progress solution, consider it a requirement that a formula must
5392	// reference that register in order to be considered. This prunes out
5393	// unprofitable searching.
5394	SmallSetVector<const SCEV *, `4`> ReqRegs;
5395	for (const SCEV *S : CurRegs)
5396	if (LU.Regs.count(Ptr: S))
5397	ReqRegs.insert(X: S);
5398
5399	SmallPtrSet<const SCEV *, `16`> NewRegs;
5400	Cost NewCost(L, SE, TTI, AMK);
5401	for (const Formula &F : LU.Formulae) {
5402	// Ignore formulae which may not be ideal in terms of register reuse of
5403	// ReqRegs. The formula should use all required registers before
5404	// introducing new ones.
5405	// This can sometimes (notably when trying to favour postinc) lead to
5406	// sub-optimial decisions. There it is best left to the cost modelling to
5407	// get correct.
5408	if (AMK != TTI::AMK_PostIndexed \|\| LU.Kind != LSRUse::Address) {
5409	int NumReqRegsToFind = std::min(a: F.getNumRegs(), b: ReqRegs.size());
5410	for (const SCEV *Reg : ReqRegs) {
5411	if ((F.ScaledReg && F.ScaledReg == Reg) \|\|
5412	is_contained(Range: F.BaseRegs, Element: Reg)) {
5413	--NumReqRegsToFind;
5414	if (NumReqRegsToFind == `0`)
5415	break;
5416	}
5417	}
5418	if (NumReqRegsToFind != `0`) {
5419	// If none of the formulae satisfied the required registers, then we could
5420	// clear ReqRegs and try again. Currently, we simply give up in this case.
5421	continue;
5422	}
5423	}
5424
5425	// Evaluate the cost of the current formula. If it's already worse than
5426	// the current best, prune the search at that point.
5427	NewCost = CurCost;
5428	NewRegs = CurRegs;
5429	NewCost.RateFormula(F, Regs&: NewRegs, VisitedRegs, LU);
5430	if (NewCost.isLess(Other: SolutionCost)) {
5431	Workspace.push_back(Elt: &F);
5432	if (Workspace.size() != Uses.size()) {
5433	SolveRecurse(Solution, SolutionCost, Workspace, CurCost: NewCost,
5434	CurRegs: NewRegs, VisitedRegs);
5435	if (F.getNumRegs() == `1` && Workspace.size() == `1`)
5436	VisitedRegs.insert(V: F.ScaledReg ? F.ScaledReg : F.BaseRegs [`0`]);
5437	} else {
5438	LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
5439	dbgs() << ".\nRegs:\n";
5440	for (const SCEV *S : NewRegs) dbgs()
5441	<< "- " << *S << "\n";
5442	dbgs() << `'\n'`);
5443
5444	SolutionCost = NewCost;
5445	Solution = Workspace;
5446	}
5447	Workspace.pop_back();
5448	}
5449	}
5450	}
5451
5452	/// Choose one formula from each use. Return the results in the given Solution
5453	/// vector.
5454	void LSRInstance::Solve(SmallVectorImpl<const Formula > &Solution) const* {
5455	SmallVector<const Formula *, `8`> Workspace;
5456	Cost SolutionCost(L, SE, TTI, AMK);
5457	SolutionCost.Lose();
5458	Cost CurCost(L, SE, TTI, AMK);
5459	SmallPtrSet<const SCEV *, `16`> CurRegs;
5460	DenseSet<const SCEV *> VisitedRegs;
5461	Workspace.reserve(N: Uses.size());
5462
5463	// SolveRecurse does all the work.
5464	SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5465	CurRegs, VisitedRegs);
5466	if (Solution.empty()) {
5467	LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
5468	return;
5469	}
5470
5471	// Ok, we've now made all our decisions.
5472	LLVM_DEBUG(dbgs() << "\n"
5473	"The chosen solution requires ";
5474	SolutionCost.print(dbgs()); dbgs() << ":\n";
5475	for (size_t i = `0`, e = Uses.size(); i != e; ++i) {
5476	dbgs() << " ";
5477	Uses[i].print(dbgs());
5478	dbgs() << "\n"
5479	" ";
5480	Solution[i]->print(dbgs());
5481	dbgs() << `'\n'`;
5482	});
5483
5484	assert(Solution.size() == Uses.size() && "Malformed solution!");
5485
5486	const bool EnableDropUnprofitableSolution = [&] {
5487	switch (AllowDropSolutionIfLessProfitable) {
5488	case cl::BOU_TRUE:
5489	return true;
5490	case cl::BOU_FALSE:
5491	return false;
5492	case cl::BOU_UNSET:
5493	return TTI.shouldDropLSRSolutionIfLessProfitable();
5494	}
5495	llvm_unreachable("Unhandled cl::boolOrDefault enum");
5496	}();
5497
5498	if (BaselineCost.isLess(Other: SolutionCost)) {
5499	if (!EnableDropUnprofitableSolution)
5500	LLVM_DEBUG(
5501	dbgs() << "Baseline is more profitable than chosen solution, "
5502	"add option 'lsr-drop-solution' to drop LSR solution.\n");
5503	else {
5504	LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "
5505	"solution, dropping LSR solution.\n";);
5506	Solution.clear();
5507	}
5508	}
5509	}
5510
5511	/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
5512	/// we can go while still being dominated by the input positions. This helps
5513	/// canonicalize the insert position, which encourages sharing.
5514	BasicBlock::iterator
5515	LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
5516	const SmallVectorImpl<Instruction *> &Inputs)
5517	const {
5518	Instruction Tentative = &IP;
5519	while (true) {
5520	bool AllDominate = true;
5521	Instruction BetterPos = nullptr*;
5522	// Don't bother attempting to insert before a catchswitch, their basic block
5523	// cannot have other non-PHI instructions.
5524	if (isa<CatchSwitchInst>(Val: Tentative))
5525	return IP;
5526
5527	for (Instruction *Inst : Inputs) {
5528	if (Inst == Tentative \|\| !DT.dominates(Def: Inst, User: Tentative)) {
5529	AllDominate = false;
5530	break;
5531	}
5532	// Attempt to find an insert position in the middle of the block,
5533	// instead of at the end, so that it can be used for other expansions.
5534	if (Tentative->getParent() == Inst->getParent() &&
5535	(!BetterPos \|\| !DT.dominates(Def: Inst, User: BetterPos)))
5536	BetterPos = &*std::next(x: BasicBlock::iterator (Inst));
5537	}
5538	if (!AllDominate)
5539	break;
5540	if (BetterPos)
5541	IP = BetterPos->getIterator();
5542	else
5543	IP = Tentative->getIterator();
5544
5545	const Loop *IPLoop = LI.getLoopFor(BB: IP ->getParent());
5546	unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : `0`;
5547
5548	BasicBlock *IDom;
5549	for (DomTreeNode *Rung = DT.getNode(BB: IP ->getParent()); ; ) {
5550	if (!Rung) return IP;
5551	Rung = Rung->getIDom();
5552	if (!Rung) return IP;
5553	IDom = Rung->getBlock();
5554
5555	// Don't climb into a loop though.
5556	const Loop *IDomLoop = LI.getLoopFor(BB: IDom);
5557	unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : `0`;
5558	if (IDomDepth <= IPLoopDepth &&
5559	(IDomDepth != IPLoopDepth \|\| IDomLoop == IPLoop))
5560	break;
5561	}
5562
5563	Tentative = IDom->getTerminator();
5564	}
5565
5566	return IP;
5567	}
5568
5569	/// Determine an input position which will be dominated by the operands and
5570	/// which will dominate the result.
5571	BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
5572	BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
5573	// Collect some instructions which must be dominated by the
5574	// expanding replacement. These must be dominated by any operands that
5575	// will be required in the expansion.
5576	SmallVector<Instruction *, `4`> Inputs;
5577	if (Instruction *I = dyn_cast<Instruction>(Val: LF.OperandValToReplace))
5578	Inputs.push_back(Elt: I);
5579	if (LU.Kind == LSRUse::ICmpZero)
5580	if (Instruction *I =
5581	dyn_cast<Instruction>(Val: cast<ICmpInst>(Val: LF.UserInst)->getOperand(i_nocapture: `1`)))
5582	Inputs.push_back(Elt: I);
5583	if (LF.PostIncLoops.count(Ptr: L)) {
5584	if (LF.isUseFullyOutsideLoop(L))
5585	Inputs.push_back(Elt: L->getLoopLatch()->getTerminator());
5586	else
5587	Inputs.push_back(Elt: IVIncInsertPos);
5588	}
5589	// The expansion must also be dominated by the increment positions of any
5590	// loops it for which it is using post-inc mode.
5591	for (const Loop *PIL : LF.PostIncLoops) {
5592	if (PIL == L) continue;
5593
5594	// Be dominated by the loop exit.
5595	SmallVector<BasicBlock *, `4`> ExitingBlocks;
5596	PIL->getExitingBlocks(ExitingBlocks);
5597	if (!ExitingBlocks.empty()) {
5598	BasicBlock *BB = ExitingBlocks [`0`];
5599	for (unsigned i = `1`, e = ExitingBlocks.size(); i != e; ++i)
5600	BB = DT.findNearestCommonDominator(A: BB, B: ExitingBlocks [i]);
5601	Inputs.push_back(Elt: BB->getTerminator());
5602	}
5603	}
5604
5605	assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() &&
5606	"Insertion point must be a normal instruction");
5607
5608	// Then, climb up the immediate dominator tree as far as we can go while
5609	// still being dominated by the input positions.
5610	BasicBlock::iterator IP = HoistInsertPosition(IP: LowestIP, Inputs);
5611
5612	// Don't insert instructions before PHI nodes.
5613	while (isa<PHINode>(Val: IP)) ++IP;
5614
5615	// Ignore landingpad instructions.
5616	while (IP ->isEHPad()) ++IP;
5617
5618	// Set IP below instructions recently inserted by SCEVExpander. This keeps the
5619	// IP consistent across expansions and allows the previously inserted
5620	// instructions to be reused by subsequent expansion.
5621	while (Rewriter.isInsertedInstruction(I: &*IP) && IP != LowestIP)
5622	++IP;
5623
5624	return IP;
5625	}
5626
5627	/// Emit instructions for the leading candidate expression for this LSRUse (this
5628	/// is called "expanding").
5629	Value LSRInstance::Expand(const* LSRUse &LU, const LSRFixup &LF,
5630	const Formula &F, BasicBlock::iterator IP,
5631	SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5632	if (LU.RigidFormula)
5633	return LF.OperandValToReplace;
5634
5635	// Determine an input position which will be dominated by the operands and
5636	// which will dominate the result.
5637	IP = AdjustInsertPositionForExpand(LowestIP: IP, LF, LU);
5638	Rewriter.setInsertPoint(&*IP);
5639
5640	// Inform the Rewriter if we have a post-increment use, so that it can
5641	// perform an advantageous expansion.
5642	Rewriter.setPostInc(LF.PostIncLoops);
5643
5644	// This is the type that the user actually needs.
5645	Type *OpTy = LF.OperandValToReplace->getType();
5646	// This will be the type that we'll initially expand to.
5647	Type *Ty = F.getType();
5648	if (!Ty)
5649	// No type known; just expand directly to the ultimate type.
5650	Ty = OpTy;
5651	else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(Ty: OpTy))
5652	// Expand directly to the ultimate type if it's the right size.
5653	Ty = OpTy;
5654	// This is the type to do integer arithmetic in.
5655	Type *IntTy = SE.getEffectiveSCEVType(Ty);
5656
5657	// Build up a list of operands to add together to form the full base.
5658	SmallVector<const SCEV *, `8`> Ops;
5659
5660	// Expand the BaseRegs portion.
5661	for (const SCEV *Reg : F.BaseRegs) {
5662	assert(!Reg->isZero() && "Zero allocated in a base register!");
5663
5664	// If we're expanding for a post-inc user, make the post-inc adjustment.
5665	Reg = denormalizeForPostIncUse(S: Reg, Loops: LF.PostIncLoops, SE);
5666	Ops.push_back(Elt: SE.getUnknown(V: Rewriter.expandCodeFor(SH: Reg, Ty: nullptr)));
5667	}
5668
5669	// Expand the ScaledReg portion.
5670	Value ICmpScaledV = nullptr*;
5671	if (F.Scale != `0`) {
5672	const SCEV *ScaledS = F.ScaledReg;
5673
5674	// If we're expanding for a post-inc user, make the post-inc adjustment.
5675	PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
5676	ScaledS = denormalizeForPostIncUse(S: ScaledS, Loops, SE);
5677
5678	if (LU.Kind == LSRUse::ICmpZero) {
5679	// Expand ScaleReg as if it was part of the base regs.
5680	if (F.Scale == `1`)
5681	Ops.push_back(
5682	Elt: SE.getUnknown(V: Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr)));
5683	else {
5684	// An interesting way of "folding" with an icmp is to use a negated
5685	// scale, which we'll implement by inserting it into the other operand
5686	// of the icmp.
5687	assert(F.Scale == -`1` &&
5688	"The only scale supported by ICmpZero uses is -1!");
5689	ICmpScaledV = Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr);
5690	}
5691	} else {
5692	// Otherwise just expand the scaled register and an explicit scale,
5693	// which is expected to be matched as part of the address.
5694
5695	// Flush the operand list to suppress SCEVExpander hoisting address modes.
5696	// Unless the addressing mode will not be folded.
5697	if (!Ops.empty() && LU.Kind == LSRUse::Address &&
5698	isAMCompletelyFolded(TTI, LU, F)) {
5699	Value FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty: nullptr*);
5700	Ops.clear();
5701	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5702	}
5703	ScaledS = SE.getUnknown(V: Rewriter.expandCodeFor(SH: ScaledS, Ty: nullptr));
5704	if (F.Scale != `1`)
5705	ScaledS =
5706	SE.getMulExpr(LHS: ScaledS, RHS: SE.getConstant(Ty: ScaledS->getType(), V: F.Scale));
5707	Ops.push_back(Elt: ScaledS);
5708	}
5709	}
5710
5711	// Expand the GV portion.
5712	if (F.BaseGV) {
5713	// Flush the operand list to suppress SCEVExpander hoisting.
5714	if (!Ops.empty()) {
5715	Value *FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty: IntTy);
5716	Ops.clear();
5717	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5718	}
5719	Ops.push_back(Elt: SE.getUnknown(V: F.BaseGV));
5720	}
5721
5722	// Flush the operand list to suppress SCEVExpander hoisting of both folded and
5723	// unfolded offsets. LSR assumes they both live next to their uses.
5724	if (!Ops.empty()) {
5725	Value *FullV = Rewriter.expandCodeFor(SH: SE.getAddExpr(Ops), Ty);
5726	Ops.clear();
5727	Ops.push_back(Elt: SE.getUnknown(V: FullV));
5728	}
5729
5730	// FIXME: Are we sure we won't get a mismatch here? Is there a way to bail
5731	// out at this point, or should we generate a SCEV adding together mixed
5732	// offsets?
5733	assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5734	"Expanding mismatched offsets\n");
5735	// Expand the immediate portion.
5736	Immediate Offset = F.BaseOffset.addUnsigned(RHS: LF.Offset);
5737	if (Offset.isNonZero()) {
5738	if (LU.Kind == LSRUse::ICmpZero) {
5739	// The other interesting way of "folding" with an ICmpZero is to use a
5740	// negated immediate.
5741	if (!ICmpScaledV)
5742	ICmpScaledV =
5743	ConstantInt::get(Ty: IntTy, V: -(uint64_t)Offset.getFixedValue());
5744	else {
5745	Ops.push_back(Elt: SE.getUnknown(V: ICmpScaledV));
5746	ICmpScaledV = ConstantInt::get(Ty: IntTy, V: Offset.getFixedValue());
5747	}
5748	} else {
5749	// Just add the immediate values. These again are expected to be matched
5750	// as part of the address.
5751	Ops.push_back(Elt: Offset.getUnknownSCEV(SE, Ty: IntTy));
5752	}
5753	}
5754
5755	// Expand the unfolded offset portion.
5756	Immediate UnfoldedOffset = F.UnfoldedOffset;
5757	if (UnfoldedOffset.isNonZero()) {
5758	// Just add the immediate values.
5759	Ops.push_back(Elt: UnfoldedOffset.getUnknownSCEV(SE, Ty: IntTy));
5760	}
5761
5762	// Emit instructions summing all the operands.
5763	const SCEV *FullS = Ops.empty() ?
5764	SE.getConstant(Ty: IntTy, V: `0`) :
5765	SE.getAddExpr(Ops);
5766	Value *FullV = Rewriter.expandCodeFor(SH: FullS, Ty);
5767
5768	// We're done expanding now, so reset the rewriter.
5769	Rewriter.clearPostInc();
5770
5771	// An ICmpZero Formula represents an ICmp which we're handling as a
5772	// comparison against zero. Now that we've expanded an expression for that
5773	// form, update the ICmp's other operand.
5774	if (LU.Kind == LSRUse::ICmpZero) {
5775	ICmpInst *CI = cast<ICmpInst>(Val: LF.UserInst);
5776	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: CI->getOperand(i_nocapture: `1`)))
5777	DeadInsts.emplace_back(Args&: OperandIsInstr);
5778	assert(!F.BaseGV && "ICmp does not support folding a global value and "
5779	"a scale at the same time!");
5780	if (F.Scale == -`1`) {
5781	if (ICmpScaledV->getType() != OpTy) {
5782	Instruction *Cast = CastInst::Create(
5783	CastInst::getCastOpcode(Val: ICmpScaledV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false),
5784	S: ICmpScaledV, Ty: OpTy, Name: "tmp", InsertBefore: CI->getIterator());
5785	ICmpScaledV = Cast;
5786	}
5787	CI->setOperand(i_nocapture: `1`, Val_nocapture: ICmpScaledV);
5788	} else {
5789	// A scale of 1 means that the scale has been expanded as part of the
5790	// base regs.
5791	assert((F.Scale == `0` \|\| F.Scale == `1`) &&
5792	"ICmp does not support folding a global value and "
5793	"a scale at the same time!");
5794	Constant *C = ConstantInt::getSigned(Ty: SE.getEffectiveSCEVType(Ty: OpTy),
5795	V: -(uint64_t)Offset.getFixedValue());
5796	if (C->getType() != OpTy) {
5797	C = ConstantFoldCastOperand(
5798	Opcode: CastInst::getCastOpcode(Val: C, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false), C, DestTy: OpTy,
5799	DL: CI->getDataLayout());
5800	assert(C && "Cast of ConstantInt should have folded");
5801	}
5802
5803	CI->setOperand(i_nocapture: `1`, Val_nocapture: C);
5804	}
5805	}
5806
5807	return FullV;
5808	}
5809
5810	/// Helper for Rewrite. PHI nodes are special because the use of their operands
5811	/// effectively happens in their predecessor blocks, so the expression may need
5812	/// to be expanded in multiple places.
5813	void LSRInstance::RewriteForPHI(PHINode PN, const* LSRUse &LU,
5814	const LSRFixup &LF, const Formula &F,
5815	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5816	DenseMap<BasicBlock , Value > Inserted;
5817
5818	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i != e; ++i)
5819	if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
5820	bool needUpdateFixups = false;
5821	BasicBlock *BB = PN->getIncomingBlock(i);
5822
5823	// If this is a critical edge, split the edge so that we do not insert
5824	// the code on all predecessor/successor paths. We do this unless this
5825	// is the canonical backedge for this loop, which complicates post-inc
5826	// users.
5827	if (e != `1` && BB->getTerminator()->getNumSuccessors() > `1` &&
5828	!isa<IndirectBrInst>(Val: BB->getTerminator()) &&
5829	!isa<CatchSwitchInst>(Val: BB->getTerminator())) {
5830	BasicBlock *Parent = PN->getParent();
5831	Loop *PNLoop = LI.getLoopFor(BB: Parent);
5832	if (!PNLoop \|\| Parent != PNLoop->getHeader()) {
5833	// Split the critical edge.
5834	BasicBlock NewBB = nullptr*;
5835	if (!Parent->isLandingPad()) {
5836	NewBB =
5837	SplitCriticalEdge(Src: BB, Dst: Parent,
5838	Options: CriticalEdgeSplittingOptions (&DT, &LI, MSSAU)
5839	.setMergeIdenticalEdges()
5840	.setKeepOneInputPHIs());
5841	} else {
5842	SmallVector<BasicBlock*, `2`> NewBBs;
5843	DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5844	SplitLandingPadPredecessors(OrigBB: Parent, Preds: BB, Suffix: "", Suffix2: "", NewBBs, DTU: &DTU, LI: &LI);
5845	NewBB = NewBBs [`0`];
5846	}
5847	// If NewBB==NULL, then SplitCriticalEdge refused to split because all
5848	// phi predecessors are identical. The simple thing to do is skip
5849	// splitting in this case rather than complicate the API.
5850	if (NewBB) {
5851	// If PN is outside of the loop and BB is in the loop, we want to
5852	// move the block to be immediately before the PHI block, not
5853	// immediately after BB.
5854	if (L->contains(BB) && !L->contains(Inst: PN))
5855	NewBB->moveBefore(MovePos: PN->getParent());
5856
5857	// Splitting the edge can reduce the number of PHI entries we have.
5858	e = PN->getNumIncomingValues();
5859	BB = NewBB;
5860	i = PN->getBasicBlockIndex(BB);
5861
5862	needUpdateFixups = true;
5863	}
5864	}
5865	}
5866
5867	std::pair<DenseMap<BasicBlock , Value >::iterator, bool> Pair =
5868	Inserted.try_emplace(Key: BB);
5869	if (!Pair.second)
5870	PN->setIncomingValue(i, V: Pair.first ->second);
5871	else {
5872	Value *FullV =
5873	Expand(LU, LF, F, IP: BB->getTerminator()->getIterator(), DeadInsts);
5874
5875	// If this is reuse-by-noop-cast, insert the noop cast.
5876	Type *OpTy = LF.OperandValToReplace->getType();
5877	if (FullV->getType() != OpTy)
5878	FullV = CastInst::Create(
5879	CastInst::getCastOpcode(Val: FullV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false), S: FullV,
5880	Ty: LF.OperandValToReplace->getType(), Name: "tmp",
5881	InsertBefore: BB->getTerminator()->getIterator());
5882
5883	// If the incoming block for this value is not in the loop, it means the
5884	// current PHI is not in a loop exit, so we must create a LCSSA PHI for
5885	// the inserted value.
5886	if (auto *I = dyn_cast<Instruction>(Val: FullV))
5887	if (L->contains(Inst: I) && !L->contains(BB))
5888	InsertedNonLCSSAInsts.insert(X: I);
5889
5890	PN->setIncomingValue(i, V: FullV);
5891	Pair.first ->second = FullV;
5892	}
5893
5894	// If LSR splits critical edge and phi node has other pending
5895	// fixup operands, we need to update those pending fixups. Otherwise
5896	// formulae will not be implemented completely and some instructions
5897	// will not be eliminated.
5898	if (needUpdateFixups) {
5899	for (LSRUse &LU : Uses)
5900	for (LSRFixup &Fixup : LU.Fixups)
5901	// If fixup is supposed to rewrite some operand in the phi
5902	// that was just updated, it may be already moved to
5903	// another phi node. Such fixup requires update.
5904	if (Fixup.UserInst == PN) {
5905	// Check if the operand we try to replace still exists in the
5906	// original phi.
5907	bool foundInOriginalPHI = false;
5908	for (const auto &val : PN->incoming_values())
5909	if (val == Fixup.OperandValToReplace) {
5910	foundInOriginalPHI = true;
5911	break;
5912	}
5913
5914	// If fixup operand found in original PHI - nothing to do.
5915	if (foundInOriginalPHI)
5916	continue;
5917
5918	// Otherwise it might be moved to another PHI and requires update.
5919	// If fixup operand not found in any of the incoming blocks that
5920	// means we have already rewritten it - nothing to do.
5921	for (const auto &Block : PN->blocks())
5922	for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(Val: I);
5923	++I) {
5924	PHINode *NewPN = cast<PHINode>(Val&: I);
5925	for (const auto &val : NewPN->incoming_values())
5926	if (val == Fixup.OperandValToReplace)
5927	Fixup.UserInst = NewPN;
5928	}
5929	}
5930	}
5931	}
5932	}
5933
5934	/// Emit instructions for the leading candidate expression for this LSRUse (this
5935	/// is called "expanding"), and update the UserInst to reference the newly
5936	/// expanded value.
5937	void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
5938	const Formula &F,
5939	SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
5940	// First, find an insertion point that dominates UserInst. For PHI nodes,
5941	// find the nearest block which dominates all the relevant uses.
5942	if (PHINode *PN = dyn_cast<PHINode>(Val: LF.UserInst)) {
5943	RewriteForPHI(PN, LU, LF, F, DeadInsts);
5944	} else {
5945	Value *FullV = Expand(LU, LF, F, IP: LF.UserInst->getIterator(), DeadInsts);
5946
5947	// If this is reuse-by-noop-cast, insert the noop cast.
5948	Type *OpTy = LF.OperandValToReplace->getType();
5949	if (FullV->getType() != OpTy) {
5950	Instruction *Cast =
5951	CastInst::Create(CastInst::getCastOpcode(Val: FullV, SrcIsSigned: false, Ty: OpTy, DstIsSigned: false),
5952	S: FullV, Ty: OpTy, Name: "tmp", InsertBefore: LF.UserInst->getIterator());
5953	FullV = Cast;
5954	}
5955
5956	// Update the user. ICmpZero is handled specially here (for now) because
5957	// Expand may have updated one of the operands of the icmp already, and
5958	// its new value may happen to be equal to LF.OperandValToReplace, in
5959	// which case doing replaceUsesOfWith leads to replacing both operands
5960	// with the same value. TODO: Reorganize this.
5961	if (LU.Kind == LSRUse::ICmpZero)
5962	LF.UserInst->setOperand(i: `0`, Val: FullV);
5963	else
5964	LF.UserInst->replaceUsesOfWith(From: LF.OperandValToReplace, To: FullV);
5965	}
5966
5967	if (auto *OperandIsInstr = dyn_cast<Instruction>(Val: LF.OperandValToReplace))
5968	DeadInsts.emplace_back(Args&: OperandIsInstr);
5969	}
5970
5971	// Trying to hoist the IVInc to loop header if all IVInc users are in
5972	// the loop header. It will help backend to generate post index load/store
5973	// when the latch block is different from loop header block.
5974	static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
5975	const LSRUse &LU, Instruction *IVIncInsertPos,
5976	Loop *L) {
5977	if (LU.Kind != LSRUse::Address)
5978	return false;
5979
5980	// For now this code do the conservative optimization, only work for
5981	// the header block. Later we can hoist the IVInc to the block post
5982	// dominate all users.
5983	BasicBlock *LHeader = L->getHeader();
5984	if (IVIncInsertPos->getParent() == LHeader)
5985	return false;
5986
5987	if (!Fixup.OperandValToReplace \|\|
5988	any_of(Range: Fixup.OperandValToReplace->users(), P: [&LHeader](User *U) {
5989	Instruction *UI = cast<Instruction>(Val: U);
5990	return UI->getParent() != LHeader;
5991	}))
5992	return false;
5993
5994	Instruction *I = Fixup.UserInst;
5995	Type *Ty = I->getType();
5996	return (isa<LoadInst>(Val: I) && TTI.isIndexedLoadLegal(Mode: TTI.MIM_PostInc, Ty)) \|\|
5997	(isa<StoreInst>(Val: I) && TTI.isIndexedStoreLegal(Mode: TTI.MIM_PostInc, Ty));
5998	}
5999
6000	/// Rewrite all the fixup locations with new values, following the chosen
6001	/// solution.
6002	void LSRInstance::ImplementSolution(
6003	const SmallVectorImpl<const Formula *> &Solution) {
6004	// Keep track of instructions we may have made dead, so that
6005	// we can remove them after we are done working.
6006	SmallVector<WeakTrackingVH, `16`> DeadInsts;
6007
6008	// Mark phi nodes that terminate chains so the expander tries to reuse them.
6009	for (const IVChain &Chain : IVChainVec) {
6010	if (PHINode *PN = dyn_cast<PHINode>(Val: Chain.tailUserInst()))
6011	Rewriter.setChainedPhi(PN);
6012	}
6013
6014	// Expand the new value definitions and update the users.
6015	for (size_t LUIdx = `0`, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
6016	for (const LSRFixup &Fixup : Uses [LUIdx].Fixups) {
6017	Instruction *InsertPos =
6018	canHoistIVInc(TTI, Fixup, LU: Uses [LUIdx], IVIncInsertPos, L)
6019	? L->getHeader()->getTerminator()
6020	: IVIncInsertPos;
6021	Rewriter.setIVIncInsertPos(L, Pos: InsertPos);
6022	Rewrite(LU: Uses [LUIdx], LF: Fixup, F: *Solution [LUIdx], DeadInsts);
6023	Changed = true;
6024	}
6025
6026	auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6027	formLCSSAForInstructions(Worklist&: InsertedInsts, DT, LI, SE: &SE);
6028
6029	for (const IVChain &Chain : IVChainVec) {
6030	GenerateIVChain(Chain, DeadInsts);
6031	Changed = true;
6032	}
6033
6034	for (const WeakVH &IV : Rewriter.getInsertedIVs())
6035	if (IV && dyn_cast<Instruction>(Val: &*IV)->getParent())
6036	ScalarEvolutionIVs.push_back(Elt: IV);
6037
6038	// Clean up after ourselves. This must be done before deleting any
6039	// instructions.
6040	Rewriter.clear();
6041
6042	Changed \|= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
6043	TLI: &TLI, MSSAU);
6044
6045	// In our cost analysis above, we assume that each addrec consumes exactly
6046	// one register, and arrange to have increments inserted just before the
6047	// latch to maximimize the chance this is true. However, if we reused
6048	// existing IVs, we now need to move the increments to match our
6049	// expectations. Otherwise, our cost modeling results in us having a
6050	// chosen a non-optimal result for the actual schedule. (And yes, this
6051	// scheduling decision does impact later codegen.)
6052	for (PHINode &PN : L->getHeader()->phis()) {
6053	BinaryOperator BO = nullptr*;
6054	Value Start = nullptr, Step = nullptr;
6055	if (!matchSimpleRecurrence(P: &PN, BO, Start, Step))
6056	continue;
6057
6058	switch (BO->getOpcode()) {
6059	case Instruction::Sub:
6060	if (BO->getOperand(i_nocapture: `0`) != &PN)
6061	// sub is non-commutative - match handling elsewhere in LSR
6062	continue;
6063	break;
6064	case Instruction::Add:
6065	break;
6066	default:
6067	continue;
6068	};
6069
6070	if (!isa<Constant>(Val: Step))
6071	// If not a constant step, might increase register pressure
6072	// (We assume constants have been canonicalized to RHS)
6073	continue;
6074
6075	if (BO->getParent() == IVIncInsertPos->getParent())
6076	// Only bother moving across blocks. Isel can handle block local case.
6077	continue;
6078
6079	// Can we legally schedule inc at the desired point?
6080	if (!llvm::all_of(Range: BO->uses(),
6081	P: [&](Use &U) {return DT.dominates(Def: IVIncInsertPos, U);}))
6082	continue;
6083	BO->moveBefore(InsertPos: IVIncInsertPos->getIterator());
6084	Changed = true;
6085	}
6086
6087
6088	}
6089
6090	LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6091	DominatorTree &DT, LoopInfo &LI,
6092	const TargetTransformInfo &TTI, AssumptionCache &AC,
6093	TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6094	: IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
6095	MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > `0`
6096	? PreferredAddresingMode
6097	: TTI.getPreferredAddressingMode(L, SE: &SE)),
6098	Rewriter (SE, L->getHeader()->getDataLayout(), "lsr", false),
6099	BaselineCost (L, SE, TTI, AMK) {
6100	// If LoopSimplify form is not available, stay out of trouble.
6101	if (!L->isLoopSimplifyForm())
6102	return;
6103
6104	// If there's no interesting work to be done, bail early.
6105	if (IU.empty()) return;
6106
6107	// If there's too much analysis to be done, bail early. We won't be able to
6108	// model the problem anyway.
6109	unsigned NumUsers = `0`;
6110	for (const IVStrideUse &U : IU) {
6111	if (++NumUsers > MaxIVUsers) {
6112	(void)U;
6113	LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
6114	<< "\n");
6115	return;
6116	}
6117	// Bail out if we have a PHI on an EHPad that gets a value from a
6118	// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
6119	// no good place to stick any instructions.
6120	if (auto *PN = dyn_cast<PHINode>(Val: U.getUser())) {
6121	auto FirstNonPHI = PN->getParent()->getFirstNonPHIIt();
6122	if (isa<FuncletPadInst>(Val: FirstNonPHI) \|\|
6123	isa<CatchSwitchInst>(Val: FirstNonPHI))
6124	for (BasicBlock *PredBB : PN->blocks())
6125	if (isa<CatchSwitchInst>(Val: PredBB->getFirstNonPHIIt()))
6126	return;
6127	}
6128	}
6129
6130	LLVM_DEBUG(dbgs() << "\nLSR on loop ";
6131	L->getHeader()->printAsOperand(dbgs(), /PrintType=/false);
6132	dbgs() << ":\n");
6133
6134	// Configure SCEVExpander already now, so the correct mode is used for
6135	// isSafeToExpand() checks.
6136	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6137	Rewriter.setDebugType(DEBUG_TYPE);
6138	#endif
6139	Rewriter.disableCanonicalMode();
6140	Rewriter.enableLSRMode();
6141
6142	// First, perform some low-level loop optimizations.
6143	OptimizeShadowIV();
6144	OptimizeLoopTermCond();
6145
6146	// If loop preparation eliminates all interesting IV users, bail.
6147	if (IU.empty()) return;
6148
6149	// Skip nested loops until we can model them better with formulae.
6150	if (!L->isInnermost()) {
6151	LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
6152	return;
6153	}
6154
6155	// Start collecting data and preparing for the solver.
6156	// If number of registers is not the major cost, we cannot benefit from the
6157	// current profitable chain optimization which is based on number of
6158	// registers.
6159	// FIXME: add profitable chain optimization for other kinds major cost, for
6160	// example number of instructions.
6161	if (TTI.isNumRegsMajorCostOfLSR() \|\| StressIVChain)
6162	CollectChains();
6163	CollectInterestingTypesAndFactors();
6164	CollectFixupsAndInitialFormulae();
6165	CollectLoopInvariantFixupsAndFormulae();
6166
6167	if (Uses.empty())
6168	return;
6169
6170	LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
6171	print_uses(dbgs()));
6172	LLVM_DEBUG(dbgs() << "The baseline solution requires ";
6173	BaselineCost.print(dbgs()); dbgs() << "\n");
6174
6175	// Now use the reuse data to generate a bunch of interesting ways
6176	// to formulate the values needed for the uses.
6177	GenerateAllReuseFormulae();
6178
6179	FilterOutUndesirableDedicatedRegisters();
6180	NarrowSearchSpaceUsingHeuristics();
6181
6182	SmallVector<const Formula *, `8`> Solution;
6183	Solve(Solution);
6184
6185	// Release memory that is no longer needed.
6186	Factors.clear();
6187	Types.clear();
6188	RegUses.clear();
6189
6190	if (Solution.empty())
6191	return;
6192
6193	#ifndef NDEBUG
6194	// Formulae should be legal.
6195	for (const LSRUse &LU : Uses) {
6196	for (const Formula &F : LU.Formulae)
6197	assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
6198	F) && "Illegal formula generated!");
6199	};
6200	#endif
6201
6202	// Now that we've decided what we want, make it so.
6203	ImplementSolution(Solution);
6204	}
6205
6206	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
6207	void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
6208	if (Factors.empty() && Types.empty()) return;
6209
6210	OS << "LSR has identified the following interesting factors and types: ";
6211	bool First = true;
6212
6213	for (int64_t Factor : Factors) {
6214	if (!First) OS << ", ";
6215	First = false;
6216	OS << `'*'` << Factor;
6217	}
6218
6219	for (Type *Ty : Types) {
6220	if (!First) OS << ", ";
6221	First = false;
6222	OS << `'('` << *Ty << `')'`;
6223	}
6224	OS << `'\n'`;
6225	}
6226
6227	void LSRInstance::print_fixups(raw_ostream &OS) const {
6228	OS << "LSR is examining the following fixup sites:\n";
6229	for (const LSRUse &LU : Uses)
6230	for (const LSRFixup &LF : LU.Fixups) {
6231	dbgs() << " ";
6232	LF.print(OS);
6233	OS << `'\n'`;
6234	}
6235	}
6236
6237	void LSRInstance::print_uses(raw_ostream &OS) const {
6238	OS << "LSR is examining the following uses:\n";
6239	for (const LSRUse &LU : Uses) {
6240	dbgs() << " ";
6241	LU.print(OS);
6242	OS << `'\n'`;
6243	for (const Formula &F : LU.Formulae) {
6244	OS << " ";
6245	F.print(OS);
6246	OS << `'\n'`;
6247	}
6248	}
6249	}
6250
6251	void LSRInstance::print(raw_ostream &OS) const {
6252	print_factors_and_types(OS);
6253	print_fixups(OS);
6254	print_uses(OS);
6255	}
6256
6257	LLVM_DUMP_METHOD void LSRInstance::dump() const {
6258	print(errs()); errs() << `'\n'`;
6259	}
6260	#endif
6261
6262	namespace {
6263
6264	class LoopStrengthReduce : public LoopPass {
6265	public:
6266	static char ID; // Pass ID, replacement for typeid
6267
6268	LoopStrengthReduce();
6269
6270	private:
6271	bool runOnLoop(Loop *L, LPPassManager &LPM) override;
6272	void getAnalysisUsage(AnalysisUsage &AU) const override;
6273	};
6274
6275	} // end anonymous namespace
6276
6277	LoopStrengthReduce::LoopStrengthReduce() : LoopPass (ID) {
6278	initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
6279	}
6280
6281	void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
6282	// We split critical edges, so we change the CFG. However, we do update
6283	// many analyses if they are around.
6284	AU.addPreservedID(ID&: LoopSimplifyID);
6285
6286	AU.addRequired<LoopInfoWrapperPass>();
6287	AU.addPreserved<LoopInfoWrapperPass>();
6288	AU.addRequiredID(ID&: LoopSimplifyID);
6289	AU.addRequired<DominatorTreeWrapperPass>();
6290	AU.addPreserved<DominatorTreeWrapperPass>();
6291	AU.addRequired<ScalarEvolutionWrapperPass>();
6292	AU.addPreserved<ScalarEvolutionWrapperPass>();
6293	AU.addRequired<AssumptionCacheTracker>();
6294	AU.addRequired<TargetLibraryInfoWrapperPass>();
6295	// Requiring LoopSimplify a second time here prevents IVUsers from running
6296	// twice, since LoopSimplify was invalidated by running ScalarEvolution.
6297	AU.addRequiredID(ID&: LoopSimplifyID);
6298	AU.addRequired<IVUsersWrapperPass>();
6299	AU.addPreserved<IVUsersWrapperPass>();
6300	AU.addRequired<TargetTransformInfoWrapperPass>();
6301	AU.addPreserved<MemorySSAWrapperPass>();
6302	}
6303
6304	namespace {
6305
6306	/// Enables more convenient iteration over a DWARF expression vector.
6307	static iterator_range<llvm::DIExpression::expr_op_iterator>
6308	ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6309	llvm::DIExpression::expr_op_iterator Begin =
6310	llvm::DIExpression::expr_op_iterator (Expr.begin());
6311	llvm::DIExpression::expr_op_iterator End =
6312	llvm::DIExpression::expr_op_iterator (Expr.end());
6313	return {Begin, End};
6314	}
6315
6316	struct SCEVDbgValueBuilder {
6317	SCEVDbgValueBuilder() = default;
6318	SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
6319
6320	void clone(const SCEVDbgValueBuilder &Base) {
6321	LocationOps = Base.LocationOps;
6322	Expr = Base.Expr;
6323	}
6324
6325	void clear() {
6326	LocationOps.clear();
6327	Expr.clear();
6328	}
6329
6330	/// The DIExpression as we translate the SCEV.
6331	SmallVector<uint64_t, `6`> Expr;
6332	/// The location ops of the DIExpression.
6333	SmallVector<Value *, `2`> LocationOps;
6334
6335	void pushOperator(uint64_t Op) { Expr.push_back(Elt: Op); }
6336	void pushUInt(uint64_t Operand) { Expr.push_back(Elt: Operand); }
6337
6338	/// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
6339	/// in the set of values referenced by the expression.
6340	void pushLocation(llvm::Value *V) {
6341	Expr.push_back(Elt: llvm::dwarf::DW_OP_LLVM_arg);
6342	auto *It = llvm::find(Range&: LocationOps, Val: V);
6343	unsigned ArgIndex = `0`;
6344	if (It != LocationOps.end()) {
6345	ArgIndex = std::distance(first: LocationOps.begin(), last: It);
6346	} else {
6347	ArgIndex = LocationOps.size();
6348	LocationOps.push_back(Elt: V);
6349	}
6350	Expr.push_back(Elt: ArgIndex);
6351	}
6352
6353	void pushValue(const SCEVUnknown *U) {
6354	llvm::Value *V = cast<SCEVUnknown>(Val: U)->getValue();
6355	pushLocation(V);
6356	}
6357
6358	bool pushConst(const SCEVConstant *C) {
6359	if (C->getAPInt().getSignificantBits() > `64`)
6360	return false;
6361	Expr.push_back(Elt: llvm::dwarf::DW_OP_consts);
6362	Expr.push_back(Elt: C->getAPInt().getSExtValue());
6363	return true;
6364	}
6365
6366	// Iterating the expression as DWARF ops is convenient when updating
6367	// DWARF_OP_LLVM_args.
6368	iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
6369	return ToDwarfOpIter(Expr);
6370	}
6371
6372	/// Several SCEV types are sequences of the same arithmetic operator applied
6373	/// to constants and values that may be extended or truncated.
6374	bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
6375	uint64_t DwarfOp) {
6376	assert((isa<llvm::SCEVAddExpr>(CommExpr) \|\| isa<SCEVMulExpr>(CommExpr)) &&
6377	"Expected arithmetic SCEV type");
6378	bool Success = true;
6379	unsigned EmitOperator = `0`;
6380	for (const auto &Op : CommExpr->operands()) {
6381	Success &= pushSCEV(S: Op);
6382
6383	if (EmitOperator >= `1`)
6384	pushOperator(Op: DwarfOp);
6385	++EmitOperator;
6386	}
6387	return Success;
6388	}
6389
6390	// TODO: Identify and omit noop casts.
6391	bool pushCast(const llvm::SCEVCastExpr C, bool* IsSigned) {
6392	const llvm::SCEV *Inner = C->getOperand(i: `0`);
6393	const llvm::Type *Type = C->getType();
6394	uint64_t ToWidth = Type->getIntegerBitWidth();
6395	bool Success = pushSCEV(S: Inner);
6396	uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
6397	IsSigned ? llvm::dwarf::DW_ATE_signed
6398	: llvm::dwarf::DW_ATE_unsigned};
6399	for (const auto &Op : CastOps)
6400	pushOperator(Op);
6401	return Success;
6402	}
6403
6404	// TODO: MinMax - although these haven't been encountered in the test suite.
6405	bool pushSCEV(const llvm::SCEV *S) {
6406	bool Success = true;
6407	if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(Val: S)) {
6408	Success &= pushConst(C: StartInt);
6409
6410	} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: S)) {
6411	if (!U->getValue())
6412	return false;
6413	pushLocation(V: U->getValue());
6414
6415	} else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(Val: S)) {
6416	Success &= pushArithmeticExpr(CommExpr: MulRec, DwarfOp: llvm::dwarf::DW_OP_mul);
6417
6418	} else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(Val: S)) {
6419	Success &= pushSCEV(S: UDiv->getLHS());
6420	Success &= pushSCEV(S: UDiv->getRHS());
6421	pushOperator(Op: llvm::dwarf::DW_OP_div);
6422
6423	} else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(Val: S)) {
6424	// Assert if a new and unknown SCEVCastEXpr type is encountered.
6425	assert((isa<SCEVZeroExtendExpr>(Cast) \|\| isa<SCEVTruncateExpr>(Cast) \|\|
6426	isa<SCEVPtrToIntExpr>(Cast) \|\| isa<SCEVSignExtendExpr>(Cast)) &&
6427	"Unexpected cast type in SCEV.");
6428	Success &= pushCast(C: Cast, IsSigned: (isa<SCEVSignExtendExpr>(Val: Cast)));
6429
6430	} else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(Val: S)) {
6431	Success &= pushArithmeticExpr(CommExpr: AddExpr, DwarfOp: llvm::dwarf::DW_OP_plus);
6432
6433	} else if (isa<SCEVAddRecExpr>(Val: S)) {
6434	// Nested SCEVAddRecExpr are generated by nested loops and are currently
6435	// unsupported.
6436	return false;
6437
6438	} else {
6439	return false;
6440	}
6441	return Success;
6442	}
6443
6444	/// Return true if the combination of arithmetic operator and underlying
6445	/// SCEV constant value is an identity function.
6446	bool isIdentityFunction(uint64_t Op, const SCEV *S) {
6447	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: S)) {
6448	if (C->getAPInt().getSignificantBits() > `64`)
6449	return false;
6450	int64_t I = C->getAPInt().getSExtValue();
6451	switch (Op) {
6452	case llvm::dwarf::DW_OP_plus:
6453	case llvm::dwarf::DW_OP_minus:
6454	return I == `0`;
6455	case llvm::dwarf::DW_OP_mul:
6456	case llvm::dwarf::DW_OP_div:
6457	return I == `1`;
6458	}
6459	}
6460	return false;
6461	}
6462
6463	/// Convert a SCEV of a value to a DIExpression that is pushed onto the
6464	/// builder's expression stack. The stack should already contain an
6465	/// expression for the iteration count, so that it can be multiplied by
6466	/// the stride and added to the start.
6467	/// Components of the expression are omitted if they are an identity function.
6468	/// Chain (non-affine) SCEVs are not supported.
6469	bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6470	assert(SAR.isAffine() && "Expected affine SCEV");
6471	const SCEV *Start = SAR.getStart();
6472	const SCEV *Stride = SAR.getStepRecurrence(SE);
6473
6474	// Skip pushing arithmetic noops.
6475	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_mul, S: Stride)) {
6476	if (!pushSCEV(S: Stride))
6477	return false;
6478	pushOperator(Op: llvm::dwarf::DW_OP_mul);
6479	}
6480	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_plus, S: Start)) {
6481	if (!pushSCEV(S: Start))
6482	return false;
6483	pushOperator(Op: llvm::dwarf::DW_OP_plus);
6484	}
6485	return true;
6486	}
6487
6488	/// Create an expression that is an offset from a value (usually the IV).
6489	void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
6490	pushLocation(V: OffsetValue);
6491	DIExpression::appendOffset(Ops&: Expr, Offset);
6492	LLVM_DEBUG(
6493	dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
6494	<< std::to_string(Offset) << "\n");
6495	}
6496
6497	/// Combine a translation of the SCEV and the IV to create an expression that
6498	/// recovers a location's value.
6499	/// returns true if an expression was created.
6500	bool createIterCountExpr(const SCEV *S,
6501	const SCEVDbgValueBuilder &IterationCount,
6502	ScalarEvolution &SE) {
6503	// SCEVs for SSA values are most frquently of the form
6504	// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
6505	// This is because %a is a PHI node that is not the IV. However, these
6506	// SCEVs have not been observed to result in debuginfo-lossy optimisations,
6507	// so its not expected this point will be reached.
6508	if (!isa<SCEVAddRecExpr>(Val: S))
6509	return false;
6510
6511	LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
6512	<< `'\n'`);
6513
6514	const auto *Rec = cast<SCEVAddRecExpr>(Val: S);
6515	if (!Rec->isAffine())
6516	return false;
6517
6518	if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6519	return false;
6520
6521	// Initialise a new builder with the iteration count expression. In
6522	// combination with the value's SCEV this enables recovery.
6523	clone(Base: IterationCount);
6524	if (!SCEVToValueExpr(SAR: *Rec, SE))
6525	return false;
6526
6527	return true;
6528	}
6529
6530	/// Convert a SCEV of a value to a DIExpression that is pushed onto the
6531	/// builder's expression stack. The stack should already contain an
6532	/// expression for the iteration count, so that it can be multiplied by
6533	/// the stride and added to the start.
6534	/// Components of the expression are omitted if they are an identity function.
6535	bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
6536	ScalarEvolution &SE) {
6537	assert(SAR.isAffine() && "Expected affine SCEV");
6538	const SCEV *Start = SAR.getStart();
6539	const SCEV *Stride = SAR.getStepRecurrence(SE);
6540
6541	// Skip pushing arithmetic noops.
6542	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_minus, S: Start)) {
6543	if (!pushSCEV(S: Start))
6544	return false;
6545	pushOperator(Op: llvm::dwarf::DW_OP_minus);
6546	}
6547	if (!isIdentityFunction(Op: llvm::dwarf::DW_OP_div, S: Stride)) {
6548	if (!pushSCEV(S: Stride))
6549	return false;
6550	pushOperator(Op: llvm::dwarf::DW_OP_div);
6551	}
6552	return true;
6553	}
6554
6555	// Append the current expression and locations to a location list and an
6556	// expression list. Modify the DW_OP_LLVM_arg indexes to account for
6557	// the locations already present in the destination list.
6558	void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6559	SmallVectorImpl<Value *> &DestLocations) {
6560	assert(!DestLocations.empty() &&
6561	"Expected the locations vector to contain the IV");
6562	// The DWARF_OP_LLVM_arg arguments of the expression being appended must be
6563	// modified to account for the locations already in the destination vector.
6564	// All builders contain the IV as the first location op.
6565	assert(!LocationOps.empty() &&
6566	"Expected the location ops to contain the IV.");
6567	// DestIndexMap[n] contains the index in DestLocations for the nth
6568	// location in this SCEVDbgValueBuilder.
6569	SmallVector<uint64_t, `2`> DestIndexMap;
6570	for (const auto &Op : LocationOps) {
6571	auto It = find(Range&: DestLocations, Val: Op);
6572	if (It != DestLocations.end()) {
6573	// Location already exists in DestLocations, reuse existing ArgIndex.
6574	DestIndexMap.push_back(Elt: std::distance(first: DestLocations.begin(), last: It));
6575	continue;
6576	}
6577	// Location is not in DestLocations, add it.
6578	DestIndexMap.push_back(Elt: DestLocations.size());
6579	DestLocations.push_back(Elt: Op);
6580	}
6581
6582	for (const auto &Op : expr_ops()) {
6583	if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
6584	Op.appendToVector(V&: DestExpr);
6585	continue;
6586	}
6587
6588	DestExpr.push_back(Elt: dwarf::DW_OP_LLVM_arg);
6589	// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
6590	// DestIndexMap[n] contains its new index in DestLocations.
6591	uint64_t NewIndex = DestIndexMap [Op.getArg(I: `0`)];
6592	DestExpr.push_back(Elt: NewIndex);
6593	}
6594	}
6595	};
6596
6597	/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
6598	/// and DIExpression.
6599	struct DVIRecoveryRec {
6600	DVIRecoveryRec(DbgValueInst *DbgValue)
6601	: DbgRef (DbgValue), Expr(DbgValue->getExpression()),
6602	HadLocationArgList(false) {}
6603	DVIRecoveryRec(DbgVariableRecord *DVR)
6604	: DbgRef (DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
6605
6606	PointerUnion<DbgValueInst , DbgVariableRecord > DbgRef;
6607	DIExpression *Expr;
6608	bool HadLocationArgList;
6609	SmallVector<WeakVH, `2`> LocationOps;
6610	SmallVector<const llvm::SCEV *, `2`> SCEVs;
6611	SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, `2`> RecoveryExprs;
6612
6613	void clear() {
6614	for (auto &RE : RecoveryExprs)
6615	RE.reset();
6616	RecoveryExprs.clear();
6617	}
6618
6619	~DVIRecoveryRec() { clear(); }
6620	};
6621	} // namespace
6622
6623	/// Returns the total number of DW_OP_llvm_arg operands in the expression.
6624	/// This helps in determining if a DIArglist is necessary or can be omitted from
6625	/// the dbg.value.
6626	static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
6627	auto expr_ops = ToDwarfOpIter(Expr);
6628	unsigned Count = `0`;
6629	for (auto Op : expr_ops)
6630	if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
6631	Count++;
6632	return Count;
6633	}
6634
6635	/// Overwrites DVI with the location and Ops as the DIExpression. This will
6636	/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
6637	/// because a DIArglist is not created for the first argument of the dbg.value.
6638	template <typename T>
6639	static void updateDVIWithLocation(T &DbgVal, Value *Location,
6640	SmallVectorImpl<uint64_t> &Ops) {
6641	assert(numLLVMArgOps(Ops) == `0` && "Expected expression that does not "
6642	"contain any DW_OP_llvm_arg operands.");
6643	DbgVal.setRawLocation(ValueAsMetadata::get(V: Location));
6644	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6645	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6646	}
6647
6648	/// Overwrite DVI with locations placed into a DIArglist.
6649	template <typename T>
6650	static void updateDVIWithLocations(T &DbgVal,
6651	SmallVectorImpl<Value *> &Locations,
6652	SmallVectorImpl<uint64_t> &Ops) {
6653	assert(numLLVMArgOps(Ops) != `0` &&
6654	"Expected expression that references DIArglist locations using "
6655	"DW_OP_llvm_arg operands.");
6656	SmallVector<ValueAsMetadata *, `3`> MetadataLocs;
6657	for (Value *V : Locations)
6658	MetadataLocs.push_back(Elt: ValueAsMetadata::get(V));
6659	auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6660	DbgVal.setRawLocation(llvm::DIArgList::get(Context&: DbgVal.getContext(), Args: ValArrayRef));
6661	DbgVal.setExpression(DIExpression::get(Context&: DbgVal.getContext(), Elements: Ops));
6662	}
6663
6664	/// Write the new expression and new location ops for the dbg.value. If possible
6665	/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
6666	/// can be omitted if:
6667	/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
6668	/// 2. The DW_OP_LLVM_arg is the first operand in the expression.
6669	static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
6670	SmallVectorImpl<Value *> &NewLocationOps,
6671	SmallVectorImpl<uint64_t> &NewExpr) {
6672	auto UpdateDbgValueInstImpl = [&](auto *DbgVal) {
6673	unsigned NumLLVMArgs = numLLVMArgOps(Expr&: NewExpr);
6674	if (NumLLVMArgs == `0`) {
6675	// Location assumed to be on the stack.
6676	updateDVIWithLocation(*DbgVal, NewLocationOps [`0`], NewExpr);
6677	} else if (NumLLVMArgs == `1` && NewExpr [`0`] == dwarf::DW_OP_LLVM_arg) {
6678	// There is only a single DW_OP_llvm_arg at the start of the expression,
6679	// so it can be omitted along with DIArglist.
6680	assert(NewExpr[`1`] == `0` &&
6681	"Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6682	llvm::SmallVector<uint64_t, `6`> ShortenedOps(llvm::drop_begin(RangeOrContainer&: NewExpr, N: `2`));
6683	updateDVIWithLocation(*DbgVal, NewLocationOps [`0`], ShortenedOps);
6684	} else {
6685	// Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
6686	updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr);
6687	}
6688
6689	// If the DIExpression was previously empty then add the stack terminator.
6690	// Non-empty expressions have only had elements inserted into them and so
6691	// the terminator should already be present e.g. stack_value or fragment.
6692	DIExpression *SalvageExpr = DbgVal->getExpression();
6693	if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
6694	SalvageExpr =
6695	DIExpression::append(Expr: SalvageExpr, Ops: {dwarf::DW_OP_stack_value});
6696	DbgVal->setExpression(SalvageExpr);
6697	}
6698	};
6699	if (isa<DbgValueInst *>(Val: DVIRec.DbgRef))
6700	UpdateDbgValueInstImpl (cast<DbgValueInst *>(Val&: DVIRec.DbgRef));
6701	else
6702	UpdateDbgValueInstImpl (cast<DbgVariableRecord *>(Val&: DVIRec.DbgRef));
6703	}
6704
6705	/// Cached location ops may be erased during LSR, in which case a poison is
6706	/// required when restoring from the cache. The type of that location is no
6707	/// longer available, so just use int8. The poison will be replaced by one or
6708	/// more locations later when a SCEVDbgValueBuilder selects alternative
6709	/// locations to use for the salvage.
6710	static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {
6711	return (VH) ? VH : PoisonValue::get(T: llvm::Type::getInt8Ty(C));
6712	}
6713
6714	/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
6715	static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
6716	auto RestorePreTransformStateImpl = [&](auto *DbgVal) {
6717	LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
6718	<< "scev-salvage: post-LSR: " << *DbgVal << `'\n'`);
6719	assert(DVIRec.Expr && "Expected an expression");
6720	DbgVal->setExpression(DVIRec.Expr);
6721
6722	// Even a single location-op may be inside a DIArgList and referenced with
6723	// DW_OP_LLVM_arg, which is valid only with a DIArgList.
6724	if (!DVIRec.HadLocationArgList) {
6725	assert(DVIRec.LocationOps.size() == `1` &&
6726	"Unexpected number of location ops.");
6727	// LSR's unsuccessful salvage attempt may have added DIArgList, which in
6728	// this case was not present before, so force the location back to a
6729	// single uncontained Value.
6730	Value *CachedValue =
6731	getValueOrPoison(DVIRec.LocationOps [`0`], DbgVal->getContext());
6732	DbgVal->setRawLocation(ValueAsMetadata::get(V: CachedValue));
6733	} else {
6734	SmallVector<ValueAsMetadata *, `3`> MetadataLocs;
6735	for (WeakVH VH : DVIRec.LocationOps) {
6736	Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext());
6737	MetadataLocs.push_back(Elt: ValueAsMetadata::get(V: CachedValue));
6738	}
6739	auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6740	DbgVal->setRawLocation(
6741	llvm::DIArgList::get(Context&: DbgVal->getContext(), Args: ValArrayRef));
6742	}
6743	LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << `'\n'`);
6744	};
6745	if (isa<DbgValueInst *>(Val: DVIRec.DbgRef))
6746	RestorePreTransformStateImpl (cast<DbgValueInst *>(Val&: DVIRec.DbgRef));
6747	else
6748	RestorePreTransformStateImpl (cast<DbgVariableRecord *>(Val&: DVIRec.DbgRef));
6749	}
6750
6751	static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
6752	llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
6753	const SCEV *SCEVInductionVar,
6754	SCEVDbgValueBuilder IterCountExpr) {
6755
6756	if (isa<DbgValueInst *>(Val: DVIRec.DbgRef)
6757	? !cast<DbgValueInst *>(Val&: DVIRec.DbgRef)->isKillLocation()
6758	: !cast<DbgVariableRecord *>(Val&: DVIRec.DbgRef)->isKillLocation())
6759	return false;
6760
6761	// LSR may have caused several changes to the dbg.value in the failed salvage
6762	// attempt. So restore the DIExpression, the location ops and also the
6763	// location ops format, which is always DIArglist for multiple ops, but only
6764	// sometimes for a single op.
6765	restorePreTransformState(DVIRec);
6766
6767	// LocationOpIndexMap[i] will store the post-LSR location index of
6768	// the non-optimised out location at pre-LSR index i.
6769	SmallVector<int64_t, `2`> LocationOpIndexMap;
6770	LocationOpIndexMap.assign(NumElts: DVIRec.LocationOps.size(), Elt: -`1`);
6771	SmallVector<Value *, `2`> NewLocationOps;
6772	NewLocationOps.push_back(Elt: LSRInductionVar);
6773
6774	for (unsigned i = `0`; i < DVIRec.LocationOps.size(); i++) {
6775	WeakVH VH = DVIRec.LocationOps [i];
6776	// Place the locations not optimised out in the list first, avoiding
6777	// inserts later. The map is used to update the DIExpression's
6778	// DW_OP_LLVM_arg arguments as the expression is updated.
6779	if (VH && !isa<UndefValue>(Val: VH)) {
6780	NewLocationOps.push_back(Elt: VH);
6781	LocationOpIndexMap [i] = NewLocationOps.size() - `1`;
6782	LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
6783	<< " now at index " << LocationOpIndexMap[i] << "\n");
6784	continue;
6785	}
6786
6787	// It's possible that a value referred to in the SCEV may have been
6788	// optimised out by LSR.
6789	if (SE.containsErasedValue(S: DVIRec.SCEVs [i]) \|\|
6790	SE.containsUndefs(S: DVIRec.SCEVs [i])) {
6791	LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
6792	<< " refers to a location that is now undef or erased. "
6793	"Salvage abandoned.\n");
6794	return false;
6795	}
6796
6797	LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
6798	<< " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
6799
6800	DVIRec.RecoveryExprs [i] = std::make_unique<SCEVDbgValueBuilder>();
6801	SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs [i].get();
6802
6803	// Create an offset-based salvage expression if possible, as it requires
6804	// less DWARF ops than an iteration count-based expression.
6805	if (std::optional<APInt> Offset =
6806	SE.computeConstantDifference(LHS: DVIRec.SCEVs [i], RHS: SCEVInductionVar)) {
6807	if (Offset ->getSignificantBits() <= `64`)
6808	SalvageExpr->createOffsetExpr(Offset: Offset ->getSExtValue(), OffsetValue: LSRInductionVar);
6809	else
6810	return false;
6811	} else if (!SalvageExpr->createIterCountExpr(S: DVIRec.SCEVs [i], IterationCount: IterCountExpr,
6812	SE))
6813	return false;
6814	}
6815
6816	// Merge the DbgValueBuilder generated expressions and the original
6817	// DIExpression, place the result into an new vector.
6818	SmallVector<uint64_t, `3`> NewExpr;
6819	if (DVIRec.Expr->getNumElements() == `0`) {
6820	assert(DVIRec.RecoveryExprs.size() == `1` &&
6821	"Expected only a single recovery expression for an empty "
6822	"DIExpression.");
6823	assert(DVIRec.RecoveryExprs[`0`] &&
6824	"Expected a SCEVDbgSalvageBuilder for location 0");
6825	SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs [`0`].get();
6826	B->appendToVectors(DestExpr&: NewExpr, DestLocations&: NewLocationOps);
6827	}
6828	for (const auto &Op : DVIRec.Expr->expr_ops()) {
6829	// Most Ops needn't be updated.
6830	if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
6831	Op.appendToVector(V&: NewExpr);
6832	continue;
6833	}
6834
6835	uint64_t LocationArgIndex = Op.getArg(I: `0`);
6836	SCEVDbgValueBuilder *DbgBuilder =
6837	DVIRec.RecoveryExprs [LocationArgIndex].get();
6838	// The location doesn't have s SCEVDbgValueBuilder, so LSR did not
6839	// optimise it away. So just translate the argument to the updated
6840	// location index.
6841	if (!DbgBuilder) {
6842	NewExpr.push_back(Elt: dwarf::DW_OP_LLVM_arg);
6843	assert(LocationOpIndexMap[Op.getArg(`0`)] != -`1` &&
6844	"Expected a positive index for the location-op position.");
6845	NewExpr.push_back(Elt: LocationOpIndexMap [Op.getArg(I: `0`)]);
6846	continue;
6847	}
6848	// The location has a recovery expression.
6849	DbgBuilder->appendToVectors(DestExpr&: NewExpr, DestLocations&: NewLocationOps);
6850	}
6851
6852	UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
6853	if (isa<DbgValueInst *>(Val: DVIRec.DbgRef))
6854	LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
6855	<< cast<DbgValueInst >(DVIRec.DbgRef) << "\n");
6856	else
6857	LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
6858	<< cast<DbgVariableRecord >(DVIRec.DbgRef) << "\n");
6859	return true;
6860	}
6861
6862	/// Obtain an expression for the iteration count, then attempt to salvage the
6863	/// dbg.value intrinsics.
6864	static void DbgRewriteSalvageableDVIs(
6865	llvm::Loop L, ScalarEvolution &SE, llvm::PHINode LSRInductionVar,
6866	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> &DVIToUpdate) {
6867	if (DVIToUpdate.empty())
6868	return;
6869
6870	const llvm::SCEV *SCEVInductionVar = SE.getSCEV(V: LSRInductionVar);
6871	assert(SCEVInductionVar &&
6872	"Anticipated a SCEV for the post-LSR induction variable");
6873
6874	if (const SCEVAddRecExpr *IVAddRec =
6875	dyn_cast<SCEVAddRecExpr>(Val: SCEVInductionVar)) {
6876	if (!IVAddRec->isAffine())
6877	return;
6878
6879	// Prevent translation using excessive resources.
6880	if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6881	return;
6882
6883	// The iteration count is required to recover location values.
6884	SCEVDbgValueBuilder IterCountExpr;
6885	IterCountExpr.pushLocation(V: LSRInductionVar);
6886	if (!IterCountExpr.SCEVToIterCountExpr(SAR: *IVAddRec, SE))
6887	return;
6888
6889	LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
6890	<< `'\n'`);
6891
6892	for (auto &DVIRec : DVIToUpdate) {
6893	SalvageDVI(L, SE, LSRInductionVar, DVIRec&: *DVIRec, SCEVInductionVar,
6894	IterCountExpr);
6895	}
6896	}
6897	}
6898
6899	/// Identify and cache salvageable DVI locations and expressions along with the
6900	/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
6901	/// cacheing and salvaging.
6902	static void DbgGatherSalvagableDVI(
6903	Loop *L, ScalarEvolution &SE,
6904	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> &SalvageableDVISCEVs,
6905	SmallSet<AssertingVH<DbgValueInst>, `2`> &DVIHandles) {
6906	for (const auto &B : L->getBlocks()) {
6907	for (auto &I : *B) {
6908	auto ProcessDbgValue = [&](auto DbgVal) -> bool* {
6909	// Ensure that if any location op is undef that the dbg.vlue is not
6910	// cached.
6911	if (DbgVal->isKillLocation())
6912	return false;
6913
6914	// Check that the location op SCEVs are suitable for translation to
6915	// DIExpression.
6916	const auto &HasTranslatableLocationOps =
6917	[&](const auto DbgValToTranslate) -> bool* {
6918	for (const auto LocOp : DbgValToTranslate->location_ops()) {
6919	if (!LocOp)
6920	return false;
6921
6922	if (!SE.isSCEVable(Ty: LocOp->getType()))
6923	return false;
6924
6925	const SCEV *S = SE.getSCEV(V: LocOp);
6926	if (SE.containsUndefs(S))
6927	return false;
6928	}
6929	return true;
6930	};
6931
6932	if (!HasTranslatableLocationOps(DbgVal))
6933	return false;
6934
6935	std::unique_ptr<DVIRecoveryRec> NewRec =
6936	std::make_unique<DVIRecoveryRec>(DbgVal);
6937	// Each location Op may need a SCEVDbgValueBuilder in order to recover
6938	// it. Pre-allocating a vector will enable quick lookups of the builder
6939	// later during the salvage.
6940	NewRec ->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
6941	for (const auto LocOp : DbgVal->location_ops()) {
6942	NewRec ->SCEVs.push_back(Elt: SE.getSCEV(V: LocOp));
6943	NewRec ->LocationOps.push_back(LocOp);
6944	NewRec ->HadLocationArgList = DbgVal->hasArgList();
6945	}
6946	SalvageableDVISCEVs.push_back(Elt: std::move(NewRec));
6947	return true;
6948	};
6949	for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange())) {
6950	if (DVR.isDbgValue() \|\| DVR.isDbgAssign())
6951	ProcessDbgValue (&DVR);
6952	}
6953	auto DVI = dyn_cast<DbgValueInst>(Val: &I);
6954	if (!DVI)
6955	continue;
6956	if (ProcessDbgValue (DVI))
6957	DVIHandles.insert(V: DVI);
6958	}
6959	}
6960	}
6961
6962	/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
6963	/// any PHi from the loop header is usable, but may have less chance of
6964	/// surviving subsequent transforms.
6965	static llvm::PHINode GetInductionVariable(const* Loop &L, ScalarEvolution &SE,
6966	const LSRInstance &LSR) {
6967
6968	auto IsSuitableIV = [&](PHINode *P) {
6969	if (!SE.isSCEVable(Ty: P->getType()))
6970	return false;
6971	if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(Val: SE.getSCEV(V: P)))
6972	return Rec->isAffine() && !SE.containsUndefs(S: SE.getSCEV(V: P));
6973	return false;
6974	};
6975
6976	// For now, just pick the first IV that was generated and inserted by
6977	// ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
6978	// by subsequent transforms.
6979	for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
6980	if (!IV)
6981	continue;
6982
6983	// There should only be PHI node IVs.
6984	PHINode P = cast<PHINode>(Val: &IV);
6985
6986	if (IsSuitableIV (P))
6987	return P;
6988	}
6989
6990	for (PHINode &P : L.getHeader()->phis()) {
6991	if (IsSuitableIV (&P))
6992	return &P;
6993	}
6994	return nullptr;
6995	}
6996
6997	static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6998	DominatorTree &DT, LoopInfo &LI,
6999	const TargetTransformInfo &TTI,
7000	AssumptionCache &AC, TargetLibraryInfo &TLI,
7001	MemorySSA *MSSA) {
7002
7003	// Debug preservation - before we start removing anything identify which DVI
7004	// meet the salvageable criteria and store their DIExpression and SCEVs.
7005	SmallVector<std::unique_ptr<DVIRecoveryRec>, `2`> SalvageableDVIRecords;
7006	SmallSet<AssertingVH<DbgValueInst>, `2`> DVIHandles;
7007	DbgGatherSalvagableDVI(L, SE, SalvageableDVISCEVs&: SalvageableDVIRecords, DVIHandles);
7008
7009	bool Changed = false;
7010	std::unique_ptr<MemorySSAUpdater> MSSAU;
7011	if (MSSA)
7012	MSSAU = std::make_unique<MemorySSAUpdater>(args&: MSSA);
7013
7014	// Run the main LSR transformation.
7015	const LSRInstance &Reducer =
7016	LSRInstance (L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
7017	Changed \|= Reducer.getChanged();
7018
7019	// Remove any extra phis created by processing inner loops.
7020	Changed \|= DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
7021	if (EnablePhiElim && L->isLoopSimplifyForm()) {
7022	SmallVector<WeakTrackingVH, `16`> DeadInsts;
7023	const DataLayout &DL = L->getHeader()->getDataLayout();
7024	SCEVExpander Rewriter(SE, DL, "lsr", false);
7025	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7026	Rewriter.setDebugType(DEBUG_TYPE);
7027	#endif
7028	unsigned numFolded = Rewriter.replaceCongruentIVs(L, DT: &DT, DeadInsts, TTI: &TTI);
7029	Rewriter.clear();
7030	if (numFolded) {
7031	Changed = true;
7032	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, TLI: &TLI,
7033	MSSAU: MSSAU.get());
7034	DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
7035	}
7036	}
7037	// LSR may at times remove all uses of an induction variable from a loop.
7038	// The only remaining use is the PHI in the exit block.
7039	// When this is the case, if the exit value of the IV can be calculated using
7040	// SCEV, we can replace the exit block PHI with the final value of the IV and
7041	// skip the updates in each loop iteration.
7042	if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7043	SmallVector<WeakTrackingVH, `16`> DeadInsts;
7044	const DataLayout &DL = L->getHeader()->getDataLayout();
7045	SCEVExpander Rewriter(SE, DL, "lsr", true);
7046	int Rewrites = rewriteLoopExitValues(L, LI: &LI, TLI: &TLI, SE: &SE, TTI: &TTI, Rewriter, DT: &DT,
7047	ReplaceExitValue: UnusedIndVarInLoop, DeadInsts);
7048	Rewriter.clear();
7049	if (Rewrites) {
7050	Changed = true;
7051	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, TLI: &TLI,
7052	MSSAU: MSSAU.get());
7053	DeleteDeadPHIs(BB: L->getHeader(), TLI: &TLI, MSSAU: MSSAU.get());
7054	}
7055	}
7056
7057	if (SalvageableDVIRecords.empty())
7058	return Changed;
7059
7060	// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
7061	// expressions composed using the derived iteration count.
7062	// TODO: Allow for multiple IV references for nested AddRecSCEVs
7063	for (const auto &L : LI) {
7064	if (llvm::PHINode IV = GetInductionVariable(L: L, SE, LSR: Reducer))
7065	DbgRewriteSalvageableDVIs(L, SE, LSRInductionVar: IV, DVIToUpdate&: SalvageableDVIRecords);
7066	else {
7067	LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
7068	"could not be identified.\n");
7069	}
7070	}
7071
7072	for (auto &Rec : SalvageableDVIRecords)
7073	Rec ->clear();
7074	SalvageableDVIRecords.clear();
7075	DVIHandles.clear();
7076	return Changed;
7077	}
7078
7079	bool LoopStrengthReduce::runOnLoop(Loop L, LPPassManager & /LPM/*) {
7080	if (skipLoop(L))
7081	return false;
7082
7083	auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
7084	auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
7085	auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
7086	auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
7087	const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
7088	F: *L->getHeader()->getParent());
7089	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
7090	F&: *L->getHeader()->getParent());
7091	auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
7092	F: *L->getHeader()->getParent());
7093	auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
7094	MemorySSA MSSA = nullptr*;
7095	if (MSSAAnalysis)
7096	MSSA = &MSSAAnalysis->getMSSA();
7097	return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);
7098	}
7099
7100	PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
7101	LoopStandardAnalysisResults &AR,
7102	LPMUpdater &) {
7103	if (!ReduceLoopStrength(L: &L, IU&: AM.getResult<IVUsersAnalysis>(IR&: L, ExtraArgs&: AR), SE&: AR.SE,
7104	DT&: AR.DT, LI&: AR.LI, TTI: AR.TTI, AC&: AR.AC, TLI&: AR.TLI, MSSA: AR.MSSA))
7105	return PreservedAnalyses::all();
7106
7107	auto PA = getLoopPassPreservedAnalyses();
7108	if (AR.MSSA)
7109	PA.preserve<MemorySSAAnalysis>();
7110	return PA;
7111	}
7112
7113	char LoopStrengthReduce::ID = `0`;
7114
7115	INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
7116	"Loop Strength Reduction", false, false)
7117	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
7118	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
7119	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
7120	INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
7121	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
7122	INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
7123	INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
7124	"Loop Strength Reduction", false, false)
7125
7126	Pass llvm::createLoopStrengthReducePass() { return* new LoopStrengthReduce (); }
7127

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp