StraightLineStrengthReduce.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp]

1	//===- StraightLineStrengthReduce.cpp - -----------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements straight-line strength reduction (SLSR). Unlike loop
10	// strength reduction, this algorithm is designed to reduce arithmetic
11	// redundancy in straight-line code instead of loops. It has proven to be
12	// effective in simplifying arithmetic statements derived from an unrolled loop.
13	// It can also simplify the logic of SeparateConstOffsetFromGEP.
14	//
15	// There are many optimizations we can perform in the domain of SLSR.
16	// We look for strength reduction candidates in the following forms:
17	//
18	// Form Add: B + i S*
19	// Form Mul: (B + i) S*
20	// Form GEP: &B[i S]*
21	//
22	// where S is an integer variable, and i is a constant integer. If we found two
23	// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
24	// in a simpler way with respect to S1 (index delta). For example,
25	//
26	// S1: X = B + i S*
27	// S2: Y = B + i' S => X + (i' - i) * S*
28	//
29	// S1: X = (B + i) S*
30	// S2: Y = (B + i') S => X + (i' - i) * S*
31	//
32	// S1: X = &B[i S]*
33	// S2: Y = &B[i' S] => &X[(i' - i) * S]*
34	//
35	// Note: (i' - i) S is folded to the extent possible.*
36	//
37	// For Add and GEP forms, we can also rewrite a candidate in a simpler way
38	// with respect to other dominating candidates if their B or S are different
39	// but other parts are the same. For example,
40	//
41	// Base Delta:
42	// S1: X = B + i S*
43	// S2: Y = B' + i S => X + (B' - B)*
44	//
45	// S1: X = &B [i S]*
46	// S2: Y = &B'[i S] => X + (B' - B)*
47	//
48	// Stride Delta:
49	// S1: X = B + i S*
50	// S2: Y = B + i S' => X + i * (S' - S)*
51	//
52	// S1: X = &B[i S]*
53	// S2: Y = &B[i S'] => X + i * (S' - S)*
54	//
55	// PS: Stride delta rewrite on Mul form is usually non-profitable, and Base
56	// delta rewrite sometimes is profitable, so we do not support them on Mul.
57	//
58	// This rewriting is in general a good idea. The code patterns we focus on
59	// usually come from loop unrolling, so the delta is likely the same
60	// across iterations and can be reused. When that happens, the optimized form
61	// takes only one add starting from the second iteration.
62	//
63	// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
64	// multiple bases, we choose to rewrite S2 with respect to its "immediate"
65	// basis, the basis that is the closest ancestor in the dominator tree.
66	//
67	// TODO:
68	//
69	// - Floating point arithmetics when fast math is enabled.
70
71	#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
72	#include "llvm/ADT/APInt.h"
73	#include "llvm/ADT/DepthFirstIterator.h"
74	#include "llvm/ADT/SetVector.h"
75	#include "llvm/ADT/SmallVector.h"
76	#include "llvm/Analysis/ScalarEvolution.h"
77	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
78	#include "llvm/Analysis/TargetTransformInfo.h"
79	#include "llvm/Analysis/ValueTracking.h"
80	#include "llvm/IR/Constants.h"
81	#include "llvm/IR/DataLayout.h"
82	#include "llvm/IR/DerivedTypes.h"
83	#include "llvm/IR/Dominators.h"
84	#include "llvm/IR/GetElementPtrTypeIterator.h"
85	#include "llvm/IR/IRBuilder.h"
86	#include "llvm/IR/Instruction.h"
87	#include "llvm/IR/Instructions.h"
88	#include "llvm/IR/Module.h"
89	#include "llvm/IR/Operator.h"
90	#include "llvm/IR/PatternMatch.h"
91	#include "llvm/IR/Type.h"
92	#include "llvm/IR/Value.h"
93	#include "llvm/InitializePasses.h"
94	#include "llvm/Pass.h"
95	#include "llvm/Support/Casting.h"
96	#include "llvm/Support/DebugCounter.h"
97	#include "llvm/Support/ErrorHandling.h"
98	#include "llvm/Transforms/Scalar.h"
99	#include "llvm/Transforms/Utils/Local.h"
100	#include <cassert>
101	#include <cstdint>
102	#include <limits>
103	#include <list>
104	#include <queue>
105	#include <vector>
106
107	using namespace llvm;
108	using namespace PatternMatch;
109
110	#define DEBUG_TYPE "slsr"
111
112	static const unsigned UnknownAddressSpace =
113	std::numeric_limits<unsigned>::max();
114
115	DEBUG_COUNTER(StraightLineStrengthReduceCounter, "slsr-counter",
116	"Controls whether rewriteCandidate is executed.");
117
118	// Only for testing.
119	static cl::opt<bool>
120	EnablePoisonReuseGuard("enable-poison-reuse-guard", cl::init(Val: true),
121	cl::desc ("Enable poison-reuse guard"));
122
123	namespace {
124
125	class StraightLineStrengthReduceLegacyPass : public FunctionPass {
126	const DataLayout DL = nullptr*;
127
128	public:
129	static char ID;
130
131	StraightLineStrengthReduceLegacyPass() : FunctionPass (ID) {
132	initializeStraightLineStrengthReduceLegacyPassPass(
133	*PassRegistry::getPassRegistry());
134	}
135
136	void getAnalysisUsage(AnalysisUsage &AU) const override {
137	AU.addRequired<DominatorTreeWrapperPass>();
138	AU.addRequired<ScalarEvolutionWrapperPass>();
139	AU.addRequired<TargetTransformInfoWrapperPass>();
140	// We do not modify the shape of the CFG.
141	AU.setPreservesCFG();
142	}
143
144	bool doInitialization(Module &M) override {
145	DL = &M.getDataLayout();
146	return false;
147	}
148
149	bool runOnFunction(Function &F) override;
150	};
151
152	class StraightLineStrengthReduce {
153	public:
154	StraightLineStrengthReduce(const DataLayout DL, DominatorTree DT,
155	ScalarEvolution SE, TargetTransformInfo TTI)
156	: DL(DL), DT(DT), SE(SE), TTI(TTI) {}
157
158	// SLSR candidate. Such a candidate must be in one of the forms described in
159	// the header comments.
160	struct Candidate {
161	enum Kind {
162	Invalid, // reserved for the default constructor
163	Add, // B + i S*
164	Mul, // (B + i) S*
165	GEP, // &B[..][i S][..]*
166	};
167
168	enum DKind {
169	InvalidDelta, // reserved for the default constructor
170	IndexDelta, // Delta is a constant from Index
171	BaseDelta, // Delta is a constant or variable from Base
172	StrideDelta, // Delta is a constant or variable from Stride
173	};
174
175	Candidate() = default;
176	Candidate(Kind CT, const SCEV B, ConstantInt Idx, Value *S,
177	Instruction I, const* SCEV *StrideSCEV)
178	: CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I),
179	StrideSCEV(StrideSCEV) {}
180
181	Kind CandidateKind = Invalid;
182
183	const SCEV Base = nullptr*;
184	// TODO: Swap Index and Stride's name.
185	// Note that Index and Stride of a GEP candidate do not necessarily have the
186	// same integer type. In that case, during rewriting, Stride will be
187	// sign-extended or truncated to Index's type.
188	ConstantInt Index = nullptr*;
189
190	Value Stride = nullptr*;
191
192	// The instruction this candidate corresponds to. It helps us to rewrite a
193	// candidate with respect to its immediate basis. Note that one instruction
194	// can correspond to multiple candidates depending on how you associate the
195	// expression. For instance,
196	//
197	// (a + 1) (b + 2)*
198	//
199	// can be treated as
200	//
201	// <Base: a, Index: 1, Stride: b + 2>
202	//
203	// or
204	//
205	// <Base: b, Index: 2, Stride: a + 1>
206	Instruction Ins = nullptr*;
207
208	// Points to the immediate basis of this candidate, or nullptr if we cannot
209	// find any basis for this candidate.
210	Candidate Basis = nullptr*;
211
212	DKind DeltaKind = InvalidDelta;
213
214	// Store SCEV of Stride to compute delta from different strides
215	const SCEV StrideSCEV = nullptr*;
216
217	// Points to (Y - X) that will be used to rewrite this candidate.
218	Value Delta = nullptr*;
219
220	/// Cost model: Evaluate the computational efficiency of the candidate.
221	///
222	/// Efficiency levels (higher is better):
223	/// ZeroInst (5) - [Variable] or [Const]
224	/// OneInstOneVar (4) - [Variable + Const] or [Variable Const]*
225	/// OneInstTwoVar (3) - [Variable + Variable] or [Variable Variable]*
226	/// TwoInstOneVar (2) - [Const + Const Variable]*
227	/// TwoInstTwoVar (1) - [Variable + Const Variable]*
228	enum EfficiencyLevel : unsigned {
229	Unknown = `0`,
230	TwoInstTwoVar = `1`,
231	TwoInstOneVar = `2`,
232	OneInstTwoVar = `3`,
233	OneInstOneVar = `4`,
234	ZeroInst = `5`
235	};
236
237	static EfficiencyLevel
238	getComputationEfficiency(Kind CandidateKind, const ConstantInt *Index,
239	const Value Stride, const* SCEV Base = nullptr*) {
240	bool IsConstantBase = false;
241	bool IsZeroBase = false;
242	// When evaluating the efficiency of a rewrite, if the Base's SCEV is
243	// not available, conservatively assume the base is not constant.
244	if (auto *ConstBase = dyn_cast_or_null<SCEVConstant>(Val: Base)) {
245	IsConstantBase = true;
246	IsZeroBase = ConstBase->getValue()->isZero();
247	}
248
249	bool IsConstantStride = isa<ConstantInt>(Val: Stride);
250	bool IsZeroStride =
251	IsConstantStride && cast<ConstantInt>(Val: Stride)->isZero();
252	// All constants
253	if (IsConstantBase && IsConstantStride)
254	return ZeroInst;
255
256	// (Base + Index) Stride*
257	if (CandidateKind == Mul) {
258	if (IsZeroStride)
259	return ZeroInst;
260	if (Index->isZero())
261	return (IsConstantStride \|\| IsConstantBase) ? OneInstOneVar
262	: OneInstTwoVar;
263
264	if (IsConstantBase)
265	return IsZeroBase && (Index->isOne() \|\| Index->isMinusOne())
266	? ZeroInst
267	: OneInstOneVar;
268
269	if (IsConstantStride) {
270	auto *CI = cast<ConstantInt>(Val: Stride);
271	return (CI->isOne() \|\| CI->isMinusOne()) ? OneInstOneVar
272	: TwoInstOneVar;
273	}
274	return TwoInstTwoVar;
275	}
276
277	// Base + Index Stride*
278	assert(CandidateKind == Add \|\| CandidateKind == GEP);
279	if (Index->isZero() \|\| IsZeroStride)
280	return ZeroInst;
281
282	bool IsSimpleIndex = Index->isOne() \|\| Index->isMinusOne();
283
284	if (IsConstantBase)
285	return IsZeroBase ? (IsSimpleIndex ? ZeroInst : OneInstOneVar)
286	: (IsSimpleIndex ? OneInstOneVar : TwoInstOneVar);
287
288	if (IsConstantStride)
289	return IsZeroStride ? ZeroInst : OneInstOneVar;
290
291	if (IsSimpleIndex)
292	return OneInstTwoVar;
293
294	return TwoInstTwoVar;
295	}
296
297	// Evaluate if the given delta is profitable to rewrite this candidate.
298	bool isProfitableRewrite(const Value &Delta, const DKind DeltaKind) const {
299	// This function cannot accurately evaluate the profit of whole expression
300	// with context. A candidate (B + I S) cannot express whether this*
301	// instruction needs to compute on its own (I S), which may be shared*
302	// with other candidates or may need instructions to compute.
303	// If the rewritten form has the same strength, still rewrite to
304	// (X + Delta) since it may expose more CSE opportunities on Delta, as
305	// unrolled loops usually have identical Delta for each unrolled body.
306	//
307	// Note, this function should only be used on Index Delta rewrite.
308	// Base and Stride delta need context info to evaluate the register
309	// pressure impact from variable delta.
310	return getComputationEfficiency(CandidateKind, Index, Stride, Base) <=
311	getRewriteEfficiency(Delta, DeltaKind);
312	}
313
314	// Evaluate the rewrite efficiency of this candidate with its Basis
315	EfficiencyLevel getRewriteEfficiency() const {
316	return Basis ? getRewriteEfficiency(Delta: *Delta, DeltaKind) : Unknown;
317	}
318
319	// Evaluate the rewrite efficiency of this candidate with a given delta
320	EfficiencyLevel getRewriteEfficiency(const Value &Delta,
321	const DKind DeltaKind) const {
322	switch (DeltaKind) {
323	case BaseDelta: // [X + Delta]
324	return getComputationEfficiency(
325	CandidateKind,
326	Index: ConstantInt::get(Ty: cast<IntegerType>(Val: Delta.getType()), V: `1`), Stride: &Delta);
327	case StrideDelta: // [X + Index Delta]*
328	return getComputationEfficiency(CandidateKind, Index, Stride: &Delta);
329	case IndexDelta: // [X + Delta Stride]*
330	return getComputationEfficiency(CandidateKind,
331	Index: cast<ConstantInt>(Val: &Delta), Stride);
332	default:
333	return Unknown;
334	}
335	}
336
337	bool isHighEfficiency() const {
338	return getComputationEfficiency(CandidateKind, Index, Stride, Base) >=
339	OneInstOneVar;
340	}
341
342	// Verify that this candidate has valid delta components relative to the
343	// basis
344	bool hasValidDelta(const Candidate &Basis) const {
345	switch (DeltaKind) {
346	case IndexDelta:
347	// Index differs, Base and Stride must match
348	return Base == Basis.Base && StrideSCEV == Basis.StrideSCEV;
349	case StrideDelta:
350	// Stride differs, Base and Index must match
351	return Base == Basis.Base && Index == Basis.Index;
352	case BaseDelta:
353	// Base differs, Stride and Index must match
354	return StrideSCEV == Basis.StrideSCEV && Index == Basis.Index;
355	default:
356	return false;
357	}
358	}
359	};
360
361	bool runOnFunction(Function &F);
362
363	private:
364	// Fetch straight-line basis for rewriting C, update C.Basis to point to it,
365	// and store the delta between C and its Basis in C.Delta.
366	void setBasisAndDeltaFor(Candidate &C);
367	// Returns whether the candidate can be folded into an addressing mode.
368	bool isFoldable(const Candidate &C, TargetTransformInfo *TTI);
369
370	// Checks whether I is in a candidate form. If so, adds all the matching forms
371	// to Candidates, and tries to find the immediate basis for each of them.
372	void allocateCandidatesAndFindBasis(Instruction *I);
373
374	// Allocate candidates and find bases for Add instructions.
375	void allocateCandidatesAndFindBasisForAdd(Instruction *I);
376
377	// Given I = LHS + RHS, factors RHS into i S and makes (LHS + i * S) a*
378	// candidate.
379	void allocateCandidatesAndFindBasisForAdd(Value LHS, Value RHS,
380	Instruction *I);
381	// Allocate candidates and find bases for Mul instructions.
382	void allocateCandidatesAndFindBasisForMul(Instruction *I);
383
384	// Splits LHS into Base + Index and, if succeeds, calls
385	// allocateCandidatesAndFindBasis.
386	void allocateCandidatesAndFindBasisForMul(Value LHS, Value RHS,
387	Instruction *I);
388
389	// Allocate candidates and find bases for GetElementPtr instructions.
390	void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
391
392	// Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
393	// basis.
394	void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
395	ConstantInt Idx, Value S,
396	Instruction *I);
397
398	// Rewrites candidate C with respect to Basis.
399	void rewriteCandidate(const Candidate &C);
400
401	// Emit code that computes the "bump" from Basis to C.
402	static Value emitBump(const* Candidate &Basis, const Candidate &C,
403	IRBuilder<> &Builder, const DataLayout *DL);
404
405	const DataLayout DL = nullptr*;
406	DominatorTree DT = nullptr*;
407	ScalarEvolution *SE;
408	TargetTransformInfo TTI = nullptr*;
409	std::list<Candidate> Candidates;
410
411	// Map from SCEV to instructions that represent the value,
412	// instructions are sorted in depth-first order.
413	DenseMap<const SCEV , SmallSetVector<Instruction , `2`>> SCEVToInsts;
414
415	// Record the dependency between instructions. If C.Basis == B, we would have
416	// {B.Ins -> {C.Ins, ...}}.
417	MapVector<Instruction , std::vector<Instruction >> DependencyGraph;
418
419	// Map between each instruction and its possible candidates.
420	DenseMap<Instruction , SmallVector<Candidate , `3`>> RewriteCandidates;
421
422	// All instructions that have candidates sort in topological order based on
423	// dependency graph, from roots to leaves.
424	std::vector<Instruction *> SortedCandidateInsts;
425
426	// Record all instructions that are already rewritten and will be removed
427	// later.
428	std::vector<Instruction *> DeadInstructions;
429
430	// Classify candidates against Delta kind
431	class CandidateDictTy {
432	public:
433	using CandsTy = SmallVector<Candidate *, `8`>;
434	using BBToCandsTy = DenseMap<const BasicBlock *, CandsTy>;
435
436	private:
437	// Index delta Basis must have the same (Base, StrideSCEV, Inst.Type)
438	using IndexDeltaKeyTy = std::tuple<const SCEV , const* SCEV , Type >;
439	DenseMap<IndexDeltaKeyTy, BBToCandsTy> IndexDeltaCandidates;
440
441	// Base delta Basis must have the same (StrideSCEV, Index, Inst.Type)
442	using BaseDeltaKeyTy = std::tuple<const SCEV , ConstantInt , Type *>;
443	DenseMap<BaseDeltaKeyTy, BBToCandsTy> BaseDeltaCandidates;
444
445	// Stride delta Basis must have the same (Base, Index, Inst.Type)
446	using StrideDeltaKeyTy = std::tuple<const SCEV , ConstantInt , Type *>;
447	DenseMap<StrideDeltaKeyTy, BBToCandsTy> StrideDeltaCandidates;
448
449	public:
450	// TODO: Disable index delta on GEP after we completely move
451	// from typed GEP to PtrAdd.
452	const BBToCandsTy getCandidatesWithDeltaKind(const* Candidate &C,
453	Candidate::DKind K) const {
454	assert(K != Candidate::InvalidDelta);
455	if (K == Candidate::IndexDelta) {
456	IndexDeltaKeyTy IndexDeltaKey(C.Base, C.StrideSCEV, C.Ins->getType());
457	auto It = IndexDeltaCandidates.find(Val: IndexDeltaKey);
458	if (It != IndexDeltaCandidates.end())
459	return &It ->second;
460	} else if (K == Candidate::BaseDelta) {
461	BaseDeltaKeyTy BaseDeltaKey(C.StrideSCEV, C.Index, C.Ins->getType());
462	auto It = BaseDeltaCandidates.find(Val: BaseDeltaKey);
463	if (It != BaseDeltaCandidates.end())
464	return &It ->second;
465	} else {
466	assert(K == Candidate::StrideDelta);
467	StrideDeltaKeyTy StrideDeltaKey(C.Base, C.Index, C.Ins->getType());
468	auto It = StrideDeltaCandidates.find(Val: StrideDeltaKey);
469	if (It != StrideDeltaCandidates.end())
470	return &It ->second;
471	}
472	return nullptr;
473	}
474
475	// Pointers to C must remain valid until CandidateDict is cleared.
476	void add(Candidate &C) {
477	Type *ValueType = C.Ins->getType();
478	BasicBlock *BB = C.Ins->getParent();
479	IndexDeltaKeyTy IndexDeltaKey(C.Base, C.StrideSCEV, ValueType);
480	BaseDeltaKeyTy BaseDeltaKey(C.StrideSCEV, C.Index, ValueType);
481	StrideDeltaKeyTy StrideDeltaKey(C.Base, C.Index, ValueType);
482	IndexDeltaCandidates [IndexDeltaKey][BB].push_back(Elt: &C);
483	BaseDeltaCandidates [BaseDeltaKey][BB].push_back(Elt: &C);
484	StrideDeltaCandidates [StrideDeltaKey][BB].push_back(Elt: &C);
485	}
486	// Remove all mappings from set
487	void clear() {
488	IndexDeltaCandidates.clear();
489	BaseDeltaCandidates.clear();
490	StrideDeltaCandidates.clear();
491	}
492	} CandidateDict;
493
494	const SCEV getAndRecordSCEV(Value V) {
495	auto *S = SE->getSCEV(V);
496	if (isa<Instruction>(Val: V) && !(isa<SCEVCouldNotCompute>(Val: S) \|\|
497	isa<SCEVUnknown>(Val: S) \|\| isa<SCEVConstant>(Val: S)))
498	SCEVToInsts [S].insert(X: cast<Instruction>(Val: V));
499
500	return S;
501	}
502
503	bool candidatePredicate(Candidate *Basis, Candidate &C, Candidate::DKind K);
504
505	bool searchFrom(const CandidateDictTy::BBToCandsTy &BBToCands, Candidate &C,
506	Candidate::DKind K);
507
508	// Get the nearest instruction before CI that represents the value of S,
509	// return nullptr if no instruction is associated with S or S is not a
510	// reusable expression.
511	Value getNearestValueOfSCEV(const* SCEV S, const* Instruction CI) const* {
512	if (isa<SCEVCouldNotCompute>(Val: S))
513	return nullptr;
514
515	if (auto *SU = dyn_cast<SCEVUnknown>(Val: S))
516	return SU->getValue();
517	if (auto *SC = dyn_cast<SCEVConstant>(Val: S))
518	return SC->getValue();
519
520	auto It = SCEVToInsts.find(Val: S);
521	if (It == SCEVToInsts.end())
522	return nullptr;
523
524	// Instructions are sorted in depth-first order, so search for the nearest
525	// instruction by walking the list in reverse order.
526	for (Instruction *I : reverse(C: It ->second))
527	if (DT->dominates(Def: I, User: CI))
528	return I;
529
530	return nullptr;
531	}
532
533	struct DeltaInfo {
534	Candidate *Cand;
535	Candidate::DKind DeltaKind;
536	Value *Delta;
537
538	DeltaInfo()
539	: Cand(nullptr), DeltaKind(Candidate::InvalidDelta), Delta(nullptr) {}
540	DeltaInfo(Candidate Cand, Candidate::DKind DeltaKind, Value Delta)
541	: Cand(Cand), DeltaKind(DeltaKind), Delta(Delta) {}
542	operator bool() const { return Cand != nullptr; }
543	};
544
545	friend raw_ostream &operator<<(raw_ostream &OS, const DeltaInfo &DI);
546
547	DeltaInfo compressPath(Candidate &C, Candidate Basis) const*;
548
549	Candidate pickRewriteCandidate(Instruction I) const;
550	void sortCandidateInstructions();
551	Value getDelta(const* Candidate &C, const Candidate &Basis,
552	Candidate::DKind K) const;
553	static bool isSimilar(Candidate &C, Candidate &Basis, Candidate::DKind K);
554
555	// Add Basis -> C in DependencyGraph and propagate
556	// C.Stride and C.Delta's dependency to C
557	void addDependency(Candidate &C, Candidate *Basis) {
558	if (Basis)
559	DependencyGraph [Basis->Ins].emplace_back(args&: C.Ins);
560
561	// If any candidate of Inst has a basis, then Inst will be rewritten,
562	// C must be rewritten after rewriting Inst, so we need to propagate
563	// the dependency to C
564	auto PropagateDependency = [&](Instruction *Inst) {
565	if (auto CandsIt = RewriteCandidates.find(Val: Inst);
566	CandsIt != RewriteCandidates.end() &&
567	llvm::any_of(Range&: CandsIt ->second,
568	P: [](Candidate Cand) { return* Cand->Basis; }))
569	DependencyGraph [Inst].emplace_back(args&: C.Ins);
570	};
571
572	// If C has a variable delta and the delta is a candidate,
573	// propagate its dependency to C
574	if (auto *DeltaInst = dyn_cast_or_null<Instruction>(Val: C.Delta))
575	PropagateDependency(DeltaInst);
576
577	// If the stride is a candidate, propagate its dependency to C
578	if (auto *StrideInst = dyn_cast<Instruction>(Val: C.Stride))
579	PropagateDependency(StrideInst);
580	};
581	};
582
583	inline raw_ostream &operator<<(raw_ostream &OS,
584	const StraightLineStrengthReduce::Candidate &C) {
585	OS << "Ins: " << C.Ins << "\n Base: " << C.Base
586	<< "\n Index: " << C.Index << "\n Stride: " << C.Stride
587	<< "\n StrideSCEV: " << *C.StrideSCEV;
588	if (C.Basis)
589	OS << "\n Delta: " << C.Delta << "\n Basis: \n [ " << C.Basis << " ]";
590	return OS;
591	}
592
593	[[maybe_unused]] LLVM_DUMP_METHOD inline raw_ostream &
594	operator<<(raw_ostream &OS, const StraightLineStrengthReduce::DeltaInfo &DI) {
595	OS << "Cand: " << *DI.Cand << "\n";
596	OS << "Delta Kind: ";
597	switch (DI.DeltaKind) {
598	case StraightLineStrengthReduce::Candidate::IndexDelta:
599	OS << "Index";
600	break;
601	case StraightLineStrengthReduce::Candidate::BaseDelta:
602	OS << "Base";
603	break;
604	case StraightLineStrengthReduce::Candidate::StrideDelta:
605	OS << "Stride";
606	break;
607	default:
608	break;
609	}
610	OS << "\nDelta: " << *DI.Delta;
611	return OS;
612	}
613
614	} // end anonymous namespace
615
616	char StraightLineStrengthReduceLegacyPass::ID = `0`;
617
618	INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr",
619	"Straight line strength reduction", false, false)
620	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
621	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
622	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
623	INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr",
624	"Straight line strength reduction", false, false)
625
626	FunctionPass *llvm::createStraightLineStrengthReducePass() {
627	return new StraightLineStrengthReduceLegacyPass ();
628	}
629
630	// A helper function that unifies the bitwidth of A and B.
631	static void unifyBitWidth(APInt &A, APInt &B) {
632	if (A.getBitWidth() < B.getBitWidth())
633	A = A.sext(width: B.getBitWidth());
634	else if (A.getBitWidth() > B.getBitWidth())
635	B = B.sext(width: A.getBitWidth());
636	}
637
638	Value StraightLineStrengthReduce::getDelta(const* Candidate &C,
639	const Candidate &Basis,
640	Candidate::DKind K) const {
641	if (K == Candidate::IndexDelta) {
642	APInt Idx = C.Index->getValue();
643	APInt BasisIdx = Basis.Index->getValue();
644	unifyBitWidth(A&: Idx, B&: BasisIdx);
645	APInt IndexDelta = Idx - BasisIdx;
646	IntegerType *DeltaType =
647	IntegerType::get(C&: C.Ins->getContext(), NumBits: IndexDelta.getBitWidth());
648	return ConstantInt::get(Ty: DeltaType, V: IndexDelta);
649	} else if (K == Candidate::BaseDelta \|\| K == Candidate::StrideDelta) {
650	const SCEV *BasisPart =
651	(K == Candidate::BaseDelta) ? Basis.Base : Basis.StrideSCEV;
652	const SCEV *CandPart = (K == Candidate::BaseDelta) ? C.Base : C.StrideSCEV;
653	const SCEV *Diff = SE->getMinusSCEV(LHS: CandPart, RHS: BasisPart);
654	return getNearestValueOfSCEV(S: Diff, CI: C.Ins);
655	}
656	return nullptr;
657	}
658
659	bool StraightLineStrengthReduce::isSimilar(Candidate &C, Candidate &Basis,
660	Candidate::DKind K) {
661	bool SameType = false;
662	switch (K) {
663	case Candidate::StrideDelta:
664	SameType = C.StrideSCEV->getType() == Basis.StrideSCEV->getType();
665	break;
666	case Candidate::BaseDelta:
667	SameType = C.Base->getType() == Basis.Base->getType();
668	break;
669	case Candidate::IndexDelta:
670	SameType = true;
671	break;
672	default:;
673	}
674	return SameType && Basis.Ins != C.Ins &&
675	Basis.CandidateKind == C.CandidateKind;
676	}
677
678	// Try to find a Delta that C can reuse Basis to rewrite.
679	// Set C.Delta, C.Basis, and C.DeltaKind if found.
680	// Return true if found a constant delta.
681	// Return false if not found or the delta is not a constant.
682	bool StraightLineStrengthReduce::candidatePredicate(Candidate *Basis,
683	Candidate &C,
684	Candidate::DKind K) {
685	SmallVector<Instruction *> DropPoisonGeneratingInsts;
686	// Ensure the IR of Basis->Ins is not more poisonous than its SCEV.
687	if (!isSimilar(C, Basis&: *Basis, K) \|\|
688	(EnablePoisonReuseGuard &&
689	!SE->canReuseInstruction(S: SE->getSCEV(V: Basis->Ins), I: Basis->Ins,
690	DropPoisonGeneratingInsts)))
691	return false;
692
693	assert(DT->dominates(Basis->Ins, C.Ins));
694	Value Delta = getDelta(C, Basis: Basis, K);
695	if (!Delta)
696	return false;
697
698	// IndexDelta rewrite is not always profitable, e.g.,
699	// X = B + 8 S*
700	// Y = B + S,
701	// rewriting Y to X - 7 S is probably a bad idea.*
702	// So, we need to check if the rewrite form's computation efficiency
703	// is better than the original form.
704	if (K == Candidate::IndexDelta &&
705	!C.isProfitableRewrite(Delta: *Delta, DeltaKind: Candidate::IndexDelta))
706	return false;
707
708	// If there is a Delta that we can reuse Basis to rewrite C,
709	// clean up DropPoisonGeneratingInsts returned by successful
710	// SE->canReuseInstruction()
711	for (Instruction *I : DropPoisonGeneratingInsts)
712	I->dropPoisonGeneratingAnnotations();
713
714	// Record delta if none has been found yet, or the new delta is
715	// a constant that is better than the existing delta.
716	if (!C.Delta \|\| isa<ConstantInt>(Val: Delta)) {
717	C.Delta = Delta;
718	C.Basis = Basis;
719	C.DeltaKind = K;
720	}
721	return isa<ConstantInt>(Val: C.Delta);
722	}
723
724	// return true if find a Basis with constant delta and stop searching,
725	// return false if did not find a Basis or the delta is not a constant
726	// and continue searching for a Basis with constant delta
727	bool StraightLineStrengthReduce::searchFrom(
728	const CandidateDictTy::BBToCandsTy &BBToCands, Candidate &C,
729	Candidate::DKind K) {
730
731	// Stride delta rewrite on Mul form is usually non-profitable, and Base
732	// delta rewrite sometimes is profitable, so we do not support them on Mul.
733	if (C.CandidateKind == Candidate::Mul && K != Candidate::IndexDelta)
734	return false;
735
736	// Search dominating candidates by walking the immediate-dominator chain
737	// from the candidate's defining block upward. Visiting blocks in this
738	// order ensures we prefer the closest dominating basis.
739	const BasicBlock *BB = C.Ins->getParent();
740	while (BB) {
741	auto It = BBToCands.find(Val: BB);
742	if (It != BBToCands.end())
743	for (Candidate *Basis : reverse(C: It ->second))
744	if (candidatePredicate(Basis, C, K))
745	return true;
746
747	const DomTreeNode *Node = DT->getNode(BB);
748	if (!Node)
749	break;
750	Node = Node->getIDom();
751	BB = Node ? Node->getBlock() : nullptr;
752	}
753	return false;
754	}
755
756	void StraightLineStrengthReduce::setBasisAndDeltaFor(Candidate &C) {
757	if (const auto *BaseDeltaCandidates =
758	CandidateDict.getCandidatesWithDeltaKind(C, K: Candidate::BaseDelta))
759	if (searchFrom(BBToCands: *BaseDeltaCandidates, C, K: Candidate::BaseDelta)) {
760	LLVM_DEBUG(dbgs() << "Found delta from Base: " << *C.Delta << "\n");
761	return;
762	}
763
764	if (const auto *StrideDeltaCandidates =
765	CandidateDict.getCandidatesWithDeltaKind(C, K: Candidate::StrideDelta))
766	if (searchFrom(BBToCands: *StrideDeltaCandidates, C, K: Candidate::StrideDelta)) {
767	LLVM_DEBUG(dbgs() << "Found delta from Stride: " << *C.Delta << "\n");
768	return;
769	}
770
771	if (const auto *IndexDeltaCandidates =
772	CandidateDict.getCandidatesWithDeltaKind(C, K: Candidate::IndexDelta))
773	if (searchFrom(BBToCands: *IndexDeltaCandidates, C, K: Candidate::IndexDelta)) {
774	LLVM_DEBUG(dbgs() << "Found delta from Index: " << *C.Delta << "\n");
775	return;
776	}
777
778	// If we did not find a constant delta, we might have found a variable delta
779	if (C.Delta) {
780	LLVM_DEBUG({
781	dbgs() << "Found delta from ";
782	if (C.DeltaKind == Candidate::BaseDelta)
783	dbgs() << "Base: ";
784	else
785	dbgs() << "Stride: ";
786	dbgs() << *C.Delta << "\n";
787	});
788	assert(C.DeltaKind != Candidate::InvalidDelta && C.Basis);
789	}
790	}
791
792	// Compress the path from `Basis` to the deepest Basis in the Basis chain
793	// to avoid non-profitable data dependency and improve ILP.
794	// X = A + 1
795	// Y = X + 1
796	// Z = Y + 1
797	// ->
798	// X = A + 1
799	// Y = A + 2
800	// Z = A + 3
801	// Return the delta info for C aginst the new Basis
802	auto StraightLineStrengthReduce::compressPath(Candidate &C,
803	Candidate Basis) const*
804	-> DeltaInfo {
805	if (!Basis \|\| !Basis->Basis \|\| C.CandidateKind == Candidate::Mul)
806	return {};
807	Candidate *Root = Basis;
808	Value NewDelta = nullptr*;
809	auto NewKind = Candidate::InvalidDelta;
810
811	while (Root->Basis) {
812	Candidate *NextRoot = Root->Basis;
813	if (C.Base == NextRoot->Base && C.StrideSCEV == NextRoot->StrideSCEV &&
814	isSimilar(C, Basis&: *NextRoot, K: Candidate::IndexDelta)) {
815	ConstantInt *CI =
816	cast<ConstantInt>(Val: getDelta(C, Basis: *NextRoot, K: Candidate::IndexDelta));
817	if (CI->isZero() \|\| CI->isOne() \|\| isa<SCEVConstant>(Val: C.StrideSCEV)) {
818	Root = NextRoot;
819	NewKind = Candidate::IndexDelta;
820	NewDelta = CI;
821	continue;
822	}
823	}
824
825	const SCEV CandPart = nullptr*;
826	const SCEV BasisPart = nullptr*;
827	auto CurrKind = Candidate::InvalidDelta;
828	if (C.Base == NextRoot->Base && C.Index == NextRoot->Index) {
829	CandPart = C.StrideSCEV;
830	BasisPart = NextRoot->StrideSCEV;
831	CurrKind = Candidate::StrideDelta;
832	} else if (C.StrideSCEV == NextRoot->StrideSCEV &&
833	C.Index == NextRoot->Index) {
834	CandPart = C.Base;
835	BasisPart = NextRoot->Base;
836	CurrKind = Candidate::BaseDelta;
837	} else
838	break;
839
840	assert(CandPart && BasisPart);
841	if (!isSimilar(C, Basis&: *NextRoot, K: CurrKind))
842	break;
843
844	if (auto DeltaVal =
845	dyn_cast<SCEVConstant>(Val: SE->getMinusSCEV(LHS: CandPart, RHS: BasisPart))) {
846	Root = NextRoot;
847	NewDelta = DeltaVal->getValue();
848	NewKind = CurrKind;
849	} else
850	break;
851	}
852
853	if (Root != Basis) {
854	assert(NewKind != Candidate::InvalidDelta && NewDelta);
855	LLVM_DEBUG(dbgs() << "Found new Basis with " << *NewDelta
856	<< " from path compression.\n");
857	return {Root, NewKind, NewDelta};
858	}
859
860	return {};
861	}
862
863	// Topologically sort candidate instructions based on their relationship in
864	// dependency graph.
865	void StraightLineStrengthReduce::sortCandidateInstructions() {
866	SortedCandidateInsts.clear();
867	// An instruction may have multiple candidates that get different Basis
868	// instructions, and each candidate can get dependencies from Basis and
869	// Stride when Stride will also be rewritten by SLSR. Hence, an instruction
870	// may have multiple dependencies. Use InDegree to ensure all dependencies
871	// processed before processing itself.
872	DenseMap<Instruction , int*> InDegree;
873	for (auto &KV : DependencyGraph) {
874	InDegree.try_emplace(Key: KV.first, Args: `0`);
875
876	for (auto *Child : KV.second) {
877	InDegree [Child]++;
878	}
879	}
880	std::queue<Instruction *> WorkList;
881	DenseSet<Instruction *> Visited;
882
883	for (auto &KV : DependencyGraph)
884	if (InDegree [KV.first] == `0`)
885	WorkList.push(x: KV.first);
886
887	while (!WorkList.empty()) {
888	Instruction *I = WorkList.front();
889	WorkList.pop();
890	if (!Visited.insert(V: I).second)
891	continue;
892
893	SortedCandidateInsts.push_back(x: I);
894
895	for (auto *Next : DependencyGraph [I]) {
896	auto &Degree = InDegree [Next];
897	if (--Degree == `0`)
898	WorkList.push(x: Next);
899	}
900	}
901
902	assert(SortedCandidateInsts.size() == DependencyGraph.size() &&
903	"Dependency graph should not have cycles");
904	}
905
906	auto StraightLineStrengthReduce::pickRewriteCandidate(Instruction I) const*
907	-> Candidate * {
908	// Return the candidate of instruction I that has the highest profit.
909	auto It = RewriteCandidates.find(Val: I);
910	if (It == RewriteCandidates.end())
911	return nullptr;
912
913	Candidate BestC = nullptr*;
914	auto BestEfficiency = Candidate::Unknown;
915	for (Candidate *C : reverse(C: It ->second))
916	if (C->Basis) {
917	auto Efficiency = C->getRewriteEfficiency();
918	if (Efficiency > BestEfficiency) {
919	BestEfficiency = Efficiency;
920	BestC = C;
921	}
922	}
923
924	return BestC;
925	}
926
927	static bool isGEPFoldable(GetElementPtrInst *GEP,
928	const TargetTransformInfo *TTI) {
929	SmallVector<const Value *, `4`> Indices(GEP->indices());
930	return TTI->getGEPCost(PointeeType: GEP->getSourceElementType(), Ptr: GEP->getPointerOperand(),
931	Operands: Indices) == TargetTransformInfo::TCC_Free;
932	}
933
934	// Returns whether (Base + Index Stride) can be folded to an addressing mode.*
935	static bool isAddFoldable(const SCEV Base, ConstantInt Index, Value *Stride,
936	TargetTransformInfo *TTI) {
937	// Index->getSExtValue() may crash if Index is wider than 64-bit.
938	return Index->getBitWidth() <= `64` &&
939	TTI->isLegalAddressingMode(Ty: Base->getType(), BaseGV: nullptr, BaseOffset: `0`, HasBaseReg: true,
940	Scale: Index->getSExtValue(), AddrSpace: UnknownAddressSpace);
941	}
942
943	bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
944	TargetTransformInfo *TTI) {
945	if (C.CandidateKind == Candidate::Add)
946	return isAddFoldable(Base: C.Base, Index: C.Index, Stride: C.Stride, TTI);
947	if (C.CandidateKind == Candidate::GEP)
948	return isGEPFoldable(GEP: cast<GetElementPtrInst>(Val: C.Ins), TTI);
949	return false;
950	}
951
952	void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
953	Candidate::Kind CT, const SCEV B, ConstantInt Idx, Value *S,
954	Instruction *I) {
955	// Record the SCEV of S that we may use it as a variable delta.
956	// Ensure that we rewrite C with a existing IR that reproduces delta value.
957
958	Candidate C(CT, B, Idx, S, I, getAndRecordSCEV(V: S));
959	// If we can fold I into an addressing mode, computing I is likely free or
960	// takes only one instruction. So, we don't need to analyze or rewrite it.
961	//
962	// Currently, this algorithm can at best optimize complex computations into
963	// a `variable +/ constant` form. However, some targets have stricter*
964	// constraints on the their addressing mode.
965	// For example, a `variable + constant` can only be folded to an addressing
966	// mode if the constant falls within a certain range.
967	// So, we also check if the instruction is already high efficient enough
968	// for the strength reduction algorithm.
969	if (!isFoldable(C, TTI) && !C.isHighEfficiency()) {
970	setBasisAndDeltaFor(C);
971
972	// Compress unnecessary rewrite to improve ILP
973	if (auto Res = compressPath(C, Basis: C.Basis)) {
974	C.Basis = Res.Cand;
975	C.DeltaKind = Res.DeltaKind;
976	C.Delta = Res.Delta;
977	}
978	}
979	// Regardless of whether we find a basis for C, we need to push C to the
980	// candidate list so that it can be the basis of other candidates.
981	LLVM_DEBUG(dbgs() << "Allocated Candidate: " << C << "\n");
982	Candidates.push_back(x: C);
983	RewriteCandidates [C.Ins].push_back(Elt: &Candidates.back());
984	CandidateDict.add(C&: Candidates.back());
985	}
986
987	void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
988	Instruction *I) {
989	switch (I->getOpcode()) {
990	case Instruction::Add:
991	allocateCandidatesAndFindBasisForAdd(I);
992	break;
993	case Instruction::Mul:
994	allocateCandidatesAndFindBasisForMul(I);
995	break;
996	case Instruction::GetElementPtr:
997	allocateCandidatesAndFindBasisForGEP(GEP: cast<GetElementPtrInst>(Val: I));
998	break;
999	}
1000	}
1001
1002	void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
1003	Instruction *I) {
1004	// Try matching B + i S.*
1005	if (!isa<IntegerType>(Val: I->getType()))
1006	return;
1007
1008	assert(I->getNumOperands() == `2` && "isn't I an add?");
1009	Value LHS = I->getOperand(i: `0`), RHS = I->getOperand(i: `1`);
1010	allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
1011	if (LHS != RHS)
1012	allocateCandidatesAndFindBasisForAdd(LHS: RHS, RHS: LHS, I);
1013	}
1014
1015	void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
1016	Value LHS, Value RHS, Instruction *I) {
1017	Value S = nullptr*;
1018	ConstantInt Idx = nullptr*;
1019	if (match(V: RHS, P: m_Mul(L: m_Value(V&: S), R: m_ConstantInt(CI&: Idx)))) {
1020	// I = LHS + RHS = LHS + Idx S*
1021	allocateCandidatesAndFindBasis(CT: Candidate::Add, B: SE->getSCEV(V: LHS), Idx, S, I);
1022	} else if (match(V: RHS, P: m_Shl(L: m_Value(V&: S), R: m_ConstantInt(CI&: Idx)))) {
1023	// I = LHS + RHS = LHS + (S << Idx) = LHS + S (1 << Idx)*
1024	APInt One(Idx->getBitWidth(), `1`);
1025	Idx = ConstantInt::get(Context&: Idx->getContext(), V: One << Idx->getValue());
1026	allocateCandidatesAndFindBasis(CT: Candidate::Add, B: SE->getSCEV(V: LHS), Idx, S, I);
1027	} else {
1028	// At least, I = LHS + 1 RHS*
1029	ConstantInt *One = ConstantInt::get(Ty: cast<IntegerType>(Val: I->getType()), V: `1`);
1030	allocateCandidatesAndFindBasis(CT: Candidate::Add, B: SE->getSCEV(V: LHS), Idx: One, S: RHS,
1031	I);
1032	}
1033	}
1034
1035	// Returns true if A matches B + C where C is constant.
1036	static bool matchesAdd(Value A, Value &B, ConstantInt *&C) {
1037	return match(V: A, P: m_c_Add(L: m_Value(V&: B), R: m_ConstantInt(CI&: C)));
1038	}
1039
1040	// Returns true if A matches B \| C where C is constant.
1041	static bool matchesOr(Value A, Value &B, ConstantInt *&C) {
1042	return match(V: A, P: m_c_Or(L: m_Value(V&: B), R: m_ConstantInt(CI&: C)));
1043	}
1044
1045	void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
1046	Value LHS, Value RHS, Instruction *I) {
1047	Value B = nullptr*;
1048	ConstantInt Idx = nullptr*;
1049	if (matchesAdd(A: LHS, B, C&: Idx)) {
1050	// If LHS is in the form of "Base + Index", then I is in the form of
1051	// "(Base + Index) RHS".*
1052	allocateCandidatesAndFindBasis(CT: Candidate::Mul, B: SE->getSCEV(V: B), Idx, S: RHS, I);
1053	} else if (matchesOr(A: LHS, B, C&: Idx) && haveNoCommonBitsSet(LHSCache: B, RHSCache: Idx, SQ: *DL)) {
1054	// If LHS is in the form of "Base \| Index" and Base and Index have no common
1055	// bits set, then
1056	// Base \| Index = Base + Index
1057	// and I is thus in the form of "(Base + Index) RHS".*
1058	allocateCandidatesAndFindBasis(CT: Candidate::Mul, B: SE->getSCEV(V: B), Idx, S: RHS, I);
1059	} else {
1060	// Otherwise, at least try the form (LHS + 0) RHS.*
1061	ConstantInt *Zero = ConstantInt::get(Ty: cast<IntegerType>(Val: I->getType()), V: `0`);
1062	allocateCandidatesAndFindBasis(CT: Candidate::Mul, B: SE->getSCEV(V: LHS), Idx: Zero, S: RHS,
1063	I);
1064	}
1065	}
1066
1067	void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
1068	Instruction *I) {
1069	// Try matching (B + i) S.*
1070	// TODO: we could extend SLSR to float and vector types.
1071	if (!isa<IntegerType>(Val: I->getType()))
1072	return;
1073
1074	assert(I->getNumOperands() == `2` && "isn't I a mul?");
1075	Value LHS = I->getOperand(i: `0`), RHS = I->getOperand(i: `1`);
1076	allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
1077	if (LHS != RHS) {
1078	// Symmetrically, try to split RHS to Base + Index.
1079	allocateCandidatesAndFindBasisForMul(LHS: RHS, RHS: LHS, I);
1080	}
1081	}
1082
1083	void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
1084	GetElementPtrInst *GEP) {
1085	// TODO: handle vector GEPs
1086	if (GEP->getType()->isVectorTy())
1087	return;
1088
1089	SmallVector<SCEVUse, `4`> IndexExprs;
1090	for (Use &Idx : GEP->indices())
1091	IndexExprs.push_back(Elt: SE->getSCEV(V: Idx));
1092
1093	gep_type_iterator GTI = gep_type_begin(GEP);
1094	for (unsigned I = `1`, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1095	if (GTI.isStruct())
1096	continue;
1097
1098	SCEVUse OrigIndexExpr = IndexExprs [I - `1`];
1099	IndexExprs [I - `1`] = SE->getZero(Ty: OrigIndexExpr.getPointer()->getType());
1100
1101	// The base of this candidate is GEP's base plus the offsets of all
1102	// indices except this current one.
1103	SCEVUse BaseExpr = SE->getGEPExpr(GEP: cast<GEPOperator>(Val: GEP), IndexExprs);
1104	Value *ArrayIdx = GEP->getOperand(i_nocapture: I);
1105	uint64_t ElementSize = GTI.getSequentialElementStride(DL: *DL);
1106	IntegerType *PtrIdxTy = cast<IntegerType>(Val: DL->getIndexType(PtrTy: GEP->getType()));
1107	// If the element size overflows the type, truncate.
1108	ConstantInt *ElementSizeIdx =
1109	ConstantInt::getSigned(Ty: PtrIdxTy, V: ElementSize, /ImplicitTrunc=/true);
1110	if (ArrayIdx->getType()->getIntegerBitWidth() <=
1111	DL->getIndexSizeInBits(AS: GEP->getAddressSpace())) {
1112	// Skip factoring if ArrayIdx is wider than the index size, because
1113	// ArrayIdx is implicitly truncated to the index size.
1114	allocateCandidatesAndFindBasis(CT: Candidate::GEP, B: BaseExpr, Idx: ElementSizeIdx,
1115	S: ArrayIdx, I: GEP);
1116	}
1117	// When ArrayIdx is the sext of a value, we try to factor that value as
1118	// well. Handling this case is important because array indices are
1119	// typically sign-extended to the pointer index size.
1120	Value TruncatedArrayIdx = nullptr*;
1121	if (match(V: ArrayIdx, P: m_SExt(Op: m_Value(V&: TruncatedArrayIdx))) &&
1122	TruncatedArrayIdx->getType()->getIntegerBitWidth() <=
1123	DL->getIndexSizeInBits(AS: GEP->getAddressSpace())) {
1124	// Skip factoring if TruncatedArrayIdx is wider than the pointer size,
1125	// because TruncatedArrayIdx is implicitly truncated to the pointer size.
1126	allocateCandidatesAndFindBasis(CT: Candidate::GEP, B: BaseExpr, Idx: ElementSizeIdx,
1127	S: TruncatedArrayIdx, I: GEP);
1128	}
1129
1130	IndexExprs [I - `1`] = OrigIndexExpr;
1131	}
1132	}
1133
1134	Value StraightLineStrengthReduce::emitBump(const* Candidate &Basis,
1135	const Candidate &C,
1136	IRBuilder<> &Builder,
1137	const DataLayout *DL) {
1138	auto CreateMul = [&](Value LHS, Value RHS) {
1139	if (ConstantInt *CR = dyn_cast<ConstantInt>(Val: RHS)) {
1140	const APInt &ConstRHS = CR->getValue();
1141	IntegerType *DeltaType =
1142	IntegerType::get(C&: C.Ins->getContext(), NumBits: ConstRHS.getBitWidth());
1143	if (ConstRHS.isPowerOf2()) {
1144	ConstantInt *Exponent =
1145	ConstantInt::get(Ty: DeltaType, V: ConstRHS.logBase2());
1146	return Builder.CreateShl(LHS, RHS: Exponent);
1147	}
1148	if (ConstRHS.isNegatedPowerOf2()) {
1149	ConstantInt *Exponent =
1150	ConstantInt::get(Ty: DeltaType, V: (-ConstRHS).logBase2());
1151	return Builder.CreateNeg(V: Builder.CreateShl(LHS, RHS: Exponent));
1152	}
1153	}
1154
1155	return Builder.CreateMul(LHS, RHS);
1156	};
1157
1158	Value *Delta = C.Delta;
1159	// If Delta is 0, C is a fully redundant of C.Basis,
1160	// just replace C.Ins with Basis.Ins
1161	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Delta);
1162	CI && CI->getValue().isZero())
1163	return nullptr;
1164
1165	if (C.DeltaKind == Candidate::IndexDelta) {
1166	APInt IndexDelta = cast<ConstantInt>(Val: C.Delta)->getValue();
1167	// IndexDelta
1168	// X = B + i S*
1169	// Y = B + i` S*
1170	// = B + (i + IndexDelta) S*
1171	// = B + i S + IndexDelta * S*
1172	// = X + IndexDelta S*
1173	// Bump = (i' - i) S*
1174
1175	// Common case 1: if (i' - i) is 1, Bump = S.
1176	if (IndexDelta == `1`)
1177	return C.Stride;
1178	// Common case 2: if (i' - i) is -1, Bump = -S.
1179	if (IndexDelta.isAllOnes())
1180	return Builder.CreateNeg(V: C.Stride);
1181
1182	IntegerType *DeltaType =
1183	IntegerType::get(C&: Basis.Ins->getContext(), NumBits: IndexDelta.getBitWidth());
1184	Value *ExtendedStride = Builder.CreateSExtOrTrunc(V: C.Stride, DestTy: DeltaType);
1185
1186	return CreateMul (ExtendedStride, C.Delta);
1187	}
1188
1189	assert(C.DeltaKind == Candidate::StrideDelta \|\|
1190	C.DeltaKind == Candidate::BaseDelta);
1191	assert(C.CandidateKind != Candidate::Mul);
1192	// StrideDelta
1193	// X = B + i S*
1194	// Y = B + i S'*
1195	// = B + i (S + StrideDelta)*
1196	// = B + i S + i * StrideDelta*
1197	// = X + i StrideDelta*
1198	// Bump = i (S' - S)*
1199	//
1200	// BaseDelta
1201	// X = B + i S*
1202	// Y = B' + i S*
1203	// = (B + BaseDelta) + i S*
1204	// = X + BaseDelta
1205	// Bump = (B' - B).
1206	Value *Bump = C.Delta;
1207	if (C.DeltaKind == Candidate::StrideDelta) {
1208	// If this value is consumed by a GEP, promote StrideDelta before doing
1209	// StrideDelta Index to ensure the same semantics as the original GEP.*
1210	if (C.CandidateKind == Candidate::GEP) {
1211	auto *GEP = cast<GetElementPtrInst>(Val: C.Ins);
1212	Type *NewScalarIndexTy =
1213	DL->getIndexType(PtrTy: GEP->getPointerOperandType()->getScalarType());
1214	Bump = Builder.CreateSExtOrTrunc(V: Bump, DestTy: NewScalarIndexTy);
1215	}
1216	if (!C.Index->isOne()) {
1217	Value *ExtendedIndex =
1218	Builder.CreateSExtOrTrunc(V: C.Index, DestTy: Bump->getType());
1219	Bump = CreateMul (Bump, ExtendedIndex);
1220	}
1221	}
1222	return Bump;
1223	}
1224
1225	void StraightLineStrengthReduce::rewriteCandidate(const Candidate &C) {
1226	if (!DebugCounter::shouldExecute(Counter&: StraightLineStrengthReduceCounter))
1227	return;
1228
1229	const Candidate &Basis = *C.Basis;
1230	assert(C.Delta && C.CandidateKind == Basis.CandidateKind &&
1231	C.hasValidDelta(Basis));
1232
1233	IRBuilder<> Builder(C.Ins);
1234	Value *Bump = emitBump(Basis, C, Builder, DL);
1235	Value Reduced = nullptr; // equivalent to but weaker than C.Ins*
1236	// If delta is 0, C is a fully redundant of Basis, and Bump is nullptr,
1237	// just replace C.Ins with Basis.Ins
1238	if (!Bump)
1239	Reduced = Basis.Ins;
1240	else {
1241	switch (C.CandidateKind) {
1242	case Candidate::Add:
1243	case Candidate::Mul: {
1244	// C = Basis + Bump
1245	Value *NegBump;
1246	if (match(V: Bump, P: m_Neg(V: m_Value(V&: NegBump)))) {
1247	// If Bump is a neg instruction, emit C = Basis - (-Bump).
1248	Reduced = Builder.CreateSub(LHS: Basis.Ins, RHS: NegBump);
1249	// We only use the negative argument of Bump, and Bump itself may be
1250	// trivially dead.
1251	RecursivelyDeleteTriviallyDeadInstructions(V: Bump);
1252	} else {
1253	// It's tempting to preserve nsw on Bump and/or Reduced. However, it's
1254	// usually unsound, e.g.,
1255	//
1256	// X = (-2 +nsw 1) nsw INT_MAX*
1257	// Y = (-2 +nsw 3) nsw INT_MAX*
1258	// =>
1259	// Y = X + 2 INT_MAX*
1260	//
1261	// Neither + and in the resultant expression are nsw.*
1262	Reduced = Builder.CreateAdd(LHS: Basis.Ins, RHS: Bump);
1263	}
1264	break;
1265	}
1266	case Candidate::GEP: {
1267	bool InBounds = cast<GetElementPtrInst>(Val: C.Ins)->isInBounds();
1268	// C = (char )Basis + Bump*
1269	Reduced = Builder.CreatePtrAdd(Ptr: Basis.Ins, Offset: Bump, Name: "", NW: InBounds);
1270	break;
1271	}
1272	default:
1273	llvm_unreachable("C.CandidateKind is invalid");
1274	};
1275	Reduced->takeName(V: C.Ins);
1276	}
1277	C.Ins->replaceAllUsesWith(V: Reduced);
1278	DeadInstructions.push_back(x: C.Ins);
1279	}
1280
1281	bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) {
1282	if (skipFunction(F))
1283	return false;
1284
1285	auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1286	auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1287	auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1288	return StraightLineStrengthReduce (DL, DT, SE, TTI).runOnFunction(F);
1289	}
1290
1291	bool StraightLineStrengthReduce::runOnFunction(Function &F) {
1292	LLVM_DEBUG(dbgs() << "SLSR on Function: " << F.getName() << "\n");
1293	// Traverse the dominator tree in the depth-first order. This order makes sure
1294	// all bases of a candidate are in Candidates when we process it.
1295	for (const auto Node : depth_first(G: DT))
1296	for (auto &I : *(Node->getBlock()))
1297	allocateCandidatesAndFindBasis(I: &I);
1298
1299	// Build the dependency graph and sort candidate instructions from dependency
1300	// roots to leaves
1301	for (auto &C : Candidates) {
1302	DependencyGraph.try_emplace(Key: C.Ins);
1303	addDependency(C, Basis: C.Basis);
1304	}
1305	sortCandidateInstructions();
1306
1307	// Rewrite candidates in the topological order that rewrites a Candidate
1308	// always before rewriting its Basis
1309	for (Instruction *I : reverse(C&: SortedCandidateInsts))
1310	if (Candidate *C = pickRewriteCandidate(I))
1311	rewriteCandidate(C: *C);
1312
1313	for (auto *DeadIns : DeadInstructions)
1314	// A dead instruction may be another dead instruction's op,
1315	// don't delete an instruction twice
1316	if (DeadIns->getParent())
1317	RecursivelyDeleteTriviallyDeadInstructions(V: DeadIns);
1318
1319	bool Ret = !DeadInstructions.empty();
1320	DeadInstructions.clear();
1321	DependencyGraph.clear();
1322	RewriteCandidates.clear();
1323	SortedCandidateInsts.clear();
1324	// First clear all references to candidates in the list
1325	CandidateDict.clear();
1326	// Then destroy the list
1327	Candidates.clear();
1328	return Ret;
1329	}
1330
1331	PreservedAnalyses
1332	StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
1333	const DataLayout *DL = &F.getDataLayout();
1334	auto *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1335	auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(IR&: F);
1336	auto *TTI = &AM.getResult<TargetIRAnalysis>(IR&: F);
1337
1338	if (!StraightLineStrengthReduce (DL, DT, SE, TTI).runOnFunction(F))
1339	return PreservedAnalyses::all();
1340
1341	PreservedAnalyses PA;
1342	PA.preserveSet<CFGAnalyses>();
1343	PA.preserve<DominatorTreeAnalysis>();
1344	PA.preserve<ScalarEvolutionAnalysis>();
1345	PA.preserve<TargetIRAnalysis>();
1346	return PA;
1347	}
1348

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp