DependenceAnalysis.cpp source code [llvm_projects/llvm/lib/Analysis/DependenceAnalysis.cpp]

1	//===-- DependenceAnalysis.cpp - DA Implementation --------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10	// accesses. Currently, it is an (incomplete) implementation of the approach
11	// described in
12	//
13	// Practical Dependence Testing
14	// Goff, Kennedy, Tseng
15	// PLDI 1991
16	//
17	// There's a single entry point that analyzes the dependence between a pair
18	// of memory references in a function, returning either NULL, for no dependence,
19	// or a more-or-less detailed description of the dependence between them.
20	//
21	// Currently, the implementation cannot propagate constraints between
22	// coupled RDIV subscripts and lacks a multi-subscript MIV test.
23	// Both of these are conservative weaknesses;
24	// that is, not a source of correctness problems.
25	//
26	// Since Clang linearizes some array subscripts, the dependence
27	// analysis is using SCEV->delinearize to recover the representation of multiple
28	// subscripts, and thus avoid the more expensive and less precise MIV tests. The
29	// delinearization is controlled by the flag -da-delinearize.
30	//
31	// We should pay some careful attention to the possibility of integer overflow
32	// in the implementation of the various tests. This could happen with Add,
33	// Subtract, or Multiply, with both APInt's and SCEV's.
34	//
35	// Some non-linear subscript pairs can be handled by the GCD test
36	// (and perhaps other tests).
37	// Should explore how often these things occur.
38	//
39	// Finally, it seems like certain test cases expose weaknesses in the SCEV
40	// simplification, especially in the handling of sign and zero extensions.
41	// It could be useful to spend time exploring these.
42	//
43	// Please note that this is work in progress and the interface is subject to
44	// change.
45	//
46	//===----------------------------------------------------------------------===//
47	// //
48	// In memory of Ken Kennedy, 1945 - 2007 //
49	// //
50	//===----------------------------------------------------------------------===//
51
52	#include "llvm/Analysis/DependenceAnalysis.h"
53	#include "llvm/ADT/Statistic.h"
54	#include "llvm/Analysis/AliasAnalysis.h"
55	#include "llvm/Analysis/Delinearization.h"
56	#include "llvm/Analysis/LoopInfo.h"
57	#include "llvm/Analysis/ScalarEvolution.h"
58	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
59	#include "llvm/Analysis/ValueTracking.h"
60	#include "llvm/IR/InstIterator.h"
61	#include "llvm/IR/Module.h"
62	#include "llvm/InitializePasses.h"
63	#include "llvm/Support/CommandLine.h"
64	#include "llvm/Support/Debug.h"
65	#include "llvm/Support/ErrorHandling.h"
66	#include "llvm/Support/raw_ostream.h"
67
68	using namespace llvm;
69
70	#define DEBUG_TYPE "da"
71
72	//===----------------------------------------------------------------------===//
73	// statistics
74
75	STATISTIC(TotalArrayPairs, "Array pairs tested");
76	STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
77	STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
78	STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
79	STATISTIC(ZIVapplications, "ZIV applications");
80	STATISTIC(ZIVindependence, "ZIV independence");
81	STATISTIC(StrongSIVapplications, "Strong SIV applications");
82	STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
83	STATISTIC(StrongSIVindependence, "Strong SIV independence");
84	STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
85	STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
86	STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
87	STATISTIC(ExactSIVapplications, "Exact SIV applications");
88	STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
89	STATISTIC(ExactSIVindependence, "Exact SIV independence");
90	STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
91	STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
92	STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
93	STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
94	STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
95	STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
96	STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
97	STATISTIC(DeltaApplications, "Delta applications");
98	STATISTIC(DeltaSuccesses, "Delta successes");
99	STATISTIC(DeltaIndependence, "Delta independence");
100	STATISTIC(DeltaPropagations, "Delta propagations");
101	STATISTIC(GCDapplications, "GCD applications");
102	STATISTIC(GCDsuccesses, "GCD successes");
103	STATISTIC(GCDindependence, "GCD independence");
104	STATISTIC(BanerjeeApplications, "Banerjee applications");
105	STATISTIC(BanerjeeIndependence, "Banerjee independence");
106	STATISTIC(BanerjeeSuccesses, "Banerjee successes");
107
108	static cl::opt<bool>
109	Delinearize("da-delinearize", cl::init(Val: true), cl::Hidden,
110	cl::desc ("Try to delinearize array references."));
111	static cl::opt<bool> DisableDelinearizationChecks(
112	"da-disable-delinearization-checks", cl::Hidden,
113	cl::desc (
114	"Disable checks that try to statically verify validity of "
115	"delinearized subscripts. Enabling this option may result in incorrect "
116	"dependence vectors for languages that allow the subscript of one "
117	"dimension to underflow or overflow into another dimension."));
118
119	static cl::opt<unsigned> MIVMaxLevelThreshold(
120	"da-miv-max-level-threshold", cl::init(Val: `7`), cl::Hidden,
121	cl::desc ("Maximum depth allowed for the recursive algorithm used to "
122	"explore MIV direction vectors."));
123
124	//===----------------------------------------------------------------------===//
125	// basics
126
127	DependenceAnalysis::Result
128	DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
129	auto &AA = FAM.getResult<AAManager>(IR&: F);
130	auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
131	auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
132	return DependenceInfo (&F, &AA, &SE, &LI);
133	}
134
135	AnalysisKey DependenceAnalysis::Key;
136
137	INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
138	"Dependence Analysis", true, true)
139	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
140	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
141	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
142	INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
143	true, true)
144
145	char DependenceAnalysisWrapperPass::ID = `0`;
146
147	DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
148	: FunctionPass (ID) {
149	initializeDependenceAnalysisWrapperPassPass(Registry&: *PassRegistry::getPassRegistry());
150	}
151
152	FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
153	return new DependenceAnalysisWrapperPass ();
154	}
155
156	bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
157	auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
158	auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
159	auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
160	info.reset(p: new DependenceInfo (&F, &AA, &SE, &LI));
161	return false;
162	}
163
164	DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
165
166	void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
167
168	void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
169	AU.setPreservesAll();
170	AU.addRequiredTransitive<AAResultsWrapperPass>();
171	AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
172	AU.addRequiredTransitive<LoopInfoWrapperPass>();
173	}
174
175	// Used to test the dependence analyzer.
176	// Looks through the function, noting instructions that may access memory.
177	// Calls depends() on every possible pair and prints out the result.
178	// Ignores all other instructions.
179	static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
180	ScalarEvolution &SE, bool NormalizeResults) {
181	auto *F = DA->getFunction();
182	for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
183	++SrcI) {
184	if (SrcI ->mayReadOrWriteMemory()) {
185	for (inst_iterator DstI = SrcI, DstE = inst_end(F);
186	DstI != DstE; ++DstI) {
187	if (DstI ->mayReadOrWriteMemory()) {
188	OS << "Src:" << SrcI << " --> Dst:" << DstI << "\n";
189	OS << " da analyze - ";
190	if (auto D = DA->depends(Src: &SrcI, Dst: &DstI, PossiblyLoopIndependent: true)) {
191	// Normalize negative direction vectors if required by clients.
192	if (NormalizeResults && D ->normalize(SE: &SE))
193	OS << "normalized - ";
194	D ->dump(OS);
195	for (unsigned Level = `1`; Level <= D ->getLevels(); Level++) {
196	if (D ->isSplitable(Level)) {
197	OS << " da analyze - split level = " << Level;
198	OS << ", iteration = " << DA->getSplitIteration(Dep: D, Level);
199	OS << "!\n";
200	}
201	}
202	}
203	else
204	OS << "none!\n";
205	}
206	}
207	}
208	}
209	}
210
211	void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
212	const Module ) const* {
213	dumpExampleDependence(OS, DA: info.get(),
214	SE&: getAnalysis<ScalarEvolutionWrapperPass>().getSE(), NormalizeResults: false);
215	}
216
217	PreservedAnalyses
218	DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
219	OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";
220	dumpExampleDependence(OS, DA: &FAM.getResult<DependenceAnalysis>(IR&: F),
221	SE&: FAM.getResult<ScalarEvolutionAnalysis>(IR&: F),
222	NormalizeResults);
223	return PreservedAnalyses::all();
224	}
225
226	//===----------------------------------------------------------------------===//
227	// Dependence methods
228
229	// Returns true if this is an input dependence.
230	bool Dependence::isInput() const {
231	return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
232	}
233
234
235	// Returns true if this is an output dependence.
236	bool Dependence::isOutput() const {
237	return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
238	}
239
240
241	// Returns true if this is an flow (aka true) dependence.
242	bool Dependence::isFlow() const {
243	return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
244	}
245
246
247	// Returns true if this is an anti dependence.
248	bool Dependence::isAnti() const {
249	return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
250	}
251
252
253	// Returns true if a particular level is scalar; that is,
254	// if no subscript in the source or destination mention the induction
255	// variable associated with the loop at this level.
256	// Leave this out of line, so it will serve as a virtual method anchor
257	bool Dependence::isScalar(unsigned level) const {
258	return false;
259	}
260
261
262	//===----------------------------------------------------------------------===//
263	// FullDependence methods
264
265	FullDependence::FullDependence(Instruction Source, Instruction Destination,
266	bool PossiblyLoopIndependent,
267	unsigned CommonLevels)
268	: Dependence (Source, Destination), Levels(CommonLevels),
269	LoopIndependent(PossiblyLoopIndependent) {
270	Consistent = true;
271	if (CommonLevels)
272	DV = std::make_unique<DVEntry[]>(num: CommonLevels);
273	}
274
275	// FIXME: in some cases the meaning of a negative direction vector
276	// may not be straightforward, e.g.,
277	// for (int i = 0; i < 32; ++i) {
278	// Src: A[i] = ...;
279	// Dst: use(A[31 - i]);
280	// }
281	// The dependency is
282	// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and
283	// anti { Dst[i] -> Src[31 - i] : when i < 16 },
284	// -- hence a [<>].
285	// As long as a dependence result contains '>' ('<>', '<=>', ""), it*
286	// means that a reversed/normalized dependence needs to be considered
287	// as well. Nevertheless, current isDirectionNegative() only returns
288	// true with a '>' or '>=' dependency for ease of canonicalizing the
289	// dependency vector, since the reverse of '<>', '<=>' and "" is itself.*
290	bool FullDependence::isDirectionNegative() const {
291	for (unsigned Level = `1`; Level <= Levels; ++Level) {
292	unsigned char Direction = DV [Level - `1`].Direction;
293	if (Direction == Dependence::DVEntry::EQ)
294	continue;
295	if (Direction == Dependence::DVEntry::GT \|\|
296	Direction == Dependence::DVEntry::GE)
297	return true;
298	return false;
299	}
300	return false;
301	}
302
303	bool FullDependence::normalize(ScalarEvolution *SE) {
304	if (!isDirectionNegative())
305	return false;
306
307	LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
308	dump(dbgs()););
309	std::swap(a&: Src, b&: Dst);
310	for (unsigned Level = `1`; Level <= Levels; ++Level) {
311	unsigned char Direction = DV [Level - `1`].Direction;
312	// Reverse the direction vector, this means LT becomes GT
313	// and GT becomes LT.
314	unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
315	if (Direction & Dependence::DVEntry::LT)
316	RevDirection \|= Dependence::DVEntry::GT;
317	if (Direction & Dependence::DVEntry::GT)
318	RevDirection \|= Dependence::DVEntry::LT;
319	DV [Level - `1`].Direction = RevDirection;
320	// Reverse the dependence distance as well.
321	if (DV [Level - `1`].Distance != nullptr)
322	DV [Level - `1`].Distance =
323	SE->getNegativeSCEV(V: DV [Level - `1`].Distance);
324	}
325
326	LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
327	dump(dbgs()););
328	return true;
329	}
330
331	// The rest are simple getters that hide the implementation.
332
333	// getDirection - Returns the direction associated with a particular level.
334	unsigned FullDependence::getDirection(unsigned Level) const {
335	assert(`0` < Level && Level <= Levels && "Level out of range");
336	return DV [Level - `1`].Direction;
337	}
338
339
340	// Returns the distance (or NULL) associated with a particular level.
341	const SCEV FullDependence::getDistance(unsigned* Level) const {
342	assert(`0` < Level && Level <= Levels && "Level out of range");
343	return DV [Level - `1`].Distance;
344	}
345
346
347	// Returns true if a particular level is scalar; that is,
348	// if no subscript in the source or destination mention the induction
349	// variable associated with the loop at this level.
350	bool FullDependence::isScalar(unsigned Level) const {
351	assert(`0` < Level && Level <= Levels && "Level out of range");
352	return DV [Level - `1`].Scalar;
353	}
354
355
356	// Returns true if peeling the first iteration from this loop
357	// will break this dependence.
358	bool FullDependence::isPeelFirst(unsigned Level) const {
359	assert(`0` < Level && Level <= Levels && "Level out of range");
360	return DV [Level - `1`].PeelFirst;
361	}
362
363
364	// Returns true if peeling the last iteration from this loop
365	// will break this dependence.
366	bool FullDependence::isPeelLast(unsigned Level) const {
367	assert(`0` < Level && Level <= Levels && "Level out of range");
368	return DV [Level - `1`].PeelLast;
369	}
370
371
372	// Returns true if splitting this loop will break the dependence.
373	bool FullDependence::isSplitable(unsigned Level) const {
374	assert(`0` < Level && Level <= Levels && "Level out of range");
375	return DV [Level - `1`].Splitable;
376	}
377
378
379	//===----------------------------------------------------------------------===//
380	// DependenceInfo::Constraint methods
381
382	// If constraint is a point <X, Y>, returns X.
383	// Otherwise assert.
384	const SCEV DependenceInfo::Constraint::getX() const* {
385	assert(Kind == Point && "Kind should be Point");
386	return A;
387	}
388
389
390	// If constraint is a point <X, Y>, returns Y.
391	// Otherwise assert.
392	const SCEV DependenceInfo::Constraint::getY() const* {
393	assert(Kind == Point && "Kind should be Point");
394	return B;
395	}
396
397
398	// If constraint is a line AX + BY = C, returns A.
399	// Otherwise assert.
400	const SCEV DependenceInfo::Constraint::getA() const* {
401	assert((Kind == Line \|\| Kind == Distance) &&
402	"Kind should be Line (or Distance)");
403	return A;
404	}
405
406
407	// If constraint is a line AX + BY = C, returns B.
408	// Otherwise assert.
409	const SCEV DependenceInfo::Constraint::getB() const* {
410	assert((Kind == Line \|\| Kind == Distance) &&
411	"Kind should be Line (or Distance)");
412	return B;
413	}
414
415
416	// If constraint is a line AX + BY = C, returns C.
417	// Otherwise assert.
418	const SCEV DependenceInfo::Constraint::getC() const* {
419	assert((Kind == Line \|\| Kind == Distance) &&
420	"Kind should be Line (or Distance)");
421	return C;
422	}
423
424
425	// If constraint is a distance, returns D.
426	// Otherwise assert.
427	const SCEV DependenceInfo::Constraint::getD() const* {
428	assert(Kind == Distance && "Kind should be Distance");
429	return SE->getNegativeSCEV(V: C);
430	}
431
432
433	// Returns the loop associated with this constraint.
434	const Loop DependenceInfo::Constraint::getAssociatedLoop() const* {
435	assert((Kind == Distance \|\| Kind == Line \|\| Kind == Point) &&
436	"Kind should be Distance, Line, or Point");
437	return AssociatedLoop;
438	}
439
440	void DependenceInfo::Constraint::setPoint(const SCEV X, const* SCEV *Y,
441	const Loop *CurLoop) {
442	Kind = Point;
443	A = X;
444	B = Y;
445	AssociatedLoop = CurLoop;
446	}
447
448	void DependenceInfo::Constraint::setLine(const SCEV AA, const* SCEV *BB,
449	const SCEV CC, const* Loop *CurLoop) {
450	Kind = Line;
451	A = AA;
452	B = BB;
453	C = CC;
454	AssociatedLoop = CurLoop;
455	}
456
457	void DependenceInfo::Constraint::setDistance(const SCEV *D,
458	const Loop *CurLoop) {
459	Kind = Distance;
460	A = SE->getOne(Ty: D->getType());
461	B = SE->getNegativeSCEV(V: A);
462	C = SE->getNegativeSCEV(V: D);
463	AssociatedLoop = CurLoop;
464	}
465
466	void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
467
468	void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
469	SE = NewSE;
470	Kind = Any;
471	}
472
473	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
474	// For debugging purposes. Dumps the constraint out to OS.
475	LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
476	if (isEmpty())
477	OS << " Empty\n";
478	else if (isAny())
479	OS << " Any\n";
480	else if (isPoint())
481	OS << " Point is <" << getX() << ", " << getY() << ">\n";
482	else if (isDistance())
483	OS << " Distance is " << *getD() <<
484	" (" << getA() << "X + " << getB() << "Y = " << *getC() << ")\n";
485	else if (isLine())
486	OS << " Line is " << getA() << "X + " <<
487	getB() << "Y = " << *getC() << "\n";
488	else
489	llvm_unreachable("unknown constraint type in Constraint::dump");
490	}
491	#endif
492
493
494	// Updates X with the intersection
495	// of the Constraints X and Y. Returns true if X has changed.
496	// Corresponds to Figure 4 from the paper
497	//
498	// Practical Dependence Testing
499	// Goff, Kennedy, Tseng
500	// PLDI 1991
501	bool DependenceInfo::intersectConstraints(Constraint X, const* Constraint *Y) {
502	++DeltaApplications;
503	LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
504	LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));
505	LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));
506	assert(!Y->isPoint() && "Y must not be a Point");
507	if (X->isAny()) {
508	if (Y->isAny())
509	return false;
510	X = Y;
511	return true;
512	}
513	if (X->isEmpty())
514	return false;
515	if (Y->isEmpty()) {
516	X->setEmpty();
517	return true;
518	}
519
520	if (X->isDistance() && Y->isDistance()) {
521	LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n");
522	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: X->getD(), Y: Y->getD()))
523	return false;
524	if (isKnownPredicate(Pred: CmpInst::ICMP_NE, X: X->getD(), Y: Y->getD())) {
525	X->setEmpty();
526	++DeltaSuccesses;
527	return true;
528	}
529	// Hmmm, interesting situation.
530	// I guess if either is constant, keep it and ignore the other.
531	if (isa<SCEVConstant>(Val: Y->getD())) {
532	X = Y;
533	return true;
534	}
535	return false;
536	}
537
538	// At this point, the pseudo-code in Figure 4 of the paper
539	// checks if (X->isPoint() && Y->isPoint()).
540	// This case can't occur in our implementation,
541	// since a Point can only arise as the result of intersecting
542	// two Line constraints, and the right-hand value, Y, is never
543	// the result of an intersection.
544	assert(!(X->isPoint() && Y->isPoint()) &&
545	"We shouldn't ever see X->isPoint() && Y->isPoint()");
546
547	if (X->isLine() && Y->isLine()) {
548	LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n");
549	const SCEV *Prod1 = SE->getMulExpr(LHS: X->getA(), RHS: Y->getB());
550	const SCEV *Prod2 = SE->getMulExpr(LHS: X->getB(), RHS: Y->getA());
551	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: Prod1, Y: Prod2)) {
552	// slopes are equal, so lines are parallel
553	LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
554	Prod1 = SE->getMulExpr(LHS: X->getC(), RHS: Y->getB());
555	Prod2 = SE->getMulExpr(LHS: X->getB(), RHS: Y->getC());
556	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: Prod1, Y: Prod2))
557	return false;
558	if (isKnownPredicate(Pred: CmpInst::ICMP_NE, X: Prod1, Y: Prod2)) {
559	X->setEmpty();
560	++DeltaSuccesses;
561	return true;
562	}
563	return false;
564	}
565	if (isKnownPredicate(Pred: CmpInst::ICMP_NE, X: Prod1, Y: Prod2)) {
566	// slopes differ, so lines intersect
567	LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
568	const SCEV *C1B2 = SE->getMulExpr(LHS: X->getC(), RHS: Y->getB());
569	const SCEV *C1A2 = SE->getMulExpr(LHS: X->getC(), RHS: Y->getA());
570	const SCEV *C2B1 = SE->getMulExpr(LHS: Y->getC(), RHS: X->getB());
571	const SCEV *C2A1 = SE->getMulExpr(LHS: Y->getC(), RHS: X->getA());
572	const SCEV *A1B2 = SE->getMulExpr(LHS: X->getA(), RHS: Y->getB());
573	const SCEV *A2B1 = SE->getMulExpr(LHS: Y->getA(), RHS: X->getB());
574	const SCEVConstant *C1A2_C2A1 =
575	dyn_cast<SCEVConstant>(Val: SE->getMinusSCEV(LHS: C1A2, RHS: C2A1));
576	const SCEVConstant *C1B2_C2B1 =
577	dyn_cast<SCEVConstant>(Val: SE->getMinusSCEV(LHS: C1B2, RHS: C2B1));
578	const SCEVConstant *A1B2_A2B1 =
579	dyn_cast<SCEVConstant>(Val: SE->getMinusSCEV(LHS: A1B2, RHS: A2B1));
580	const SCEVConstant *A2B1_A1B2 =
581	dyn_cast<SCEVConstant>(Val: SE->getMinusSCEV(LHS: A2B1, RHS: A1B2));
582	if (!C1B2_C2B1 \|\| !C1A2_C2A1 \|\|
583	!A1B2_A2B1 \|\| !A2B1_A1B2)
584	return false;
585	APInt Xtop = C1B2_C2B1->getAPInt();
586	APInt Xbot = A1B2_A2B1->getAPInt();
587	APInt Ytop = C1A2_C2A1->getAPInt();
588	APInt Ybot = A2B1_A1B2->getAPInt();
589	LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
590	LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
591	LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
592	LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
593	APInt Xq = Xtop; // these need to be initialized, even
594	APInt Xr = Xtop; // though they're just going to be overwritten
595	APInt::sdivrem(LHS: Xtop, RHS: Xbot, Quotient&: Xq, Remainder&: Xr);
596	APInt Yq = Ytop;
597	APInt Yr = Ytop;
598	APInt::sdivrem(LHS: Ytop, RHS: Ybot, Quotient&: Yq, Remainder&: Yr);
599	if (Xr != `0` \|\| Yr != `0`) {
600	X->setEmpty();
601	++DeltaSuccesses;
602	return true;
603	}
604	LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
605	if (Xq.slt(RHS: `0`) \|\| Yq.slt(RHS: `0`)) {
606	X->setEmpty();
607	++DeltaSuccesses;
608	return true;
609	}
610	if (const SCEVConstant *CUB =
611	collectConstantUpperBound(l: X->getAssociatedLoop(), T: Prod1->getType())) {
612	const APInt &UpperBound = CUB->getAPInt();
613	LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
614	if (Xq.sgt(RHS: UpperBound) \|\| Yq.sgt(RHS: UpperBound)) {
615	X->setEmpty();
616	++DeltaSuccesses;
617	return true;
618	}
619	}
620	X->setPoint(X: SE->getConstant(Val: Xq),
621	Y: SE->getConstant(Val: Yq),
622	CurLoop: X->getAssociatedLoop());
623	++DeltaSuccesses;
624	return true;
625	}
626	return false;
627	}
628
629	// if (X->isLine() && Y->isPoint()) This case can't occur.
630	assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
631
632	if (X->isPoint() && Y->isLine()) {
633	LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n");
634	const SCEV *A1X1 = SE->getMulExpr(LHS: Y->getA(), RHS: X->getX());
635	const SCEV *B1Y1 = SE->getMulExpr(LHS: Y->getB(), RHS: X->getY());
636	const SCEV *Sum = SE->getAddExpr(LHS: A1X1, RHS: B1Y1);
637	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: Sum, Y: Y->getC()))
638	return false;
639	if (isKnownPredicate(Pred: CmpInst::ICMP_NE, X: Sum, Y: Y->getC())) {
640	X->setEmpty();
641	++DeltaSuccesses;
642	return true;
643	}
644	return false;
645	}
646
647	llvm_unreachable("shouldn't reach the end of Constraint intersection");
648	return false;
649	}
650
651
652	//===----------------------------------------------------------------------===//
653	// DependenceInfo methods
654
655	// For debugging purposes. Dumps a dependence to OS.
656	void Dependence::dump(raw_ostream &OS) const {
657	bool Splitable = false;
658	if (isConfused())
659	OS << "confused";
660	else {
661	if (isConsistent())
662	OS << "consistent ";
663	if (isFlow())
664	OS << "flow";
665	else if (isOutput())
666	OS << "output";
667	else if (isAnti())
668	OS << "anti";
669	else if (isInput())
670	OS << "input";
671	unsigned Levels = getLevels();
672	OS << " [";
673	for (unsigned II = `1`; II <= Levels; ++II) {
674	if (isSplitable(Level: II))
675	Splitable = true;
676	if (isPeelFirst(Level: II))
677	OS << `'p'`;
678	const SCEV *Distance = getDistance(Level: II);
679	if (Distance)
680	OS << *Distance;
681	else if (isScalar(level: II))
682	OS << "S";
683	else {
684	unsigned Direction = getDirection(Level: II);
685	if (Direction == DVEntry::ALL)
686	OS << "*";
687	else {
688	if (Direction & DVEntry::LT)
689	OS << "<";
690	if (Direction & DVEntry::EQ)
691	OS << "=";
692	if (Direction & DVEntry::GT)
693	OS << ">";
694	}
695	}
696	if (isPeelLast(Level: II))
697	OS << `'p'`;
698	if (II < Levels)
699	OS << " ";
700	}
701	if (isLoopIndependent())
702	OS << "\|<";
703	OS << "]";
704	if (Splitable)
705	OS << " splitable";
706	}
707	OS << "!\n";
708	}
709
710	// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
711	// underlaying objects. If LocA and LocB are known to not alias (for any reason:
712	// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
713	// Otherwise the underlying objects are checked to see if they point to
714	// different identifiable objects.
715	static AliasResult underlyingObjectsAlias(AAResults *AA,
716	const DataLayout &DL,
717	const MemoryLocation &LocA,
718	const MemoryLocation &LocB) {
719	// Check the original locations (minus size) for noalias, which can happen for
720	// tbaa, incompatible underlying object locations, etc.
721	MemoryLocation LocAS =
722	MemoryLocation::getBeforeOrAfter(Ptr: LocA.Ptr, AATags: LocA.AATags);
723	MemoryLocation LocBS =
724	MemoryLocation::getBeforeOrAfter(Ptr: LocB.Ptr, AATags: LocB.AATags);
725	if (AA->isNoAlias(LocA: LocAS, LocB: LocBS))
726	return AliasResult::NoAlias;
727
728	// Check the underlying objects are the same
729	const Value *AObj = getUnderlyingObject(V: LocA.Ptr);
730	const Value *BObj = getUnderlyingObject(V: LocB.Ptr);
731
732	// If the underlying objects are the same, they must alias
733	if (AObj == BObj)
734	return AliasResult::MustAlias;
735
736	// We may have hit the recursion limit for underlying objects, or have
737	// underlying objects where we don't know they will alias.
738	if (!isIdentifiedObject(V: AObj) \|\| !isIdentifiedObject(V: BObj))
739	return AliasResult::MayAlias;
740
741	// Otherwise we know the objects are different and both identified objects so
742	// must not alias.
743	return AliasResult::NoAlias;
744	}
745
746
747	// Returns true if the load or store can be analyzed. Atomic and volatile
748	// operations have properties which this analysis does not understand.
749	static
750	bool isLoadOrStore(const Instruction *I) {
751	if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
752	return LI->isUnordered();
753	else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
754	return SI->isUnordered();
755	return false;
756	}
757
758
759	// Examines the loop nesting of the Src and Dst
760	// instructions and establishes their shared loops. Sets the variables
761	// CommonLevels, SrcLevels, and MaxLevels.
762	// The source and destination instructions needn't be contained in the same
763	// loop. The routine establishNestingLevels finds the level of most deeply
764	// nested loop that contains them both, CommonLevels. An instruction that's
765	// not contained in a loop is at level = 0. MaxLevels is equal to the level
766	// of the source plus the level of the destination, minus CommonLevels.
767	// This lets us allocate vectors MaxLevels in length, with room for every
768	// distinct loop referenced in both the source and destination subscripts.
769	// The variable SrcLevels is the nesting depth of the source instruction.
770	// It's used to help calculate distinct loops referenced by the destination.
771	// Here's the map from loops to levels:
772	// 0 - unused
773	// 1 - outermost common loop
774	// ... - other common loops
775	// CommonLevels - innermost common loop
776	// ... - loops containing Src but not Dst
777	// SrcLevels - innermost loop containing Src but not Dst
778	// ... - loops containing Dst but not Src
779	// MaxLevels - innermost loops containing Dst but not Src
780	// Consider the follow code fragment:
781	// for (a = ...) {
782	// for (b = ...) {
783	// for (c = ...) {
784	// for (d = ...) {
785	// A[] = ...;
786	// }
787	// }
788	// for (e = ...) {
789	// for (f = ...) {
790	// for (g = ...) {
791	// ... = A[];
792	// }
793	// }
794	// }
795	// }
796	// }
797	// If we're looking at the possibility of a dependence between the store
798	// to A (the Src) and the load from A (the Dst), we'll note that they
799	// have 2 loops in common, so CommonLevels will equal 2 and the direction
800	// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
801	// A map from loop names to loop numbers would look like
802	// a - 1
803	// b - 2 = CommonLevels
804	// c - 3
805	// d - 4 = SrcLevels
806	// e - 5
807	// f - 6
808	// g - 7 = MaxLevels
809	void DependenceInfo::establishNestingLevels(const Instruction *Src,
810	const Instruction *Dst) {
811	const BasicBlock *SrcBlock = Src->getParent();
812	const BasicBlock *DstBlock = Dst->getParent();
813	unsigned SrcLevel = LI->getLoopDepth(BB: SrcBlock);
814	unsigned DstLevel = LI->getLoopDepth(BB: DstBlock);
815	const Loop *SrcLoop = LI->getLoopFor(BB: SrcBlock);
816	const Loop *DstLoop = LI->getLoopFor(BB: DstBlock);
817	SrcLevels = SrcLevel;
818	MaxLevels = SrcLevel + DstLevel;
819	while (SrcLevel > DstLevel) {
820	SrcLoop = SrcLoop->getParentLoop();
821	SrcLevel--;
822	}
823	while (DstLevel > SrcLevel) {
824	DstLoop = DstLoop->getParentLoop();
825	DstLevel--;
826	}
827	while (SrcLoop != DstLoop) {
828	SrcLoop = SrcLoop->getParentLoop();
829	DstLoop = DstLoop->getParentLoop();
830	SrcLevel--;
831	}
832	CommonLevels = SrcLevel;
833	MaxLevels -= CommonLevels;
834	}
835
836
837	// Given one of the loops containing the source, return
838	// its level index in our numbering scheme.
839	unsigned DependenceInfo::mapSrcLoop(const Loop SrcLoop) const* {
840	return SrcLoop->getLoopDepth();
841	}
842
843
844	// Given one of the loops containing the destination,
845	// return its level index in our numbering scheme.
846	unsigned DependenceInfo::mapDstLoop(const Loop DstLoop) const* {
847	unsigned D = DstLoop->getLoopDepth();
848	if (D > CommonLevels)
849	// This tries to make sure that we assign unique numbers to src and dst when
850	// the memory accesses reside in different loops that have the same depth.
851	return D - CommonLevels + SrcLevels;
852	else
853	return D;
854	}
855
856
857	// Returns true if Expression is loop invariant in LoopNest.
858	bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
859	const Loop LoopNest) const* {
860	// Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
861	// any loop as invariant, because we only consier expression evaluation at a
862	// specific position (where the array access takes place), and not across the
863	// entire function.
864	if (!LoopNest)
865	return true;
866
867	// If the expression is invariant in the outermost loop of the loop nest, it
868	// is invariant anywhere in the loop nest.
869	return SE->isLoopInvariant(S: Expression, L: LoopNest->getOutermostLoop());
870	}
871
872
873
874	// Finds the set of loops from the LoopNest that
875	// have a level <= CommonLevels and are referred to by the SCEV Expression.
876	void DependenceInfo::collectCommonLoops(const SCEV *Expression,
877	const Loop *LoopNest,
878	SmallBitVector &Loops) const {
879	while (LoopNest) {
880	unsigned Level = LoopNest->getLoopDepth();
881	if (Level <= CommonLevels && !SE->isLoopInvariant(S: Expression, L: LoopNest))
882	Loops.set(Level);
883	LoopNest = LoopNest->getParentLoop();
884	}
885	}
886
887	void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
888
889	unsigned widestWidthSeen = `0`;
890	Type *widestType;
891
892	// Go through each pair and find the widest bit to which we need
893	// to extend all of them.
894	for (Subscript *Pair : Pairs) {
895	const SCEV *Src = Pair->Src;
896	const SCEV *Dst = Pair->Dst;
897	IntegerType *SrcTy = dyn_cast<IntegerType>(Val: Src->getType());
898	IntegerType *DstTy = dyn_cast<IntegerType>(Val: Dst->getType());
899	if (SrcTy == nullptr \|\| DstTy == nullptr) {
900	assert(SrcTy == DstTy && "This function only unify integer types and "
901	"expect Src and Dst share the same type "
902	"otherwise.");
903	continue;
904	}
905	if (SrcTy->getBitWidth() > widestWidthSeen) {
906	widestWidthSeen = SrcTy->getBitWidth();
907	widestType = SrcTy;
908	}
909	if (DstTy->getBitWidth() > widestWidthSeen) {
910	widestWidthSeen = DstTy->getBitWidth();
911	widestType = DstTy;
912	}
913	}
914
915
916	assert(widestWidthSeen > `0`);
917
918	// Now extend each pair to the widest seen.
919	for (Subscript *Pair : Pairs) {
920	const SCEV *Src = Pair->Src;
921	const SCEV *Dst = Pair->Dst;
922	IntegerType *SrcTy = dyn_cast<IntegerType>(Val: Src->getType());
923	IntegerType *DstTy = dyn_cast<IntegerType>(Val: Dst->getType());
924	if (SrcTy == nullptr \|\| DstTy == nullptr) {
925	assert(SrcTy == DstTy && "This function only unify integer types and "
926	"expect Src and Dst share the same type "
927	"otherwise.");
928	continue;
929	}
930	if (SrcTy->getBitWidth() < widestWidthSeen)
931	// Sign-extend Src to widestType
932	Pair->Src = SE->getSignExtendExpr(Op: Src, Ty: widestType);
933	if (DstTy->getBitWidth() < widestWidthSeen) {
934	// Sign-extend Dst to widestType
935	Pair->Dst = SE->getSignExtendExpr(Op: Dst, Ty: widestType);
936	}
937	}
938	}
939
940	// removeMatchingExtensions - Examines a subscript pair.
941	// If the source and destination are identically sign (or zero)
942	// extended, it strips off the extension in an effect to simplify
943	// the actual analysis.
944	void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
945	const SCEV *Src = Pair->Src;
946	const SCEV *Dst = Pair->Dst;
947	if ((isa<SCEVZeroExtendExpr>(Val: Src) && isa<SCEVZeroExtendExpr>(Val: Dst)) \|\|
948	(isa<SCEVSignExtendExpr>(Val: Src) && isa<SCEVSignExtendExpr>(Val: Dst))) {
949	const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Val: Src);
950	const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Val: Dst);
951	const SCEV *SrcCastOp = SrcCast->getOperand();
952	const SCEV *DstCastOp = DstCast->getOperand();
953	if (SrcCastOp->getType() == DstCastOp->getType()) {
954	Pair->Src = SrcCastOp;
955	Pair->Dst = DstCastOp;
956	}
957	}
958	}
959
960	// Examine the scev and return true iff it's affine.
961	// Collect any loops mentioned in the set of "Loops".
962	bool DependenceInfo::checkSubscript(const SCEV Expr, const* Loop *LoopNest,
963	SmallBitVector &Loops, bool IsSrc) {
964	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
965	if (!AddRec)
966	return isLoopInvariant(Expression: Expr, LoopNest);
967
968	// The AddRec must depend on one of the containing loops. Otherwise,
969	// mapSrcLoop and mapDstLoop return indices outside the intended range. This
970	// can happen when a subscript in one loop references an IV from a sibling
971	// loop that could not be replaced with a concrete exit value by
972	// getSCEVAtScope.
973	const Loop *L = LoopNest;
974	while (L && AddRec->getLoop() != L)
975	L = L->getParentLoop();
976	if (!L)
977	return false;
978
979	const SCEV *Start = AddRec->getStart();
980	const SCEV Step = AddRec->getStepRecurrence(SE&: SE);
981	const SCEV *UB = SE->getBackedgeTakenCount(L: AddRec->getLoop());
982	if (!isa<SCEVCouldNotCompute>(Val: UB)) {
983	if (SE->getTypeSizeInBits(Ty: Start->getType()) <
984	SE->getTypeSizeInBits(Ty: UB->getType())) {
985	if (!AddRec->getNoWrapFlags())
986	return false;
987	}
988	}
989	if (!isLoopInvariant(Expression: Step, LoopNest))
990	return false;
991	if (IsSrc)
992	Loops.set(mapSrcLoop(SrcLoop: AddRec->getLoop()));
993	else
994	Loops.set(mapDstLoop(DstLoop: AddRec->getLoop()));
995	return checkSubscript(Expr: Start, LoopNest, Loops, IsSrc);
996	}
997
998	// Examine the scev and return true iff it's linear.
999	// Collect any loops mentioned in the set of "Loops".
1000	bool DependenceInfo::checkSrcSubscript(const SCEV Src, const* Loop *LoopNest,
1001	SmallBitVector &Loops) {
1002	return checkSubscript(Expr: Src, LoopNest, Loops, IsSrc: true);
1003	}
1004
1005	// Examine the scev and return true iff it's linear.
1006	// Collect any loops mentioned in the set of "Loops".
1007	bool DependenceInfo::checkDstSubscript(const SCEV Dst, const* Loop *LoopNest,
1008	SmallBitVector &Loops) {
1009	return checkSubscript(Expr: Dst, LoopNest, Loops, IsSrc: false);
1010	}
1011
1012
1013	// Examines the subscript pair (the Src and Dst SCEVs)
1014	// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
1015	// Collects the associated loops in a set.
1016	DependenceInfo::Subscript::ClassificationKind
1017	DependenceInfo::classifyPair(const SCEV Src, const* Loop *SrcLoopNest,
1018	const SCEV Dst, const* Loop *DstLoopNest,
1019	SmallBitVector &Loops) {
1020	SmallBitVector SrcLoops(MaxLevels + `1`);
1021	SmallBitVector DstLoops(MaxLevels + `1`);
1022	if (!checkSrcSubscript(Src, LoopNest: SrcLoopNest, Loops&: SrcLoops))
1023	return Subscript::NonLinear;
1024	if (!checkDstSubscript(Dst, LoopNest: DstLoopNest, Loops&: DstLoops))
1025	return Subscript::NonLinear;
1026	Loops = SrcLoops;
1027	Loops \|= DstLoops;
1028	unsigned N = Loops.count();
1029	if (N == `0`)
1030	return Subscript::ZIV;
1031	if (N == `1`)
1032	return Subscript::SIV;
1033	if (N == `2` && (SrcLoops.count() == `0` \|\|
1034	DstLoops.count() == `0` \|\|
1035	(SrcLoops.count() == `1` && DstLoops.count() == `1`)))
1036	return Subscript::RDIV;
1037	return Subscript::MIV;
1038	}
1039
1040
1041	// A wrapper around SCEV::isKnownPredicate.
1042	// Looks for cases where we're interested in comparing for equality.
1043	// If both X and Y have been identically sign or zero extended,
1044	// it strips off the (confusing) extensions before invoking
1045	// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
1046	// will be similarly updated.
1047	//
1048	// If SCEV::isKnownPredicate can't prove the predicate,
1049	// we try simple subtraction, which seems to help in some cases
1050	// involving symbolics.
1051	bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
1052	const SCEV Y) const* {
1053	if (Pred == CmpInst::ICMP_EQ \|\|
1054	Pred == CmpInst::ICMP_NE) {
1055	if ((isa<SCEVSignExtendExpr>(Val: X) &&
1056	isa<SCEVSignExtendExpr>(Val: Y)) \|\|
1057	(isa<SCEVZeroExtendExpr>(Val: X) &&
1058	isa<SCEVZeroExtendExpr>(Val: Y))) {
1059	const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(Val: X);
1060	const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Val: Y);
1061	const SCEV *Xop = CX->getOperand();
1062	const SCEV *Yop = CY->getOperand();
1063	if (Xop->getType() == Yop->getType()) {
1064	X = Xop;
1065	Y = Yop;
1066	}
1067	}
1068	}
1069	if (SE->isKnownPredicate(Pred, LHS: X, RHS: Y))
1070	return true;
1071	// If SE->isKnownPredicate can't prove the condition,
1072	// we try the brute-force approach of subtracting
1073	// and testing the difference.
1074	// By testing with SE->isKnownPredicate first, we avoid
1075	// the possibility of overflow when the arguments are constants.
1076	const SCEV *Delta = SE->getMinusSCEV(LHS: X, RHS: Y);
1077	switch (Pred) {
1078	case CmpInst::ICMP_EQ:
1079	return Delta->isZero();
1080	case CmpInst::ICMP_NE:
1081	return SE->isKnownNonZero(S: Delta);
1082	case CmpInst::ICMP_SGE:
1083	return SE->isKnownNonNegative(S: Delta);
1084	case CmpInst::ICMP_SLE:
1085	return SE->isKnownNonPositive(S: Delta);
1086	case CmpInst::ICMP_SGT:
1087	return SE->isKnownPositive(S: Delta);
1088	case CmpInst::ICMP_SLT:
1089	return SE->isKnownNegative(S: Delta);
1090	default:
1091	llvm_unreachable("unexpected predicate in isKnownPredicate");
1092	}
1093	}
1094
1095	/// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
1096	/// with some extra checking if S is an AddRec and we can prove less-than using
1097	/// the loop bounds.
1098	bool DependenceInfo::isKnownLessThan(const SCEV S, const* SCEV Size) const* {
1099	// First unify to the same type
1100	auto *SType = dyn_cast<IntegerType>(Val: S->getType());
1101	auto *SizeType = dyn_cast<IntegerType>(Val: Size->getType());
1102	if (!SType \|\| !SizeType)
1103	return false;
1104	Type *MaxType =
1105	(SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
1106	S = SE->getTruncateOrZeroExtend(V: S, Ty: MaxType);
1107	Size = SE->getTruncateOrZeroExtend(V: Size, Ty: MaxType);
1108
1109	// Special check for addrecs using BE taken count
1110	const SCEV *Bound = SE->getMinusSCEV(LHS: S, RHS: Size);
1111	if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Bound)) {
1112	if (AddRec->isAffine()) {
1113	const SCEV *BECount = SE->getBackedgeTakenCount(L: AddRec->getLoop());
1114	if (!isa<SCEVCouldNotCompute>(Val: BECount)) {
1115	const SCEV Limit = AddRec->evaluateAtIteration(It: BECount, SE&: SE);
1116	if (SE->isKnownNegative(S: Limit))
1117	return true;
1118	}
1119	}
1120	}
1121
1122	// Check using normal isKnownNegative
1123	const SCEV *LimitedBound =
1124	SE->getMinusSCEV(LHS: S, RHS: SE->getSMaxExpr(LHS: Size, RHS: SE->getOne(Ty: Size->getType())));
1125	return SE->isKnownNegative(S: LimitedBound);
1126	}
1127
1128	bool DependenceInfo::isKnownNonNegative(const SCEV S, const* Value Ptr) const* {
1129	bool Inbounds = false;
1130	if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Val: Ptr))
1131	Inbounds = SrcGEP->isInBounds();
1132	if (Inbounds) {
1133	if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: S)) {
1134	if (AddRec->isAffine()) {
1135	// We know S is for Ptr, the operand on a load/store, so doesn't wrap.
1136	// If both parts are NonNegative, the end result will be NonNegative
1137	if (SE->isKnownNonNegative(S: AddRec->getStart()) &&
1138	SE->isKnownNonNegative(S: AddRec->getOperand(i: `1`)))
1139	return true;
1140	}
1141	}
1142	}
1143
1144	return SE->isKnownNonNegative(S);
1145	}
1146
1147	// All subscripts are all the same type.
1148	// Loop bound may be smaller (e.g., a char).
1149	// Should zero extend loop bound, since it's always >= 0.
1150	// This routine collects upper bound and extends or truncates if needed.
1151	// Truncating is safe when subscripts are known not to wrap. Cases without
1152	// nowrap flags should have been rejected earlier.
1153	// Return null if no bound available.
1154	const SCEV DependenceInfo::collectUpperBound(const* Loop L, Type T) const {
1155	if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
1156	const SCEV *UB = SE->getBackedgeTakenCount(L);
1157	return SE->getTruncateOrZeroExtend(V: UB, Ty: T);
1158	}
1159	return nullptr;
1160	}
1161
1162
1163	// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1164	// If the cast fails, returns NULL.
1165	const SCEVConstant DependenceInfo::collectConstantUpperBound(const* Loop *L,
1166	Type T) const* {
1167	if (const SCEV *UB = collectUpperBound(L, T))
1168	return dyn_cast<SCEVConstant>(Val: UB);
1169	return nullptr;
1170	}
1171
1172
1173	// testZIV -
1174	// When we have a pair of subscripts of the form [c1] and [c2],
1175	// where c1 and c2 are both loop invariant, we attack it using
1176	// the ZIV test. Basically, we test by comparing the two values,
1177	// but there are actually three possible results:
1178	// 1) the values are equal, so there's a dependence
1179	// 2) the values are different, so there's no dependence
1180	// 3) the values might be equal, so we have to assume a dependence.
1181	//
1182	// Return true if dependence disproved.
1183	bool DependenceInfo::testZIV(const SCEV Src, const* SCEV *Dst,
1184	FullDependence &Result) const {
1185	LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1186	LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1187	++ZIVapplications;
1188	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: Src, Y: Dst)) {
1189	LLVM_DEBUG(dbgs() << " provably dependent\n");
1190	return false; // provably dependent
1191	}
1192	if (isKnownPredicate(Pred: CmpInst::ICMP_NE, X: Src, Y: Dst)) {
1193	LLVM_DEBUG(dbgs() << " provably independent\n");
1194	++ZIVindependence;
1195	return true; // provably independent
1196	}
1197	LLVM_DEBUG(dbgs() << " possibly dependent\n");
1198	Result.Consistent = false;
1199	return false; // possibly dependent
1200	}
1201
1202
1203	// strongSIVtest -
1204	// From the paper, Practical Dependence Testing, Section 4.2.1
1205	//
1206	// When we have a pair of subscripts of the form [c1 + ai] and [c2 + ai],
1207	// where i is an induction variable, c1 and c2 are loop invariant,
1208	// and a is a constant, we can solve it exactly using the Strong SIV test.
1209	//
1210	// Can prove independence. Failing that, can compute distance (and direction).
1211	// In the presence of symbolic terms, we can sometimes make progress.
1212	//
1213	// If there's a dependence,
1214	//
1215	// c1 + ai = c2 + ai'
1216	//
1217	// The dependence distance is
1218	//
1219	// d = i' - i = (c1 - c2)/a
1220	//
1221	// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1222	// loop's upper bound. If a dependence exists, the dependence direction is
1223	// defined as
1224	//
1225	// { < if d > 0
1226	// direction = { = if d = 0
1227	// { > if d < 0
1228	//
1229	// Return true if dependence disproved.
1230	bool DependenceInfo::strongSIVtest(const SCEV Coeff, const* SCEV *SrcConst,
1231	const SCEV DstConst, const* Loop *CurLoop,
1232	unsigned Level, FullDependence &Result,
1233	Constraint &NewConstraint) const {
1234	LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1235	LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
1236	LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1237	LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
1238	LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1239	LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
1240	LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1241	++StrongSIVapplications;
1242	assert(`0` < Level && Level <= CommonLevels && "level out of range");
1243	Level--;
1244
1245	const SCEV *Delta = SE->getMinusSCEV(LHS: SrcConst, RHS: DstConst);
1246	LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
1247	LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1248
1249	// check that \|Delta\| < iteration count
1250	if (const SCEV *UpperBound = collectUpperBound(L: CurLoop, T: Delta->getType())) {
1251	LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
1252	LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
1253	const SCEV *AbsDelta =
1254	SE->isKnownNonNegative(S: Delta) ? Delta : SE->getNegativeSCEV(V: Delta);
1255	const SCEV *AbsCoeff =
1256	SE->isKnownNonNegative(S: Coeff) ? Coeff : SE->getNegativeSCEV(V: Coeff);
1257	const SCEV *Product = SE->getMulExpr(LHS: UpperBound, RHS: AbsCoeff);
1258	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: AbsDelta, Y: Product)) {
1259	// Distance greater than trip count - no dependence
1260	++StrongSIVindependence;
1261	++StrongSIVsuccesses;
1262	return true;
1263	}
1264	}
1265
1266	// Can we compute distance?
1267	if (isa<SCEVConstant>(Val: Delta) && isa<SCEVConstant>(Val: Coeff)) {
1268	APInt ConstDelta = cast<SCEVConstant>(Val: Delta)->getAPInt();
1269	APInt ConstCoeff = cast<SCEVConstant>(Val: Coeff)->getAPInt();
1270	APInt Distance = ConstDelta; // these need to be initialized
1271	APInt Remainder = ConstDelta;
1272	APInt::sdivrem(LHS: ConstDelta, RHS: ConstCoeff, Quotient&: Distance, Remainder);
1273	LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1274	LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1275	// Make sure Coeff divides Delta exactly
1276	if (Remainder != `0`) {
1277	// Coeff doesn't divide Distance, no dependence
1278	++StrongSIVindependence;
1279	++StrongSIVsuccesses;
1280	return true;
1281	}
1282	Result.DV [Level].Distance = SE->getConstant(Val: Distance);
1283	NewConstraint.setDistance(D: SE->getConstant(Val: Distance), CurLoop);
1284	if (Distance.sgt(RHS: `0`))
1285	Result.DV [Level].Direction &= Dependence::DVEntry::LT;
1286	else if (Distance.slt(RHS: `0`))
1287	Result.DV [Level].Direction &= Dependence::DVEntry::GT;
1288	else
1289	Result.DV [Level].Direction &= Dependence::DVEntry::EQ;
1290	++StrongSIVsuccesses;
1291	}
1292	else if (Delta->isZero()) {
1293	// since 0/X == 0
1294	Result.DV [Level].Distance = Delta;
1295	NewConstraint.setDistance(D: Delta, CurLoop);
1296	Result.DV [Level].Direction &= Dependence::DVEntry::EQ;
1297	++StrongSIVsuccesses;
1298	}
1299	else {
1300	if (Coeff->isOne()) {
1301	LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
1302	Result.DV [Level].Distance = Delta; // since X/1 == X
1303	NewConstraint.setDistance(D: Delta, CurLoop);
1304	}
1305	else {
1306	Result.Consistent = false;
1307	NewConstraint.setLine(AA: Coeff,
1308	BB: SE->getNegativeSCEV(V: Coeff),
1309	CC: SE->getNegativeSCEV(V: Delta), CurLoop);
1310	}
1311
1312	// maybe we can get a useful direction
1313	bool DeltaMaybeZero = !SE->isKnownNonZero(S: Delta);
1314	bool DeltaMaybePositive = !SE->isKnownNonPositive(S: Delta);
1315	bool DeltaMaybeNegative = !SE->isKnownNonNegative(S: Delta);
1316	bool CoeffMaybePositive = !SE->isKnownNonPositive(S: Coeff);
1317	bool CoeffMaybeNegative = !SE->isKnownNonNegative(S: Coeff);
1318	// The double negatives above are confusing.
1319	// It helps to read !SE->isKnownNonZero(Delta)
1320	// as "Delta might be Zero"
1321	unsigned NewDirection = Dependence::DVEntry::NONE;
1322	if ((DeltaMaybePositive && CoeffMaybePositive) \|\|
1323	(DeltaMaybeNegative && CoeffMaybeNegative))
1324	NewDirection = Dependence::DVEntry::LT;
1325	if (DeltaMaybeZero)
1326	NewDirection \|= Dependence::DVEntry::EQ;
1327	if ((DeltaMaybeNegative && CoeffMaybePositive) \|\|
1328	(DeltaMaybePositive && CoeffMaybeNegative))
1329	NewDirection \|= Dependence::DVEntry::GT;
1330	if (NewDirection < Result.DV [Level].Direction)
1331	++StrongSIVsuccesses;
1332	Result.DV [Level].Direction &= NewDirection;
1333	}
1334	return false;
1335	}
1336
1337
1338	// weakCrossingSIVtest -
1339	// From the paper, Practical Dependence Testing, Section 4.2.2
1340	//
1341	// When we have a pair of subscripts of the form [c1 + ai] and [c2 - ai],
1342	// where i is an induction variable, c1 and c2 are loop invariant,
1343	// and a is a constant, we can solve it exactly using the
1344	// Weak-Crossing SIV test.
1345	//
1346	// Given c1 + ai = c2 - ai', we can look for the intersection of
1347	// the two lines, where i = i', yielding
1348	//
1349	// c1 + ai = c2 - ai
1350	// 2ai = c2 - c1*
1351	// i = (c2 - c1)/2a
1352	//
1353	// If i < 0, there is no dependence.
1354	// If i > upperbound, there is no dependence.
1355	// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1356	// If i = upperbound, there's a dependence with distance = 0.
1357	// If i is integral, there's a dependence (all directions).
1358	// If the non-integer part = 1/2, there's a dependence (<> directions).
1359	// Otherwise, there's no dependence.
1360	//
1361	// Can prove independence. Failing that,
1362	// can sometimes refine the directions.
1363	// Can determine iteration for splitting.
1364	//
1365	// Return true if dependence disproved.
1366	bool DependenceInfo::weakCrossingSIVtest(
1367	const SCEV Coeff, const* SCEV SrcConst, const* SCEV *DstConst,
1368	const Loop CurLoop, unsigned* Level, FullDependence &Result,
1369	Constraint &NewConstraint, const SCEV &SplitIter) const* {
1370	LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1371	LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
1372	LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1373	LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1374	++WeakCrossingSIVapplications;
1375	assert(`0` < Level && Level <= CommonLevels && "Level out of range");
1376	Level--;
1377	Result.Consistent = false;
1378	const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
1379	LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1380	NewConstraint.setLine(AA: Coeff, BB: Coeff, CC: Delta, CurLoop);
1381	if (Delta->isZero()) {
1382	Result.DV [Level].Direction &= ~Dependence::DVEntry::LT;
1383	Result.DV [Level].Direction &= ~Dependence::DVEntry::GT;
1384	++WeakCrossingSIVsuccesses;
1385	if (!Result.DV [Level].Direction) {
1386	++WeakCrossingSIVindependence;
1387	return true;
1388	}
1389	Result.DV [Level].Distance = Delta; // = 0
1390	return false;
1391	}
1392	const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: Coeff);
1393	if (!ConstCoeff)
1394	return false;
1395
1396	Result.DV [Level].Splitable = true;
1397	if (SE->isKnownNegative(S: ConstCoeff)) {
1398	ConstCoeff = dyn_cast<SCEVConstant>(Val: SE->getNegativeSCEV(V: ConstCoeff));
1399	assert(ConstCoeff &&
1400	"dynamic cast of negative of ConstCoeff should yield constant");
1401	Delta = SE->getNegativeSCEV(V: Delta);
1402	}
1403	assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1404
1405	// compute SplitIter for use by DependenceInfo::getSplitIteration()
1406	SplitIter = SE->getUDivExpr(
1407	LHS: SE->getSMaxExpr(LHS: SE->getZero(Ty: Delta->getType()), RHS: Delta),
1408	RHS: SE->getMulExpr(LHS: SE->getConstant(Ty: Delta->getType(), V: `2`), RHS: ConstCoeff));
1409	LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
1410
1411	const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1412	if (!ConstDelta)
1413	return false;
1414
1415	// We're certain that ConstCoeff > 0; therefore,
1416	// if Delta < 0, then no dependence.
1417	LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1418	LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
1419	if (SE->isKnownNegative(S: Delta)) {
1420	// No dependence, Delta < 0
1421	++WeakCrossingSIVindependence;
1422	++WeakCrossingSIVsuccesses;
1423	return true;
1424	}
1425
1426	// We're certain that Delta > 0 and ConstCoeff > 0.
1427	// Check Delta/(2ConstCoeff) against upper loop bound*
1428	if (const SCEV *UpperBound = collectUpperBound(L: CurLoop, T: Delta->getType())) {
1429	LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1430	const SCEV *ConstantTwo = SE->getConstant(Ty: UpperBound->getType(), V: `2`);
1431	const SCEV *ML = SE->getMulExpr(LHS: SE->getMulExpr(LHS: ConstCoeff, RHS: UpperBound),
1432	RHS: ConstantTwo);
1433	LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");
1434	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: Delta, Y: ML)) {
1435	// Delta too big, no dependence
1436	++WeakCrossingSIVindependence;
1437	++WeakCrossingSIVsuccesses;
1438	return true;
1439	}
1440	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: Delta, Y: ML)) {
1441	// i = i' = UB
1442	Result.DV [Level].Direction &= ~Dependence::DVEntry::LT;
1443	Result.DV [Level].Direction &= ~Dependence::DVEntry::GT;
1444	++WeakCrossingSIVsuccesses;
1445	if (!Result.DV [Level].Direction) {
1446	++WeakCrossingSIVindependence;
1447	return true;
1448	}
1449	Result.DV [Level].Splitable = false;
1450	Result.DV [Level].Distance = SE->getZero(Ty: Delta->getType());
1451	return false;
1452	}
1453	}
1454
1455	// check that Coeff divides Delta
1456	APInt APDelta = ConstDelta->getAPInt();
1457	APInt APCoeff = ConstCoeff->getAPInt();
1458	APInt Distance = APDelta; // these need to be initialzed
1459	APInt Remainder = APDelta;
1460	APInt::sdivrem(LHS: APDelta, RHS: APCoeff, Quotient&: Distance, Remainder);
1461	LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1462	if (Remainder != `0`) {
1463	// Coeff doesn't divide Delta, no dependence
1464	++WeakCrossingSIVindependence;
1465	++WeakCrossingSIVsuccesses;
1466	return true;
1467	}
1468	LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1469
1470	// if 2Coeff doesn't divide Delta, then the equal direction isn't possible*
1471	APInt Two = APInt (Distance.getBitWidth(), `2`, true);
1472	Remainder = Distance.srem(RHS: Two);
1473	LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1474	if (Remainder != `0`) {
1475	// Equal direction isn't possible
1476	Result.DV [Level].Direction &= ~Dependence::DVEntry::EQ;
1477	++WeakCrossingSIVsuccesses;
1478	}
1479	return false;
1480	}
1481
1482
1483	// Kirch's algorithm, from
1484	//
1485	// Optimizing Supercompilers for Supercomputers
1486	// Michael Wolfe
1487	// MIT Press, 1989
1488	//
1489	// Program 2.1, page 29.
1490	// Computes the GCD of AM and BM.
1491	// Also finds a solution to the equation ax - by = gcd(a, b).
1492	// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1493	static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1494	const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1495	APInt A0(Bits, `1`, true), A1(Bits, `0`, true);
1496	APInt B0(Bits, `0`, true), B1(Bits, `1`, true);
1497	APInt G0 = AM.abs();
1498	APInt G1 = BM.abs();
1499	APInt Q = G0; // these need to be initialized
1500	APInt R = G0;
1501	APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1502	while (R != `0`) {
1503	APInt A2 = A0 - Q *A1; A0 = A1; A1 = A2;
1504	APInt B2 = B0 - Q *B1; B0 = B1; B1 = B2;
1505	G0 = G1; G1 = R;
1506	APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1507	}
1508	G = G1;
1509	LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
1510	X = AM.slt(RHS: `0`) ? -A1 : A1;
1511	Y = BM.slt(RHS: `0`) ? B1 : -B1;
1512
1513	// make sure gcd divides Delta
1514	R = Delta.srem(RHS: G);
1515	if (R != `0`)
1516	return true; // gcd doesn't divide Delta, no dependence
1517	Q = Delta.sdiv(RHS: G);
1518	return false;
1519	}
1520
1521	static APInt floorOfQuotient(const APInt &A, const APInt &B) {
1522	APInt Q = A; // these need to be initialized
1523	APInt R = A;
1524	APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1525	if (R == `0`)
1526	return Q;
1527	if ((A.sgt(RHS: `0`) && B.sgt(RHS: `0`)) \|\|
1528	(A.slt(RHS: `0`) && B.slt(RHS: `0`)))
1529	return Q;
1530	else
1531	return Q - `1`;
1532	}
1533
1534	static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
1535	APInt Q = A; // these need to be initialized
1536	APInt R = A;
1537	APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1538	if (R == `0`)
1539	return Q;
1540	if ((A.sgt(RHS: `0`) && B.sgt(RHS: `0`)) \|\|
1541	(A.slt(RHS: `0`) && B.slt(RHS: `0`)))
1542	return Q + `1`;
1543	else
1544	return Q;
1545	}
1546
1547	// exactSIVtest -
1548	// When we have a pair of subscripts of the form [c1 + a1i] and [c2 + a2i],
1549	// where i is an induction variable, c1 and c2 are loop invariant, and a1
1550	// and a2 are constant, we can solve it exactly using an algorithm developed
1551	// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1552	//
1553	// Dependence Analysis for Supercomputing
1554	// Utpal Banerjee
1555	// Kluwer Academic Publishers, 1988
1556	//
1557	// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1558	// so use them if possible. They're also a bit better with symbolics and,
1559	// in the case of the strong SIV test, can compute Distances.
1560	//
1561	// Return true if dependence disproved.
1562	//
1563	// This is a modified version of the original Banerjee algorithm. The original
1564	// only tested whether Dst depends on Src. This algorithm extends that and
1565	// returns all the dependencies that exist between Dst and Src.
1566	bool DependenceInfo::exactSIVtest(const SCEV SrcCoeff, const* SCEV *DstCoeff,
1567	const SCEV SrcConst, const* SCEV *DstConst,
1568	const Loop CurLoop, unsigned* Level,
1569	FullDependence &Result,
1570	Constraint &NewConstraint) const {
1571	LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1572	LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1573	LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1574	LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1575	LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1576	++ExactSIVapplications;
1577	assert(`0` < Level && Level <= CommonLevels && "Level out of range");
1578	Level--;
1579	Result.Consistent = false;
1580	const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
1581	LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1582	NewConstraint.setLine(AA: SrcCoeff, BB: SE->getNegativeSCEV(V: DstCoeff), CC: Delta,
1583	CurLoop);
1584	const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1585	const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1586	const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1587	if (!ConstDelta \|\| !ConstSrcCoeff \|\| !ConstDstCoeff)
1588	return false;
1589
1590	// find gcd
1591	APInt G, X, Y;
1592	APInt AM = ConstSrcCoeff->getAPInt();
1593	APInt BM = ConstDstCoeff->getAPInt();
1594	APInt CM = ConstDelta->getAPInt();
1595	unsigned Bits = AM.getBitWidth();
1596	if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
1597	// gcd doesn't divide Delta, no dependence
1598	++ExactSIVindependence;
1599	++ExactSIVsuccesses;
1600	return true;
1601	}
1602
1603	LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1604
1605	// since SCEV construction normalizes, LM = 0
1606	APInt UM(Bits, `1`, true);
1607	bool UMValid = false;
1608	// UM is perhaps unavailable, let's check
1609	if (const SCEVConstant *CUB =
1610	collectConstantUpperBound(L: CurLoop, T: Delta->getType())) {
1611	UM = CUB->getAPInt();
1612	LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n");
1613	UMValid = true;
1614	}
1615
1616	APInt TU(APInt::getSignedMaxValue(numBits: Bits));
1617	APInt TL(APInt::getSignedMinValue(numBits: Bits));
1618	APInt TC = CM.sdiv(RHS: G);
1619	APInt TX = X * TC;
1620	APInt TY = Y * TC;
1621	LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
1622	LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
1623	LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
1624
1625	SmallVector<APInt, `2`> TLVec, TUVec;
1626	APInt TB = BM.sdiv(RHS: G);
1627	if (TB.sgt(RHS: `0`)) {
1628	TLVec.push_back(Elt: ceilingOfQuotient(A: -TX, B: TB));
1629	LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
1630	// New bound check - modification to Banerjee's e3 check
1631	if (UMValid) {
1632	TUVec.push_back(Elt: floorOfQuotient(A: UM - TX, B: TB));
1633	LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
1634	}
1635	} else {
1636	TUVec.push_back(Elt: floorOfQuotient(A: -TX, B: TB));
1637	LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
1638	// New bound check - modification to Banerjee's e3 check
1639	if (UMValid) {
1640	TLVec.push_back(Elt: ceilingOfQuotient(A: UM - TX, B: TB));
1641	LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
1642	}
1643	}
1644
1645	APInt TA = AM.sdiv(RHS: G);
1646	if (TA.sgt(RHS: `0`)) {
1647	if (UMValid) {
1648	TUVec.push_back(Elt: floorOfQuotient(A: UM - TY, B: TA));
1649	LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
1650	}
1651	// New bound check - modification to Banerjee's e3 check
1652	TLVec.push_back(Elt: ceilingOfQuotient(A: -TY, B: TA));
1653	LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
1654	} else {
1655	if (UMValid) {
1656	TLVec.push_back(Elt: ceilingOfQuotient(A: UM - TY, B: TA));
1657	LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
1658	}
1659	// New bound check - modification to Banerjee's e3 check
1660	TUVec.push_back(Elt: floorOfQuotient(A: -TY, B: TA));
1661	LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
1662	}
1663
1664	LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
1665	LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
1666
1667	if (TLVec.empty() \|\| TUVec.empty())
1668	return false;
1669	TL = APIntOps::smax(A: TLVec.front(), B: TLVec.back());
1670	TU = APIntOps::smin(A: TUVec.front(), B: TUVec.back());
1671	LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
1672	LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
1673
1674	if (TL.sgt(RHS: TU)) {
1675	++ExactSIVindependence;
1676	++ExactSIVsuccesses;
1677	return true;
1678	}
1679
1680	// explore directions
1681	unsigned NewDirection = Dependence::DVEntry::NONE;
1682	APInt LowerDistance, UpperDistance;
1683	if (TA.sgt(RHS: TB)) {
1684	LowerDistance = (TY - TX) + (TA - TB) * TL;
1685	UpperDistance = (TY - TX) + (TA - TB) * TU;
1686	} else {
1687	LowerDistance = (TY - TX) + (TA - TB) * TU;
1688	UpperDistance = (TY - TX) + (TA - TB) * TL;
1689	}
1690
1691	LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n");
1692	LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n");
1693
1694	APInt Zero(Bits, `0`, true);
1695	if (LowerDistance.sle(RHS: Zero) && UpperDistance.sge(RHS: Zero)) {
1696	NewDirection \|= Dependence::DVEntry::EQ;
1697	++ExactSIVsuccesses;
1698	}
1699	if (LowerDistance.slt(RHS: `0`)) {
1700	NewDirection \|= Dependence::DVEntry::GT;
1701	++ExactSIVsuccesses;
1702	}
1703	if (UpperDistance.sgt(RHS: `0`)) {
1704	NewDirection \|= Dependence::DVEntry::LT;
1705	++ExactSIVsuccesses;
1706	}
1707
1708	// finished
1709	Result.DV [Level].Direction &= NewDirection;
1710	if (Result.DV [Level].Direction == Dependence::DVEntry::NONE)
1711	++ExactSIVindependence;
1712	LLVM_DEBUG(dbgs() << "\t Result = ");
1713	LLVM_DEBUG(Result.dump(dbgs()));
1714	return Result.DV [Level].Direction == Dependence::DVEntry::NONE;
1715	}
1716
1717
1718	// Return true if the divisor evenly divides the dividend.
1719	static
1720	bool isRemainderZero(const SCEVConstant *Dividend,
1721	const SCEVConstant *Divisor) {
1722	const APInt &ConstDividend = Dividend->getAPInt();
1723	const APInt &ConstDivisor = Divisor->getAPInt();
1724	return ConstDividend.srem(RHS: ConstDivisor) == `0`;
1725	}
1726
1727
1728	// weakZeroSrcSIVtest -
1729	// From the paper, Practical Dependence Testing, Section 4.2.2
1730	//
1731	// When we have a pair of subscripts of the form [c1] and [c2 + ai],*
1732	// where i is an induction variable, c1 and c2 are loop invariant,
1733	// and a is a constant, we can solve it exactly using the
1734	// Weak-Zero SIV test.
1735	//
1736	// Given
1737	//
1738	// c1 = c2 + ai*
1739	//
1740	// we get
1741	//
1742	// (c1 - c2)/a = i
1743	//
1744	// If i is not an integer, there's no dependence.
1745	// If i < 0 or > UB, there's no dependence.
1746	// If i = 0, the direction is >= and peeling the
1747	// 1st iteration will break the dependence.
1748	// If i = UB, the direction is <= and peeling the
1749	// last iteration will break the dependence.
1750	// Otherwise, the direction is .*
1751	//
1752	// Can prove independence. Failing that, we can sometimes refine
1753	// the directions. Can sometimes show that first or last
1754	// iteration carries all the dependences (so worth peeling).
1755	//
1756	// (see also weakZeroDstSIVtest)
1757	//
1758	// Return true if dependence disproved.
1759	bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1760	const SCEV *SrcConst,
1761	const SCEV *DstConst,
1762	const Loop CurLoop, unsigned* Level,
1763	FullDependence &Result,
1764	Constraint &NewConstraint) const {
1765	// For the WeakSIV test, it's possible the loop isn't common to
1766	// the Src and Dst loops. If it isn't, then there's no need to
1767	// record a direction.
1768	LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1769	LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1770	LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1771	LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1772	++WeakZeroSIVapplications;
1773	assert(`0` < Level && Level <= MaxLevels && "Level out of range");
1774	Level--;
1775	Result.Consistent = false;
1776	const SCEV *Delta = SE->getMinusSCEV(LHS: SrcConst, RHS: DstConst);
1777	NewConstraint.setLine(AA: SE->getZero(Ty: Delta->getType()), BB: DstCoeff, CC: Delta,
1778	CurLoop);
1779	LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1780	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: SrcConst, Y: DstConst)) {
1781	if (Level < CommonLevels) {
1782	Result.DV [Level].Direction &= Dependence::DVEntry::GE;
1783	Result.DV [Level].PeelFirst = true;
1784	++WeakZeroSIVsuccesses;
1785	}
1786	return false; // dependences caused by first iteration
1787	}
1788	const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1789	if (!ConstCoeff)
1790	return false;
1791	const SCEV *AbsCoeff =
1792	SE->isKnownNegative(S: ConstCoeff) ?
1793	SE->getNegativeSCEV(V: ConstCoeff) : ConstCoeff;
1794	const SCEV *NewDelta =
1795	SE->isKnownNegative(S: ConstCoeff) ? SE->getNegativeSCEV(V: Delta) : Delta;
1796
1797	// check that Delta/SrcCoeff < iteration count
1798	// really check NewDelta < countAbsCoeff*
1799	if (const SCEV *UpperBound = collectUpperBound(L: CurLoop, T: Delta->getType())) {
1800	LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1801	const SCEV *Product = SE->getMulExpr(LHS: AbsCoeff, RHS: UpperBound);
1802	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: NewDelta, Y: Product)) {
1803	++WeakZeroSIVindependence;
1804	++WeakZeroSIVsuccesses;
1805	return true;
1806	}
1807	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: NewDelta, Y: Product)) {
1808	// dependences caused by last iteration
1809	if (Level < CommonLevels) {
1810	Result.DV [Level].Direction &= Dependence::DVEntry::LE;
1811	Result.DV [Level].PeelLast = true;
1812	++WeakZeroSIVsuccesses;
1813	}
1814	return false;
1815	}
1816	}
1817
1818	// check that Delta/SrcCoeff >= 0
1819	// really check that NewDelta >= 0
1820	if (SE->isKnownNegative(S: NewDelta)) {
1821	// No dependence, newDelta < 0
1822	++WeakZeroSIVindependence;
1823	++WeakZeroSIVsuccesses;
1824	return true;
1825	}
1826
1827	// if SrcCoeff doesn't divide Delta, then no dependence
1828	if (isa<SCEVConstant>(Val: Delta) &&
1829	!isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
1830	++WeakZeroSIVindependence;
1831	++WeakZeroSIVsuccesses;
1832	return true;
1833	}
1834	return false;
1835	}
1836
1837
1838	// weakZeroDstSIVtest -
1839	// From the paper, Practical Dependence Testing, Section 4.2.2
1840	//
1841	// When we have a pair of subscripts of the form [c1 + ai] and [c2],*
1842	// where i is an induction variable, c1 and c2 are loop invariant,
1843	// and a is a constant, we can solve it exactly using the
1844	// Weak-Zero SIV test.
1845	//
1846	// Given
1847	//
1848	// c1 + ai = c2*
1849	//
1850	// we get
1851	//
1852	// i = (c2 - c1)/a
1853	//
1854	// If i is not an integer, there's no dependence.
1855	// If i < 0 or > UB, there's no dependence.
1856	// If i = 0, the direction is <= and peeling the
1857	// 1st iteration will break the dependence.
1858	// If i = UB, the direction is >= and peeling the
1859	// last iteration will break the dependence.
1860	// Otherwise, the direction is .*
1861	//
1862	// Can prove independence. Failing that, we can sometimes refine
1863	// the directions. Can sometimes show that first or last
1864	// iteration carries all the dependences (so worth peeling).
1865	//
1866	// (see also weakZeroSrcSIVtest)
1867	//
1868	// Return true if dependence disproved.
1869	bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
1870	const SCEV *SrcConst,
1871	const SCEV *DstConst,
1872	const Loop CurLoop, unsigned* Level,
1873	FullDependence &Result,
1874	Constraint &NewConstraint) const {
1875	// For the WeakSIV test, it's possible the loop isn't common to the
1876	// Src and Dst loops. If it isn't, then there's no need to record a direction.
1877	LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1878	LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1879	LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1880	LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1881	++WeakZeroSIVapplications;
1882	assert(`0` < Level && Level <= SrcLevels && "Level out of range");
1883	Level--;
1884	Result.Consistent = false;
1885	const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
1886	NewConstraint.setLine(AA: SrcCoeff, BB: SE->getZero(Ty: Delta->getType()), CC: Delta,
1887	CurLoop);
1888	LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1889	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: DstConst, Y: SrcConst)) {
1890	if (Level < CommonLevels) {
1891	Result.DV [Level].Direction &= Dependence::DVEntry::LE;
1892	Result.DV [Level].PeelFirst = true;
1893	++WeakZeroSIVsuccesses;
1894	}
1895	return false; // dependences caused by first iteration
1896	}
1897	const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1898	if (!ConstCoeff)
1899	return false;
1900	const SCEV *AbsCoeff =
1901	SE->isKnownNegative(S: ConstCoeff) ?
1902	SE->getNegativeSCEV(V: ConstCoeff) : ConstCoeff;
1903	const SCEV *NewDelta =
1904	SE->isKnownNegative(S: ConstCoeff) ? SE->getNegativeSCEV(V: Delta) : Delta;
1905
1906	// check that Delta/SrcCoeff < iteration count
1907	// really check NewDelta < countAbsCoeff*
1908	if (const SCEV *UpperBound = collectUpperBound(L: CurLoop, T: Delta->getType())) {
1909	LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1910	const SCEV *Product = SE->getMulExpr(LHS: AbsCoeff, RHS: UpperBound);
1911	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: NewDelta, Y: Product)) {
1912	++WeakZeroSIVindependence;
1913	++WeakZeroSIVsuccesses;
1914	return true;
1915	}
1916	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: NewDelta, Y: Product)) {
1917	// dependences caused by last iteration
1918	if (Level < CommonLevels) {
1919	Result.DV [Level].Direction &= Dependence::DVEntry::GE;
1920	Result.DV [Level].PeelLast = true;
1921	++WeakZeroSIVsuccesses;
1922	}
1923	return false;
1924	}
1925	}
1926
1927	// check that Delta/SrcCoeff >= 0
1928	// really check that NewDelta >= 0
1929	if (SE->isKnownNegative(S: NewDelta)) {
1930	// No dependence, newDelta < 0
1931	++WeakZeroSIVindependence;
1932	++WeakZeroSIVsuccesses;
1933	return true;
1934	}
1935
1936	// if SrcCoeff doesn't divide Delta, then no dependence
1937	if (isa<SCEVConstant>(Val: Delta) &&
1938	!isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
1939	++WeakZeroSIVindependence;
1940	++WeakZeroSIVsuccesses;
1941	return true;
1942	}
1943	return false;
1944	}
1945
1946
1947	// exactRDIVtest - Tests the RDIV subscript pair for dependence.
1948	// Things of the form [c1 + ai] and [c2 + bj],
1949	// where i and j are induction variable, c1 and c2 are loop invariant,
1950	// and a and b are constants.
1951	// Returns true if any possible dependence is disproved.
1952	// Marks the result as inconsistent.
1953	// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1954	bool DependenceInfo::exactRDIVtest(const SCEV SrcCoeff, const* SCEV *DstCoeff,
1955	const SCEV SrcConst, const* SCEV *DstConst,
1956	const Loop SrcLoop, const* Loop *DstLoop,
1957	FullDependence &Result) const {
1958	LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
1959	LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1960	LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1961	LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1962	LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1963	++ExactRDIVapplications;
1964	Result.Consistent = false;
1965	const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
1966	LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1967	const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1968	const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1969	const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1970	if (!ConstDelta \|\| !ConstSrcCoeff \|\| !ConstDstCoeff)
1971	return false;
1972
1973	// find gcd
1974	APInt G, X, Y;
1975	APInt AM = ConstSrcCoeff->getAPInt();
1976	APInt BM = ConstDstCoeff->getAPInt();
1977	APInt CM = ConstDelta->getAPInt();
1978	unsigned Bits = AM.getBitWidth();
1979	if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
1980	// gcd doesn't divide Delta, no dependence
1981	++ExactRDIVindependence;
1982	return true;
1983	}
1984
1985	LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1986
1987	// since SCEV construction seems to normalize, LM = 0
1988	APInt SrcUM(Bits, `1`, true);
1989	bool SrcUMvalid = false;
1990	// SrcUM is perhaps unavailable, let's check
1991	if (const SCEVConstant *UpperBound =
1992	collectConstantUpperBound(L: SrcLoop, T: Delta->getType())) {
1993	SrcUM = UpperBound->getAPInt();
1994	LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
1995	SrcUMvalid = true;
1996	}
1997
1998	APInt DstUM(Bits, `1`, true);
1999	bool DstUMvalid = false;
2000	// UM is perhaps unavailable, let's check
2001	if (const SCEVConstant *UpperBound =
2002	collectConstantUpperBound(L: DstLoop, T: Delta->getType())) {
2003	DstUM = UpperBound->getAPInt();
2004	LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
2005	DstUMvalid = true;
2006	}
2007
2008	APInt TU(APInt::getSignedMaxValue(numBits: Bits));
2009	APInt TL(APInt::getSignedMinValue(numBits: Bits));
2010	APInt TC = CM.sdiv(RHS: G);
2011	APInt TX = X * TC;
2012	APInt TY = Y * TC;
2013	LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
2014	LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
2015	LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
2016
2017	SmallVector<APInt, `2`> TLVec, TUVec;
2018	APInt TB = BM.sdiv(RHS: G);
2019	if (TB.sgt(RHS: `0`)) {
2020	TLVec.push_back(Elt: ceilingOfQuotient(A: -TX, B: TB));
2021	LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
2022	if (SrcUMvalid) {
2023	TUVec.push_back(Elt: floorOfQuotient(A: SrcUM - TX, B: TB));
2024	LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
2025	}
2026	} else {
2027	TUVec.push_back(Elt: floorOfQuotient(A: -TX, B: TB));
2028	LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
2029	if (SrcUMvalid) {
2030	TLVec.push_back(Elt: ceilingOfQuotient(A: SrcUM - TX, B: TB));
2031	LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
2032	}
2033	}
2034
2035	APInt TA = AM.sdiv(RHS: G);
2036	if (TA.sgt(RHS: `0`)) {
2037	TLVec.push_back(Elt: ceilingOfQuotient(A: -TY, B: TA));
2038	LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
2039	if (DstUMvalid) {
2040	TUVec.push_back(Elt: floorOfQuotient(A: DstUM - TY, B: TA));
2041	LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
2042	}
2043	} else {
2044	TUVec.push_back(Elt: floorOfQuotient(A: -TY, B: TA));
2045	LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
2046	if (DstUMvalid) {
2047	TLVec.push_back(Elt: ceilingOfQuotient(A: DstUM - TY, B: TA));
2048	LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
2049	}
2050	}
2051
2052	if (TLVec.empty() \|\| TUVec.empty())
2053	return false;
2054
2055	LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
2056	LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
2057
2058	TL = APIntOps::smax(A: TLVec.front(), B: TLVec.back());
2059	TU = APIntOps::smin(A: TUVec.front(), B: TUVec.back());
2060	LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
2061	LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
2062
2063	if (TL.sgt(RHS: TU))
2064	++ExactRDIVindependence;
2065	return TL.sgt(RHS: TU);
2066	}
2067
2068
2069	// symbolicRDIVtest -
2070	// In Section 4.5 of the Practical Dependence Testing paper,the authors
2071	// introduce a special case of Banerjee's Inequalities (also called the
2072	// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
2073	// particularly cases with symbolics. Since it's only able to disprove
2074	// dependence (not compute distances or directions), we'll use it as a
2075	// fall back for the other tests.
2076	//
2077	// When we have a pair of subscripts of the form [c1 + a1i] and [c2 + a2j]
2078	// where i and j are induction variables and c1 and c2 are loop invariants,
2079	// we can use the symbolic tests to disprove some dependences, serving as a
2080	// backup for the RDIV test. Note that i and j can be the same variable,
2081	// letting this test serve as a backup for the various SIV tests.
2082	//
2083	// For a dependence to exist, c1 + a1i must equal c2 + a2j for some
2084	// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2085	// loop bounds for the i and j loops, respectively. So, ...
2086	//
2087	// c1 + a1i = c2 + a2j
2088	// a1i - a2j = c2 - c1
2089	//
2090	// To test for a dependence, we compute c2 - c1 and make sure it's in the
2091	// range of the maximum and minimum possible values of a1i - a2j.
2092	// Considering the signs of a1 and a2, we have 4 possible cases:
2093	//
2094	// 1) If a1 >= 0 and a2 >= 0, then
2095	// a10 - a2N2 <= c2 - c1 <= a1N1 - a20
2096	// -a2N2 <= c2 - c1 <= a1N1
2097	//
2098	// 2) If a1 >= 0 and a2 <= 0, then
2099	// a10 - a20 <= c2 - c1 <= a1N1 - a2N2
2100	// 0 <= c2 - c1 <= a1N1 - a2N2
2101	//
2102	// 3) If a1 <= 0 and a2 >= 0, then
2103	// a1N1 - a2N2 <= c2 - c1 <= a10 - a20
2104	// a1N1 - a2N2 <= c2 - c1 <= 0
2105	//
2106	// 4) If a1 <= 0 and a2 <= 0, then
2107	// a1N1 - a20 <= c2 - c1 <= a10 - a2N2
2108	// a1N1 <= c2 - c1 <= -a2N2
2109	//
2110	// return true if dependence disproved
2111	bool DependenceInfo::symbolicRDIVtest(const SCEV A1, const* SCEV *A2,
2112	const SCEV C1, const* SCEV *C2,
2113	const Loop *Loop1,
2114	const Loop Loop2) const* {
2115	++SymbolicRDIVapplications;
2116	LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2117	LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
2118	LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
2119	LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
2120	LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
2121	LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
2122	const SCEV *N1 = collectUpperBound(L: Loop1, T: A1->getType());
2123	const SCEV *N2 = collectUpperBound(L: Loop2, T: A1->getType());
2124	LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
2125	LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
2126	const SCEV *C2_C1 = SE->getMinusSCEV(LHS: C2, RHS: C1);
2127	const SCEV *C1_C2 = SE->getMinusSCEV(LHS: C1, RHS: C2);
2128	LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
2129	LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
2130	if (SE->isKnownNonNegative(S: A1)) {
2131	if (SE->isKnownNonNegative(S: A2)) {
2132	// A1 >= 0 && A2 >= 0
2133	if (N1) {
2134	// make sure that c2 - c1 <= a1N1*
2135	const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2136	LLVM_DEBUG(dbgs() << "\t A1N1 = " << A1N1 << "\n");
2137	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: C2_C1, Y: A1N1)) {
2138	++SymbolicRDIVindependence;
2139	return true;
2140	}
2141	}
2142	if (N2) {
2143	// make sure that -a2N2 <= c2 - c1, or a2N2 >= c1 - c2
2144	const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2145	LLVM_DEBUG(dbgs() << "\t A2N2 = " << A2N2 << "\n");
2146	if (isKnownPredicate(Pred: CmpInst::ICMP_SLT, X: A2N2, Y: C1_C2)) {
2147	++SymbolicRDIVindependence;
2148	return true;
2149	}
2150	}
2151	}
2152	else if (SE->isKnownNonPositive(S: A2)) {
2153	// a1 >= 0 && a2 <= 0
2154	if (N1 && N2) {
2155	// make sure that c2 - c1 <= a1N1 - a2N2
2156	const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2157	const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2158	const SCEV *A1N1_A2N2 = SE->getMinusSCEV(LHS: A1N1, RHS: A2N2);
2159	LLVM_DEBUG(dbgs() << "\t A1N1 - A2N2 = " << *A1N1_A2N2 << "\n");
2160	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: C2_C1, Y: A1N1_A2N2)) {
2161	++SymbolicRDIVindependence;
2162	return true;
2163	}
2164	}
2165	// make sure that 0 <= c2 - c1
2166	if (SE->isKnownNegative(S: C2_C1)) {
2167	++SymbolicRDIVindependence;
2168	return true;
2169	}
2170	}
2171	}
2172	else if (SE->isKnownNonPositive(S: A1)) {
2173	if (SE->isKnownNonNegative(S: A2)) {
2174	// a1 <= 0 && a2 >= 0
2175	if (N1 && N2) {
2176	// make sure that a1N1 - a2N2 <= c2 - c1
2177	const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2178	const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2179	const SCEV *A1N1_A2N2 = SE->getMinusSCEV(LHS: A1N1, RHS: A2N2);
2180	LLVM_DEBUG(dbgs() << "\t A1N1 - A2N2 = " << *A1N1_A2N2 << "\n");
2181	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: A1N1_A2N2, Y: C2_C1)) {
2182	++SymbolicRDIVindependence;
2183	return true;
2184	}
2185	}
2186	// make sure that c2 - c1 <= 0
2187	if (SE->isKnownPositive(S: C2_C1)) {
2188	++SymbolicRDIVindependence;
2189	return true;
2190	}
2191	}
2192	else if (SE->isKnownNonPositive(S: A2)) {
2193	// a1 <= 0 && a2 <= 0
2194	if (N1) {
2195	// make sure that a1N1 <= c2 - c1*
2196	const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2197	LLVM_DEBUG(dbgs() << "\t A1N1 = " << A1N1 << "\n");
2198	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: A1N1, Y: C2_C1)) {
2199	++SymbolicRDIVindependence;
2200	return true;
2201	}
2202	}
2203	if (N2) {
2204	// make sure that c2 - c1 <= -a2N2, or c1 - c2 >= a2N2
2205	const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2206	LLVM_DEBUG(dbgs() << "\t A2N2 = " << A2N2 << "\n");
2207	if (isKnownPredicate(Pred: CmpInst::ICMP_SLT, X: C1_C2, Y: A2N2)) {
2208	++SymbolicRDIVindependence;
2209	return true;
2210	}
2211	}
2212	}
2213	}
2214	return false;
2215	}
2216
2217
2218	// testSIV -
2219	// When we have a pair of subscripts of the form [c1 + a1i] and [c2 - a2i]
2220	// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2221	// a2 are constant, we attack it with an SIV test. While they can all be
2222	// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2223	// they apply; they're cheaper and sometimes more precise.
2224	//
2225	// Return true if dependence disproved.
2226	bool DependenceInfo::testSIV(const SCEV Src, const* SCEV Dst, unsigned* &Level,
2227	FullDependence &Result, Constraint &NewConstraint,
2228	const SCEV &SplitIter) const* {
2229	LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2230	LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2231	const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2232	const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2233	if (SrcAddRec && DstAddRec) {
2234	const SCEV *SrcConst = SrcAddRec->getStart();
2235	const SCEV *DstConst = DstAddRec->getStart();
2236	const SCEV SrcCoeff = SrcAddRec->getStepRecurrence(SE&: SE);
2237	const SCEV DstCoeff = DstAddRec->getStepRecurrence(SE&: SE);
2238	const Loop *CurLoop = SrcAddRec->getLoop();
2239	assert(CurLoop == DstAddRec->getLoop() &&
2240	"both loops in SIV should be same");
2241	Level = mapSrcLoop(SrcLoop: CurLoop);
2242	bool disproven;
2243	if (SrcCoeff == DstCoeff)
2244	disproven = strongSIVtest(Coeff: SrcCoeff, SrcConst, DstConst, CurLoop,
2245	Level, Result, NewConstraint);
2246	else if (SrcCoeff == SE->getNegativeSCEV(V: DstCoeff))
2247	disproven = weakCrossingSIVtest(Coeff: SrcCoeff, SrcConst, DstConst, CurLoop,
2248	Level, Result, NewConstraint, SplitIter);
2249	else
2250	disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
2251	Level, Result, NewConstraint);
2252	return disproven \|\|
2253	gcdMIVtest(Src, Dst, Result) \|\|
2254	symbolicRDIVtest(A1: SrcCoeff, A2: DstCoeff, C1: SrcConst, C2: DstConst, Loop1: CurLoop, Loop2: CurLoop);
2255	}
2256	if (SrcAddRec) {
2257	const SCEV *SrcConst = SrcAddRec->getStart();
2258	const SCEV SrcCoeff = SrcAddRec->getStepRecurrence(SE&: SE);
2259	const SCEV *DstConst = Dst;
2260	const Loop *CurLoop = SrcAddRec->getLoop();
2261	Level = mapSrcLoop(SrcLoop: CurLoop);
2262	return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2263	Level, Result, NewConstraint) \|\|
2264	gcdMIVtest(Src, Dst, Result);
2265	}
2266	if (DstAddRec) {
2267	const SCEV *DstConst = DstAddRec->getStart();
2268	const SCEV DstCoeff = DstAddRec->getStepRecurrence(SE&: SE);
2269	const SCEV *SrcConst = Src;
2270	const Loop *CurLoop = DstAddRec->getLoop();
2271	Level = mapDstLoop(DstLoop: CurLoop);
2272	return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
2273	CurLoop, Level, Result, NewConstraint) \|\|
2274	gcdMIVtest(Src, Dst, Result);
2275	}
2276	llvm_unreachable("SIV test expected at least one AddRec");
2277	return false;
2278	}
2279
2280
2281	// testRDIV -
2282	// When we have a pair of subscripts of the form [c1 + a1i] and [c2 + a2j]
2283	// where i and j are induction variables, c1 and c2 are loop invariant,
2284	// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2285	// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2286	// It doesn't make sense to talk about distance or direction in this case,
2287	// so there's no point in making special versions of the Strong SIV test or
2288	// the Weak-crossing SIV test.
2289	//
2290	// With minor algebra, this test can also be used for things like
2291	// [c1 + a1i + a2j][c2].
2292	//
2293	// Return true if dependence disproved.
2294	bool DependenceInfo::testRDIV(const SCEV Src, const* SCEV *Dst,
2295	FullDependence &Result) const {
2296	// we have 3 possible situations here:
2297	// 1) [ai + b] and [cj + d]
2298	// 2) [ai + cj + b] and [d]
2299	// 3) [b] and [ai + cj + d]
2300	// We need to find what we've got and get organized
2301
2302	const SCEV SrcConst, DstConst;
2303	const SCEV SrcCoeff, DstCoeff;
2304	const Loop SrcLoop, DstLoop;
2305
2306	LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2307	LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2308	const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2309	const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2310	if (SrcAddRec && DstAddRec) {
2311	SrcConst = SrcAddRec->getStart();
2312	SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2313	SrcLoop = SrcAddRec->getLoop();
2314	DstConst = DstAddRec->getStart();
2315	DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2316	DstLoop = DstAddRec->getLoop();
2317	}
2318	else if (SrcAddRec) {
2319	if (const SCEVAddRecExpr *tmpAddRec =
2320	dyn_cast<SCEVAddRecExpr>(Val: SrcAddRec->getStart())) {
2321	SrcConst = tmpAddRec->getStart();
2322	SrcCoeff = tmpAddRec->getStepRecurrence(SE&: *SE);
2323	SrcLoop = tmpAddRec->getLoop();
2324	DstConst = Dst;
2325	DstCoeff = SE->getNegativeSCEV(V: SrcAddRec->getStepRecurrence(SE&: *SE));
2326	DstLoop = SrcAddRec->getLoop();
2327	}
2328	else
2329	llvm_unreachable("RDIV reached by surprising SCEVs");
2330	}
2331	else if (DstAddRec) {
2332	if (const SCEVAddRecExpr *tmpAddRec =
2333	dyn_cast<SCEVAddRecExpr>(Val: DstAddRec->getStart())) {
2334	DstConst = tmpAddRec->getStart();
2335	DstCoeff = tmpAddRec->getStepRecurrence(SE&: *SE);
2336	DstLoop = tmpAddRec->getLoop();
2337	SrcConst = Src;
2338	SrcCoeff = SE->getNegativeSCEV(V: DstAddRec->getStepRecurrence(SE&: *SE));
2339	SrcLoop = DstAddRec->getLoop();
2340	}
2341	else
2342	llvm_unreachable("RDIV reached by surprising SCEVs");
2343	}
2344	else
2345	llvm_unreachable("RDIV expected at least one AddRec");
2346	return exactRDIVtest(SrcCoeff, DstCoeff,
2347	SrcConst, DstConst,
2348	SrcLoop, DstLoop,
2349	Result) \|\|
2350	gcdMIVtest(Src, Dst, Result) \|\|
2351	symbolicRDIVtest(A1: SrcCoeff, A2: DstCoeff,
2352	C1: SrcConst, C2: DstConst,
2353	Loop1: SrcLoop, Loop2: DstLoop);
2354	}
2355
2356
2357	// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2358	// Return true if dependence disproved.
2359	// Can sometimes refine direction vectors.
2360	bool DependenceInfo::testMIV(const SCEV Src, const* SCEV *Dst,
2361	const SmallBitVector &Loops,
2362	FullDependence &Result) const {
2363	LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2364	LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2365	Result.Consistent = false;
2366	return gcdMIVtest(Src, Dst, Result) \|\|
2367	banerjeeMIVtest(Src, Dst, Loops, Result);
2368	}
2369
2370
2371	// Given a product, e.g., 10XY, returns the first constant operand,
2372	// in this case 10. If there is no constant part, returns NULL.
2373	static
2374	const SCEVConstant getConstantPart(const* SCEV *Expr) {
2375	if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Expr))
2376	return Constant;
2377	else if (const auto *Product = dyn_cast<SCEVMulExpr>(Val: Expr))
2378	if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Product->getOperand(i: `0`)))
2379	return Constant;
2380	return nullptr;
2381	}
2382
2383
2384	//===----------------------------------------------------------------------===//
2385	// gcdMIVtest -
2386	// Tests an MIV subscript pair for dependence.
2387	// Returns true if any possible dependence is disproved.
2388	// Marks the result as inconsistent.
2389	// Can sometimes disprove the equal direction for 1 or more loops,
2390	// as discussed in Michael Wolfe's book,
2391	// High Performance Compilers for Parallel Computing, page 235.
2392	//
2393	// We spend some effort (code!) to handle cases like
2394	// [10i + 5Nj + 15M + 6], where i and j are induction variables,
2395	// but M and N are just loop-invariant variables.
2396	// This should help us handle linearized subscripts;
2397	// also makes this test a useful backup to the various SIV tests.
2398	//
2399	// It occurs to me that the presence of loop-invariant variables
2400	// changes the nature of the test from "greatest common divisor"
2401	// to "a common divisor".
2402	bool DependenceInfo::gcdMIVtest(const SCEV Src, const* SCEV *Dst,
2403	FullDependence &Result) const {
2404	LLVM_DEBUG(dbgs() << "starting gcd\n");
2405	++GCDapplications;
2406	unsigned BitWidth = SE->getTypeSizeInBits(Ty: Src->getType());
2407	APInt RunningGCD = APInt::getZero(numBits: BitWidth);
2408
2409	// Examine Src coefficients.
2410	// Compute running GCD and record source constant.
2411	// Because we're looking for the constant at the end of the chain,
2412	// we can't quit the loop just because the GCD == 1.
2413	const SCEV *Coefficients = Src;
2414	while (const SCEVAddRecExpr *AddRec =
2415	dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2416	const SCEV Coeff = AddRec->getStepRecurrence(SE&: SE);
2417	// If the coefficient is the product of a constant and other stuff,
2418	// we can use the constant in the GCD computation.
2419	const auto *Constant = getConstantPart(Expr: Coeff);
2420	if (!Constant)
2421	return false;
2422	APInt ConstCoeff = Constant->getAPInt();
2423	RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff.abs());
2424	Coefficients = AddRec->getStart();
2425	}
2426	const SCEV *SrcConst = Coefficients;
2427
2428	// Examine Dst coefficients.
2429	// Compute running GCD and record destination constant.
2430	// Because we're looking for the constant at the end of the chain,
2431	// we can't quit the loop just because the GCD == 1.
2432	Coefficients = Dst;
2433	while (const SCEVAddRecExpr *AddRec =
2434	dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2435	const SCEV Coeff = AddRec->getStepRecurrence(SE&: SE);
2436	// If the coefficient is the product of a constant and other stuff,
2437	// we can use the constant in the GCD computation.
2438	const auto *Constant = getConstantPart(Expr: Coeff);
2439	if (!Constant)
2440	return false;
2441	APInt ConstCoeff = Constant->getAPInt();
2442	RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff.abs());
2443	Coefficients = AddRec->getStart();
2444	}
2445	const SCEV *DstConst = Coefficients;
2446
2447	APInt ExtraGCD = APInt::getZero(numBits: BitWidth);
2448	const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
2449	LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
2450	const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Val: Delta);
2451	if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Val: Delta)) {
2452	// If Delta is a sum of products, we may be able to make further progress.
2453	for (unsigned Op = `0`, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
2454	const SCEV *Operand = Sum->getOperand(i: Op);
2455	if (isa<SCEVConstant>(Val: Operand)) {
2456	assert(!Constant && "Surprised to find multiple constants");
2457	Constant = cast<SCEVConstant>(Val: Operand);
2458	}
2459	else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Val: Operand)) {
2460	// Search for constant operand to participate in GCD;
2461	// If none found; return false.
2462	const SCEVConstant *ConstOp = getConstantPart(Expr: Product);
2463	if (!ConstOp)
2464	return false;
2465	APInt ConstOpValue = ConstOp->getAPInt();
2466	ExtraGCD = APIntOps::GreatestCommonDivisor(A: ExtraGCD,
2467	B: ConstOpValue.abs());
2468	}
2469	else
2470	return false;
2471	}
2472	}
2473	if (!Constant)
2474	return false;
2475	APInt ConstDelta = cast<SCEVConstant>(Val: Constant)->getAPInt();
2476	LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
2477	if (ConstDelta == `0`)
2478	return false;
2479	RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ExtraGCD);
2480	LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
2481	APInt Remainder = ConstDelta.srem(RHS: RunningGCD);
2482	if (Remainder != `0`) {
2483	++GCDindependence;
2484	return true;
2485	}
2486
2487	// Try to disprove equal directions.
2488	// For example, given a subscript pair [3i + 2j] and [i' + 2j' - 1],*
2489	// the code above can't disprove the dependence because the GCD = 1.
2490	// So we consider what happen if i = i' and what happens if j = j'.
2491	// If i = i', we can simplify the subscript to [2i + 2j] and [2j' - 1],*
2492	// which is infeasible, so we can disallow the = direction for the i level.
2493	// Setting j = j' doesn't help matters, so we end up with a direction vector
2494	// of [<>, ]*
2495	//
2496	// Given A[5i + 10jM + 9MN] and A[15i + 20jM - 21NM + 5],
2497	// we need to remember that the constant part is 5 and the RunningGCD should
2498	// be initialized to ExtraGCD = 30.
2499	LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << `'\n'`);
2500
2501	bool Improved = false;
2502	Coefficients = Src;
2503	while (const SCEVAddRecExpr *AddRec =
2504	dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2505	Coefficients = AddRec->getStart();
2506	const Loop *CurLoop = AddRec->getLoop();
2507	RunningGCD = ExtraGCD;
2508	const SCEV SrcCoeff = AddRec->getStepRecurrence(SE&: SE);
2509	const SCEV *DstCoeff = SE->getMinusSCEV(LHS: SrcCoeff, RHS: SrcCoeff);
2510	const SCEV *Inner = Src;
2511	while (RunningGCD != `1` && isa<SCEVAddRecExpr>(Val: Inner)) {
2512	AddRec = cast<SCEVAddRecExpr>(Val: Inner);
2513	const SCEV Coeff = AddRec->getStepRecurrence(SE&: SE);
2514	if (CurLoop == AddRec->getLoop())
2515	; // SrcCoeff == Coeff
2516	else {
2517	// If the coefficient is the product of a constant and other stuff,
2518	// we can use the constant in the GCD computation.
2519	Constant = getConstantPart(Expr: Coeff);
2520	if (!Constant)
2521	return false;
2522	APInt ConstCoeff = Constant->getAPInt();
2523	RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff.abs());
2524	}
2525	Inner = AddRec->getStart();
2526	}
2527	Inner = Dst;
2528	while (RunningGCD != `1` && isa<SCEVAddRecExpr>(Val: Inner)) {
2529	AddRec = cast<SCEVAddRecExpr>(Val: Inner);
2530	const SCEV Coeff = AddRec->getStepRecurrence(SE&: SE);
2531	if (CurLoop == AddRec->getLoop())
2532	DstCoeff = Coeff;
2533	else {
2534	// If the coefficient is the product of a constant and other stuff,
2535	// we can use the constant in the GCD computation.
2536	Constant = getConstantPart(Expr: Coeff);
2537	if (!Constant)
2538	return false;
2539	APInt ConstCoeff = Constant->getAPInt();
2540	RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff.abs());
2541	}
2542	Inner = AddRec->getStart();
2543	}
2544	Delta = SE->getMinusSCEV(LHS: SrcCoeff, RHS: DstCoeff);
2545	// If the coefficient is the product of a constant and other stuff,
2546	// we can use the constant in the GCD computation.
2547	Constant = getConstantPart(Expr: Delta);
2548	if (!Constant)
2549	// The difference of the two coefficients might not be a product
2550	// or constant, in which case we give up on this direction.
2551	continue;
2552	APInt ConstCoeff = Constant->getAPInt();
2553	RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff.abs());
2554	LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2555	if (RunningGCD != `0`) {
2556	Remainder = ConstDelta.srem(RHS: RunningGCD);
2557	LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2558	if (Remainder != `0`) {
2559	unsigned Level = mapSrcLoop(SrcLoop: CurLoop);
2560	Result.DV [Level - `1`].Direction &= ~Dependence::DVEntry::EQ;
2561	Improved = true;
2562	}
2563	}
2564	}
2565	if (Improved)
2566	++GCDsuccesses;
2567	LLVM_DEBUG(dbgs() << "all done\n");
2568	return false;
2569	}
2570
2571
2572	//===----------------------------------------------------------------------===//
2573	// banerjeeMIVtest -
2574	// Use Banerjee's Inequalities to test an MIV subscript pair.
2575	// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2576	// Generally follows the discussion in Section 2.5.2 of
2577	//
2578	// Optimizing Supercompilers for Supercomputers
2579	// Michael Wolfe
2580	//
2581	// The inequalities given on page 25 are simplified in that loops are
2582	// normalized so that the lower bound is always 0 and the stride is always 1.
2583	// For example, Wolfe gives
2584	//
2585	// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2586	//
2587	// where A_k is the coefficient of the kth index in the source subscript,
2588	// B_k is the coefficient of the kth index in the destination subscript,
2589	// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2590	// index, and N_k is the stride of the kth index. Since all loops are normalized
2591	// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2592	// equation to
2593	//
2594	// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2595	// = (A^-_k - B_k)^- (U_k - 1) - B_k
2596	//
2597	// Similar simplifications are possible for the other equations.
2598	//
2599	// When we can't determine the number of iterations for a loop,
2600	// we use NULL as an indicator for the worst case, infinity.
2601	// When computing the upper bound, NULL denotes +inf;
2602	// for the lower bound, NULL denotes -inf.
2603	//
2604	// Return true if dependence disproved.
2605	bool DependenceInfo::banerjeeMIVtest(const SCEV Src, const* SCEV *Dst,
2606	const SmallBitVector &Loops,
2607	FullDependence &Result) const {
2608	LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2609	++BanerjeeApplications;
2610	LLVM_DEBUG(dbgs() << " Src = " << *Src << `'\n'`);
2611	const SCEV *A0;
2612	CoefficientInfo A = collectCoeffInfo(Subscript: Src, SrcFlag: true*, Constant&: A0);
2613	LLVM_DEBUG(dbgs() << " Dst = " << *Dst << `'\n'`);
2614	const SCEV *B0;
2615	CoefficientInfo B = collectCoeffInfo(Subscript: Dst, SrcFlag: false*, Constant&: B0);
2616	BoundInfo Bound = new* BoundInfo[MaxLevels + `1`];
2617	const SCEV *Delta = SE->getMinusSCEV(LHS: B0, RHS: A0);
2618	LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << `'\n'`);
2619
2620	// Compute bounds for all the directions.*
2621	LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2622	for (unsigned K = `1`; K <= MaxLevels; ++K) {
2623	Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2624	Bound[K].Direction = Dependence::DVEntry::ALL;
2625	Bound[K].DirSet = Dependence::DVEntry::NONE;
2626	findBoundsALL(A, B, Bound, K);
2627	#ifndef NDEBUG
2628	LLVM_DEBUG(dbgs() << "\t " << K << `'\t'`);
2629	if (Bound[K].Lower[Dependence::DVEntry::ALL])
2630	LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << `'\t'`);
2631	else
2632	LLVM_DEBUG(dbgs() << "-inf\t");
2633	if (Bound[K].Upper[Dependence::DVEntry::ALL])
2634	LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << `'\n'`);
2635	else
2636	LLVM_DEBUG(dbgs() << "+inf\n");
2637	#endif
2638	}
2639
2640	// Test the , , , ... case.*
2641	bool Disproved = false;
2642	if (testBounds(DirKind: Dependence::DVEntry::ALL, Level: `0`, Bound, Delta)) {
2643	// Explore the direction vector hierarchy.
2644	unsigned DepthExpanded = `0`;
2645	unsigned NewDeps = exploreDirections(Level: `1`, A, B, Bound,
2646	Loops, DepthExpanded, Delta);
2647	if (NewDeps > `0`) {
2648	bool Improved = false;
2649	for (unsigned K = `1`; K <= CommonLevels; ++K) {
2650	if (Loops [K]) {
2651	unsigned Old = Result.DV [K - `1`].Direction;
2652	Result.DV [K - `1`].Direction = Old & Bound[K].DirSet;
2653	Improved \|= Old != Result.DV [K - `1`].Direction;
2654	if (!Result.DV [K - `1`].Direction) {
2655	Improved = false;
2656	Disproved = true;
2657	break;
2658	}
2659	}
2660	}
2661	if (Improved)
2662	++BanerjeeSuccesses;
2663	}
2664	else {
2665	++BanerjeeIndependence;
2666	Disproved = true;
2667	}
2668	}
2669	else {
2670	++BanerjeeIndependence;
2671	Disproved = true;
2672	}
2673	delete [] Bound;
2674	delete [] A;
2675	delete [] B;
2676	return Disproved;
2677	}
2678
2679
2680	// Hierarchically expands the direction vector
2681	// search space, combining the directions of discovered dependences
2682	// in the DirSet field of Bound. Returns the number of distinct
2683	// dependences discovered. If the dependence is disproved,
2684	// it will return 0.
2685	unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2686	CoefficientInfo B, BoundInfo Bound,
2687	const SmallBitVector &Loops,
2688	unsigned &DepthExpanded,
2689	const SCEV Delta) const* {
2690	// This algorithm has worst case complexity of O(3^n), where 'n' is the number
2691	// of common loop levels. To avoid excessive compile-time, pessimize all the
2692	// results and immediately return when the number of common levels is beyond
2693	// the given threshold.
2694	if (CommonLevels > MIVMaxLevelThreshold) {
2695	LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2696	"direction exploration is terminated.\n");
2697	for (unsigned K = `1`; K <= CommonLevels; ++K)
2698	if (Loops [K])
2699	Bound[K].DirSet = Dependence::DVEntry::ALL;
2700	return `1`;
2701	}
2702
2703	if (Level > CommonLevels) {
2704	// record result
2705	LLVM_DEBUG(dbgs() << "\t[");
2706	for (unsigned K = `1`; K <= CommonLevels; ++K) {
2707	if (Loops [K]) {
2708	Bound[K].DirSet \|= Bound[K].Direction;
2709	#ifndef NDEBUG
2710	switch (Bound[K].Direction) {
2711	case Dependence::DVEntry::LT:
2712	LLVM_DEBUG(dbgs() << " <");
2713	break;
2714	case Dependence::DVEntry::EQ:
2715	LLVM_DEBUG(dbgs() << " =");
2716	break;
2717	case Dependence::DVEntry::GT:
2718	LLVM_DEBUG(dbgs() << " >");
2719	break;
2720	case Dependence::DVEntry::ALL:
2721	LLVM_DEBUG(dbgs() << " *");
2722	break;
2723	default:
2724	llvm_unreachable("unexpected Bound[K].Direction");
2725	}
2726	#endif
2727	}
2728	}
2729	LLVM_DEBUG(dbgs() << " ]\n");
2730	return `1`;
2731	}
2732	if (Loops [Level]) {
2733	if (Level > DepthExpanded) {
2734	DepthExpanded = Level;
2735	// compute bounds for <, =, > at current level
2736	findBoundsLT(A, B, Bound, K: Level);
2737	findBoundsGT(A, B, Bound, K: Level);
2738	findBoundsEQ(A, B, Bound, K: Level);
2739	#ifndef NDEBUG
2740	LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << `'\n'`);
2741	LLVM_DEBUG(dbgs() << "\t <\t");
2742	if (Bound[Level].Lower[Dependence::DVEntry::LT])
2743	LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2744	<< `'\t'`);
2745	else
2746	LLVM_DEBUG(dbgs() << "-inf\t");
2747	if (Bound[Level].Upper[Dependence::DVEntry::LT])
2748	LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2749	<< `'\n'`);
2750	else
2751	LLVM_DEBUG(dbgs() << "+inf\n");
2752	LLVM_DEBUG(dbgs() << "\t =\t");
2753	if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2754	LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2755	<< `'\t'`);
2756	else
2757	LLVM_DEBUG(dbgs() << "-inf\t");
2758	if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2759	LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2760	<< `'\n'`);
2761	else
2762	LLVM_DEBUG(dbgs() << "+inf\n");
2763	LLVM_DEBUG(dbgs() << "\t >\t");
2764	if (Bound[Level].Lower[Dependence::DVEntry::GT])
2765	LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2766	<< `'\t'`);
2767	else
2768	LLVM_DEBUG(dbgs() << "-inf\t");
2769	if (Bound[Level].Upper[Dependence::DVEntry::GT])
2770	LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2771	<< `'\n'`);
2772	else
2773	LLVM_DEBUG(dbgs() << "+inf\n");
2774	#endif
2775	}
2776
2777	unsigned NewDeps = `0`;
2778
2779	// test bounds for <, , , ...
2780	if (testBounds(DirKind: Dependence::DVEntry::LT, Level, Bound, Delta))
2781	NewDeps += exploreDirections(Level: Level + `1`, A, B, Bound,
2782	Loops, DepthExpanded, Delta);
2783
2784	// Test bounds for =, , , ...
2785	if (testBounds(DirKind: Dependence::DVEntry::EQ, Level, Bound, Delta))
2786	NewDeps += exploreDirections(Level: Level + `1`, A, B, Bound,
2787	Loops, DepthExpanded, Delta);
2788
2789	// test bounds for >, , , ...
2790	if (testBounds(DirKind: Dependence::DVEntry::GT, Level, Bound, Delta))
2791	NewDeps += exploreDirections(Level: Level + `1`, A, B, Bound,
2792	Loops, DepthExpanded, Delta);
2793
2794	Bound[Level].Direction = Dependence::DVEntry::ALL;
2795	return NewDeps;
2796	}
2797	else
2798	return exploreDirections(Level: Level + `1`, A, B, Bound, Loops, DepthExpanded, Delta);
2799	}
2800
2801
2802	// Returns true iff the current bounds are plausible.
2803	bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2804	BoundInfo Bound, const* SCEV Delta) const* {
2805	Bound[Level].Direction = DirKind;
2806	if (const SCEV *LowerBound = getLowerBound(Bound))
2807	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: LowerBound, Y: Delta))
2808	return false;
2809	if (const SCEV *UpperBound = getUpperBound(Bound))
2810	if (isKnownPredicate(Pred: CmpInst::ICMP_SGT, X: Delta, Y: UpperBound))
2811	return false;
2812	return true;
2813	}
2814
2815
2816	// Computes the upper and lower bounds for level K
2817	// using the direction. Records them in Bound.*
2818	// Wolfe gives the equations
2819	//
2820	// LB^_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k*
2821	// UB^_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k*
2822	//
2823	// Since we normalize loops, we can simplify these equations to
2824	//
2825	// LB^_k = (A^-_k - B^+_k)U_k*
2826	// UB^_k = (A^+_k - B^-_k)U_k*
2827	//
2828	// We must be careful to handle the case where the upper bound is unknown.
2829	// Note that the lower bound is always <= 0
2830	// and the upper bound is always >= 0.
2831	void DependenceInfo::findBoundsALL(CoefficientInfo A, CoefficientInfo B,
2832	BoundInfo Bound, unsigned* K) const {
2833	Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
2834	Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
2835	if (Bound[K].Iterations) {
2836	Bound[K].Lower[Dependence::DVEntry::ALL] =
2837	SE->getMulExpr(LHS: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].PosPart),
2838	RHS: Bound[K].Iterations);
2839	Bound[K].Upper[Dependence::DVEntry::ALL] =
2840	SE->getMulExpr(LHS: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].NegPart),
2841	RHS: Bound[K].Iterations);
2842	}
2843	else {
2844	// If the difference is 0, we won't need to know the number of iterations.
2845	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: A[K].NegPart, Y: B[K].PosPart))
2846	Bound[K].Lower[Dependence::DVEntry::ALL] =
2847	SE->getZero(Ty: A[K].Coeff->getType());
2848	if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: A[K].PosPart, Y: B[K].NegPart))
2849	Bound[K].Upper[Dependence::DVEntry::ALL] =
2850	SE->getZero(Ty: A[K].Coeff->getType());
2851	}
2852	}
2853
2854
2855	// Computes the upper and lower bounds for level K
2856	// using the = direction. Records them in Bound.
2857	// Wolfe gives the equations
2858	//
2859	// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2860	// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2861	//
2862	// Since we normalize loops, we can simplify these equations to
2863	//
2864	// LB^=_k = (A_k - B_k)^- U_k
2865	// UB^=_k = (A_k - B_k)^+ U_k
2866	//
2867	// We must be careful to handle the case where the upper bound is unknown.
2868	// Note that the lower bound is always <= 0
2869	// and the upper bound is always >= 0.
2870	void DependenceInfo::findBoundsEQ(CoefficientInfo A, CoefficientInfo B,
2871	BoundInfo Bound, unsigned* K) const {
2872	Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
2873	Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
2874	if (Bound[K].Iterations) {
2875	const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2876	const SCEV *NegativePart = getNegativePart(X: Delta);
2877	Bound[K].Lower[Dependence::DVEntry::EQ] =
2878	SE->getMulExpr(LHS: NegativePart, RHS: Bound[K].Iterations);
2879	const SCEV *PositivePart = getPositivePart(X: Delta);
2880	Bound[K].Upper[Dependence::DVEntry::EQ] =
2881	SE->getMulExpr(LHS: PositivePart, RHS: Bound[K].Iterations);
2882	}
2883	else {
2884	// If the positive/negative part of the difference is 0,
2885	// we won't need to know the number of iterations.
2886	const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2887	const SCEV *NegativePart = getNegativePart(X: Delta);
2888	if (NegativePart->isZero())
2889	Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2890	const SCEV *PositivePart = getPositivePart(X: Delta);
2891	if (PositivePart->isZero())
2892	Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2893	}
2894	}
2895
2896
2897	// Computes the upper and lower bounds for level K
2898	// using the < direction. Records them in Bound.
2899	// Wolfe gives the equations
2900	//
2901	// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2902	// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2903	//
2904	// Since we normalize loops, we can simplify these equations to
2905	//
2906	// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2907	// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2908	//
2909	// We must be careful to handle the case where the upper bound is unknown.
2910	void DependenceInfo::findBoundsLT(CoefficientInfo A, CoefficientInfo B,
2911	BoundInfo Bound, unsigned* K) const {
2912	Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
2913	Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
2914	if (Bound[K].Iterations) {
2915	const SCEV *Iter_1 = SE->getMinusSCEV(
2916	LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2917	const SCEV *NegPart =
2918	getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2919	Bound[K].Lower[Dependence::DVEntry::LT] =
2920	SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: B[K].Coeff);
2921	const SCEV *PosPart =
2922	getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2923	Bound[K].Upper[Dependence::DVEntry::LT] =
2924	SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: B[K].Coeff);
2925	}
2926	else {
2927	// If the positive/negative part of the difference is 0,
2928	// we won't need to know the number of iterations.
2929	const SCEV *NegPart =
2930	getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2931	if (NegPart->isZero())
2932	Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2933	const SCEV *PosPart =
2934	getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2935	if (PosPart->isZero())
2936	Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2937	}
2938	}
2939
2940
2941	// Computes the upper and lower bounds for level K
2942	// using the > direction. Records them in Bound.
2943	// Wolfe gives the equations
2944	//
2945	// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2946	// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2947	//
2948	// Since we normalize loops, we can simplify these equations to
2949	//
2950	// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2951	// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2952	//
2953	// We must be careful to handle the case where the upper bound is unknown.
2954	void DependenceInfo::findBoundsGT(CoefficientInfo A, CoefficientInfo B,
2955	BoundInfo Bound, unsigned* K) const {
2956	Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
2957	Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
2958	if (Bound[K].Iterations) {
2959	const SCEV *Iter_1 = SE->getMinusSCEV(
2960	LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2961	const SCEV *NegPart =
2962	getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2963	Bound[K].Lower[Dependence::DVEntry::GT] =
2964	SE->getAddExpr(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: A[K].Coeff);
2965	const SCEV *PosPart =
2966	getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2967	Bound[K].Upper[Dependence::DVEntry::GT] =
2968	SE->getAddExpr(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: A[K].Coeff);
2969	}
2970	else {
2971	// If the positive/negative part of the difference is 0,
2972	// we won't need to know the number of iterations.
2973	const SCEV *NegPart = getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2974	if (NegPart->isZero())
2975	Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2976	const SCEV *PosPart = getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2977	if (PosPart->isZero())
2978	Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2979	}
2980	}
2981
2982
2983	// X^+ = max(X, 0)
2984	const SCEV DependenceInfo::getPositivePart(const* SCEV X) const* {
2985	return SE->getSMaxExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
2986	}
2987
2988
2989	// X^- = min(X, 0)
2990	const SCEV DependenceInfo::getNegativePart(const* SCEV X) const* {
2991	return SE->getSMinExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
2992	}
2993
2994
2995	// Walks through the subscript,
2996	// collecting each coefficient, the associated loop bounds,
2997	// and recording its positive and negative parts for later use.
2998	DependenceInfo::CoefficientInfo *
2999	DependenceInfo::collectCoeffInfo(const SCEV Subscript, bool* SrcFlag,
3000	const SCEV &Constant) const* {
3001	const SCEV *Zero = SE->getZero(Ty: Subscript->getType());
3002	CoefficientInfo CI = new* CoefficientInfo[MaxLevels + `1`];
3003	for (unsigned K = `1`; K <= MaxLevels; ++K) {
3004	CI[K].Coeff = Zero;
3005	CI[K].PosPart = Zero;
3006	CI[K].NegPart = Zero;
3007	CI[K].Iterations = nullptr;
3008	}
3009	while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Subscript)) {
3010	const Loop *L = AddRec->getLoop();
3011	unsigned K = SrcFlag ? mapSrcLoop(SrcLoop: L) : mapDstLoop(DstLoop: L);
3012	CI[K].Coeff = AddRec->getStepRecurrence(SE&: *SE);
3013	CI[K].PosPart = getPositivePart(X: CI[K].Coeff);
3014	CI[K].NegPart = getNegativePart(X: CI[K].Coeff);
3015	CI[K].Iterations = collectUpperBound(L, T: Subscript->getType());
3016	Subscript = AddRec->getStart();
3017	}
3018	Constant = Subscript;
3019	#ifndef NDEBUG
3020	LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
3021	for (unsigned K = `1`; K <= MaxLevels; ++K) {
3022	LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
3023	LLVM_DEBUG(dbgs() << "\tPos Part = ");
3024	LLVM_DEBUG(dbgs() << *CI[K].PosPart);
3025	LLVM_DEBUG(dbgs() << "\tNeg Part = ");
3026	LLVM_DEBUG(dbgs() << *CI[K].NegPart);
3027	LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
3028	if (CI[K].Iterations)
3029	LLVM_DEBUG(dbgs() << *CI[K].Iterations);
3030	else
3031	LLVM_DEBUG(dbgs() << "+inf");
3032	LLVM_DEBUG(dbgs() << `'\n'`);
3033	}
3034	LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << `'\n'`);
3035	#endif
3036	return CI;
3037	}
3038
3039
3040	// Looks through all the bounds info and
3041	// computes the lower bound given the current direction settings
3042	// at each level. If the lower bound for any level is -inf,
3043	// the result is -inf.
3044	const SCEV DependenceInfo::getLowerBound(BoundInfo Bound) const {
3045	const SCEV *Sum = Bound[`1`].Lower[Bound[`1`].Direction];
3046	for (unsigned K = `2`; Sum && K <= MaxLevels; ++K) {
3047	if (Bound[K].Lower[Bound[K].Direction])
3048	Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Lower[Bound[K].Direction]);
3049	else
3050	Sum = nullptr;
3051	}
3052	return Sum;
3053	}
3054
3055
3056	// Looks through all the bounds info and
3057	// computes the upper bound given the current direction settings
3058	// at each level. If the upper bound at any level is +inf,
3059	// the result is +inf.
3060	const SCEV DependenceInfo::getUpperBound(BoundInfo Bound) const {
3061	const SCEV *Sum = Bound[`1`].Upper[Bound[`1`].Direction];
3062	for (unsigned K = `2`; Sum && K <= MaxLevels; ++K) {
3063	if (Bound[K].Upper[Bound[K].Direction])
3064	Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Upper[Bound[K].Direction]);
3065	else
3066	Sum = nullptr;
3067	}
3068	return Sum;
3069	}
3070
3071
3072	//===----------------------------------------------------------------------===//
3073	// Constraint manipulation for Delta test.
3074
3075	// Given a linear SCEV,
3076	// return the coefficient (the step)
3077	// corresponding to the specified loop.
3078	// If there isn't one, return 0.
3079	// For example, given ai + bj + ck, finding the coefficient*
3080	// corresponding to the j loop would yield b.
3081	const SCEV DependenceInfo::findCoefficient(const* SCEV *Expr,
3082	const Loop TargetLoop) const* {
3083	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
3084	if (!AddRec)
3085	return SE->getZero(Ty: Expr->getType());
3086	if (AddRec->getLoop() == TargetLoop)
3087	return AddRec->getStepRecurrence(SE&: *SE);
3088	return findCoefficient(Expr: AddRec->getStart(), TargetLoop);
3089	}
3090
3091
3092	// Given a linear SCEV,
3093	// return the SCEV given by zeroing out the coefficient
3094	// corresponding to the specified loop.
3095	// For example, given ai + bj + ck, zeroing the coefficient*
3096	// corresponding to the j loop would yield ai + ck.
3097	const SCEV DependenceInfo::zeroCoefficient(const* SCEV *Expr,
3098	const Loop TargetLoop) const* {
3099	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
3100	if (!AddRec)
3101	return Expr; // ignore
3102	if (AddRec->getLoop() == TargetLoop)
3103	return AddRec->getStart();
3104	return SE->getAddRecExpr(Start: zeroCoefficient(Expr: AddRec->getStart(), TargetLoop),
3105	Step: AddRec->getStepRecurrence(SE&: *SE),
3106	L: AddRec->getLoop(),
3107	Flags: AddRec->getNoWrapFlags());
3108	}
3109
3110
3111	// Given a linear SCEV Expr,
3112	// return the SCEV given by adding some Value to the
3113	// coefficient corresponding to the specified TargetLoop.
3114	// For example, given ai + bj + ck, adding 1 to the coefficient*
3115	// corresponding to the j loop would yield ai + (b+1)j + ck.*
3116	const SCEV DependenceInfo::addToCoefficient(const* SCEV *Expr,
3117	const Loop *TargetLoop,
3118	const SCEV Value) const* {
3119	const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
3120	if (!AddRec) // create a new addRec
3121	return SE->getAddRecExpr(Start: Expr,
3122	Step: Value,
3123	L: TargetLoop,
3124	Flags: SCEV::FlagAnyWrap); // Worst case, with no info.
3125	if (AddRec->getLoop() == TargetLoop) {
3126	const SCEV Sum = SE->getAddExpr(LHS: AddRec->getStepRecurrence(SE&: SE), RHS: Value);
3127	if (Sum->isZero())
3128	return AddRec->getStart();
3129	return SE->getAddRecExpr(Start: AddRec->getStart(),
3130	Step: Sum,
3131	L: AddRec->getLoop(),
3132	Flags: AddRec->getNoWrapFlags());
3133	}
3134	if (SE->isLoopInvariant(S: AddRec, L: TargetLoop))
3135	return SE->getAddRecExpr(Start: AddRec, Step: Value, L: TargetLoop, Flags: SCEV::FlagAnyWrap);
3136	return SE->getAddRecExpr(
3137	Start: addToCoefficient(Expr: AddRec->getStart(), TargetLoop, Value),
3138	Step: AddRec->getStepRecurrence(SE&: *SE), L: AddRec->getLoop(),
3139	Flags: AddRec->getNoWrapFlags());
3140	}
3141
3142
3143	// Review the constraints, looking for opportunities
3144	// to simplify a subscript pair (Src and Dst).
3145	// Return true if some simplification occurs.
3146	// If the simplification isn't exact (that is, if it is conservative
3147	// in terms of dependence), set consistent to false.
3148	// Corresponds to Figure 5 from the paper
3149	//
3150	// Practical Dependence Testing
3151	// Goff, Kennedy, Tseng
3152	// PLDI 1991
3153	bool DependenceInfo::propagate(const SCEV &Src, const* SCEV *&Dst,
3154	SmallBitVector &Loops,
3155	SmallVectorImpl<Constraint> &Constraints,
3156	bool &Consistent) {
3157	bool Result = false;
3158	for (unsigned LI : Loops.set_bits()) {
3159	LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
3160	LLVM_DEBUG(Constraints[LI].dump(dbgs()));
3161	if (Constraints [LI].isDistance())
3162	Result \|= propagateDistance(Src, Dst, CurConstraint&: Constraints [LI], Consistent);
3163	else if (Constraints [LI].isLine())
3164	Result \|= propagateLine(Src, Dst, CurConstraint&: Constraints [LI], Consistent);
3165	else if (Constraints [LI].isPoint())
3166	Result \|= propagatePoint(Src, Dst, CurConstraint&: Constraints [LI]);
3167	}
3168	return Result;
3169	}
3170
3171
3172	// Attempt to propagate a distance
3173	// constraint into a subscript pair (Src and Dst).
3174	// Return true if some simplification occurs.
3175	// If the simplification isn't exact (that is, if it is conservative
3176	// in terms of dependence), set consistent to false.
3177	bool DependenceInfo::propagateDistance(const SCEV &Src, const* SCEV *&Dst,
3178	Constraint &CurConstraint,
3179	bool &Consistent) {
3180	const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3181	LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3182	const SCEV *A_K = findCoefficient(Expr: Src, TargetLoop: CurLoop);
3183	if (A_K->isZero())
3184	return false;
3185	const SCEV *DA_K = SE->getMulExpr(LHS: A_K, RHS: CurConstraint.getD());
3186	Src = SE->getMinusSCEV(LHS: Src, RHS: DA_K);
3187	Src = zeroCoefficient(Expr: Src, TargetLoop: CurLoop);
3188	LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3189	LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3190	Dst = addToCoefficient(Expr: Dst, TargetLoop: CurLoop, Value: SE->getNegativeSCEV(V: A_K));
3191	LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3192	if (!findCoefficient(Expr: Dst, TargetLoop: CurLoop)->isZero())
3193	Consistent = false;
3194	return true;
3195	}
3196
3197
3198	// Attempt to propagate a line
3199	// constraint into a subscript pair (Src and Dst).
3200	// Return true if some simplification occurs.
3201	// If the simplification isn't exact (that is, if it is conservative
3202	// in terms of dependence), set consistent to false.
3203	bool DependenceInfo::propagateLine(const SCEV &Src, const* SCEV *&Dst,
3204	Constraint &CurConstraint,
3205	bool &Consistent) {
3206	const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3207	const SCEV *A = CurConstraint.getA();
3208	const SCEV *B = CurConstraint.getB();
3209	const SCEV *C = CurConstraint.getC();
3210	LLVM_DEBUG(dbgs() << "\t\tA = " << A << ", B = " << B << ", C = " << *C
3211	<< "\n");
3212	LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
3213	LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
3214	if (A->isZero()) {
3215	const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(Val: B);
3216	const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(Val: C);
3217	if (!Bconst \|\| !Cconst) return false;
3218	APInt Beta = Bconst->getAPInt();
3219	APInt Charlie = Cconst->getAPInt();
3220	APInt CdivB = Charlie.sdiv(RHS: Beta);
3221	assert(Charlie.srem(Beta) == `0` && "C should be evenly divisible by B");
3222	const SCEV *AP_K = findCoefficient(Expr: Dst, TargetLoop: CurLoop);
3223	// Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3224	Src = SE->getMinusSCEV(LHS: Src, RHS: SE->getMulExpr(LHS: AP_K, RHS: SE->getConstant(Val: CdivB)));
3225	Dst = zeroCoefficient(Expr: Dst, TargetLoop: CurLoop);
3226	if (!findCoefficient(Expr: Src, TargetLoop: CurLoop)->isZero())
3227	Consistent = false;
3228	}
3229	else if (B->isZero()) {
3230	const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(Val: A);
3231	const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(Val: C);
3232	if (!Aconst \|\| !Cconst) return false;
3233	APInt Alpha = Aconst->getAPInt();
3234	APInt Charlie = Cconst->getAPInt();
3235	APInt CdivA = Charlie.sdiv(RHS: Alpha);
3236	assert(Charlie.srem(Alpha) == `0` && "C should be evenly divisible by A");
3237	const SCEV *A_K = findCoefficient(Expr: Src, TargetLoop: CurLoop);
3238	Src = SE->getAddExpr(LHS: Src, RHS: SE->getMulExpr(LHS: A_K, RHS: SE->getConstant(Val: CdivA)));
3239	Src = zeroCoefficient(Expr: Src, TargetLoop: CurLoop);
3240	if (!findCoefficient(Expr: Dst, TargetLoop: CurLoop)->isZero())
3241	Consistent = false;
3242	}
3243	else if (isKnownPredicate(Pred: CmpInst::ICMP_EQ, X: A, Y: B)) {
3244	const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(Val: A);
3245	const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(Val: C);
3246	if (!Aconst \|\| !Cconst) return false;
3247	APInt Alpha = Aconst->getAPInt();
3248	APInt Charlie = Cconst->getAPInt();
3249	APInt CdivA = Charlie.sdiv(RHS: Alpha);
3250	assert(Charlie.srem(Alpha) == `0` && "C should be evenly divisible by A");
3251	const SCEV *A_K = findCoefficient(Expr: Src, TargetLoop: CurLoop);
3252	Src = SE->getAddExpr(LHS: Src, RHS: SE->getMulExpr(LHS: A_K, RHS: SE->getConstant(Val: CdivA)));
3253	Src = zeroCoefficient(Expr: Src, TargetLoop: CurLoop);
3254	Dst = addToCoefficient(Expr: Dst, TargetLoop: CurLoop, Value: A_K);
3255	if (!findCoefficient(Expr: Dst, TargetLoop: CurLoop)->isZero())
3256	Consistent = false;
3257	}
3258	else {
3259	// paper is incorrect here, or perhaps just misleading
3260	const SCEV *A_K = findCoefficient(Expr: Src, TargetLoop: CurLoop);
3261	Src = SE->getMulExpr(LHS: Src, RHS: A);
3262	Dst = SE->getMulExpr(LHS: Dst, RHS: A);
3263	Src = SE->getAddExpr(LHS: Src, RHS: SE->getMulExpr(LHS: A_K, RHS: C));
3264	Src = zeroCoefficient(Expr: Src, TargetLoop: CurLoop);
3265	Dst = addToCoefficient(Expr: Dst, TargetLoop: CurLoop, Value: SE->getMulExpr(LHS: A_K, RHS: B));
3266	if (!findCoefficient(Expr: Dst, TargetLoop: CurLoop)->isZero())
3267	Consistent = false;
3268	}
3269	LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
3270	LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
3271	return true;
3272	}
3273
3274
3275	// Attempt to propagate a point
3276	// constraint into a subscript pair (Src and Dst).
3277	// Return true if some simplification occurs.
3278	bool DependenceInfo::propagatePoint(const SCEV &Src, const* SCEV *&Dst,
3279	Constraint &CurConstraint) {
3280	const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3281	const SCEV *A_K = findCoefficient(Expr: Src, TargetLoop: CurLoop);
3282	const SCEV *AP_K = findCoefficient(Expr: Dst, TargetLoop: CurLoop);
3283	const SCEV *XA_K = SE->getMulExpr(LHS: A_K, RHS: CurConstraint.getX());
3284	const SCEV *YAP_K = SE->getMulExpr(LHS: AP_K, RHS: CurConstraint.getY());
3285	LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3286	Src = SE->getAddExpr(LHS: Src, RHS: SE->getMinusSCEV(LHS: XA_K, RHS: YAP_K));
3287	Src = zeroCoefficient(Expr: Src, TargetLoop: CurLoop);
3288	LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3289	LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3290	Dst = zeroCoefficient(Expr: Dst, TargetLoop: CurLoop);
3291	LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3292	return true;
3293	}
3294
3295
3296	// Update direction vector entry based on the current constraint.
3297	void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
3298	const Constraint &CurConstraint) const {
3299	LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
3300	LLVM_DEBUG(CurConstraint.dump(dbgs()));
3301	if (CurConstraint.isAny())
3302	; // use defaults
3303	else if (CurConstraint.isDistance()) {
3304	// this one is consistent, the others aren't
3305	Level.Scalar = false;
3306	Level.Distance = CurConstraint.getD();
3307	unsigned NewDirection = Dependence::DVEntry::NONE;
3308	if (!SE->isKnownNonZero(S: Level.Distance)) // if may be zero
3309	NewDirection = Dependence::DVEntry::EQ;
3310	if (!SE->isKnownNonPositive(S: Level.Distance)) // if may be positive
3311	NewDirection \|= Dependence::DVEntry::LT;
3312	if (!SE->isKnownNonNegative(S: Level.Distance)) // if may be negative
3313	NewDirection \|= Dependence::DVEntry::GT;
3314	Level.Direction &= NewDirection;
3315	}
3316	else if (CurConstraint.isLine()) {
3317	Level.Scalar = false;
3318	Level.Distance = nullptr;
3319	// direction should be accurate
3320	}
3321	else if (CurConstraint.isPoint()) {
3322	Level.Scalar = false;
3323	Level.Distance = nullptr;
3324	unsigned NewDirection = Dependence::DVEntry::NONE;
3325	if (!isKnownPredicate(Pred: CmpInst::ICMP_NE,
3326	X: CurConstraint.getY(),
3327	Y: CurConstraint.getX()))
3328	// if X may be = Y
3329	NewDirection \|= Dependence::DVEntry::EQ;
3330	if (!isKnownPredicate(Pred: CmpInst::ICMP_SLE,
3331	X: CurConstraint.getY(),
3332	Y: CurConstraint.getX()))
3333	// if Y may be > X
3334	NewDirection \|= Dependence::DVEntry::LT;
3335	if (!isKnownPredicate(Pred: CmpInst::ICMP_SGE,
3336	X: CurConstraint.getY(),
3337	Y: CurConstraint.getX()))
3338	// if Y may be < X
3339	NewDirection \|= Dependence::DVEntry::GT;
3340	Level.Direction &= NewDirection;
3341	}
3342	else
3343	llvm_unreachable("constraint has unexpected kind");
3344	}
3345
3346	/// Check if we can delinearize the subscripts. If the SCEVs representing the
3347	/// source and destination array references are recurrences on a nested loop,
3348	/// this function flattens the nested recurrences into separate recurrences
3349	/// for each loop level.
3350	bool DependenceInfo::tryDelinearize(Instruction Src, Instruction Dst,
3351	SmallVectorImpl<Subscript> &Pair) {
3352	assert(isLoadOrStore(Src) && "instruction is not load or store");
3353	assert(isLoadOrStore(Dst) && "instruction is not load or store");
3354	Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3355	Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3356	Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
3357	Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
3358	const SCEV *SrcAccessFn = SE->getSCEVAtScope(V: SrcPtr, L: SrcLoop);
3359	const SCEV *DstAccessFn = SE->getSCEVAtScope(V: DstPtr, L: DstLoop);
3360	const SCEVUnknown *SrcBase =
3361	dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
3362	const SCEVUnknown *DstBase =
3363	dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
3364
3365	if (!SrcBase \|\| !DstBase \|\| SrcBase != DstBase)
3366	return false;
3367
3368	SmallVector<const SCEV *, `4`> SrcSubscripts, DstSubscripts;
3369
3370	if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
3371	SrcSubscripts, DstSubscripts) &&
3372	!tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
3373	SrcSubscripts, DstSubscripts))
3374	return false;
3375
3376	int Size = SrcSubscripts.size();
3377	LLVM_DEBUG({
3378	dbgs() << "\nSrcSubscripts: ";
3379	for (int I = `0`; I < Size; I++)
3380	dbgs() << *SrcSubscripts[I];
3381	dbgs() << "\nDstSubscripts: ";
3382	for (int I = `0`; I < Size; I++)
3383	dbgs() << *DstSubscripts[I];
3384	});
3385
3386	// The delinearization transforms a single-subscript MIV dependence test into
3387	// a multi-subscript SIV dependence test that is easier to compute. So we
3388	// resize Pair to contain as many pairs of subscripts as the delinearization
3389	// has found, and then initialize the pairs following the delinearization.
3390	Pair.resize(N: Size);
3391	for (int I = `0`; I < Size; ++I) {
3392	Pair [I].Src = SrcSubscripts [I];
3393	Pair [I].Dst = DstSubscripts [I];
3394	unifySubscriptType(Pairs: &Pair [I]);
3395	}
3396
3397	return true;
3398	}
3399
3400	/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
3401	/// arrays accessed are fixed-size arrays. Return true if delinearization was
3402	/// successful.
3403	bool DependenceInfo::tryDelinearizeFixedSize(
3404	Instruction Src, Instruction Dst, const SCEV *SrcAccessFn,
3405	const SCEV DstAccessFn, SmallVectorImpl<const* SCEV *> &SrcSubscripts,
3406	SmallVectorImpl<const SCEV *> &DstSubscripts) {
3407	LLVM_DEBUG({
3408	const SCEVUnknown *SrcBase =
3409	dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3410	const SCEVUnknown *DstBase =
3411	dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3412	assert(SrcBase && DstBase && SrcBase == DstBase &&
3413	"expected src and dst scev unknowns to be equal");
3414	});
3415
3416	SmallVector<int, `4`> SrcSizes;
3417	SmallVector<int, `4`> DstSizes;
3418	if (!tryDelinearizeFixedSizeImpl(SE, Inst: Src, AccessFn: SrcAccessFn, Subscripts&: SrcSubscripts,
3419	Sizes&: SrcSizes) \|\|
3420	!tryDelinearizeFixedSizeImpl(SE, Inst: Dst, AccessFn: DstAccessFn, Subscripts&: DstSubscripts,
3421	Sizes&: DstSizes))
3422	return false;
3423
3424	// Check that the two size arrays are non-empty and equal in length and
3425	// value.
3426	if (SrcSizes.size() != DstSizes.size() \|\|
3427	!std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
3428	SrcSubscripts.clear();
3429	DstSubscripts.clear();
3430	return false;
3431	}
3432
3433	assert(SrcSubscripts.size() == DstSubscripts.size() &&
3434	"Expected equal number of entries in the list of SrcSubscripts and "
3435	"DstSubscripts.");
3436
3437	Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3438	Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3439
3440	// In general we cannot safely assume that the subscripts recovered from GEPs
3441	// are in the range of values defined for their corresponding array
3442	// dimensions. For example some C language usage/interpretation make it
3443	// impossible to verify this at compile-time. As such we can only delinearize
3444	// iff the subscripts are positive and are less than the range of the
3445	// dimension.
3446	if (!DisableDelinearizationChecks) {
3447	auto AllIndicesInRange = [&](SmallVector<int, `4`> &DimensionSizes,
3448	SmallVectorImpl<const SCEV *> &Subscripts,
3449	Value *Ptr) {
3450	size_t SSize = Subscripts.size();
3451	for (size_t I = `1`; I < SSize; ++I) {
3452	const SCEV *S = Subscripts [I];
3453	if (!isKnownNonNegative(S, Ptr))
3454	return false;
3455	if (auto *SType = dyn_cast<IntegerType>(Val: S->getType())) {
3456	const SCEV *Range = SE->getConstant(
3457	V: ConstantInt::get(Ty: SType, V: DimensionSizes [I - `1`], IsSigned: false));
3458	if (!isKnownLessThan(S, Size: Range))
3459	return false;
3460	}
3461	}
3462	return true;
3463	};
3464
3465	if (!AllIndicesInRange (SrcSizes, SrcSubscripts, SrcPtr) \|\|
3466	!AllIndicesInRange (DstSizes, DstSubscripts, DstPtr)) {
3467	SrcSubscripts.clear();
3468	DstSubscripts.clear();
3469	return false;
3470	}
3471	}
3472	LLVM_DEBUG({
3473	dbgs() << "Delinearized subscripts of fixed-size array\n"
3474	<< "SrcGEP:" << *SrcPtr << "\n"
3475	<< "DstGEP:" << *DstPtr << "\n";
3476	});
3477	return true;
3478	}
3479
3480	bool DependenceInfo::tryDelinearizeParametricSize(
3481	Instruction Src, Instruction Dst, const SCEV *SrcAccessFn,
3482	const SCEV DstAccessFn, SmallVectorImpl<const* SCEV *> &SrcSubscripts,
3483	SmallVectorImpl<const SCEV *> &DstSubscripts) {
3484
3485	Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3486	Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3487	const SCEVUnknown *SrcBase =
3488	dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
3489	const SCEVUnknown *DstBase =
3490	dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
3491	assert(SrcBase && DstBase && SrcBase == DstBase &&
3492	"expected src and dst scev unknowns to be equal");
3493
3494	const SCEV *ElementSize = SE->getElementSize(Inst: Src);
3495	if (ElementSize != SE->getElementSize(Inst: Dst))
3496	return false;
3497
3498	const SCEV *SrcSCEV = SE->getMinusSCEV(LHS: SrcAccessFn, RHS: SrcBase);
3499	const SCEV *DstSCEV = SE->getMinusSCEV(LHS: DstAccessFn, RHS: DstBase);
3500
3501	const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(Val: SrcSCEV);
3502	const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(Val: DstSCEV);
3503	if (!SrcAR \|\| !DstAR \|\| !SrcAR->isAffine() \|\| !DstAR->isAffine())
3504	return false;
3505
3506	// First step: collect parametric terms in both array references.
3507	SmallVector<const SCEV *, `4`> Terms;
3508	collectParametricTerms(SE&: *SE, Expr: SrcAR, Terms);
3509	collectParametricTerms(SE&: *SE, Expr: DstAR, Terms);
3510
3511	// Second step: find subscript sizes.
3512	SmallVector<const SCEV *, `4`> Sizes;
3513	findArrayDimensions(SE&: *SE, Terms, Sizes, ElementSize);
3514
3515	// Third step: compute the access functions for each subscript.
3516	computeAccessFunctions(SE&: *SE, Expr: SrcAR, Subscripts&: SrcSubscripts, Sizes);
3517	computeAccessFunctions(SE&: *SE, Expr: DstAR, Subscripts&: DstSubscripts, Sizes);
3518
3519	// Fail when there is only a subscript: that's a linearized access function.
3520	if (SrcSubscripts.size() < `2` \|\| DstSubscripts.size() < `2` \|\|
3521	SrcSubscripts.size() != DstSubscripts.size())
3522	return false;
3523
3524	size_t Size = SrcSubscripts.size();
3525
3526	// Statically check that the array bounds are in-range. The first subscript we
3527	// don't have a size for and it cannot overflow into another subscript, so is
3528	// always safe. The others need to be 0 <= subscript[i] < bound, for both src
3529	// and dst.
3530	// FIXME: It may be better to record these sizes and add them as constraints
3531	// to the dependency checks.
3532	if (!DisableDelinearizationChecks)
3533	for (size_t I = `1`; I < Size; ++I) {
3534	if (!isKnownNonNegative(S: SrcSubscripts [I], Ptr: SrcPtr))
3535	return false;
3536
3537	if (!isKnownLessThan(S: SrcSubscripts [I], Size: Sizes [I - `1`]))
3538	return false;
3539
3540	if (!isKnownNonNegative(S: DstSubscripts [I], Ptr: DstPtr))
3541	return false;
3542
3543	if (!isKnownLessThan(S: DstSubscripts [I], Size: Sizes [I - `1`]))
3544	return false;
3545	}
3546
3547	return true;
3548	}
3549
3550	//===----------------------------------------------------------------------===//
3551
3552	#ifndef NDEBUG
3553	// For debugging purposes, dump a small bit vector to dbgs().
3554	static void dumpSmallBitVector(SmallBitVector &BV) {
3555	dbgs() << "{";
3556	for (unsigned VI : BV.set_bits()) {
3557	dbgs() << VI;
3558	if (BV.find_next(VI) >= `0`)
3559	dbgs() << `' '`;
3560	}
3561	dbgs() << "}\n";
3562	}
3563	#endif
3564
3565	bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
3566	FunctionAnalysisManager::Invalidator &Inv) {
3567	// Check if the analysis itself has been invalidated.
3568	auto PAC = PA.getChecker<DependenceAnalysis>();
3569	if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3570	return true;
3571
3572	// Check transitive dependencies.
3573	return Inv.invalidate<AAManager>(IR&: F, PA) \|\|
3574	Inv.invalidate<ScalarEvolutionAnalysis>(IR&: F, PA) \|\|
3575	Inv.invalidate<LoopAnalysis>(IR&: F, PA);
3576	}
3577
3578	// depends -
3579	// Returns NULL if there is no dependence.
3580	// Otherwise, return a Dependence with as many details as possible.
3581	// Corresponds to Section 3.1 in the paper
3582	//
3583	// Practical Dependence Testing
3584	// Goff, Kennedy, Tseng
3585	// PLDI 1991
3586	//
3587	// Care is required to keep the routine below, getSplitIteration(),
3588	// up to date with respect to this routine.
3589	std::unique_ptr<Dependence>
3590	DependenceInfo::depends(Instruction Src, Instruction Dst,
3591	bool PossiblyLoopIndependent) {
3592	if (Src == Dst)
3593	PossiblyLoopIndependent = false;
3594
3595	if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
3596	// if both instructions don't reference memory, there's no dependence
3597	return nullptr;
3598
3599	if (!isLoadOrStore(I: Src) \|\| !isLoadOrStore(I: Dst)) {
3600	// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3601	LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3602	return std::make_unique<Dependence>(args&: Src, args&: Dst);
3603	}
3604
3605	assert(isLoadOrStore(Src) && "instruction is not load or store");
3606	assert(isLoadOrStore(Dst) && "instruction is not load or store");
3607	Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3608	Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3609
3610	switch (underlyingObjectsAlias(AA, DL: F->getDataLayout(),
3611	LocA: MemoryLocation::get(Inst: Dst),
3612	LocB: MemoryLocation::get(Inst: Src))) {
3613	case AliasResult::MayAlias:
3614	case AliasResult::PartialAlias:
3615	// cannot analyse objects if we don't understand their aliasing.
3616	LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3617	return std::make_unique<Dependence>(args&: Src, args&: Dst);
3618	case AliasResult::NoAlias:
3619	// If the objects noalias, they are distinct, accesses are independent.
3620	LLVM_DEBUG(dbgs() << "no alias\n");
3621	return nullptr;
3622	case AliasResult::MustAlias:
3623	break; // The underlying objects alias; test accesses for dependence.
3624	}
3625
3626	// establish loop nesting levels
3627	establishNestingLevels(Src, Dst);
3628	LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
3629	LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
3630
3631	FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
3632	++TotalArrayPairs;
3633
3634	unsigned Pairs = `1`;
3635	SmallVector<Subscript, `2`> Pair(Pairs);
3636	const SCEV *SrcSCEV = SE->getSCEV(V: SrcPtr);
3637	const SCEV *DstSCEV = SE->getSCEV(V: DstPtr);
3638	LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
3639	LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
3640	if (SE->getPointerBase(V: SrcSCEV) != SE->getPointerBase(V: DstSCEV)) {
3641	// If two pointers have different bases, trying to analyze indexes won't
3642	// work; we can't compare them to each other. This can happen, for example,
3643	// if one is produced by an LCSSA PHI node.
3644	//
3645	// We check this upfront so we don't crash in cases where getMinusSCEV()
3646	// returns a SCEVCouldNotCompute.
3647	LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
3648	return std::make_unique<Dependence>(args&: Src, args&: Dst);
3649	}
3650	Pair [`0`].Src = SrcSCEV;
3651	Pair [`0`].Dst = DstSCEV;
3652
3653	if (Delinearize) {
3654	if (tryDelinearize(Src, Dst, Pair)) {
3655	LLVM_DEBUG(dbgs() << " delinearized\n");
3656	Pairs = Pair.size();
3657	}
3658	}
3659
3660	for (unsigned P = `0`; P < Pairs; ++P) {
3661	Pair [P].Loops.resize(N: MaxLevels + `1`);
3662	Pair [P].GroupLoops.resize(N: MaxLevels + `1`);
3663	Pair [P].Group.resize(N: Pairs);
3664	removeMatchingExtensions(Pair: &Pair [P]);
3665	Pair [P].Classification =
3666	classifyPair(Src: Pair [P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
3667	Dst: Pair [P].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()),
3668	Loops&: Pair [P].Loops);
3669	Pair [P].GroupLoops = Pair [P].Loops;
3670	Pair [P].Group.set(P);
3671	LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
3672	LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3673	LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3674	LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3675	LLVM_DEBUG(dbgs() << "\tloops = ");
3676	LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
3677	}
3678
3679	SmallBitVector Separable(Pairs);
3680	SmallBitVector Coupled(Pairs);
3681
3682	// Partition subscripts into separable and minimally-coupled groups
3683	// Algorithm in paper is algorithmically better;
3684	// this may be faster in practice. Check someday.
3685	//
3686	// Here's an example of how it works. Consider this code:
3687	//
3688	// for (i = ...) {
3689	// for (j = ...) {
3690	// for (k = ...) {
3691	// for (l = ...) {
3692	// for (m = ...) {
3693	// A[i][j][k][m] = ...;
3694	// ... = A[0][j][l][i + j];
3695	// }
3696	// }
3697	// }
3698	// }
3699	// }
3700	//
3701	// There are 4 subscripts here:
3702	// 0 [i] and [0]
3703	// 1 [j] and [j]
3704	// 2 [k] and [l]
3705	// 3 [m] and [i + j]
3706	//
3707	// We've already classified each subscript pair as ZIV, SIV, etc.,
3708	// and collected all the loops mentioned by pair P in Pair[P].Loops.
3709	// In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
3710	// and set Pair[P].Group = {P}.
3711	//
3712	// Src Dst Classification Loops GroupLoops Group
3713	// 0 [i] [0] SIV {1} {1} {0}
3714	// 1 [j] [j] SIV {2} {2} {1}
3715	// 2 [k] [l] RDIV {3,4} {3,4} {2}
3716	// 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3}
3717	//
3718	// For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
3719	// So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
3720	//
3721	// We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
3722	// Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
3723	// Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
3724	// so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
3725	// to either Separable or Coupled).
3726	//
3727	// Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
3728	// Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
3729	// so Pair[3].Group = {0, 1, 3} and Done = false.
3730	//
3731	// Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
3732	// Since Done remains true, we add 2 to the set of Separable pairs.
3733	//
3734	// Finally, we consider 3. There's nothing to compare it with,
3735	// so Done remains true and we add it to the Coupled set.
3736	// Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
3737	//
3738	// In the end, we've got 1 separable subscript and 1 coupled group.
3739	for (unsigned SI = `0`; SI < Pairs; ++SI) {
3740	if (Pair [SI].Classification == Subscript::NonLinear) {
3741	// ignore these, but collect loops for later
3742	++NonlinearSubscriptPairs;
3743	collectCommonLoops(Expression: Pair [SI].Src,
3744	LoopNest: LI->getLoopFor(BB: Src->getParent()),
3745	Loops&: Pair [SI].Loops);
3746	collectCommonLoops(Expression: Pair [SI].Dst,
3747	LoopNest: LI->getLoopFor(BB: Dst->getParent()),
3748	Loops&: Pair [SI].Loops);
3749	Result.Consistent = false;
3750	} else if (Pair [SI].Classification == Subscript::ZIV) {
3751	// always separable
3752	Separable.set(SI);
3753	}
3754	else {
3755	// SIV, RDIV, or MIV, so check for coupled group
3756	bool Done = true;
3757	for (unsigned SJ = SI + `1`; SJ < Pairs; ++SJ) {
3758	SmallBitVector Intersection = Pair [SI].GroupLoops;
3759	Intersection &= Pair [SJ].GroupLoops;
3760	if (Intersection.any()) {
3761	// accumulate set of all the loops in group
3762	Pair [SJ].GroupLoops \|= Pair [SI].GroupLoops;
3763	// accumulate set of all subscripts in group
3764	Pair [SJ].Group \|= Pair [SI].Group;
3765	Done = false;
3766	}
3767	}
3768	if (Done) {
3769	if (Pair [SI].Group.count() == `1`) {
3770	Separable.set(SI);
3771	++SeparableSubscriptPairs;
3772	}
3773	else {
3774	Coupled.set(SI);
3775	++CoupledSubscriptPairs;
3776	}
3777	}
3778	}
3779	}
3780
3781	LLVM_DEBUG(dbgs() << " Separable = ");
3782	LLVM_DEBUG(dumpSmallBitVector(Separable));
3783	LLVM_DEBUG(dbgs() << " Coupled = ");
3784	LLVM_DEBUG(dumpSmallBitVector(Coupled));
3785
3786	Constraint NewConstraint;
3787	NewConstraint.setAny(SE);
3788
3789	// test separable subscripts
3790	for (unsigned SI : Separable.set_bits()) {
3791	LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3792	switch (Pair [SI].Classification) {
3793	case Subscript::ZIV:
3794	LLVM_DEBUG(dbgs() << ", ZIV\n");
3795	if (testZIV(Src: Pair [SI].Src, Dst: Pair [SI].Dst, Result))
3796	return nullptr;
3797	break;
3798	case Subscript::SIV: {
3799	LLVM_DEBUG(dbgs() << ", SIV\n");
3800	unsigned Level;
3801	const SCEV SplitIter = nullptr*;
3802	if (testSIV(Src: Pair [SI].Src, Dst: Pair [SI].Dst, Level, Result, NewConstraint,
3803	SplitIter))
3804	return nullptr;
3805	break;
3806	}
3807	case Subscript::RDIV:
3808	LLVM_DEBUG(dbgs() << ", RDIV\n");
3809	if (testRDIV(Src: Pair [SI].Src, Dst: Pair [SI].Dst, Result))
3810	return nullptr;
3811	break;
3812	case Subscript::MIV:
3813	LLVM_DEBUG(dbgs() << ", MIV\n");
3814	if (testMIV(Src: Pair [SI].Src, Dst: Pair [SI].Dst, Loops: Pair [SI].Loops, Result))
3815	return nullptr;
3816	break;
3817	default:
3818	llvm_unreachable("subscript has unexpected classification");
3819	}
3820	}
3821
3822	if (Coupled.count()) {
3823	// test coupled subscript groups
3824	LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
3825	LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + `1` << "\n");
3826	SmallVector<Constraint, `4`> Constraints(MaxLevels + `1`);
3827	for (unsigned II = `0`; II <= MaxLevels; ++II)
3828	Constraints [II].setAny(SE);
3829	for (unsigned SI : Coupled.set_bits()) {
3830	LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");
3831	SmallBitVector Group(Pair [SI].Group);
3832	SmallBitVector Sivs(Pairs);
3833	SmallBitVector Mivs(Pairs);
3834	SmallBitVector ConstrainedLevels(MaxLevels + `1`);
3835	SmallVector<Subscript *, `4`> PairsInGroup;
3836	for (unsigned SJ : Group.set_bits()) {
3837	LLVM_DEBUG(dbgs() << SJ << " ");
3838	if (Pair [SJ].Classification == Subscript::SIV)
3839	Sivs.set(SJ);
3840	else
3841	Mivs.set(SJ);
3842	PairsInGroup.push_back(Elt: &Pair [SJ]);
3843	}
3844	unifySubscriptType(Pairs: PairsInGroup);
3845	LLVM_DEBUG(dbgs() << "}\n");
3846	while (Sivs.any()) {
3847	bool Changed = false;
3848	for (unsigned SJ : Sivs.set_bits()) {
3849	LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
3850	// SJ is an SIV subscript that's part of the current coupled group
3851	unsigned Level;
3852	const SCEV SplitIter = nullptr*;
3853	LLVM_DEBUG(dbgs() << "SIV\n");
3854	if (testSIV(Src: Pair [SJ].Src, Dst: Pair [SJ].Dst, Level, Result, NewConstraint,
3855	SplitIter))
3856	return nullptr;
3857	ConstrainedLevels.set(Level);
3858	if (intersectConstraints(X: &Constraints [Level], Y: &NewConstraint)) {
3859	if (Constraints [Level].isEmpty()) {
3860	++DeltaIndependence;
3861	return nullptr;
3862	}
3863	Changed = true;
3864	}
3865	Sivs.reset(Idx: SJ);
3866	}
3867	if (Changed) {
3868	// propagate, possibly creating new SIVs and ZIVs
3869	LLVM_DEBUG(dbgs() << " propagating\n");
3870	LLVM_DEBUG(dbgs() << "\tMivs = ");
3871	LLVM_DEBUG(dumpSmallBitVector(Mivs));
3872	for (unsigned SJ : Mivs.set_bits()) {
3873	// SJ is an MIV subscript that's part of the current coupled group
3874	LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
3875	if (propagate(Src&: Pair [SJ].Src, Dst&: Pair [SJ].Dst, Loops&: Pair [SJ].Loops,
3876	Constraints, Consistent&: Result.Consistent)) {
3877	LLVM_DEBUG(dbgs() << "\t Changed\n");
3878	++DeltaPropagations;
3879	Pair [SJ].Classification =
3880	classifyPair(Src: Pair [SJ].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
3881	Dst: Pair [SJ].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()),
3882	Loops&: Pair [SJ].Loops);
3883	switch (Pair [SJ].Classification) {
3884	case Subscript::ZIV:
3885	LLVM_DEBUG(dbgs() << "ZIV\n");
3886	if (testZIV(Src: Pair [SJ].Src, Dst: Pair [SJ].Dst, Result))
3887	return nullptr;
3888	Mivs.reset(Idx: SJ);
3889	break;
3890	case Subscript::SIV:
3891	Sivs.set(SJ);
3892	Mivs.reset(Idx: SJ);
3893	break;
3894	case Subscript::RDIV:
3895	case Subscript::MIV:
3896	break;
3897	default:
3898	llvm_unreachable("bad subscript classification");
3899	}
3900	}
3901	}
3902	}
3903	}
3904
3905	// test & propagate remaining RDIVs
3906	for (unsigned SJ : Mivs.set_bits()) {
3907	if (Pair [SJ].Classification == Subscript::RDIV) {
3908	LLVM_DEBUG(dbgs() << "RDIV test\n");
3909	if (testRDIV(Src: Pair [SJ].Src, Dst: Pair [SJ].Dst, Result))
3910	return nullptr;
3911	// I don't yet understand how to propagate RDIV results
3912	Mivs.reset(Idx: SJ);
3913	}
3914	}
3915
3916	// test remaining MIVs
3917	// This code is temporary.
3918	// Better to somehow test all remaining subscripts simultaneously.
3919	for (unsigned SJ : Mivs.set_bits()) {
3920	if (Pair [SJ].Classification == Subscript::MIV) {
3921	LLVM_DEBUG(dbgs() << "MIV test\n");
3922	if (testMIV(Src: Pair [SJ].Src, Dst: Pair [SJ].Dst, Loops: Pair [SJ].Loops, Result))
3923	return nullptr;
3924	}
3925	else
3926	llvm_unreachable("expected only MIV subscripts at this point");
3927	}
3928
3929	// update Result.DV from constraint vector
3930	LLVM_DEBUG(dbgs() << " updating\n");
3931	for (unsigned SJ : ConstrainedLevels.set_bits()) {
3932	if (SJ > CommonLevels)
3933	break;
3934	updateDirection(Level&: Result.DV [SJ - `1`], CurConstraint: Constraints [SJ]);
3935	if (Result.DV [SJ - `1`].Direction == Dependence::DVEntry::NONE)
3936	return nullptr;
3937	}
3938	}
3939	}
3940
3941	// Make sure the Scalar flags are set correctly.
3942	SmallBitVector CompleteLoops(MaxLevels + `1`);
3943	for (unsigned SI = `0`; SI < Pairs; ++SI)
3944	CompleteLoops \|= Pair [SI].Loops;
3945	for (unsigned II = `1`; II <= CommonLevels; ++II)
3946	if (CompleteLoops [II])
3947	Result.DV [II - `1`].Scalar = false;
3948
3949	if (PossiblyLoopIndependent) {
3950	// Make sure the LoopIndependent flag is set correctly.
3951	// All directions must include equal, otherwise no
3952	// loop-independent dependence is possible.
3953	for (unsigned II = `1`; II <= CommonLevels; ++II) {
3954	if (!(Result.getDirection(Level: II) & Dependence::DVEntry::EQ)) {
3955	Result.LoopIndependent = false;
3956	break;
3957	}
3958	}
3959	}
3960	else {
3961	// On the other hand, if all directions are equal and there's no
3962	// loop-independent dependence possible, then no dependence exists.
3963	bool AllEqual = true;
3964	for (unsigned II = `1`; II <= CommonLevels; ++II) {
3965	if (Result.getDirection(Level: II) != Dependence::DVEntry::EQ) {
3966	AllEqual = false;
3967	break;
3968	}
3969	}
3970	if (AllEqual)
3971	return nullptr;
3972	}
3973
3974	return std::make_unique<FullDependence>(args: std::move(Result));
3975	}
3976
3977	//===----------------------------------------------------------------------===//
3978	// getSplitIteration -
3979	// Rather than spend rarely-used space recording the splitting iteration
3980	// during the Weak-Crossing SIV test, we re-compute it on demand.
3981	// The re-computation is basically a repeat of the entire dependence test,
3982	// though simplified since we know that the dependence exists.
3983	// It's tedious, since we must go through all propagations, etc.
3984	//
3985	// Care is required to keep this code up to date with respect to the routine
3986	// above, depends().
3987	//
3988	// Generally, the dependence analyzer will be used to build
3989	// a dependence graph for a function (basically a map from instructions
3990	// to dependences). Looking for cycles in the graph shows us loops
3991	// that cannot be trivially vectorized/parallelized.
3992	//
3993	// We can try to improve the situation by examining all the dependences
3994	// that make up the cycle, looking for ones we can break.
3995	// Sometimes, peeling the first or last iteration of a loop will break
3996	// dependences, and we've got flags for those possibilities.
3997	// Sometimes, splitting a loop at some other iteration will do the trick,
3998	// and we've got a flag for that case. Rather than waste the space to
3999	// record the exact iteration (since we rarely know), we provide
4000	// a method that calculates the iteration. It's a drag that it must work
4001	// from scratch, but wonderful in that it's possible.
4002	//
4003	// Here's an example:
4004	//
4005	// for (i = 0; i < 10; i++)
4006	// A[i] = ...
4007	// ... = A[11 - i]
4008	//
4009	// There's a loop-carried flow dependence from the store to the load,
4010	// found by the weak-crossing SIV test. The dependence will have a flag,
4011	// indicating that the dependence can be broken by splitting the loop.
4012	// Calling getSplitIteration will return 5.
4013	// Splitting the loop breaks the dependence, like so:
4014	//
4015	// for (i = 0; i <= 5; i++)
4016	// A[i] = ...
4017	// ... = A[11 - i]
4018	// for (i = 6; i < 10; i++)
4019	// A[i] = ...
4020	// ... = A[11 - i]
4021	//
4022	// breaks the dependence and allows us to vectorize/parallelize
4023	// both loops.
4024	const SCEV DependenceInfo::getSplitIteration(const* Dependence &Dep,
4025	unsigned SplitLevel) {
4026	assert(Dep.isSplitable(SplitLevel) &&
4027	"Dep should be splitable at SplitLevel");
4028	Instruction *Src = Dep.getSrc();
4029	Instruction *Dst = Dep.getDst();
4030	assert(Src->mayReadFromMemory() \|\| Src->mayWriteToMemory());
4031	assert(Dst->mayReadFromMemory() \|\| Dst->mayWriteToMemory());
4032	assert(isLoadOrStore(Src));
4033	assert(isLoadOrStore(Dst));
4034	Value *SrcPtr = getLoadStorePointerOperand(V: Src);
4035	Value *DstPtr = getLoadStorePointerOperand(V: Dst);
4036	assert(underlyingObjectsAlias(
4037	AA, F->getDataLayout(), MemoryLocation::get(Dst),
4038	MemoryLocation::get(Src)) == AliasResult::MustAlias);
4039
4040	// establish loop nesting levels
4041	establishNestingLevels(Src, Dst);
4042
4043	FullDependence Result(Src, Dst, false, CommonLevels);
4044
4045	unsigned Pairs = `1`;
4046	SmallVector<Subscript, `2`> Pair(Pairs);
4047	const SCEV *SrcSCEV = SE->getSCEV(V: SrcPtr);
4048	const SCEV *DstSCEV = SE->getSCEV(V: DstPtr);
4049	Pair [`0`].Src = SrcSCEV;
4050	Pair [`0`].Dst = DstSCEV;
4051
4052	if (Delinearize) {
4053	if (tryDelinearize(Src, Dst, Pair)) {
4054	LLVM_DEBUG(dbgs() << " delinearized\n");
4055	Pairs = Pair.size();
4056	}
4057	}
4058
4059	for (unsigned P = `0`; P < Pairs; ++P) {
4060	Pair [P].Loops.resize(N: MaxLevels + `1`);
4061	Pair [P].GroupLoops.resize(N: MaxLevels + `1`);
4062	Pair [P].Group.resize(N: Pairs);
4063	removeMatchingExtensions(Pair: &Pair [P]);
4064	Pair [P].Classification =
4065	classifyPair(Src: Pair [P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
4066	Dst: Pair [P].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()),
4067	Loops&: Pair [P].Loops);
4068	Pair [P].GroupLoops = Pair [P].Loops;
4069	Pair [P].Group.set(P);
4070	}
4071
4072	SmallBitVector Separable(Pairs);
4073	SmallBitVector Coupled(Pairs);
4074
4075	// partition subscripts into separable and minimally-coupled groups
4076	for (unsigned SI = `0`; SI < Pairs; ++SI) {
4077	if (Pair [SI].Classification == Subscript::NonLinear) {
4078	// ignore these, but collect loops for later
4079	collectCommonLoops(Expression: Pair [SI].Src,
4080	LoopNest: LI->getLoopFor(BB: Src->getParent()),
4081	Loops&: Pair [SI].Loops);
4082	collectCommonLoops(Expression: Pair [SI].Dst,
4083	LoopNest: LI->getLoopFor(BB: Dst->getParent()),
4084	Loops&: Pair [SI].Loops);
4085	Result.Consistent = false;
4086	}
4087	else if (Pair [SI].Classification == Subscript::ZIV)
4088	Separable.set(SI);
4089	else {
4090	// SIV, RDIV, or MIV, so check for coupled group
4091	bool Done = true;
4092	for (unsigned SJ = SI + `1`; SJ < Pairs; ++SJ) {
4093	SmallBitVector Intersection = Pair [SI].GroupLoops;
4094	Intersection &= Pair [SJ].GroupLoops;
4095	if (Intersection.any()) {
4096	// accumulate set of all the loops in group
4097	Pair [SJ].GroupLoops \|= Pair [SI].GroupLoops;
4098	// accumulate set of all subscripts in group
4099	Pair [SJ].Group \|= Pair [SI].Group;
4100	Done = false;
4101	}
4102	}
4103	if (Done) {
4104	if (Pair [SI].Group.count() == `1`)
4105	Separable.set(SI);
4106	else
4107	Coupled.set(SI);
4108	}
4109	}
4110	}
4111
4112	Constraint NewConstraint;
4113	NewConstraint.setAny(SE);
4114
4115	// test separable subscripts
4116	for (unsigned SI : Separable.set_bits()) {
4117	switch (Pair [SI].Classification) {
4118	case Subscript::SIV: {
4119	unsigned Level;
4120	const SCEV SplitIter = nullptr*;
4121	(void) testSIV(Src: Pair [SI].Src, Dst: Pair [SI].Dst, Level,
4122	Result, NewConstraint, SplitIter);
4123	if (Level == SplitLevel) {
4124	assert(SplitIter != nullptr);
4125	return SplitIter;
4126	}
4127	break;
4128	}
4129	case Subscript::ZIV:
4130	case Subscript::RDIV:
4131	case Subscript::MIV:
4132	break;
4133	default:
4134	llvm_unreachable("subscript has unexpected classification");
4135	}
4136	}
4137
4138	if (Coupled.count()) {
4139	// test coupled subscript groups
4140	SmallVector<Constraint, `4`> Constraints(MaxLevels + `1`);
4141	for (unsigned II = `0`; II <= MaxLevels; ++II)
4142	Constraints [II].setAny(SE);
4143	for (unsigned SI : Coupled.set_bits()) {
4144	SmallBitVector Group(Pair [SI].Group);
4145	SmallBitVector Sivs(Pairs);
4146	SmallBitVector Mivs(Pairs);
4147	SmallBitVector ConstrainedLevels(MaxLevels + `1`);
4148	for (unsigned SJ : Group.set_bits()) {
4149	if (Pair [SJ].Classification == Subscript::SIV)
4150	Sivs.set(SJ);
4151	else
4152	Mivs.set(SJ);
4153	}
4154	while (Sivs.any()) {
4155	bool Changed = false;
4156	for (unsigned SJ : Sivs.set_bits()) {
4157	// SJ is an SIV subscript that's part of the current coupled group
4158	unsigned Level;
4159	const SCEV SplitIter = nullptr*;
4160	(void) testSIV(Src: Pair [SJ].Src, Dst: Pair [SJ].Dst, Level,
4161	Result, NewConstraint, SplitIter);
4162	if (Level == SplitLevel && SplitIter)
4163	return SplitIter;
4164	ConstrainedLevels.set(Level);
4165	if (intersectConstraints(X: &Constraints [Level], Y: &NewConstraint))
4166	Changed = true;
4167	Sivs.reset(Idx: SJ);
4168	}
4169	if (Changed) {
4170	// propagate, possibly creating new SIVs and ZIVs
4171	for (unsigned SJ : Mivs.set_bits()) {
4172	// SJ is an MIV subscript that's part of the current coupled group
4173	if (propagate(Src&: Pair [SJ].Src, Dst&: Pair [SJ].Dst,
4174	Loops&: Pair [SJ].Loops, Constraints, Consistent&: Result.Consistent)) {
4175	Pair [SJ].Classification =
4176	classifyPair(Src: Pair [SJ].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
4177	Dst: Pair [SJ].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()),
4178	Loops&: Pair [SJ].Loops);
4179	switch (Pair [SJ].Classification) {
4180	case Subscript::ZIV:
4181	Mivs.reset(Idx: SJ);
4182	break;
4183	case Subscript::SIV:
4184	Sivs.set(SJ);
4185	Mivs.reset(Idx: SJ);
4186	break;
4187	case Subscript::RDIV:
4188	case Subscript::MIV:
4189	break;
4190	default:
4191	llvm_unreachable("bad subscript classification");
4192	}
4193	}
4194	}
4195	}
4196	}
4197	}
4198	}
4199	llvm_unreachable("somehow reached end of routine");
4200	return nullptr;
4201	}
4202

Browse the source code of llvm_projects/llvm/lib/Analysis/DependenceAnalysis.cpp