1//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10// accesses. Currently, it is an (incomplete) implementation of the approach
11// described in
12//
13// Practical Dependence Testing
14// Goff, Kennedy, Tseng
15// PLDI 1991
16//
17// There's a single entry point that analyzes the dependence between a pair
18// of memory references in a function, returning either NULL, for no dependence,
19// or a more-or-less detailed description of the dependence between them.
20//
21// Since Clang linearizes some array subscripts, the dependence
22// analysis is using SCEV->delinearize to recover the representation of multiple
23// subscripts, and thus avoid the more expensive and less precise MIV tests. The
24// delinearization is controlled by the flag -da-delinearize.
25//
26// We should pay some careful attention to the possibility of integer overflow
27// in the implementation of the various tests. This could happen with Add,
28// Subtract, or Multiply, with both APInt's and SCEV's.
29//
30// Some non-linear subscript pairs can be handled by the GCD test
31// (and perhaps other tests).
32// Should explore how often these things occur.
33//
34// Finally, it seems like certain test cases expose weaknesses in the SCEV
35// simplification, especially in the handling of sign and zero extensions.
36// It could be useful to spend time exploring these.
37//
38// Please note that this is work in progress and the interface is subject to
39// change.
40//
41//===----------------------------------------------------------------------===//
42// //
43// In memory of Ken Kennedy, 1945 - 2007 //
44// //
45//===----------------------------------------------------------------------===//
46
47#include "llvm/Analysis/DependenceAnalysis.h"
48#include "llvm/ADT/Statistic.h"
49#include "llvm/Analysis/AliasAnalysis.h"
50#include "llvm/Analysis/Delinearization.h"
51#include "llvm/Analysis/LoopInfo.h"
52#include "llvm/Analysis/ScalarEvolution.h"
53#include "llvm/Analysis/ScalarEvolutionExpressions.h"
54#include "llvm/Analysis/ValueTracking.h"
55#include "llvm/IR/InstIterator.h"
56#include "llvm/IR/Module.h"
57#include "llvm/InitializePasses.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/raw_ostream.h"
62
63using namespace llvm;
64
65#define DEBUG_TYPE "da"
66
67//===----------------------------------------------------------------------===//
68// statistics
69
70STATISTIC(TotalArrayPairs, "Array pairs tested");
71STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
72STATISTIC(ZIVapplications, "ZIV applications");
73STATISTIC(ZIVindependence, "ZIV independence");
74STATISTIC(StrongSIVapplications, "Strong SIV applications");
75STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
76STATISTIC(StrongSIVindependence, "Strong SIV independence");
77STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
78STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
79STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
80STATISTIC(ExactSIVapplications, "Exact SIV applications");
81STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
82STATISTIC(ExactSIVindependence, "Exact SIV independence");
83STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
84STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
85STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
86STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
87STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
88STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
89STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
90STATISTIC(GCDapplications, "GCD applications");
91STATISTIC(GCDsuccesses, "GCD successes");
92STATISTIC(GCDindependence, "GCD independence");
93STATISTIC(BanerjeeApplications, "Banerjee applications");
94STATISTIC(BanerjeeIndependence, "Banerjee independence");
95STATISTIC(BanerjeeSuccesses, "Banerjee successes");
96STATISTIC(SameSDLoopsCount, "Loops with Same iteration Space and Depth");
97
98static cl::opt<bool>
99 Delinearize("da-delinearize", cl::init(Val: true), cl::Hidden,
100 cl::desc("Try to delinearize array references."));
101static cl::opt<bool> DisableDelinearizationChecks(
102 "da-disable-delinearization-checks", cl::Hidden,
103 cl::desc(
104 "Disable checks that try to statically verify validity of "
105 "delinearized subscripts. Enabling this option may result in incorrect "
106 "dependence vectors for languages that allow the subscript of one "
107 "dimension to underflow or overflow into another dimension."));
108
109static cl::opt<unsigned> MIVMaxLevelThreshold(
110 "da-miv-max-level-threshold", cl::init(Val: 7), cl::Hidden,
111 cl::desc("Maximum depth allowed for the recursive algorithm used to "
112 "explore MIV direction vectors."));
113
114namespace {
115
116/// Types of dependence test routines.
117enum class DependenceTestType {
118 All,
119 StrongSIV,
120 WeakCrossingSIV,
121 ExactSIV,
122 WeakZeroSIV,
123 ExactRDIV,
124 SymbolicRDIV,
125 GCDMIV,
126 BanerjeeMIV,
127};
128
129} // anonymous namespace
130
131static cl::opt<DependenceTestType> EnableDependenceTest(
132 "da-enable-dependence-test", cl::init(Val: DependenceTestType::All),
133 cl::ReallyHidden,
134 cl::desc("Run only specified dependence test routine and disable others. "
135 "The purpose is mainly to exclude the influence of other "
136 "dependence test routines in regression tests. If set to All, all "
137 "dependence test routines are enabled."),
138 cl::values(clEnumValN(DependenceTestType::All, "all",
139 "Enable all dependence test routines."),
140 clEnumValN(DependenceTestType::StrongSIV, "strong-siv",
141 "Enable only Strong SIV test."),
142 clEnumValN(DependenceTestType::WeakCrossingSIV,
143 "weak-crossing-siv",
144 "Enable only Weak-Crossing SIV test."),
145 clEnumValN(DependenceTestType::ExactSIV, "exact-siv",
146 "Enable only Exact SIV test."),
147 clEnumValN(DependenceTestType::WeakZeroSIV, "weak-zero-siv",
148 "Enable only Weak-Zero SIV test."),
149 clEnumValN(DependenceTestType::ExactRDIV, "exact-rdiv",
150 "Enable only Exact RDIV test."),
151 clEnumValN(DependenceTestType::SymbolicRDIV, "symbolic-rdiv",
152 "Enable only Symbolic RDIV test."),
153 clEnumValN(DependenceTestType::GCDMIV, "gcd-miv",
154 "Enable only GCD MIV test."),
155 clEnumValN(DependenceTestType::BanerjeeMIV, "banerjee-miv",
156 "Enable only Banerjee MIV test.")));
157
158// TODO: This flag is disabled by default because it is still under development.
159// Enable it or delete this flag when the feature is ready.
160static cl::opt<bool> EnableMonotonicityCheck(
161 "da-enable-monotonicity-check", cl::init(Val: false), cl::Hidden,
162 cl::desc("Check if the subscripts are monotonic. If it's not, dependence "
163 "is reported as unknown."));
164
165static cl::opt<bool> DumpMonotonicityReport(
166 "da-dump-monotonicity-report", cl::init(Val: false), cl::Hidden,
167 cl::desc(
168 "When printing analysis, dump the results of monotonicity checks."));
169
170//===----------------------------------------------------------------------===//
171// basics
172
173DependenceAnalysis::Result
174DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
175 auto &AA = FAM.getResult<AAManager>(IR&: F);
176 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
177 auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
178 return DependenceInfo(&F, &AA, &SE, &LI);
179}
180
181AnalysisKey DependenceAnalysis::Key;
182
183INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
184 "Dependence Analysis", true, true)
185INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
186INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
187INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
188INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
189 true, true)
190
191char DependenceAnalysisWrapperPass::ID = 0;
192
193DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
194 : FunctionPass(ID) {}
195
196FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
197 return new DependenceAnalysisWrapperPass();
198}
199
200bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
201 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
202 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
203 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
204 info.reset(p: new DependenceInfo(&F, &AA, &SE, &LI));
205 return false;
206}
207
208DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
209
210void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
211
212void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
213 AU.setPreservesAll();
214 AU.addRequiredTransitive<AAResultsWrapperPass>();
215 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
216 AU.addRequiredTransitive<LoopInfoWrapperPass>();
217}
218
219namespace {
220
221/// The property of monotonicity of a SCEV. To define the monotonicity, assume
222/// a SCEV defined within N-nested loops. Let i_k denote the iteration number
223/// of the k-th loop. Then we can regard the SCEV as an N-ary function:
224///
225/// F(i_1, i_2, ..., i_N)
226///
227/// The domain of i_k is the closed range [0, BTC_k], where BTC_k is the
228/// backedge-taken count of the k-th loop.
229///
230/// A function F is said to be "monotonically increasing with respect to the
231/// k-th loop" if x <= y implies the following condition:
232///
233/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) <=
234/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
235///
236/// where i_1, ..., i_{k-1}, i_{k+1}, ..., i_N, x, and y are elements of their
237/// respective domains.
238///
239/// Likewise F is "monotonically decreasing with respect to the k-th loop"
240/// if x <= y implies
241///
242/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) >=
243/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
244///
245/// A function F that is monotonically increasing or decreasing with respect to
246/// the k-th loop is simply called "monotonic with respect to k-th loop".
247///
248/// A function F is said to be "multivariate monotonic" when it is monotonic
249/// with respect to all of the N loops.
250///
251/// Since integer comparison can be either signed or unsigned, we need to
252/// distinguish monotonicity in the signed sense from that in the unsigned
253/// sense. Note that the inequality "x <= y" merely indicates loop progression
254/// and is not affected by the difference between signed and unsigned order.
255///
256/// Currently we only consider monotonicity in a signed sense.
257enum class SCEVMonotonicityType {
258 /// We don't know anything about the monotonicity of the SCEV.
259 Unknown,
260
261 /// The SCEV is loop-invariant with respect to the outermost loop. In other
262 /// words, the function F corresponding to the SCEV is a constant function.
263 Invariant,
264
265 /// The function F corresponding to the SCEV is multivariate monotonic in a
266 /// signed sense. Note that the multivariate monotonic function may also be a
267 /// constant function. The order employed in the definition of monotonicity
268 /// is not strict order.
269 MultivariateSignedMonotonic,
270};
271
272struct SCEVMonotonicity {
273 SCEVMonotonicity(SCEVMonotonicityType Type,
274 const SCEV *FailurePoint = nullptr);
275
276 SCEVMonotonicityType getType() const { return Type; }
277
278 const SCEV *getFailurePoint() const { return FailurePoint; }
279
280 bool isUnknown() const { return Type == SCEVMonotonicityType::Unknown; }
281
282 void print(raw_ostream &OS, unsigned Depth) const;
283
284private:
285 SCEVMonotonicityType Type;
286
287 /// The subexpression that caused Unknown. Mainly for debugging purpose.
288 const SCEV *FailurePoint;
289};
290
291/// Check the monotonicity of a SCEV. Since dependence tests (SIV, MIV, etc.)
292/// assume that subscript expressions are (multivariate) monotonic, we need to
293/// verify this property before applying those tests. Violating this assumption
294/// may cause them to produce incorrect results.
295struct SCEVMonotonicityChecker
296 : public SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity> {
297
298 SCEVMonotonicityChecker(ScalarEvolution *SE) : SE(SE) {}
299
300 /// Check the monotonicity of \p Expr. \p Expr must be integer type. If \p
301 /// OutermostLoop is not null, \p Expr must be defined in \p OutermostLoop or
302 /// one of its nested loops.
303 SCEVMonotonicity checkMonotonicity(const SCEV *Expr,
304 const Loop *OutermostLoop);
305
306private:
307 ScalarEvolution *SE;
308
309 /// The outermost loop that DA is analyzing.
310 const Loop *OutermostLoop;
311
312 /// A helper to classify \p Expr as either Invariant or Unknown.
313 SCEVMonotonicity invariantOrUnknown(const SCEV *Expr);
314
315 /// Return true if \p Expr is loop-invariant with respect to the outermost
316 /// loop.
317 bool isLoopInvariant(const SCEV *Expr) const;
318
319 /// A helper to create an Unknown SCEVMonotonicity.
320 SCEVMonotonicity createUnknown(const SCEV *FailurePoint) {
321 return SCEVMonotonicity(SCEVMonotonicityType::Unknown, FailurePoint);
322 }
323
324 SCEVMonotonicity visitAddRecExpr(const SCEVAddRecExpr *Expr);
325
326 SCEVMonotonicity visitConstant(const SCEVConstant *) {
327 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
328 }
329 SCEVMonotonicity visitVScale(const SCEVVScale *) {
330 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
331 }
332
333 // TODO: Handle more cases.
334 SCEVMonotonicity visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
335 return invariantOrUnknown(Expr);
336 }
337 SCEVMonotonicity visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
338 return invariantOrUnknown(Expr);
339 }
340 SCEVMonotonicity visitAddExpr(const SCEVAddExpr *Expr) {
341 return invariantOrUnknown(Expr);
342 }
343 SCEVMonotonicity visitMulExpr(const SCEVMulExpr *Expr) {
344 return invariantOrUnknown(Expr);
345 }
346 SCEVMonotonicity visitPtrToAddrExpr(const SCEVPtrToAddrExpr *Expr) {
347 return invariantOrUnknown(Expr);
348 }
349 SCEVMonotonicity visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) {
350 return invariantOrUnknown(Expr);
351 }
352 SCEVMonotonicity visitTruncateExpr(const SCEVTruncateExpr *Expr) {
353 return invariantOrUnknown(Expr);
354 }
355 SCEVMonotonicity visitUDivExpr(const SCEVUDivExpr *Expr) {
356 return invariantOrUnknown(Expr);
357 }
358 SCEVMonotonicity visitSMaxExpr(const SCEVSMaxExpr *Expr) {
359 return invariantOrUnknown(Expr);
360 }
361 SCEVMonotonicity visitUMaxExpr(const SCEVUMaxExpr *Expr) {
362 return invariantOrUnknown(Expr);
363 }
364 SCEVMonotonicity visitSMinExpr(const SCEVSMinExpr *Expr) {
365 return invariantOrUnknown(Expr);
366 }
367 SCEVMonotonicity visitUMinExpr(const SCEVUMinExpr *Expr) {
368 return invariantOrUnknown(Expr);
369 }
370 SCEVMonotonicity visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) {
371 return invariantOrUnknown(Expr);
372 }
373 SCEVMonotonicity visitUnknown(const SCEVUnknown *Expr) {
374 return invariantOrUnknown(Expr);
375 }
376 SCEVMonotonicity visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
377 return invariantOrUnknown(Expr);
378 }
379
380 friend struct SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity>;
381};
382
383/// A wrapper class for std::optional<APInt> that provides arithmetic operators
384/// with overflow checking in a signed sense. This allows us to omit inserting
385/// an overflow check at every arithmetic operation, which simplifies the code
386/// if the operations are chained like `a + b + c + ...`.
387///
388/// If an calculation overflows, the result becomes "invalid" which is
389/// internally represented by std::nullopt. If any operand of an arithmetic
390/// operation is "invalid", the result will also be "invalid".
391struct OverflowSafeSignedAPInt {
392 OverflowSafeSignedAPInt() : Value(std::nullopt) {}
393 OverflowSafeSignedAPInt(const APInt &V) : Value(V) {}
394 OverflowSafeSignedAPInt(const std::optional<APInt> &V) : Value(V) {}
395
396 OverflowSafeSignedAPInt operator+(const OverflowSafeSignedAPInt &RHS) const {
397 if (!Value || !RHS.Value)
398 return OverflowSafeSignedAPInt();
399 bool Overflow;
400 APInt Result = Value->sadd_ov(RHS: *RHS.Value, Overflow);
401 if (Overflow)
402 return OverflowSafeSignedAPInt();
403 return OverflowSafeSignedAPInt(Result);
404 }
405
406 OverflowSafeSignedAPInt operator+(int RHS) const {
407 if (!Value)
408 return OverflowSafeSignedAPInt();
409 return *this + fromInt(V: RHS);
410 }
411
412 OverflowSafeSignedAPInt operator-(const OverflowSafeSignedAPInt &RHS) const {
413 if (!Value || !RHS.Value)
414 return OverflowSafeSignedAPInt();
415 bool Overflow;
416 APInt Result = Value->ssub_ov(RHS: *RHS.Value, Overflow);
417 if (Overflow)
418 return OverflowSafeSignedAPInt();
419 return OverflowSafeSignedAPInt(Result);
420 }
421
422 OverflowSafeSignedAPInt operator-(int RHS) const {
423 if (!Value)
424 return OverflowSafeSignedAPInt();
425 return *this - fromInt(V: RHS);
426 }
427
428 OverflowSafeSignedAPInt operator*(const OverflowSafeSignedAPInt &RHS) const {
429 if (!Value || !RHS.Value)
430 return OverflowSafeSignedAPInt();
431 bool Overflow;
432 APInt Result = Value->smul_ov(RHS: *RHS.Value, Overflow);
433 if (Overflow)
434 return OverflowSafeSignedAPInt();
435 return OverflowSafeSignedAPInt(Result);
436 }
437
438 OverflowSafeSignedAPInt operator-() const {
439 if (!Value)
440 return OverflowSafeSignedAPInt();
441 if (Value->isMinSignedValue())
442 return OverflowSafeSignedAPInt();
443 return OverflowSafeSignedAPInt(-*Value);
444 }
445
446 operator bool() const { return Value.has_value(); }
447
448 bool operator!() const { return !Value.has_value(); }
449
450 const APInt &operator*() const {
451 assert(Value && "Value is not available.");
452 return *Value;
453 }
454
455 const APInt *operator->() const {
456 assert(Value && "Value is not available.");
457 return &*Value;
458 }
459
460private:
461 /// Underlying value. std::nullopt means "unknown". An arithmetic operation on
462 /// "unknown" always produces "unknown".
463 std::optional<APInt> Value;
464
465 OverflowSafeSignedAPInt fromInt(uint64_t V) const {
466 assert(Value && "Value is not available.");
467 return OverflowSafeSignedAPInt(
468 APInt(Value->getBitWidth(), V, /*isSigned=*/true));
469 }
470};
471
472} // anonymous namespace
473
474// Used to test the dependence analyzer.
475// Looks through the function, noting instructions that may access memory.
476// Calls depends() on every possible pair and prints out the result.
477// Ignores all other instructions.
478static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
479 ScalarEvolution &SE, LoopInfo &LI,
480 bool NormalizeResults) {
481 auto *F = DA->getFunction();
482
483 if (DumpMonotonicityReport) {
484 SCEVMonotonicityChecker Checker(&SE);
485 OS << "Monotonicity check:\n";
486 for (Instruction &Inst : instructions(F)) {
487 if (!isa<LoadInst>(Val: Inst) && !isa<StoreInst>(Val: Inst))
488 continue;
489 Value *Ptr = getLoadStorePointerOperand(V: &Inst);
490 const Loop *L = LI.getLoopFor(BB: Inst.getParent());
491 const Loop *OutermostLoop = L ? L->getOutermostLoop() : nullptr;
492 const SCEV *PtrSCEV = SE.getSCEVAtScope(V: Ptr, L);
493 const SCEV *AccessFn = SE.removePointerBase(S: PtrSCEV);
494 SCEVMonotonicity Mon = Checker.checkMonotonicity(Expr: AccessFn, OutermostLoop);
495 OS.indent(NumSpaces: 2) << "Inst: " << Inst << "\n";
496 OS.indent(NumSpaces: 4) << "Expr: " << *AccessFn << "\n";
497 Mon.print(OS, Depth: 4);
498 }
499 OS << "\n";
500 }
501
502 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
503 ++SrcI) {
504 if (SrcI->mayReadOrWriteMemory()) {
505 for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE;
506 ++DstI) {
507 if (DstI->mayReadOrWriteMemory()) {
508 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
509 OS << " da analyze - ";
510 if (auto D = DA->depends(Src: &*SrcI, Dst: &*DstI,
511 /*UnderRuntimeAssumptions=*/true)) {
512
513#ifndef NDEBUG
514 // Verify that the distance being zero is equivalent to the
515 // direction being EQ.
516 for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
517 const SCEV *Distance = D->getDistance(Level);
518 bool IsDistanceZero = Distance && Distance->isZero();
519 bool IsDirectionEQ =
520 D->getDirection(Level) == Dependence::DVEntry::EQ;
521 assert(IsDistanceZero == IsDirectionEQ &&
522 "Inconsistent distance and direction.");
523 }
524#endif
525
526 // Normalize negative direction vectors if required by clients.
527 if (NormalizeResults && D->normalize(SE: &SE))
528 OS << "normalized - ";
529 D->dump(OS);
530 } else
531 OS << "none!\n";
532 }
533 }
534 }
535 }
536}
537
538void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
539 const Module *) const {
540 dumpExampleDependence(
541 OS, DA: info.get(), SE&: getAnalysis<ScalarEvolutionWrapperPass>().getSE(),
542 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), NormalizeResults: false);
543}
544
545PreservedAnalyses
546DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
547 OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName()
548 << "':\n";
549 dumpExampleDependence(OS, DA: &FAM.getResult<DependenceAnalysis>(IR&: F),
550 SE&: FAM.getResult<ScalarEvolutionAnalysis>(IR&: F),
551 LI&: FAM.getResult<LoopAnalysis>(IR&: F), NormalizeResults);
552 return PreservedAnalyses::all();
553}
554
555//===----------------------------------------------------------------------===//
556// Dependence methods
557
558// Returns true if this is an input dependence.
559bool Dependence::isInput() const {
560 return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
561}
562
563// Returns true if this is an output dependence.
564bool Dependence::isOutput() const {
565 return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
566}
567
568// Returns true if this is an flow (aka true) dependence.
569bool Dependence::isFlow() const {
570 return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
571}
572
573// Returns true if this is an anti dependence.
574bool Dependence::isAnti() const {
575 return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
576}
577
578// Returns true if a particular level is scalar; that is,
579// if no subscript in the source or destination mention the induction
580// variable associated with the loop at this level.
581// Leave this out of line, so it will serve as a virtual method anchor
582bool Dependence::isScalar(unsigned level, bool IsSameSD) const { return false; }
583
584//===----------------------------------------------------------------------===//
585// FullDependence methods
586
587FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
588 const SCEVUnionPredicate &Assumes,
589 bool PossiblyLoopIndependent,
590 unsigned CommonLevels)
591 : Dependence(Source, Destination, Assumes), Levels(CommonLevels),
592 LoopIndependent(PossiblyLoopIndependent) {
593 Consistent = true;
594 SameSDLevels = 0;
595 if (CommonLevels)
596 DV = std::make_unique<DVEntry[]>(num: CommonLevels);
597}
598
599// FIXME: in some cases the meaning of a negative direction vector
600// may not be straightforward, e.g.,
601// for (int i = 0; i < 32; ++i) {
602// Src: A[i] = ...;
603// Dst: use(A[31 - i]);
604// }
605// The dependency is
606// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and
607// anti { Dst[i] -> Src[31 - i] : when i < 16 },
608// -- hence a [<>].
609// As long as a dependence result contains '>' ('<>', '<=>', "*"), it
610// means that a reversed/normalized dependence needs to be considered
611// as well. Nevertheless, current isDirectionNegative() only returns
612// true with a '>' or '>=' dependency for ease of canonicalizing the
613// dependency vector, since the reverse of '<>', '<=>' and "*" is itself.
614bool FullDependence::isDirectionNegative() const {
615 for (unsigned Level = 1; Level <= Levels; ++Level) {
616 unsigned char Direction = DV[Level - 1].Direction;
617 if (Direction == Dependence::DVEntry::EQ)
618 continue;
619 if (Direction == Dependence::DVEntry::GT ||
620 Direction == Dependence::DVEntry::GE)
621 return true;
622 return false;
623 }
624 return false;
625}
626
627bool FullDependence::normalize(ScalarEvolution *SE) {
628 if (!isDirectionNegative())
629 return false;
630
631 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
632 dump(dbgs()););
633 std::swap(a&: Src, b&: Dst);
634 for (unsigned Level = 1; Level <= Levels; ++Level) {
635 unsigned char Direction = DV[Level - 1].Direction;
636 // Reverse the direction vector, this means LT becomes GT
637 // and GT becomes LT.
638 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
639 if (Direction & Dependence::DVEntry::LT)
640 RevDirection |= Dependence::DVEntry::GT;
641 if (Direction & Dependence::DVEntry::GT)
642 RevDirection |= Dependence::DVEntry::LT;
643 DV[Level - 1].Direction = RevDirection;
644 // Reverse the dependence distance as well.
645 if (DV[Level - 1].Distance != nullptr)
646 DV[Level - 1].Distance = SE->getNegativeSCEV(V: DV[Level - 1].Distance);
647 }
648
649 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
650 dump(dbgs()););
651 return true;
652}
653
654// The rest are simple getters that hide the implementation.
655
656// getDirection - Returns the direction associated with a particular common or
657// SameSD level.
658unsigned FullDependence::getDirection(unsigned Level, bool IsSameSD) const {
659 return getDVEntry(Level, IsSameSD).Direction;
660}
661
662// Returns the distance (or NULL) associated with a particular common or
663// SameSD level.
664const SCEV *FullDependence::getDistance(unsigned Level, bool IsSameSD) const {
665 return getDVEntry(Level, IsSameSD).Distance;
666}
667
668// Returns true if a particular regular or SameSD level is scalar; that is,
669// if no subscript in the source or destination mention the induction variable
670// associated with the loop at this level.
671bool FullDependence::isScalar(unsigned Level, bool IsSameSD) const {
672 return getDVEntry(Level, IsSameSD).Scalar;
673}
674
675// Returns true if peeling the first iteration from this regular or SameSD
676// loop level will break this dependence.
677bool FullDependence::isPeelFirst(unsigned Level, bool IsSameSD) const {
678 return getDVEntry(Level, IsSameSD).PeelFirst;
679}
680
681// Returns true if peeling the last iteration from this regular or SameSD
682// loop level will break this dependence.
683bool FullDependence::isPeelLast(unsigned Level, bool IsSameSD) const {
684 return getDVEntry(Level, IsSameSD).PeelLast;
685}
686
687// inSameSDLoops - Returns true if this level is an SameSD level, i.e.,
688// performed across two separate loop nests that have the Same iteration space
689// and Depth.
690bool FullDependence::inSameSDLoops(unsigned Level) const {
691 assert(0 < Level && Level <= static_cast<unsigned>(Levels) + SameSDLevels &&
692 "Level out of range");
693 return Level > Levels;
694}
695
696//===----------------------------------------------------------------------===//
697// SCEVMonotonicity
698
699SCEVMonotonicity::SCEVMonotonicity(SCEVMonotonicityType Type,
700 const SCEV *FailurePoint)
701 : Type(Type), FailurePoint(FailurePoint) {
702 assert(
703 ((Type == SCEVMonotonicityType::Unknown) == (FailurePoint != nullptr)) &&
704 "FailurePoint must be provided iff Type is Unknown");
705}
706
707void SCEVMonotonicity::print(raw_ostream &OS, unsigned Depth) const {
708 OS.indent(NumSpaces: Depth) << "Monotonicity: ";
709 switch (Type) {
710 case SCEVMonotonicityType::Unknown:
711 assert(FailurePoint && "FailurePoint must be provided for Unknown");
712 OS << "Unknown\n";
713 OS.indent(NumSpaces: Depth) << "Reason: " << *FailurePoint << "\n";
714 break;
715 case SCEVMonotonicityType::Invariant:
716 OS << "Invariant\n";
717 break;
718 case SCEVMonotonicityType::MultivariateSignedMonotonic:
719 OS << "MultivariateSignedMonotonic\n";
720 break;
721 }
722}
723
724bool SCEVMonotonicityChecker::isLoopInvariant(const SCEV *Expr) const {
725 return !OutermostLoop || SE->isLoopInvariant(S: Expr, L: OutermostLoop);
726}
727
728SCEVMonotonicity SCEVMonotonicityChecker::invariantOrUnknown(const SCEV *Expr) {
729 if (isLoopInvariant(Expr))
730 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
731 return createUnknown(FailurePoint: Expr);
732}
733
734SCEVMonotonicity
735SCEVMonotonicityChecker::checkMonotonicity(const SCEV *Expr,
736 const Loop *OutermostLoop) {
737 assert((!OutermostLoop || OutermostLoop->isOutermost()) &&
738 "OutermostLoop must be outermost");
739 assert(Expr->getType()->isIntegerTy() && "Expr must be integer type");
740 this->OutermostLoop = OutermostLoop;
741 return visit(S: Expr);
742}
743
744/// We only care about an affine AddRec at the moment. For an affine AddRec,
745/// the monotonicity can be inferred from its nowrap property. For example, let
746/// X and Y be loop-invariant, and assume Y is non-negative. An AddRec
747/// {X,+.Y}<nsw> implies:
748///
749/// X <=s (X + Y) <=s ((X + Y) + Y) <=s ...
750///
751/// Thus, we can conclude that the AddRec is monotonically increasing with
752/// respect to the associated loop in a signed sense. The similar reasoning
753/// applies when Y is non-positive, leading to a monotonically decreasing
754/// AddRec.
755SCEVMonotonicity
756SCEVMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
757 if (!Expr->isAffine() || !Expr->hasNoSignedWrap())
758 return createUnknown(FailurePoint: Expr);
759
760 const SCEV *Start = Expr->getStart();
761 const SCEV *Step = Expr->getStepRecurrence(SE&: *SE);
762
763 SCEVMonotonicity StartMon = visit(S: Start);
764 if (StartMon.isUnknown())
765 return StartMon;
766
767 if (!isLoopInvariant(Expr: Step))
768 return createUnknown(FailurePoint: Expr);
769
770 return SCEVMonotonicity(SCEVMonotonicityType::MultivariateSignedMonotonic);
771}
772
773//===----------------------------------------------------------------------===//
774// DependenceInfo methods
775
776// For debugging purposes. Dumps a dependence to OS.
777void Dependence::dump(raw_ostream &OS) const {
778 if (isConfused())
779 OS << "confused";
780 else {
781 if (isConsistent())
782 OS << "consistent ";
783 if (isFlow())
784 OS << "flow";
785 else if (isOutput())
786 OS << "output";
787 else if (isAnti())
788 OS << "anti";
789 else if (isInput())
790 OS << "input";
791 dumpImp(OS);
792 unsigned SameSDLevels = getSameSDLevels();
793 if (SameSDLevels > 0) {
794 OS << " / assuming " << SameSDLevels << " loop level(s) fused: ";
795 dumpImp(OS, IsSameSD: true);
796 }
797 }
798 OS << "!\n";
799
800 SCEVUnionPredicate Assumptions = getRuntimeAssumptions();
801 if (!Assumptions.isAlwaysTrue()) {
802 OS << " Runtime Assumptions:\n";
803 Assumptions.print(OS, Depth: 2);
804 }
805}
806
807// For debugging purposes. Dumps a dependence to OS with or without considering
808// the SameSD levels.
809void Dependence::dumpImp(raw_ostream &OS, bool IsSameSD) const {
810 unsigned Levels = getLevels();
811 unsigned SameSDLevels = getSameSDLevels();
812 bool OnSameSD = false;
813 unsigned LevelNum = Levels;
814 if (IsSameSD)
815 LevelNum += SameSDLevels;
816 OS << " [";
817 for (unsigned II = 1; II <= LevelNum; ++II) {
818 if (!OnSameSD && inSameSDLoops(Level: II))
819 OnSameSD = true;
820 if (isPeelFirst(Level: II, SameSD: OnSameSD))
821 OS << 'p';
822 const SCEV *Distance = getDistance(Level: II, SameSD: OnSameSD);
823 if (Distance)
824 OS << *Distance;
825 else if (isScalar(level: II, IsSameSD: OnSameSD))
826 OS << "S";
827 else {
828 unsigned Direction = getDirection(Level: II, SameSD: OnSameSD);
829 if (Direction == DVEntry::ALL)
830 OS << "*";
831 else {
832 if (Direction & DVEntry::LT)
833 OS << "<";
834 if (Direction & DVEntry::EQ)
835 OS << "=";
836 if (Direction & DVEntry::GT)
837 OS << ">";
838 }
839 }
840 if (isPeelLast(Level: II, SameSD: OnSameSD))
841 OS << 'p';
842 if (II < LevelNum)
843 OS << " ";
844 }
845 if (isLoopIndependent())
846 OS << "|<";
847 OS << "]";
848}
849
850// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
851// underlaying objects. If LocA and LocB are known to not alias (for any reason:
852// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
853// Otherwise the underlying objects are checked to see if they point to
854// different identifiable objects.
855static AliasResult underlyingObjectsAlias(AAResults *AA, const DataLayout &DL,
856 const MemoryLocation &LocA,
857 const MemoryLocation &LocB) {
858 // Check the original locations (minus size) for noalias, which can happen for
859 // tbaa, incompatible underlying object locations, etc.
860 MemoryLocation LocAS =
861 MemoryLocation::getBeforeOrAfter(Ptr: LocA.Ptr, AATags: LocA.AATags);
862 MemoryLocation LocBS =
863 MemoryLocation::getBeforeOrAfter(Ptr: LocB.Ptr, AATags: LocB.AATags);
864 BatchAAResults BAA(*AA);
865 BAA.enableCrossIterationMode();
866
867 if (BAA.isNoAlias(LocA: LocAS, LocB: LocBS))
868 return AliasResult::NoAlias;
869
870 // Check the underlying objects are the same
871 const Value *AObj = getUnderlyingObject(V: LocA.Ptr);
872 const Value *BObj = getUnderlyingObject(V: LocB.Ptr);
873
874 // If the underlying objects are the same, they must alias
875 if (AObj == BObj)
876 return AliasResult::MustAlias;
877
878 // We may have hit the recursion limit for underlying objects, or have
879 // underlying objects where we don't know they will alias.
880 if (!isIdentifiedObject(V: AObj) || !isIdentifiedObject(V: BObj))
881 return AliasResult::MayAlias;
882
883 // Otherwise we know the objects are different and both identified objects so
884 // must not alias.
885 return AliasResult::NoAlias;
886}
887
888// Returns true if the load or store can be analyzed. Atomic and volatile
889// operations have properties which this analysis does not understand.
890static bool isLoadOrStore(const Instruction *I) {
891 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
892 return LI->isUnordered();
893 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
894 return SI->isUnordered();
895 return false;
896}
897
898// Returns true if two loops have the Same iteration Space and Depth. To be
899// more specific, two loops have SameSD if they are in the same nesting
900// depth and have the same backedge count. SameSD stands for Same iteration
901// Space and Depth.
902bool DependenceInfo::haveSameSD(const Loop *SrcLoop,
903 const Loop *DstLoop) const {
904 if (SrcLoop == DstLoop)
905 return true;
906
907 if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
908 return false;
909
910 if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
911 !DstLoop->getLoopLatch())
912 return false;
913
914 const SCEV *SrcUB = nullptr, *DstUP = nullptr;
915 if (SE->hasLoopInvariantBackedgeTakenCount(L: SrcLoop))
916 SrcUB = SE->getBackedgeTakenCount(L: SrcLoop);
917 if (SE->hasLoopInvariantBackedgeTakenCount(L: DstLoop))
918 DstUP = SE->getBackedgeTakenCount(L: DstLoop);
919
920 if (SrcUB != nullptr && DstUP != nullptr) {
921 Type *WiderType = SE->getWiderType(Ty1: SrcUB->getType(), Ty2: DstUP->getType());
922 SrcUB = SE->getNoopOrZeroExtend(V: SrcUB, Ty: WiderType);
923 DstUP = SE->getNoopOrZeroExtend(V: DstUP, Ty: WiderType);
924
925 if (SE->isKnownPredicate(Pred: ICmpInst::ICMP_EQ, LHS: SrcUB, RHS: DstUP))
926 return true;
927 }
928
929 return false;
930}
931
932// Examines the loop nesting of the Src and Dst
933// instructions and establishes their shared loops. Sets the variables
934// CommonLevels, SrcLevels, and MaxLevels.
935// The source and destination instructions needn't be contained in the same
936// loop. The routine establishNestingLevels finds the level of most deeply
937// nested loop that contains them both, CommonLevels. An instruction that's
938// not contained in a loop is at level = 0. MaxLevels is equal to the level
939// of the source plus the level of the destination, minus CommonLevels.
940// This lets us allocate vectors MaxLevels in length, with room for every
941// distinct loop referenced in both the source and destination subscripts.
942// The variable SrcLevels is the nesting depth of the source instruction.
943// It's used to help calculate distinct loops referenced by the destination.
944// Here's the map from loops to levels:
945// 0 - unused
946// 1 - outermost common loop
947// ... - other common loops
948// CommonLevels - innermost common loop
949// ... - loops containing Src but not Dst
950// SrcLevels - innermost loop containing Src but not Dst
951// ... - loops containing Dst but not Src
952// MaxLevels - innermost loops containing Dst but not Src
953// Consider the follow code fragment:
954// for (a = ...) {
955// for (b = ...) {
956// for (c = ...) {
957// for (d = ...) {
958// A[] = ...;
959// }
960// }
961// for (e = ...) {
962// for (f = ...) {
963// for (g = ...) {
964// ... = A[];
965// }
966// }
967// }
968// }
969// }
970// If we're looking at the possibility of a dependence between the store
971// to A (the Src) and the load from A (the Dst), we'll note that they
972// have 2 loops in common, so CommonLevels will equal 2 and the direction
973// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
974// A map from loop names to loop numbers would look like
975// a - 1
976// b - 2 = CommonLevels
977// c - 3
978// d - 4 = SrcLevels
979// e - 5
980// f - 6
981// g - 7 = MaxLevels
982// SameSDLevels counts the number of levels after common levels that are
983// not common but have the same iteration space and depth. Internally this
984// is checked using haveSameSD. Assume that in this code fragment, levels c and
985// e have the same iteration space and depth, but levels d and f does not. Then
986// SameSDLevels is set to 1. In that case the level numbers for the previous
987// code look like
988// a - 1
989// b - 2
990// c,e - 3 = CommonLevels
991// d - 4 = SrcLevels
992// f - 5
993// g - 6 = MaxLevels
994void DependenceInfo::establishNestingLevels(const Instruction *Src,
995 const Instruction *Dst) {
996 const BasicBlock *SrcBlock = Src->getParent();
997 const BasicBlock *DstBlock = Dst->getParent();
998 unsigned SrcLevel = LI->getLoopDepth(BB: SrcBlock);
999 unsigned DstLevel = LI->getLoopDepth(BB: DstBlock);
1000 const Loop *SrcLoop = LI->getLoopFor(BB: SrcBlock);
1001 const Loop *DstLoop = LI->getLoopFor(BB: DstBlock);
1002 SrcLevels = SrcLevel;
1003 MaxLevels = SrcLevel + DstLevel;
1004 SameSDLevels = 0;
1005 while (SrcLevel > DstLevel) {
1006 SrcLoop = SrcLoop->getParentLoop();
1007 SrcLevel--;
1008 }
1009 while (DstLevel > SrcLevel) {
1010 DstLoop = DstLoop->getParentLoop();
1011 DstLevel--;
1012 }
1013
1014 // find the first common level and count the SameSD levels leading to it
1015 while (SrcLoop != DstLoop) {
1016 SameSDLevels++;
1017 if (!haveSameSD(SrcLoop, DstLoop))
1018 SameSDLevels = 0;
1019 SrcLoop = SrcLoop->getParentLoop();
1020 DstLoop = DstLoop->getParentLoop();
1021 SrcLevel--;
1022 }
1023 CommonLevels = SrcLevel;
1024 MaxLevels -= CommonLevels;
1025}
1026
1027// Given one of the loops containing the source, return
1028// its level index in our numbering scheme.
1029unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
1030 return SrcLoop->getLoopDepth();
1031}
1032
1033// Given one of the loops containing the destination,
1034// return its level index in our numbering scheme.
1035unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
1036 unsigned D = DstLoop->getLoopDepth();
1037 if (D > CommonLevels)
1038 // This tries to make sure that we assign unique numbers to src and dst when
1039 // the memory accesses reside in different loops that have the same depth.
1040 return D - CommonLevels + SrcLevels;
1041 else
1042 return D;
1043}
1044
1045// Returns true if Expression is loop invariant in LoopNest.
1046bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
1047 const Loop *LoopNest) const {
1048 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
1049 // any loop as invariant, because we only consier expression evaluation at a
1050 // specific position (where the array access takes place), and not across the
1051 // entire function.
1052 if (!LoopNest)
1053 return true;
1054
1055 // If the expression is invariant in the outermost loop of the loop nest, it
1056 // is invariant anywhere in the loop nest.
1057 return SE->isLoopInvariant(S: Expression, L: LoopNest->getOutermostLoop());
1058}
1059
1060// Finds the set of loops from the LoopNest that
1061// have a level <= CommonLevels and are referred to by the SCEV Expression.
1062void DependenceInfo::collectCommonLoops(const SCEV *Expression,
1063 const Loop *LoopNest,
1064 SmallBitVector &Loops) const {
1065 while (LoopNest) {
1066 unsigned Level = LoopNest->getLoopDepth();
1067 if (Level <= CommonLevels && !SE->isLoopInvariant(S: Expression, L: LoopNest))
1068 Loops.set(Level);
1069 LoopNest = LoopNest->getParentLoop();
1070 }
1071}
1072
1073// Examine the scev and return true iff it's affine.
1074// Collect any loops mentioned in the set of "Loops".
1075bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
1076 SmallBitVector &Loops, bool IsSrc) {
1077 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
1078 if (!AddRec)
1079 return isLoopInvariant(Expression: Expr, LoopNest);
1080
1081 // The AddRec must depend on one of the containing loops. Otherwise,
1082 // mapSrcLoop and mapDstLoop return indices outside the intended range. This
1083 // can happen when a subscript in one loop references an IV from a sibling
1084 // loop that could not be replaced with a concrete exit value by
1085 // getSCEVAtScope.
1086 const Loop *L = LoopNest;
1087 while (L && AddRec->getLoop() != L)
1088 L = L->getParentLoop();
1089 if (!L)
1090 return false;
1091
1092 const SCEV *Start = AddRec->getStart();
1093 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
1094 if (!isLoopInvariant(Expression: Step, LoopNest))
1095 return false;
1096 if (IsSrc)
1097 Loops.set(mapSrcLoop(SrcLoop: AddRec->getLoop()));
1098 else
1099 Loops.set(mapDstLoop(DstLoop: AddRec->getLoop()));
1100 return checkSubscript(Expr: Start, LoopNest, Loops, IsSrc);
1101}
1102
1103// Examine the scev and return true iff it's linear.
1104// Collect any loops mentioned in the set of "Loops".
1105bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
1106 SmallBitVector &Loops) {
1107 return checkSubscript(Expr: Src, LoopNest, Loops, IsSrc: true);
1108}
1109
1110// Examine the scev and return true iff it's linear.
1111// Collect any loops mentioned in the set of "Loops".
1112bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
1113 SmallBitVector &Loops) {
1114 return checkSubscript(Expr: Dst, LoopNest, Loops, IsSrc: false);
1115}
1116
1117// Examines the subscript pair (the Src and Dst SCEVs)
1118// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
1119// Collects the associated loops in a set.
1120DependenceInfo::Subscript::ClassificationKind
1121DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
1122 const SCEV *Dst, const Loop *DstLoopNest,
1123 SmallBitVector &Loops) {
1124 SmallBitVector SrcLoops(MaxLevels + 1);
1125 SmallBitVector DstLoops(MaxLevels + 1);
1126 if (!checkSrcSubscript(Src, LoopNest: SrcLoopNest, Loops&: SrcLoops))
1127 return Subscript::NonLinear;
1128 if (!checkDstSubscript(Dst, LoopNest: DstLoopNest, Loops&: DstLoops))
1129 return Subscript::NonLinear;
1130 Loops = SrcLoops;
1131 Loops |= DstLoops;
1132 unsigned N = Loops.count();
1133 if (N == 0)
1134 return Subscript::ZIV;
1135 if (N == 1)
1136 return Subscript::SIV;
1137 if (N == 2 && SrcLoops.count() == 1 && DstLoops.count() == 1)
1138 return Subscript::RDIV;
1139 return Subscript::MIV;
1140}
1141
1142// All subscripts are all the same type.
1143// Loop bound may be smaller (e.g., a char).
1144// Should zero extend loop bound, since it's always >= 0.
1145// This routine collects upper bound and extends or truncates if needed.
1146// Truncating is safe when subscripts are known not to wrap. Cases without
1147// nowrap flags should have been rejected earlier.
1148// Return null if no bound available.
1149const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
1150 if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
1151 const SCEV *UB = SE->getBackedgeTakenCount(L);
1152 return SE->getTruncateOrZeroExtend(V: UB, Ty: T);
1153 }
1154 return nullptr;
1155}
1156
1157// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1158// If the cast fails, returns NULL.
1159const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
1160 Type *T) const {
1161 if (const SCEV *UB = collectUpperBound(L, T))
1162 return dyn_cast<SCEVConstant>(Val: UB);
1163 return nullptr;
1164}
1165
1166/// Returns \p A - \p B if it guaranteed not to signed wrap. Otherwise returns
1167/// nullptr. \p A and \p B must have the same integer type.
1168static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
1169 ScalarEvolution &SE) {
1170 if (SE.willNotOverflow(BinOp: Instruction::Sub, /*Signed=*/true, LHS: A, RHS: B))
1171 return SE.getMinusSCEV(LHS: A, RHS: B);
1172 return nullptr;
1173}
1174
1175/// Returns true iff \p Test is enabled.
1176static bool isDependenceTestEnabled(DependenceTestType Test) {
1177 if (EnableDependenceTest == DependenceTestType::All)
1178 return true;
1179 return EnableDependenceTest == Test;
1180}
1181
1182// testZIV -
1183// When we have a pair of subscripts of the form [c1] and [c2],
1184// where c1 and c2 are both loop invariant, we attack it using
1185// the ZIV test. Basically, we test by comparing the two values,
1186// but there are actually three possible results:
1187// 1) the values are equal, so there's a dependence
1188// 2) the values are different, so there's no dependence
1189// 3) the values might be equal, so we have to assume a dependence.
1190//
1191// Return true if dependence disproved.
1192bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1193 FullDependence &Result) const {
1194 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1195 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1196 ++ZIVapplications;
1197 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: Src, RHS: Dst)) {
1198 LLVM_DEBUG(dbgs() << " provably dependent\n");
1199 return false; // provably dependent
1200 }
1201 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_NE, LHS: Src, RHS: Dst)) {
1202 LLVM_DEBUG(dbgs() << " provably independent\n");
1203 ++ZIVindependence;
1204 return true; // provably independent
1205 }
1206 LLVM_DEBUG(dbgs() << " possibly dependent\n");
1207 Result.Consistent = false;
1208 return false; // possibly dependent
1209}
1210
1211// strongSIVtest -
1212// From the paper, Practical Dependence Testing, Section 4.2.1
1213//
1214// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1215// where i is an induction variable, c1 and c2 are loop invariant,
1216// and a is a constant, we can solve it exactly using the Strong SIV test.
1217//
1218// Can prove independence. Failing that, can compute distance (and direction).
1219// In the presence of symbolic terms, we can sometimes make progress.
1220//
1221// If there's a dependence,
1222//
1223// c1 + a*i = c2 + a*i'
1224//
1225// The dependence distance is
1226//
1227// d = i' - i = (c1 - c2)/a
1228//
1229// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1230// loop's upper bound. If a dependence exists, the dependence direction is
1231// defined as
1232//
1233// { < if d > 0
1234// direction = { = if d = 0
1235// { > if d < 0
1236//
1237// Return true if dependence disproved.
1238bool DependenceInfo::strongSIVtest(const SCEVAddRecExpr *Src,
1239 const SCEVAddRecExpr *Dst, unsigned Level,
1240 FullDependence &Result,
1241 bool UnderRuntimeAssumptions) {
1242 if (!isDependenceTestEnabled(Test: DependenceTestType::StrongSIV))
1243 return false;
1244
1245 const SCEV *Coeff = Src->getStepRecurrence(SE&: *SE);
1246 assert(Coeff == Dst->getStepRecurrence(*SE) &&
1247 "Expecting same coefficient in Strong SIV test");
1248 const SCEV *SrcConst = Src->getStart();
1249 const SCEV *DstConst = Dst->getStart();
1250 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1251 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
1252 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1253 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
1254 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1255 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
1256 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1257 ++StrongSIVapplications;
1258 assert(0 < Level && Level <= CommonLevels && "level out of range");
1259 Level--;
1260
1261 // First try to prove independence based on the ranges of the two subscripts.
1262 ConstantRange SrcRange = SE->getSignedRange(S: Src);
1263 ConstantRange DstRange = SE->getSignedRange(S: Dst);
1264 if (SrcRange.intersectWith(CR: DstRange).isEmptySet()) {
1265 ++StrongSIVindependence;
1266 ++StrongSIVsuccesses;
1267 return true;
1268 }
1269
1270 const SCEV *Delta = minusSCEVNoSignedOverflow(A: SrcConst, B: DstConst, SE&: *SE);
1271 if (!Delta) {
1272 Result.Consistent = false;
1273 return false;
1274 }
1275 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
1276 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1277
1278 // Can we compute distance?
1279 if (isa<SCEVConstant>(Val: Delta) && isa<SCEVConstant>(Val: Coeff)) {
1280 APInt ConstDelta = cast<SCEVConstant>(Val: Delta)->getAPInt();
1281 APInt ConstCoeff = cast<SCEVConstant>(Val: Coeff)->getAPInt();
1282 APInt Distance = ConstDelta; // these need to be initialized
1283 APInt Remainder = ConstDelta;
1284 APInt::sdivrem(LHS: ConstDelta, RHS: ConstCoeff, Quotient&: Distance, Remainder);
1285 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1286 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1287 // Make sure Coeff divides Delta exactly
1288 if (Remainder != 0) {
1289 // Coeff doesn't divide Distance, no dependence
1290 ++StrongSIVindependence;
1291 ++StrongSIVsuccesses;
1292 return true;
1293 }
1294 Result.DV[Level].Distance = SE->getConstant(Val: Distance);
1295 if (Distance.sgt(RHS: 0))
1296 Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1297 else if (Distance.slt(RHS: 0))
1298 Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1299 else
1300 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1301 ++StrongSIVsuccesses;
1302 } else if (Delta->isZero()) {
1303 // Check if coefficient could be zero. If so, 0/0 is undefined and we
1304 // cannot conclude that only same-iteration dependencies exist.
1305 // When coeff=0, all iterations access the same location.
1306 if (SE->isKnownNonZero(S: Coeff)) {
1307 LLVM_DEBUG(
1308 dbgs() << "\t Coefficient proven non-zero by SCEV analysis\n");
1309 } else {
1310 // Cannot prove at compile time, would need runtime assumption.
1311 if (UnderRuntimeAssumptions) {
1312 const SCEVPredicate *Pred = SE->getComparePredicate(
1313 Pred: ICmpInst::ICMP_NE, LHS: Coeff, RHS: SE->getZero(Ty: Coeff->getType()));
1314 Result.Assumptions = Result.Assumptions.getUnionWith(N: Pred, SE&: *SE);
1315 LLVM_DEBUG(dbgs() << "\t Added runtime assumption: " << *Coeff
1316 << " != 0\n");
1317 } else {
1318 // Cannot add runtime assumptions, this test cannot handle this case.
1319 // Let more complex tests try.
1320 LLVM_DEBUG(dbgs() << "\t Would need runtime assumption " << *Coeff
1321 << " != 0, but not allowed. Failing this test.\n");
1322 return false;
1323 }
1324 }
1325 // Since 0/X == 0 (where X is known non-zero or assumed non-zero).
1326 Result.DV[Level].Distance = Delta;
1327 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1328 ++StrongSIVsuccesses;
1329 } else {
1330 if (Coeff->isOne()) {
1331 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
1332 Result.DV[Level].Distance = Delta; // since X/1 == X
1333 } else {
1334 Result.Consistent = false;
1335 }
1336
1337 // maybe we can get a useful direction
1338 bool DeltaMaybeZero = !SE->isKnownNonZero(S: Delta);
1339 bool DeltaMaybePositive = !SE->isKnownNonPositive(S: Delta);
1340 bool DeltaMaybeNegative = !SE->isKnownNonNegative(S: Delta);
1341 bool CoeffMaybePositive = !SE->isKnownNonPositive(S: Coeff);
1342 bool CoeffMaybeNegative = !SE->isKnownNonNegative(S: Coeff);
1343 // The double negatives above are confusing.
1344 // It helps to read !SE->isKnownNonZero(Delta)
1345 // as "Delta might be Zero"
1346 unsigned NewDirection = Dependence::DVEntry::NONE;
1347 if ((DeltaMaybePositive && CoeffMaybePositive) ||
1348 (DeltaMaybeNegative && CoeffMaybeNegative))
1349 NewDirection = Dependence::DVEntry::LT;
1350 if (DeltaMaybeZero)
1351 NewDirection |= Dependence::DVEntry::EQ;
1352 if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1353 (DeltaMaybePositive && CoeffMaybeNegative))
1354 NewDirection |= Dependence::DVEntry::GT;
1355 if (NewDirection < Result.DV[Level].Direction)
1356 ++StrongSIVsuccesses;
1357 Result.DV[Level].Direction &= NewDirection;
1358 }
1359 return false;
1360}
1361
1362// weakCrossingSIVtest -
1363// From the paper, Practical Dependence Testing, Section 4.2.2
1364//
1365// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1366// where i is an induction variable, c1 and c2 are loop invariant,
1367// and a is a constant, we can solve it exactly using the
1368// Weak-Crossing SIV test.
1369//
1370// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1371// the two lines, where i = i', yielding
1372//
1373// c1 + a*i = c2 - a*i
1374// 2a*i = c2 - c1
1375// i = (c2 - c1)/2a
1376//
1377// If i < 0, there is no dependence.
1378// If i > upperbound, there is no dependence.
1379// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1380// If i = upperbound, there's a dependence with distance = 0.
1381// If i is integral, there's a dependence (all directions).
1382// If the non-integer part = 1/2, there's a dependence (<> directions).
1383// Otherwise, there's no dependence.
1384//
1385// Can prove independence. Failing that,
1386// can sometimes refine the directions.
1387// Can determine iteration for splitting.
1388//
1389// Return true if dependence disproved.
1390bool DependenceInfo::weakCrossingSIVtest(const SCEV *Coeff,
1391 const SCEV *SrcConst,
1392 const SCEV *DstConst,
1393 const Loop *CurSrcLoop,
1394 const Loop *CurDstLoop, unsigned Level,
1395 FullDependence &Result) const {
1396 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakCrossingSIV))
1397 return false;
1398
1399 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1400 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
1401 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1402 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1403 ++WeakCrossingSIVapplications;
1404 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1405 Level--;
1406 Result.Consistent = false;
1407 const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
1408 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1409 if (Delta->isZero()) {
1410 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1411 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1412 ++WeakCrossingSIVsuccesses;
1413 if (!Result.DV[Level].Direction) {
1414 ++WeakCrossingSIVindependence;
1415 return true;
1416 }
1417 Result.DV[Level].Distance = Delta; // = 0
1418 return false;
1419 }
1420 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: Coeff);
1421 if (!ConstCoeff)
1422 return false;
1423
1424 if (SE->isKnownNegative(S: ConstCoeff)) {
1425 ConstCoeff = dyn_cast<SCEVConstant>(Val: SE->getNegativeSCEV(V: ConstCoeff));
1426 assert(ConstCoeff &&
1427 "dynamic cast of negative of ConstCoeff should yield constant");
1428 Delta = SE->getNegativeSCEV(V: Delta);
1429 }
1430 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1431
1432 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1433 if (!ConstDelta)
1434 return false;
1435
1436 // We're certain that ConstCoeff > 0; therefore,
1437 // if Delta < 0, then no dependence.
1438 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1439 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
1440 if (SE->isKnownNegative(S: Delta)) {
1441 // No dependence, Delta < 0
1442 ++WeakCrossingSIVindependence;
1443 ++WeakCrossingSIVsuccesses;
1444 return true;
1445 }
1446
1447 // We're certain that Delta > 0 and ConstCoeff > 0.
1448 // Check Delta/(2*ConstCoeff) against upper loop bound
1449 if (const SCEV *UpperBound =
1450 collectUpperBound(L: CurSrcLoop, T: Delta->getType())) {
1451 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1452 const SCEV *ConstantTwo = SE->getConstant(Ty: UpperBound->getType(), V: 2);
1453 const SCEV *ML =
1454 SE->getMulExpr(LHS: SE->getMulExpr(LHS: ConstCoeff, RHS: UpperBound), RHS: ConstantTwo);
1455 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");
1456 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: Delta, RHS: ML)) {
1457 // Delta too big, no dependence
1458 ++WeakCrossingSIVindependence;
1459 ++WeakCrossingSIVsuccesses;
1460 return true;
1461 }
1462 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: Delta, RHS: ML)) {
1463 // i = i' = UB
1464 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1465 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1466 ++WeakCrossingSIVsuccesses;
1467 if (!Result.DV[Level].Direction) {
1468 ++WeakCrossingSIVindependence;
1469 return true;
1470 }
1471 Result.DV[Level].Distance = SE->getZero(Ty: Delta->getType());
1472 return false;
1473 }
1474 }
1475
1476 // check that Coeff divides Delta
1477 APInt APDelta = ConstDelta->getAPInt();
1478 APInt APCoeff = ConstCoeff->getAPInt();
1479 APInt Distance = APDelta; // these need to be initialzed
1480 APInt Remainder = APDelta;
1481 APInt::sdivrem(LHS: APDelta, RHS: APCoeff, Quotient&: Distance, Remainder);
1482 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1483 if (Remainder != 0) {
1484 // Coeff doesn't divide Delta, no dependence
1485 ++WeakCrossingSIVindependence;
1486 ++WeakCrossingSIVsuccesses;
1487 return true;
1488 }
1489 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1490
1491 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1492 APInt Two = APInt(Distance.getBitWidth(), 2, true);
1493 Remainder = Distance.srem(RHS: Two);
1494 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1495 if (Remainder != 0) {
1496 // Equal direction isn't possible
1497 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;
1498 ++WeakCrossingSIVsuccesses;
1499 }
1500 return false;
1501}
1502
1503// Kirch's algorithm, from
1504//
1505// Optimizing Supercompilers for Supercomputers
1506// Michael Wolfe
1507// MIT Press, 1989
1508//
1509// Program 2.1, page 29.
1510// Computes the GCD of AM and BM.
1511// Also finds a solution to the equation ax - by = gcd(a, b).
1512// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1513//
1514// We don't use OverflowSafeSignedAPInt here because it's known that this
1515// algorithm doesn't overflow.
1516static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1517 const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1518 APInt A0(Bits, 1, true), A1(Bits, 0, true);
1519 APInt B0(Bits, 0, true), B1(Bits, 1, true);
1520 APInt G0 = AM.abs();
1521 APInt G1 = BM.abs();
1522 APInt Q = G0; // these need to be initialized
1523 APInt R = G0;
1524 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1525 while (R != 0) {
1526 // clang-format off
1527 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1528 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1529 G0 = G1; G1 = R;
1530 // clang-format on
1531 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1532 }
1533 G = G1;
1534 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
1535 X = AM.slt(RHS: 0) ? -A1 : A1;
1536 Y = BM.slt(RHS: 0) ? B1 : -B1;
1537
1538 // make sure gcd divides Delta
1539 R = Delta.srem(RHS: G);
1540 if (R != 0)
1541 return true; // gcd doesn't divide Delta, no dependence
1542 Q = Delta.sdiv(RHS: G);
1543 return false;
1544}
1545
1546static OverflowSafeSignedAPInt
1547floorOfQuotient(const OverflowSafeSignedAPInt &OA,
1548 const OverflowSafeSignedAPInt &OB) {
1549 if (!OA || !OB)
1550 return OverflowSafeSignedAPInt();
1551
1552 APInt A = *OA;
1553 APInt B = *OB;
1554 APInt Q = A; // these need to be initialized
1555 APInt R = A;
1556 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1557 if (R == 0)
1558 return Q;
1559 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1560 return Q;
1561 return OverflowSafeSignedAPInt(Q) - 1;
1562}
1563
1564static OverflowSafeSignedAPInt
1565ceilingOfQuotient(const OverflowSafeSignedAPInt &OA,
1566 const OverflowSafeSignedAPInt &OB) {
1567 if (!OA || !OB)
1568 return OverflowSafeSignedAPInt();
1569
1570 APInt A = *OA;
1571 APInt B = *OB;
1572 APInt Q = A; // these need to be initialized
1573 APInt R = A;
1574 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1575 if (R == 0)
1576 return Q;
1577 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1578 return OverflowSafeSignedAPInt(Q) + 1;
1579 return Q;
1580}
1581
1582/// Given an affine expression of the form A*k + B, where k is an arbitrary
1583/// integer, infer the possible range of k based on the known range of the
1584/// affine expression. If we know A*k + B is non-negative, i.e.,
1585///
1586/// A*k + B >= 0
1587///
1588/// we can derive the following inequalities for k when A is positive:
1589///
1590/// k >= -B / A
1591///
1592/// Since k is an integer, it means k is greater than or equal to the
1593/// ceil(-B / A).
1594///
1595/// If the upper bound of the affine expression \p UB is passed, the following
1596/// inequality can be derived as well:
1597///
1598/// A*k + B <= UB
1599///
1600/// which leads to:
1601///
1602/// k <= (UB - B) / A
1603///
1604/// Again, as k is an integer, it means k is less than or equal to the
1605/// floor((UB - B) / A).
1606///
1607/// The similar logic applies when A is negative, but the inequalities sign flip
1608/// while working with them.
1609///
1610/// Preconditions: \p A is non-zero, and we know A*k + B is non-negative.
1611static std::pair<OverflowSafeSignedAPInt, OverflowSafeSignedAPInt>
1612inferDomainOfAffine(OverflowSafeSignedAPInt A, OverflowSafeSignedAPInt B,
1613 OverflowSafeSignedAPInt UB) {
1614 assert(A && B && "A and B must be available");
1615 assert(*A != 0 && "A must be non-zero");
1616 OverflowSafeSignedAPInt TL, TU;
1617 if (A->sgt(RHS: 0)) {
1618 TL = ceilingOfQuotient(OA: -B, OB: A);
1619 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1620
1621 // New bound check - modification to Banerjee's e3 check
1622 TU = floorOfQuotient(OA: UB - B, OB: A);
1623 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1624 } else {
1625 TU = floorOfQuotient(OA: -B, OB: A);
1626 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1627
1628 // New bound check - modification to Banerjee's e3 check
1629 TL = ceilingOfQuotient(OA: UB - B, OB: A);
1630 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1631 }
1632 return std::make_pair(x&: TL, y&: TU);
1633}
1634
1635// exactSIVtest -
1636// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1637// where i is an induction variable, c1 and c2 are loop invariant, and a1
1638// and a2 are constant, we can solve it exactly using an algorithm developed
1639// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1640//
1641// Dependence Analysis for Supercomputing
1642// Utpal Banerjee
1643// Kluwer Academic Publishers, 1988
1644//
1645// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1646// so use them if possible. They're also a bit better with symbolics and,
1647// in the case of the strong SIV test, can compute Distances.
1648//
1649// Return true if dependence disproved.
1650//
1651// This is a modified version of the original Banerjee algorithm. The original
1652// only tested whether Dst depends on Src. This algorithm extends that and
1653// returns all the dependencies that exist between Dst and Src.
1654bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1655 const SCEV *SrcConst, const SCEV *DstConst,
1656 const Loop *CurSrcLoop,
1657 const Loop *CurDstLoop, unsigned Level,
1658 FullDependence &Result) const {
1659 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactSIV))
1660 return false;
1661
1662 LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1663 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1664 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1665 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1666 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1667 ++ExactSIVapplications;
1668 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1669 Level--;
1670 Result.Consistent = false;
1671 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1672 if (!Delta)
1673 return false;
1674 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1675 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1676 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1677 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1678 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1679 return false;
1680
1681 // find gcd
1682 APInt G, X, Y;
1683 APInt AM = ConstSrcCoeff->getAPInt();
1684 APInt BM = ConstDstCoeff->getAPInt();
1685 APInt CM = ConstDelta->getAPInt();
1686 unsigned Bits = AM.getBitWidth();
1687 if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
1688 // gcd doesn't divide Delta, no dependence
1689 ++ExactSIVindependence;
1690 ++ExactSIVsuccesses;
1691 return true;
1692 }
1693
1694 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1695
1696 // since SCEV construction normalizes, LM = 0
1697 std::optional<APInt> UM;
1698 // UM is perhaps unavailable, let's check
1699 if (const SCEVConstant *CUB =
1700 collectConstantUpperBound(L: CurSrcLoop, T: Delta->getType())) {
1701 UM = CUB->getAPInt();
1702 LLVM_DEBUG(dbgs() << "\t UM = " << *UM << "\n");
1703 }
1704
1705 APInt TU(APInt::getSignedMaxValue(numBits: Bits));
1706 APInt TL(APInt::getSignedMinValue(numBits: Bits));
1707 APInt TC = CM.sdiv(RHS: G);
1708 APInt TX = X * TC;
1709 APInt TY = Y * TC;
1710 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
1711 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
1712 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
1713
1714 APInt TB = BM.sdiv(RHS: G);
1715 APInt TA = AM.sdiv(RHS: G);
1716
1717 // At this point, we have the following equations:
1718 //
1719 // TA*i0 - TB*i1 = TC
1720 //
1721 // Also, we know that the all pairs of (i0, i1) can be expressed as:
1722 //
1723 // (TX + k*TB, TY + k*TA)
1724 //
1725 // where k is an arbitrary integer.
1726 auto [TL0, TU0] = inferDomainOfAffine(A: TB, B: TX, UB: UM);
1727 auto [TL1, TU1] = inferDomainOfAffine(A: TA, B: TY, UB: UM);
1728
1729 auto CreateVec = [](const OverflowSafeSignedAPInt &V0,
1730 const OverflowSafeSignedAPInt &V1) {
1731 SmallVector<APInt, 2> Vec;
1732 if (V0)
1733 Vec.push_back(Elt: *V0);
1734 if (V1)
1735 Vec.push_back(Elt: *V1);
1736 return Vec;
1737 };
1738
1739 SmallVector<APInt, 2> TLVec = CreateVec(TL0, TL1);
1740 SmallVector<APInt, 2> TUVec = CreateVec(TU0, TU1);
1741
1742 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
1743 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
1744
1745 if (TLVec.empty() || TUVec.empty())
1746 return false;
1747 TL = APIntOps::smax(A: TLVec.front(), B: TLVec.back());
1748 TU = APIntOps::smin(A: TUVec.front(), B: TUVec.back());
1749 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
1750 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
1751
1752 if (TL.sgt(RHS: TU)) {
1753 ++ExactSIVindependence;
1754 ++ExactSIVsuccesses;
1755 return true;
1756 }
1757
1758 // explore directions
1759 unsigned NewDirection = Dependence::DVEntry::NONE;
1760 OverflowSafeSignedAPInt LowerDistance, UpperDistance;
1761 OverflowSafeSignedAPInt OTY(TY), OTX(TX), OTA(TA), OTB(TB), OTL(TL), OTU(TU);
1762 // NOTE: It's unclear whether these calculations can overflow. At the moment,
1763 // we conservatively assume they can.
1764 if (TA.sgt(RHS: TB)) {
1765 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1766 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1767 } else {
1768 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1769 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1770 }
1771
1772 if (!LowerDistance || !UpperDistance)
1773 return false;
1774
1775 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << *LowerDistance << "\n");
1776 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << *UpperDistance << "\n");
1777
1778 if (LowerDistance->sle(RHS: 0) && UpperDistance->sge(RHS: 0)) {
1779 NewDirection |= Dependence::DVEntry::EQ;
1780 ++ExactSIVsuccesses;
1781 }
1782 if (LowerDistance->slt(RHS: 0)) {
1783 NewDirection |= Dependence::DVEntry::GT;
1784 ++ExactSIVsuccesses;
1785 }
1786 if (UpperDistance->sgt(RHS: 0)) {
1787 NewDirection |= Dependence::DVEntry::LT;
1788 ++ExactSIVsuccesses;
1789 }
1790
1791 // finished
1792 Result.DV[Level].Direction &= NewDirection;
1793 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1794 ++ExactSIVindependence;
1795 LLVM_DEBUG(dbgs() << "\t Result = ");
1796 LLVM_DEBUG(Result.dump(dbgs()));
1797 return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1798}
1799
1800// Return true if the divisor evenly divides the dividend.
1801static bool isRemainderZero(const SCEVConstant *Dividend,
1802 const SCEVConstant *Divisor) {
1803 const APInt &ConstDividend = Dividend->getAPInt();
1804 const APInt &ConstDivisor = Divisor->getAPInt();
1805 return ConstDividend.srem(RHS: ConstDivisor) == 0;
1806}
1807
1808// weakZeroSrcSIVtest -
1809// From the paper, Practical Dependence Testing, Section 4.2.2
1810//
1811// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1812// where i is an induction variable, c1 and c2 are loop invariant,
1813// and a is a constant, we can solve it exactly using the
1814// Weak-Zero SIV test.
1815//
1816// Given
1817//
1818// c1 = c2 + a*i
1819//
1820// we get
1821//
1822// (c1 - c2)/a = i
1823//
1824// If i is not an integer, there's no dependence.
1825// If i < 0 or > UB, there's no dependence.
1826// If i = 0, the direction is >= and peeling the
1827// 1st iteration will break the dependence.
1828// If i = UB, the direction is <= and peeling the
1829// last iteration will break the dependence.
1830// Otherwise, the direction is *.
1831//
1832// Can prove independence. Failing that, we can sometimes refine
1833// the directions. Can sometimes show that first or last
1834// iteration carries all the dependences (so worth peeling).
1835//
1836// (see also weakZeroDstSIVtest)
1837//
1838// Return true if dependence disproved.
1839bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1840 const SCEV *SrcConst,
1841 const SCEV *DstConst,
1842 const Loop *CurSrcLoop,
1843 const Loop *CurDstLoop, unsigned Level,
1844 FullDependence &Result) const {
1845 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1846 return false;
1847
1848 // For the WeakSIV test, it's possible the loop isn't common to
1849 // the Src and Dst loops. If it isn't, then there's no need to
1850 // record a direction.
1851 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1852 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1853 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1854 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1855 ++WeakZeroSIVapplications;
1856 assert(0 < Level && Level <= MaxLevels && "Level out of range");
1857 Level--;
1858 Result.Consistent = false;
1859 const SCEV *Delta = SE->getMinusSCEV(LHS: SrcConst, RHS: DstConst);
1860 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1861 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: SrcConst, RHS: DstConst)) {
1862 if (Level < CommonLevels) {
1863 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1864 Result.DV[Level].PeelFirst = true;
1865 ++WeakZeroSIVsuccesses;
1866 }
1867 return false; // dependences caused by first iteration
1868 }
1869 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1870 if (!ConstCoeff)
1871 return false;
1872
1873 // Since ConstCoeff is constant, !isKnownNegative means it's non-negative.
1874 // TODO: Bail out if it's a signed minimum value.
1875 const SCEV *AbsCoeff = SE->isKnownNegative(S: ConstCoeff)
1876 ? SE->getNegativeSCEV(V: ConstCoeff)
1877 : ConstCoeff;
1878 const SCEV *NewDelta =
1879 SE->isKnownNegative(S: ConstCoeff) ? SE->getNegativeSCEV(V: Delta) : Delta;
1880
1881 // check that Delta/SrcCoeff < iteration count
1882 // really check NewDelta < count*AbsCoeff
1883 if (const SCEV *UpperBound =
1884 collectUpperBound(L: CurSrcLoop, T: Delta->getType())) {
1885 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1886 const SCEV *Product = SE->getMulExpr(LHS: AbsCoeff, RHS: UpperBound);
1887 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: NewDelta, RHS: Product)) {
1888 ++WeakZeroSIVindependence;
1889 ++WeakZeroSIVsuccesses;
1890 return true;
1891 }
1892 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: NewDelta, RHS: Product)) {
1893 // dependences caused by last iteration
1894 if (Level < CommonLevels) {
1895 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1896 Result.DV[Level].PeelLast = true;
1897 ++WeakZeroSIVsuccesses;
1898 }
1899 return false;
1900 }
1901 }
1902
1903 // check that Delta/SrcCoeff >= 0
1904 // really check that NewDelta >= 0
1905 if (SE->isKnownNegative(S: NewDelta)) {
1906 // No dependence, newDelta < 0
1907 ++WeakZeroSIVindependence;
1908 ++WeakZeroSIVsuccesses;
1909 return true;
1910 }
1911
1912 // if SrcCoeff doesn't divide Delta, then no dependence
1913 if (isa<SCEVConstant>(Val: Delta) &&
1914 !isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
1915 ++WeakZeroSIVindependence;
1916 ++WeakZeroSIVsuccesses;
1917 return true;
1918 }
1919 return false;
1920}
1921
1922// weakZeroDstSIVtest -
1923// From the paper, Practical Dependence Testing, Section 4.2.2
1924//
1925// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1926// where i is an induction variable, c1 and c2 are loop invariant,
1927// and a is a constant, we can solve it exactly using the
1928// Weak-Zero SIV test.
1929//
1930// Given
1931//
1932// c1 + a*i = c2
1933//
1934// we get
1935//
1936// i = (c2 - c1)/a
1937//
1938// If i is not an integer, there's no dependence.
1939// If i < 0 or > UB, there's no dependence.
1940// If i = 0, the direction is <= and peeling the
1941// 1st iteration will break the dependence.
1942// If i = UB, the direction is >= and peeling the
1943// last iteration will break the dependence.
1944// Otherwise, the direction is *.
1945//
1946// Can prove independence. Failing that, we can sometimes refine
1947// the directions. Can sometimes show that first or last
1948// iteration carries all the dependences (so worth peeling).
1949//
1950// (see also weakZeroSrcSIVtest)
1951//
1952// Return true if dependence disproved.
1953bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
1954 const SCEV *SrcConst,
1955 const SCEV *DstConst,
1956 const Loop *CurSrcLoop,
1957 const Loop *CurDstLoop, unsigned Level,
1958 FullDependence &Result) const {
1959 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1960 return false;
1961
1962 // For the WeakSIV test, it's possible the loop isn't common to the
1963 // Src and Dst loops. If it isn't, then there's no need to record a direction.
1964 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1965 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1966 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1967 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1968 ++WeakZeroSIVapplications;
1969 assert(0 < Level && Level <= SrcLevels && "Level out of range");
1970 Level--;
1971 Result.Consistent = false;
1972 const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
1973 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1974 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: DstConst, RHS: SrcConst)) {
1975 if (Level < CommonLevels) {
1976 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1977 Result.DV[Level].PeelFirst = true;
1978 ++WeakZeroSIVsuccesses;
1979 }
1980 return false; // dependences caused by first iteration
1981 }
1982 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1983 if (!ConstCoeff)
1984 return false;
1985
1986 // Since ConstCoeff is constant, !isKnownNegative means it's non-negative.
1987 // TODO: Bail out if it's a signed minimum value.
1988 const SCEV *AbsCoeff = SE->isKnownNegative(S: ConstCoeff)
1989 ? SE->getNegativeSCEV(V: ConstCoeff)
1990 : ConstCoeff;
1991 const SCEV *NewDelta =
1992 SE->isKnownNegative(S: ConstCoeff) ? SE->getNegativeSCEV(V: Delta) : Delta;
1993
1994 // check that Delta/SrcCoeff < iteration count
1995 // really check NewDelta < count*AbsCoeff
1996 if (const SCEV *UpperBound =
1997 collectUpperBound(L: CurSrcLoop, T: Delta->getType())) {
1998 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1999 const SCEV *Product = SE->getMulExpr(LHS: AbsCoeff, RHS: UpperBound);
2000 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: NewDelta, RHS: Product)) {
2001 ++WeakZeroSIVindependence;
2002 ++WeakZeroSIVsuccesses;
2003 return true;
2004 }
2005 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: NewDelta, RHS: Product)) {
2006 // dependences caused by last iteration
2007 if (Level < CommonLevels) {
2008 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
2009 Result.DV[Level].PeelLast = true;
2010 ++WeakZeroSIVsuccesses;
2011 }
2012 return false;
2013 }
2014 }
2015
2016 // check that Delta/SrcCoeff >= 0
2017 // really check that NewDelta >= 0
2018 if (SE->isKnownNegative(S: NewDelta)) {
2019 // No dependence, newDelta < 0
2020 ++WeakZeroSIVindependence;
2021 ++WeakZeroSIVsuccesses;
2022 return true;
2023 }
2024
2025 // if SrcCoeff doesn't divide Delta, then no dependence
2026 if (isa<SCEVConstant>(Val: Delta) &&
2027 !isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
2028 ++WeakZeroSIVindependence;
2029 ++WeakZeroSIVsuccesses;
2030 return true;
2031 }
2032 return false;
2033}
2034
2035// exactRDIVtest - Tests the RDIV subscript pair for dependence.
2036// Things of the form [c1 + a*i] and [c2 + b*j],
2037// where i and j are induction variable, c1 and c2 are loop invariant,
2038// and a and b are constants.
2039// Returns true if any possible dependence is disproved.
2040// Marks the result as inconsistent.
2041// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
2042bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
2043 const SCEV *SrcConst, const SCEV *DstConst,
2044 const Loop *SrcLoop, const Loop *DstLoop,
2045 FullDependence &Result) const {
2046 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactRDIV))
2047 return false;
2048
2049 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
2050 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
2051 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
2052 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
2053 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
2054 ++ExactRDIVapplications;
2055 Result.Consistent = false;
2056 const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
2057 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
2058 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
2059 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
2060 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
2061 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
2062 return false;
2063
2064 // find gcd
2065 APInt G, X, Y;
2066 APInt AM = ConstSrcCoeff->getAPInt();
2067 APInt BM = ConstDstCoeff->getAPInt();
2068 APInt CM = ConstDelta->getAPInt();
2069 unsigned Bits = AM.getBitWidth();
2070 if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
2071 // gcd doesn't divide Delta, no dependence
2072 ++ExactRDIVindependence;
2073 return true;
2074 }
2075
2076 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
2077
2078 // since SCEV construction seems to normalize, LM = 0
2079 std::optional<APInt> SrcUM;
2080 // SrcUM is perhaps unavailable, let's check
2081 if (const SCEVConstant *UpperBound =
2082 collectConstantUpperBound(L: SrcLoop, T: Delta->getType())) {
2083 SrcUM = UpperBound->getAPInt();
2084 LLVM_DEBUG(dbgs() << "\t SrcUM = " << *SrcUM << "\n");
2085 }
2086
2087 std::optional<APInt> DstUM;
2088 // UM is perhaps unavailable, let's check
2089 if (const SCEVConstant *UpperBound =
2090 collectConstantUpperBound(L: DstLoop, T: Delta->getType())) {
2091 DstUM = UpperBound->getAPInt();
2092 LLVM_DEBUG(dbgs() << "\t DstUM = " << *DstUM << "\n");
2093 }
2094
2095 APInt TU(APInt::getSignedMaxValue(numBits: Bits));
2096 APInt TL(APInt::getSignedMinValue(numBits: Bits));
2097 APInt TC = CM.sdiv(RHS: G);
2098 APInt TX = X * TC;
2099 APInt TY = Y * TC;
2100 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
2101 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
2102 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
2103
2104 APInt TB = BM.sdiv(RHS: G);
2105 APInt TA = AM.sdiv(RHS: G);
2106
2107 // At this point, we have the following equations:
2108 //
2109 // TA*i - TB*j = TC
2110 //
2111 // Also, we know that the all pairs of (i, j) can be expressed as:
2112 //
2113 // (TX + k*TB, TY + k*TA)
2114 //
2115 // where k is an arbitrary integer.
2116 auto [TL0, TU0] = inferDomainOfAffine(A: TB, B: TX, UB: SrcUM);
2117 auto [TL1, TU1] = inferDomainOfAffine(A: TA, B: TY, UB: DstUM);
2118
2119 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
2120 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
2121
2122 auto CreateVec = [](const OverflowSafeSignedAPInt &V0,
2123 const OverflowSafeSignedAPInt &V1) {
2124 SmallVector<APInt, 2> Vec;
2125 if (V0)
2126 Vec.push_back(Elt: *V0);
2127 if (V1)
2128 Vec.push_back(Elt: *V1);
2129 return Vec;
2130 };
2131
2132 SmallVector<APInt, 2> TLVec = CreateVec(TL0, TL1);
2133 SmallVector<APInt, 2> TUVec = CreateVec(TU0, TU1);
2134 if (TLVec.empty() || TUVec.empty())
2135 return false;
2136
2137 TL = APIntOps::smax(A: TLVec.front(), B: TLVec.back());
2138 TU = APIntOps::smin(A: TUVec.front(), B: TUVec.back());
2139 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
2140 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
2141
2142 if (TL.sgt(RHS: TU))
2143 ++ExactRDIVindependence;
2144 return TL.sgt(RHS: TU);
2145}
2146
2147// symbolicRDIVtest -
2148// In Section 4.5 of the Practical Dependence Testing paper,the authors
2149// introduce a special case of Banerjee's Inequalities (also called the
2150// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
2151// particularly cases with symbolics. Since it's only able to disprove
2152// dependence (not compute distances or directions), we'll use it as a
2153// fall back for the other tests.
2154//
2155// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2156// where i and j are induction variables and c1 and c2 are loop invariants,
2157// we can use the symbolic tests to disprove some dependences, serving as a
2158// backup for the RDIV test. Note that i and j can be the same variable,
2159// letting this test serve as a backup for the various SIV tests.
2160//
2161// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
2162// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2163// loop bounds for the i and j loops, respectively. So, ...
2164//
2165// c1 + a1*i = c2 + a2*j
2166// a1*i - a2*j = c2 - c1
2167//
2168// To test for a dependence, we compute c2 - c1 and make sure it's in the
2169// range of the maximum and minimum possible values of a1*i - a2*j.
2170// Considering the signs of a1 and a2, we have 4 possible cases:
2171//
2172// 1) If a1 >= 0 and a2 >= 0, then
2173// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
2174// -a2*N2 <= c2 - c1 <= a1*N1
2175//
2176// 2) If a1 >= 0 and a2 <= 0, then
2177// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
2178// 0 <= c2 - c1 <= a1*N1 - a2*N2
2179//
2180// 3) If a1 <= 0 and a2 >= 0, then
2181// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
2182// a1*N1 - a2*N2 <= c2 - c1 <= 0
2183//
2184// 4) If a1 <= 0 and a2 <= 0, then
2185// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
2186// a1*N1 <= c2 - c1 <= -a2*N2
2187//
2188// return true if dependence disproved
2189bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
2190 const SCEV *C1, const SCEV *C2,
2191 const Loop *Loop1,
2192 const Loop *Loop2) const {
2193 if (!isDependenceTestEnabled(Test: DependenceTestType::SymbolicRDIV))
2194 return false;
2195
2196 ++SymbolicRDIVapplications;
2197 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2198 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
2199 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
2200 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
2201 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
2202 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
2203 const SCEV *N1 = collectUpperBound(L: Loop1, T: A1->getType());
2204 const SCEV *N2 = collectUpperBound(L: Loop2, T: A1->getType());
2205 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
2206 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
2207 const SCEV *C2_C1 = SE->getMinusSCEV(LHS: C2, RHS: C1);
2208 const SCEV *C1_C2 = SE->getMinusSCEV(LHS: C1, RHS: C2);
2209 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
2210 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
2211 if (SE->isKnownNonNegative(S: A1)) {
2212 if (SE->isKnownNonNegative(S: A2)) {
2213 // A1 >= 0 && A2 >= 0
2214 if (N1) {
2215 // make sure that c2 - c1 <= a1*N1
2216 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2217 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2218 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: C2_C1, RHS: A1N1)) {
2219 ++SymbolicRDIVindependence;
2220 return true;
2221 }
2222 }
2223 if (N2) {
2224 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
2225 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2226 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2227 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SLT, LHS: A2N2, RHS: C1_C2)) {
2228 ++SymbolicRDIVindependence;
2229 return true;
2230 }
2231 }
2232 } else if (SE->isKnownNonPositive(S: A2)) {
2233 // a1 >= 0 && a2 <= 0
2234 if (N1 && N2) {
2235 // make sure that c2 - c1 <= a1*N1 - a2*N2
2236 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2237 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2238 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(LHS: A1N1, RHS: A2N2);
2239 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2240 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: C2_C1, RHS: A1N1_A2N2)) {
2241 ++SymbolicRDIVindependence;
2242 return true;
2243 }
2244 }
2245 // make sure that 0 <= c2 - c1
2246 if (SE->isKnownNegative(S: C2_C1)) {
2247 ++SymbolicRDIVindependence;
2248 return true;
2249 }
2250 }
2251 } else if (SE->isKnownNonPositive(S: A1)) {
2252 if (SE->isKnownNonNegative(S: A2)) {
2253 // a1 <= 0 && a2 >= 0
2254 if (N1 && N2) {
2255 // make sure that a1*N1 - a2*N2 <= c2 - c1
2256 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2257 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2258 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(LHS: A1N1, RHS: A2N2);
2259 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2260 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: A1N1_A2N2, RHS: C2_C1)) {
2261 ++SymbolicRDIVindependence;
2262 return true;
2263 }
2264 }
2265 // make sure that c2 - c1 <= 0
2266 if (SE->isKnownPositive(S: C2_C1)) {
2267 ++SymbolicRDIVindependence;
2268 return true;
2269 }
2270 } else if (SE->isKnownNonPositive(S: A2)) {
2271 // a1 <= 0 && a2 <= 0
2272 if (N1) {
2273 // make sure that a1*N1 <= c2 - c1
2274 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2275 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2276 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: A1N1, RHS: C2_C1)) {
2277 ++SymbolicRDIVindependence;
2278 return true;
2279 }
2280 }
2281 if (N2) {
2282 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2283 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2284 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2285 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SLT, LHS: C1_C2, RHS: A2N2)) {
2286 ++SymbolicRDIVindependence;
2287 return true;
2288 }
2289 }
2290 }
2291 }
2292 return false;
2293}
2294
2295// testSIV -
2296// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2297// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2298// a2 are constant, we attack it with an SIV test. While they can all be
2299// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2300// they apply; they're cheaper and sometimes more precise.
2301//
2302// Return true if dependence disproved.
2303bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
2304 FullDependence &Result,
2305 bool UnderRuntimeAssumptions) {
2306 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2307 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2308 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2309 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2310 if (SrcAddRec && DstAddRec) {
2311 const SCEV *SrcConst = SrcAddRec->getStart();
2312 const SCEV *DstConst = DstAddRec->getStart();
2313 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2314 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2315 const Loop *CurSrcLoop = SrcAddRec->getLoop();
2316 const Loop *CurDstLoop = DstAddRec->getLoop();
2317 assert(haveSameSD(CurSrcLoop, CurDstLoop) &&
2318 "Loops in the SIV test should have the same iteration space and "
2319 "depth");
2320 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
2321 bool disproven;
2322 if (SrcCoeff == DstCoeff)
2323 disproven = strongSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result,
2324 UnderRuntimeAssumptions);
2325 else if (SrcCoeff == SE->getNegativeSCEV(V: DstCoeff))
2326 disproven = weakCrossingSIVtest(Coeff: SrcCoeff, SrcConst, DstConst, CurSrcLoop,
2327 CurDstLoop, Level, Result);
2328 else
2329 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst,
2330 CurSrcLoop, CurDstLoop, Level, Result);
2331 return disproven || gcdMIVtest(Src, Dst, Result) ||
2332 symbolicRDIVtest(A1: SrcCoeff, A2: DstCoeff, C1: SrcConst, C2: DstConst, Loop1: CurSrcLoop,
2333 Loop2: CurDstLoop);
2334 }
2335 if (SrcAddRec) {
2336 const SCEV *SrcConst = SrcAddRec->getStart();
2337 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2338 const SCEV *DstConst = Dst;
2339 const Loop *CurSrcLoop = SrcAddRec->getLoop();
2340 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
2341 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
2342 CurDstLoop: CurSrcLoop, Level, Result) ||
2343 gcdMIVtest(Src, Dst, Result);
2344 }
2345 if (DstAddRec) {
2346 const SCEV *DstConst = DstAddRec->getStart();
2347 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2348 const SCEV *SrcConst = Src;
2349 const Loop *CurDstLoop = DstAddRec->getLoop();
2350 Level = mapDstLoop(DstLoop: CurDstLoop);
2351 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, CurSrcLoop: CurDstLoop,
2352 CurDstLoop, Level, Result) ||
2353 gcdMIVtest(Src, Dst, Result);
2354 }
2355 llvm_unreachable("SIV test expected at least one AddRec");
2356 return false;
2357}
2358
2359// testRDIV -
2360// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2361// where i and j are induction variables, c1 and c2 are loop invariant,
2362// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2363// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2364// It doesn't make sense to talk about distance or direction in this case,
2365// so there's no point in making special versions of the Strong SIV test or
2366// the Weak-crossing SIV test.
2367//
2368// Return true if dependence disproved.
2369bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2370 FullDependence &Result) const {
2371 const SCEV *SrcConst, *DstConst;
2372 const SCEV *SrcCoeff, *DstCoeff;
2373 const Loop *SrcLoop, *DstLoop;
2374
2375 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2376 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2377 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2378 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2379 if (SrcAddRec && DstAddRec) {
2380 SrcConst = SrcAddRec->getStart();
2381 SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2382 SrcLoop = SrcAddRec->getLoop();
2383 DstConst = DstAddRec->getStart();
2384 DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2385 DstLoop = DstAddRec->getLoop();
2386 } else
2387 llvm_unreachable("RDIV expected at least one AddRec");
2388 return exactRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, SrcLoop, DstLoop,
2389 Result) ||
2390 gcdMIVtest(Src, Dst, Result) ||
2391 symbolicRDIVtest(A1: SrcCoeff, A2: DstCoeff, C1: SrcConst, C2: DstConst, Loop1: SrcLoop,
2392 Loop2: DstLoop);
2393}
2394
2395// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2396// Return true if dependence disproved.
2397// Can sometimes refine direction vectors.
2398bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2399 const SmallBitVector &Loops,
2400 FullDependence &Result) const {
2401 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2402 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2403 Result.Consistent = false;
2404 return gcdMIVtest(Src, Dst, Result) ||
2405 banerjeeMIVtest(Src, Dst, Loops, Result);
2406}
2407
2408/// Given a SCEVMulExpr, returns its first operand if its first operand is a
2409/// constant and the product doesn't overflow in a signed sense. Otherwise,
2410/// returns std::nullopt. For example, given (10 * X * Y)<nsw>, it returns 10.
2411/// Notably, if it doesn't have nsw, the multiplication may overflow, and if
2412/// so, it may not a multiple of 10.
2413static std::optional<APInt> getConstantCoefficient(const SCEV *Expr) {
2414 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Expr))
2415 return Constant->getAPInt();
2416 if (const auto *Product = dyn_cast<SCEVMulExpr>(Val: Expr))
2417 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Product->getOperand(i: 0)))
2418 if (Product->hasNoSignedWrap())
2419 return Constant->getAPInt();
2420 return std::nullopt;
2421}
2422
2423bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
2424 const Loop *CurLoop,
2425 const SCEV *&CurLoopCoeff,
2426 APInt &RunningGCD) const {
2427 // If RunningGCD is already 1, exit early.
2428 // TODO: It might be better to continue the recursion to find CurLoopCoeff.
2429 if (RunningGCD == 1)
2430 return true;
2431
2432 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
2433 if (!AddRec) {
2434 assert(isLoopInvariant(Expr, CurLoop) &&
2435 "Expected loop invariant expression");
2436 return true;
2437 }
2438
2439 assert(AddRec->isAffine() && "Unexpected Expr");
2440 const SCEV *Start = AddRec->getStart();
2441 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
2442 if (AddRec->getLoop() == CurLoop) {
2443 CurLoopCoeff = Step;
2444 } else {
2445 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Step);
2446
2447 // If the coefficient is the product of a constant and other stuff, we can
2448 // use the constant in the GCD computation.
2449 if (!ConstCoeff)
2450 return false;
2451
2452 // TODO: What happens if ConstCoeff is the "most negative" signed number
2453 // (e.g. -128 for 8 bit wide APInt)?
2454 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2455 }
2456
2457 return accumulateCoefficientsGCD(Expr: Start, CurLoop, CurLoopCoeff, RunningGCD);
2458}
2459
2460//===----------------------------------------------------------------------===//
2461// gcdMIVtest -
2462// Tests an MIV subscript pair for dependence.
2463// Returns true if any possible dependence is disproved.
2464// Marks the result as inconsistent.
2465// Can sometimes disprove the equal direction for 1 or more loops,
2466// as discussed in Michael Wolfe's book,
2467// High Performance Compilers for Parallel Computing, page 235.
2468//
2469// We spend some effort (code!) to handle cases like
2470// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2471// but M and N are just loop-invariant variables.
2472// This should help us handle linearized subscripts;
2473// also makes this test a useful backup to the various SIV tests.
2474//
2475// It occurs to me that the presence of loop-invariant variables
2476// changes the nature of the test from "greatest common divisor"
2477// to "a common divisor".
2478bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2479 FullDependence &Result) const {
2480 if (!isDependenceTestEnabled(Test: DependenceTestType::GCDMIV))
2481 return false;
2482
2483 LLVM_DEBUG(dbgs() << "starting gcd\n");
2484 ++GCDapplications;
2485 unsigned BitWidth = SE->getTypeSizeInBits(Ty: Src->getType());
2486 APInt RunningGCD = APInt::getZero(numBits: BitWidth);
2487
2488 // Examine Src coefficients.
2489 // Compute running GCD and record source constant.
2490 // Because we're looking for the constant at the end of the chain,
2491 // we can't quit the loop just because the GCD == 1.
2492 const SCEV *Coefficients = Src;
2493 while (const SCEVAddRecExpr *AddRec =
2494 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2495 const SCEV *Coeff = AddRec->getStepRecurrence(SE&: *SE);
2496 // If the coefficient is the product of a constant and other stuff,
2497 // we can use the constant in the GCD computation.
2498 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Coeff);
2499 if (!ConstCoeff)
2500 return false;
2501 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2502 Coefficients = AddRec->getStart();
2503 }
2504 const SCEV *SrcConst = Coefficients;
2505
2506 // Examine Dst coefficients.
2507 // Compute running GCD and record destination constant.
2508 // Because we're looking for the constant at the end of the chain,
2509 // we can't quit the loop just because the GCD == 1.
2510 Coefficients = Dst;
2511 while (const SCEVAddRecExpr *AddRec =
2512 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2513 const SCEV *Coeff = AddRec->getStepRecurrence(SE&: *SE);
2514 // If the coefficient is the product of a constant and other stuff,
2515 // we can use the constant in the GCD computation.
2516 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Coeff);
2517 if (!ConstCoeff)
2518 return false;
2519 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2520 Coefficients = AddRec->getStart();
2521 }
2522 const SCEV *DstConst = Coefficients;
2523
2524 APInt ExtraGCD = APInt::getZero(numBits: BitWidth);
2525 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
2526 if (!Delta)
2527 return false;
2528 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
2529 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Val: Delta);
2530 if (!Constant)
2531 return false;
2532 APInt ConstDelta = cast<SCEVConstant>(Val: Constant)->getAPInt();
2533 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
2534 if (ConstDelta == 0)
2535 return false;
2536 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
2537 APInt Remainder = ConstDelta.srem(RHS: RunningGCD);
2538 if (Remainder != 0) {
2539 ++GCDindependence;
2540 return true;
2541 }
2542
2543 // Try to disprove equal directions.
2544 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2545 // the code above can't disprove the dependence because the GCD = 1.
2546 // So we consider what happen if i = i' and what happens if j = j'.
2547 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2548 // which is infeasible, so we can disallow the = direction for the i level.
2549 // Setting j = j' doesn't help matters, so we end up with a direction vector
2550 // of [<>, *]
2551 //
2552 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2553 // we need to remember that the constant part is 5 and the RunningGCD should
2554 // be initialized to ExtraGCD = 30.
2555 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
2556
2557 bool Improved = false;
2558 Coefficients = Src;
2559 while (const SCEVAddRecExpr *AddRec =
2560 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2561 Coefficients = AddRec->getStart();
2562 const Loop *CurLoop = AddRec->getLoop();
2563 RunningGCD = ExtraGCD;
2564 const SCEV *SrcCoeff = AddRec->getStepRecurrence(SE&: *SE);
2565 const SCEV *DstCoeff = SE->getMinusSCEV(LHS: SrcCoeff, RHS: SrcCoeff);
2566
2567 if (!accumulateCoefficientsGCD(Expr: Src, CurLoop, CurLoopCoeff&: SrcCoeff, RunningGCD) ||
2568 !accumulateCoefficientsGCD(Expr: Dst, CurLoop, CurLoopCoeff&: DstCoeff, RunningGCD))
2569 return false;
2570
2571 Delta = SE->getMinusSCEV(LHS: SrcCoeff, RHS: DstCoeff);
2572 // If the coefficient is the product of a constant and other stuff,
2573 // we can use the constant in the GCD computation.
2574 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Delta);
2575 if (!ConstCoeff)
2576 // The difference of the two coefficients might not be a product
2577 // or constant, in which case we give up on this direction.
2578 continue;
2579 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2580 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2581 if (RunningGCD != 0) {
2582 Remainder = ConstDelta.srem(RHS: RunningGCD);
2583 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2584 if (Remainder != 0) {
2585 unsigned Level = mapSrcLoop(SrcLoop: CurLoop);
2586 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;
2587 Improved = true;
2588 }
2589 }
2590 }
2591 if (Improved)
2592 ++GCDsuccesses;
2593 LLVM_DEBUG(dbgs() << "all done\n");
2594 return false;
2595}
2596
2597//===----------------------------------------------------------------------===//
2598// banerjeeMIVtest -
2599// Use Banerjee's Inequalities to test an MIV subscript pair.
2600// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2601// Generally follows the discussion in Section 2.5.2 of
2602//
2603// Optimizing Supercompilers for Supercomputers
2604// Michael Wolfe
2605//
2606// The inequalities given on page 25 are simplified in that loops are
2607// normalized so that the lower bound is always 0 and the stride is always 1.
2608// For example, Wolfe gives
2609//
2610// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2611//
2612// where A_k is the coefficient of the kth index in the source subscript,
2613// B_k is the coefficient of the kth index in the destination subscript,
2614// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2615// index, and N_k is the stride of the kth index. Since all loops are normalized
2616// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2617// equation to
2618//
2619// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2620// = (A^-_k - B_k)^- (U_k - 1) - B_k
2621//
2622// Similar simplifications are possible for the other equations.
2623//
2624// When we can't determine the number of iterations for a loop,
2625// we use NULL as an indicator for the worst case, infinity.
2626// When computing the upper bound, NULL denotes +inf;
2627// for the lower bound, NULL denotes -inf.
2628//
2629// Return true if dependence disproved.
2630bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2631 const SmallBitVector &Loops,
2632 FullDependence &Result) const {
2633 if (!isDependenceTestEnabled(Test: DependenceTestType::BanerjeeMIV))
2634 return false;
2635
2636 LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2637 ++BanerjeeApplications;
2638 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
2639 const SCEV *A0;
2640 CoefficientInfo *A = collectCoeffInfo(Subscript: Src, SrcFlag: true, Constant&: A0);
2641 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
2642 const SCEV *B0;
2643 CoefficientInfo *B = collectCoeffInfo(Subscript: Dst, SrcFlag: false, Constant&: B0);
2644 BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2645 const SCEV *Delta = SE->getMinusSCEV(LHS: B0, RHS: A0);
2646 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2647
2648 // Compute bounds for all the * directions.
2649 LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2650 for (unsigned K = 1; K <= MaxLevels; ++K) {
2651 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2652 Bound[K].Direction = Dependence::DVEntry::ALL;
2653 Bound[K].DirSet = Dependence::DVEntry::NONE;
2654 findBoundsALL(A, B, Bound, K);
2655#ifndef NDEBUG
2656 LLVM_DEBUG(dbgs() << "\t " << K << '\t');
2657 if (Bound[K].Lower[Dependence::DVEntry::ALL])
2658 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2659 else
2660 LLVM_DEBUG(dbgs() << "-inf\t");
2661 if (Bound[K].Upper[Dependence::DVEntry::ALL])
2662 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2663 else
2664 LLVM_DEBUG(dbgs() << "+inf\n");
2665#endif
2666 }
2667
2668 // Test the *, *, *, ... case.
2669 bool Disproved = false;
2670 if (testBounds(DirKind: Dependence::DVEntry::ALL, Level: 0, Bound, Delta)) {
2671 // Explore the direction vector hierarchy.
2672 unsigned DepthExpanded = 0;
2673 unsigned NewDeps =
2674 exploreDirections(Level: 1, A, B, Bound, Loops, DepthExpanded, Delta);
2675 if (NewDeps > 0) {
2676 bool Improved = false;
2677 for (unsigned K = 1; K <= CommonLevels; ++K) {
2678 if (Loops[K]) {
2679 unsigned Old = Result.DV[K - 1].Direction;
2680 Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2681 Improved |= Old != Result.DV[K - 1].Direction;
2682 if (!Result.DV[K - 1].Direction) {
2683 Improved = false;
2684 Disproved = true;
2685 break;
2686 }
2687 }
2688 }
2689 if (Improved)
2690 ++BanerjeeSuccesses;
2691 } else {
2692 ++BanerjeeIndependence;
2693 Disproved = true;
2694 }
2695 } else {
2696 ++BanerjeeIndependence;
2697 Disproved = true;
2698 }
2699 delete[] Bound;
2700 delete[] A;
2701 delete[] B;
2702 return Disproved;
2703}
2704
2705// Hierarchically expands the direction vector
2706// search space, combining the directions of discovered dependences
2707// in the DirSet field of Bound. Returns the number of distinct
2708// dependences discovered. If the dependence is disproved,
2709// it will return 0.
2710unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2711 CoefficientInfo *B, BoundInfo *Bound,
2712 const SmallBitVector &Loops,
2713 unsigned &DepthExpanded,
2714 const SCEV *Delta) const {
2715 // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2716 // of common loop levels. To avoid excessive compile-time, pessimize all the
2717 // results and immediately return when the number of common levels is beyond
2718 // the given threshold.
2719 if (CommonLevels > MIVMaxLevelThreshold) {
2720 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2721 "direction exploration is terminated.\n");
2722 for (unsigned K = 1; K <= CommonLevels; ++K)
2723 if (Loops[K])
2724 Bound[K].DirSet = Dependence::DVEntry::ALL;
2725 return 1;
2726 }
2727
2728 if (Level > CommonLevels) {
2729 // record result
2730 LLVM_DEBUG(dbgs() << "\t[");
2731 for (unsigned K = 1; K <= CommonLevels; ++K) {
2732 if (Loops[K]) {
2733 Bound[K].DirSet |= Bound[K].Direction;
2734#ifndef NDEBUG
2735 switch (Bound[K].Direction) {
2736 case Dependence::DVEntry::LT:
2737 LLVM_DEBUG(dbgs() << " <");
2738 break;
2739 case Dependence::DVEntry::EQ:
2740 LLVM_DEBUG(dbgs() << " =");
2741 break;
2742 case Dependence::DVEntry::GT:
2743 LLVM_DEBUG(dbgs() << " >");
2744 break;
2745 case Dependence::DVEntry::ALL:
2746 LLVM_DEBUG(dbgs() << " *");
2747 break;
2748 default:
2749 llvm_unreachable("unexpected Bound[K].Direction");
2750 }
2751#endif
2752 }
2753 }
2754 LLVM_DEBUG(dbgs() << " ]\n");
2755 return 1;
2756 }
2757 if (Loops[Level]) {
2758 if (Level > DepthExpanded) {
2759 DepthExpanded = Level;
2760 // compute bounds for <, =, > at current level
2761 findBoundsLT(A, B, Bound, K: Level);
2762 findBoundsGT(A, B, Bound, K: Level);
2763 findBoundsEQ(A, B, Bound, K: Level);
2764#ifndef NDEBUG
2765 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2766 LLVM_DEBUG(dbgs() << "\t <\t");
2767 if (Bound[Level].Lower[Dependence::DVEntry::LT])
2768 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2769 << '\t');
2770 else
2771 LLVM_DEBUG(dbgs() << "-inf\t");
2772 if (Bound[Level].Upper[Dependence::DVEntry::LT])
2773 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2774 << '\n');
2775 else
2776 LLVM_DEBUG(dbgs() << "+inf\n");
2777 LLVM_DEBUG(dbgs() << "\t =\t");
2778 if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2779 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2780 << '\t');
2781 else
2782 LLVM_DEBUG(dbgs() << "-inf\t");
2783 if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2784 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2785 << '\n');
2786 else
2787 LLVM_DEBUG(dbgs() << "+inf\n");
2788 LLVM_DEBUG(dbgs() << "\t >\t");
2789 if (Bound[Level].Lower[Dependence::DVEntry::GT])
2790 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2791 << '\t');
2792 else
2793 LLVM_DEBUG(dbgs() << "-inf\t");
2794 if (Bound[Level].Upper[Dependence::DVEntry::GT])
2795 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2796 << '\n');
2797 else
2798 LLVM_DEBUG(dbgs() << "+inf\n");
2799#endif
2800 }
2801
2802 unsigned NewDeps = 0;
2803
2804 // test bounds for <, *, *, ...
2805 if (testBounds(DirKind: Dependence::DVEntry::LT, Level, Bound, Delta))
2806 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2807 Delta);
2808
2809 // Test bounds for =, *, *, ...
2810 if (testBounds(DirKind: Dependence::DVEntry::EQ, Level, Bound, Delta))
2811 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2812 Delta);
2813
2814 // test bounds for >, *, *, ...
2815 if (testBounds(DirKind: Dependence::DVEntry::GT, Level, Bound, Delta))
2816 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2817 Delta);
2818
2819 Bound[Level].Direction = Dependence::DVEntry::ALL;
2820 return NewDeps;
2821 } else
2822 return exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2823 Delta);
2824}
2825
2826// Returns true iff the current bounds are plausible.
2827bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2828 BoundInfo *Bound, const SCEV *Delta) const {
2829 Bound[Level].Direction = DirKind;
2830 if (const SCEV *LowerBound = getLowerBound(Bound))
2831 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: LowerBound, RHS: Delta))
2832 return false;
2833 if (const SCEV *UpperBound = getUpperBound(Bound))
2834 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: Delta, RHS: UpperBound))
2835 return false;
2836 return true;
2837}
2838
2839// Computes the upper and lower bounds for level K
2840// using the * direction. Records them in Bound.
2841// Wolfe gives the equations
2842//
2843// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2844// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2845//
2846// Since we normalize loops, we can simplify these equations to
2847//
2848// LB^*_k = (A^-_k - B^+_k)U_k
2849// UB^*_k = (A^+_k - B^-_k)U_k
2850//
2851// We must be careful to handle the case where the upper bound is unknown.
2852// Note that the lower bound is always <= 0
2853// and the upper bound is always >= 0.
2854void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2855 BoundInfo *Bound, unsigned K) const {
2856 Bound[K].Lower[Dependence::DVEntry::ALL] =
2857 nullptr; // Default value = -infinity.
2858 Bound[K].Upper[Dependence::DVEntry::ALL] =
2859 nullptr; // Default value = +infinity.
2860 if (Bound[K].Iterations) {
2861 Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(
2862 LHS: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].PosPart), RHS: Bound[K].Iterations);
2863 Bound[K].Upper[Dependence::DVEntry::ALL] = SE->getMulExpr(
2864 LHS: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].NegPart), RHS: Bound[K].Iterations);
2865 } else {
2866 // If the difference is 0, we won't need to know the number of iterations.
2867 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].NegPart, RHS: B[K].PosPart))
2868 Bound[K].Lower[Dependence::DVEntry::ALL] =
2869 SE->getZero(Ty: A[K].Coeff->getType());
2870 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].PosPart, RHS: B[K].NegPart))
2871 Bound[K].Upper[Dependence::DVEntry::ALL] =
2872 SE->getZero(Ty: A[K].Coeff->getType());
2873 }
2874}
2875
2876// Computes the upper and lower bounds for level K
2877// using the = direction. Records them in Bound.
2878// Wolfe gives the equations
2879//
2880// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2881// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2882//
2883// Since we normalize loops, we can simplify these equations to
2884//
2885// LB^=_k = (A_k - B_k)^- U_k
2886// UB^=_k = (A_k - B_k)^+ U_k
2887//
2888// We must be careful to handle the case where the upper bound is unknown.
2889// Note that the lower bound is always <= 0
2890// and the upper bound is always >= 0.
2891void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
2892 BoundInfo *Bound, unsigned K) const {
2893 Bound[K].Lower[Dependence::DVEntry::EQ] =
2894 nullptr; // Default value = -infinity.
2895 Bound[K].Upper[Dependence::DVEntry::EQ] =
2896 nullptr; // Default value = +infinity.
2897 if (Bound[K].Iterations) {
2898 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2899 const SCEV *NegativePart = getNegativePart(X: Delta);
2900 Bound[K].Lower[Dependence::DVEntry::EQ] =
2901 SE->getMulExpr(LHS: NegativePart, RHS: Bound[K].Iterations);
2902 const SCEV *PositivePart = getPositivePart(X: Delta);
2903 Bound[K].Upper[Dependence::DVEntry::EQ] =
2904 SE->getMulExpr(LHS: PositivePart, RHS: Bound[K].Iterations);
2905 } else {
2906 // If the positive/negative part of the difference is 0,
2907 // we won't need to know the number of iterations.
2908 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2909 const SCEV *NegativePart = getNegativePart(X: Delta);
2910 if (NegativePart->isZero())
2911 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2912 const SCEV *PositivePart = getPositivePart(X: Delta);
2913 if (PositivePart->isZero())
2914 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2915 }
2916}
2917
2918// Computes the upper and lower bounds for level K
2919// using the < direction. Records them in Bound.
2920// Wolfe gives the equations
2921//
2922// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2923// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2924//
2925// Since we normalize loops, we can simplify these equations to
2926//
2927// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2928// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2929//
2930// We must be careful to handle the case where the upper bound is unknown.
2931void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
2932 BoundInfo *Bound, unsigned K) const {
2933 Bound[K].Lower[Dependence::DVEntry::LT] =
2934 nullptr; // Default value = -infinity.
2935 Bound[K].Upper[Dependence::DVEntry::LT] =
2936 nullptr; // Default value = +infinity.
2937 if (Bound[K].Iterations) {
2938 const SCEV *Iter_1 = SE->getMinusSCEV(
2939 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2940 const SCEV *NegPart =
2941 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2942 Bound[K].Lower[Dependence::DVEntry::LT] =
2943 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: B[K].Coeff);
2944 const SCEV *PosPart =
2945 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2946 Bound[K].Upper[Dependence::DVEntry::LT] =
2947 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: B[K].Coeff);
2948 } else {
2949 // If the positive/negative part of the difference is 0,
2950 // we won't need to know the number of iterations.
2951 const SCEV *NegPart =
2952 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2953 if (NegPart->isZero())
2954 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2955 const SCEV *PosPart =
2956 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2957 if (PosPart->isZero())
2958 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2959 }
2960}
2961
2962// Computes the upper and lower bounds for level K
2963// using the > direction. Records them in Bound.
2964// Wolfe gives the equations
2965//
2966// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2967// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2968//
2969// Since we normalize loops, we can simplify these equations to
2970//
2971// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2972// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2973//
2974// We must be careful to handle the case where the upper bound is unknown.
2975void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
2976 BoundInfo *Bound, unsigned K) const {
2977 Bound[K].Lower[Dependence::DVEntry::GT] =
2978 nullptr; // Default value = -infinity.
2979 Bound[K].Upper[Dependence::DVEntry::GT] =
2980 nullptr; // Default value = +infinity.
2981 if (Bound[K].Iterations) {
2982 const SCEV *Iter_1 = SE->getMinusSCEV(
2983 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2984 const SCEV *NegPart =
2985 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2986 Bound[K].Lower[Dependence::DVEntry::GT] =
2987 SE->getAddExpr(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: A[K].Coeff);
2988 const SCEV *PosPart =
2989 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2990 Bound[K].Upper[Dependence::DVEntry::GT] =
2991 SE->getAddExpr(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: A[K].Coeff);
2992 } else {
2993 // If the positive/negative part of the difference is 0,
2994 // we won't need to know the number of iterations.
2995 const SCEV *NegPart =
2996 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2997 if (NegPart->isZero())
2998 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2999 const SCEV *PosPart =
3000 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
3001 if (PosPart->isZero())
3002 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
3003 }
3004}
3005
3006// X^+ = max(X, 0)
3007const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
3008 return SE->getSMaxExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
3009}
3010
3011// X^- = min(X, 0)
3012const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
3013 return SE->getSMinExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
3014}
3015
3016// Walks through the subscript,
3017// collecting each coefficient, the associated loop bounds,
3018// and recording its positive and negative parts for later use.
3019DependenceInfo::CoefficientInfo *
3020DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
3021 const SCEV *&Constant) const {
3022 const SCEV *Zero = SE->getZero(Ty: Subscript->getType());
3023 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
3024 for (unsigned K = 1; K <= MaxLevels; ++K) {
3025 CI[K].Coeff = Zero;
3026 CI[K].PosPart = Zero;
3027 CI[K].NegPart = Zero;
3028 CI[K].Iterations = nullptr;
3029 }
3030 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Subscript)) {
3031 const Loop *L = AddRec->getLoop();
3032 unsigned K = SrcFlag ? mapSrcLoop(SrcLoop: L) : mapDstLoop(DstLoop: L);
3033 CI[K].Coeff = AddRec->getStepRecurrence(SE&: *SE);
3034 CI[K].PosPart = getPositivePart(X: CI[K].Coeff);
3035 CI[K].NegPart = getNegativePart(X: CI[K].Coeff);
3036 CI[K].Iterations = collectUpperBound(L, T: Subscript->getType());
3037 Subscript = AddRec->getStart();
3038 }
3039 Constant = Subscript;
3040#ifndef NDEBUG
3041 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
3042 for (unsigned K = 1; K <= MaxLevels; ++K) {
3043 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
3044 LLVM_DEBUG(dbgs() << "\tPos Part = ");
3045 LLVM_DEBUG(dbgs() << *CI[K].PosPart);
3046 LLVM_DEBUG(dbgs() << "\tNeg Part = ");
3047 LLVM_DEBUG(dbgs() << *CI[K].NegPart);
3048 LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
3049 if (CI[K].Iterations)
3050 LLVM_DEBUG(dbgs() << *CI[K].Iterations);
3051 else
3052 LLVM_DEBUG(dbgs() << "+inf");
3053 LLVM_DEBUG(dbgs() << '\n');
3054 }
3055 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
3056#endif
3057 return CI;
3058}
3059
3060// Looks through all the bounds info and
3061// computes the lower bound given the current direction settings
3062// at each level. If the lower bound for any level is -inf,
3063// the result is -inf.
3064const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
3065 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
3066 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3067 if (Bound[K].Lower[Bound[K].Direction])
3068 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Lower[Bound[K].Direction]);
3069 else
3070 Sum = nullptr;
3071 }
3072 return Sum;
3073}
3074
3075// Looks through all the bounds info and
3076// computes the upper bound given the current direction settings
3077// at each level. If the upper bound at any level is +inf,
3078// the result is +inf.
3079const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
3080 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
3081 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3082 if (Bound[K].Upper[Bound[K].Direction])
3083 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Upper[Bound[K].Direction]);
3084 else
3085 Sum = nullptr;
3086 }
3087 return Sum;
3088}
3089
3090/// Check if we can delinearize the subscripts. If the SCEVs representing the
3091/// source and destination array references are recurrences on a nested loop,
3092/// this function flattens the nested recurrences into separate recurrences
3093/// for each loop level.
3094bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
3095 SmallVectorImpl<Subscript> &Pair) {
3096 assert(isLoadOrStore(Src) && "instruction is not load or store");
3097 assert(isLoadOrStore(Dst) && "instruction is not load or store");
3098 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3099 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3100 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
3101 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
3102 const SCEV *SrcAccessFn = SE->getSCEVAtScope(V: SrcPtr, L: SrcLoop);
3103 const SCEV *DstAccessFn = SE->getSCEVAtScope(V: DstPtr, L: DstLoop);
3104 const SCEVUnknown *SrcBase =
3105 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
3106 const SCEVUnknown *DstBase =
3107 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
3108
3109 if (!SrcBase || !DstBase || SrcBase != DstBase)
3110 return false;
3111
3112 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
3113
3114 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
3115 SrcSubscripts, DstSubscripts) &&
3116 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
3117 SrcSubscripts, DstSubscripts))
3118 return false;
3119
3120 assert(isLoopInvariant(SrcBase, SrcLoop) &&
3121 isLoopInvariant(DstBase, DstLoop) &&
3122 "Expected SrcBase and DstBase to be loop invariant");
3123
3124 int Size = SrcSubscripts.size();
3125 LLVM_DEBUG({
3126 dbgs() << "\nSrcSubscripts: ";
3127 for (int I = 0; I < Size; I++)
3128 dbgs() << *SrcSubscripts[I];
3129 dbgs() << "\nDstSubscripts: ";
3130 for (int I = 0; I < Size; I++)
3131 dbgs() << *DstSubscripts[I];
3132 });
3133
3134 // The delinearization transforms a single-subscript MIV dependence test into
3135 // a multi-subscript SIV dependence test that is easier to compute. So we
3136 // resize Pair to contain as many pairs of subscripts as the delinearization
3137 // has found, and then initialize the pairs following the delinearization.
3138 Pair.resize(N: Size);
3139 SCEVMonotonicityChecker MonChecker(SE);
3140 const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
3141 for (int I = 0; I < Size; ++I) {
3142 Pair[I].Src = SrcSubscripts[I];
3143 Pair[I].Dst = DstSubscripts[I];
3144
3145 assert(Pair[I].Src->getType() == Pair[I].Dst->getType() &&
3146 "Unexpected different types for the subscripts");
3147
3148 if (EnableMonotonicityCheck) {
3149 if (MonChecker.checkMonotonicity(Expr: Pair[I].Src, OutermostLoop).isUnknown())
3150 return false;
3151 if (MonChecker.checkMonotonicity(Expr: Pair[I].Dst, OutermostLoop).isUnknown())
3152 return false;
3153 }
3154 }
3155
3156 return true;
3157}
3158
3159/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
3160/// arrays accessed are fixed-size arrays. Return true if delinearization was
3161/// successful.
3162bool DependenceInfo::tryDelinearizeFixedSize(
3163 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3164 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3165 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3166 LLVM_DEBUG({
3167 const SCEVUnknown *SrcBase =
3168 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3169 const SCEVUnknown *DstBase =
3170 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3171 assert(SrcBase && DstBase && SrcBase == DstBase &&
3172 "expected src and dst scev unknowns to be equal");
3173 });
3174
3175 const SCEV *ElemSize = SE->getElementSize(Inst: Src);
3176 assert(ElemSize == SE->getElementSize(Dst) && "Different element sizes");
3177 SmallVector<const SCEV *, 4> SrcSizes, DstSizes;
3178 if (!delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: SrcAccessFn),
3179 Subscripts&: SrcSubscripts, Sizes&: SrcSizes, ElementSize: ElemSize) ||
3180 !delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: DstAccessFn),
3181 Subscripts&: DstSubscripts, Sizes&: DstSizes, ElementSize: ElemSize))
3182 return false;
3183
3184 // Check that the two size arrays are non-empty and equal in length and
3185 // value. SCEV expressions are uniqued, so we can compare pointers.
3186 if (SrcSizes.size() != DstSizes.size() ||
3187 !std::equal(first1: SrcSizes.begin(), last1: SrcSizes.end(), first2: DstSizes.begin())) {
3188 SrcSubscripts.clear();
3189 DstSubscripts.clear();
3190 return false;
3191 }
3192
3193 assert(SrcSubscripts.size() == DstSubscripts.size() &&
3194 "Expected equal number of entries in the list of SrcSubscripts and "
3195 "DstSubscripts.");
3196
3197 // In general we cannot safely assume that the subscripts recovered from GEPs
3198 // are in the range of values defined for their corresponding array
3199 // dimensions. For example some C language usage/interpretation make it
3200 // impossible to verify this at compile-time. As such we can only delinearize
3201 // iff the subscripts are positive and are less than the range of the
3202 // dimension.
3203 if (!DisableDelinearizationChecks) {
3204 if (!validateDelinearizationResult(SE&: *SE, Sizes: SrcSizes, Subscripts: SrcSubscripts) ||
3205 !validateDelinearizationResult(SE&: *SE, Sizes: DstSizes, Subscripts: DstSubscripts)) {
3206 SrcSubscripts.clear();
3207 DstSubscripts.clear();
3208 return false;
3209 }
3210 }
3211 LLVM_DEBUG({
3212 dbgs() << "Delinearized subscripts of fixed-size array\n"
3213 << "SrcGEP:" << *getLoadStorePointerOperand(Src) << "\n"
3214 << "DstGEP:" << *getLoadStorePointerOperand(Dst) << "\n";
3215 });
3216 return true;
3217}
3218
3219bool DependenceInfo::tryDelinearizeParametricSize(
3220 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3221 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3222 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3223
3224 const SCEVUnknown *SrcBase =
3225 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
3226 const SCEVUnknown *DstBase =
3227 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
3228 assert(SrcBase && DstBase && SrcBase == DstBase &&
3229 "expected src and dst scev unknowns to be equal");
3230
3231 const SCEV *ElementSize = SE->getElementSize(Inst: Src);
3232 if (ElementSize != SE->getElementSize(Inst: Dst))
3233 return false;
3234
3235 const SCEV *SrcSCEV = SE->getMinusSCEV(LHS: SrcAccessFn, RHS: SrcBase);
3236 const SCEV *DstSCEV = SE->getMinusSCEV(LHS: DstAccessFn, RHS: DstBase);
3237
3238 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(Val: SrcSCEV);
3239 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(Val: DstSCEV);
3240 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
3241 return false;
3242
3243 // First step: collect parametric terms in both array references.
3244 SmallVector<const SCEV *, 4> Terms;
3245 collectParametricTerms(SE&: *SE, Expr: SrcAR, Terms);
3246 collectParametricTerms(SE&: *SE, Expr: DstAR, Terms);
3247
3248 // Second step: find subscript sizes.
3249 SmallVector<const SCEV *, 4> Sizes;
3250 findArrayDimensions(SE&: *SE, Terms, Sizes, ElementSize);
3251
3252 // Third step: compute the access functions for each subscript.
3253 computeAccessFunctions(SE&: *SE, Expr: SrcAR, Subscripts&: SrcSubscripts, Sizes);
3254 computeAccessFunctions(SE&: *SE, Expr: DstAR, Subscripts&: DstSubscripts, Sizes);
3255
3256 // Fail when there is only a subscript: that's a linearized access function.
3257 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
3258 SrcSubscripts.size() != DstSubscripts.size())
3259 return false;
3260
3261 // Statically check that the array bounds are in-range. The first subscript we
3262 // don't have a size for and it cannot overflow into another subscript, so is
3263 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3264 // and dst.
3265 // FIXME: It may be better to record these sizes and add them as constraints
3266 // to the dependency checks.
3267 if (!DisableDelinearizationChecks)
3268 if (!validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: SrcSubscripts) ||
3269 !validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: DstSubscripts))
3270 return false;
3271
3272 return true;
3273}
3274
3275//===----------------------------------------------------------------------===//
3276
3277#ifndef NDEBUG
3278// For debugging purposes, dump a small bit vector to dbgs().
3279static void dumpSmallBitVector(SmallBitVector &BV) {
3280 dbgs() << "{";
3281 for (unsigned VI : BV.set_bits()) {
3282 dbgs() << VI;
3283 if (BV.find_next(VI) >= 0)
3284 dbgs() << ' ';
3285 }
3286 dbgs() << "}\n";
3287}
3288#endif
3289
3290bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
3291 FunctionAnalysisManager::Invalidator &Inv) {
3292 // Check if the analysis itself has been invalidated.
3293 auto PAC = PA.getChecker<DependenceAnalysis>();
3294 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3295 return true;
3296
3297 // Check transitive dependencies.
3298 return Inv.invalidate<AAManager>(IR&: F, PA) ||
3299 Inv.invalidate<ScalarEvolutionAnalysis>(IR&: F, PA) ||
3300 Inv.invalidate<LoopAnalysis>(IR&: F, PA);
3301}
3302
3303// depends -
3304// Returns NULL if there is no dependence.
3305// Otherwise, return a Dependence with as many details as possible.
3306// Corresponds to Section 3.1 in the paper
3307//
3308// Practical Dependence Testing
3309// Goff, Kennedy, Tseng
3310// PLDI 1991
3311//
3312std::unique_ptr<Dependence>
3313DependenceInfo::depends(Instruction *Src, Instruction *Dst,
3314 bool UnderRuntimeAssumptions) {
3315 SmallVector<const SCEVPredicate *, 4> Assume;
3316 bool PossiblyLoopIndependent = true;
3317 if (Src == Dst)
3318 PossiblyLoopIndependent = false;
3319
3320 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
3321 // if both instructions don't reference memory, there's no dependence
3322 return nullptr;
3323
3324 if (!isLoadOrStore(I: Src) || !isLoadOrStore(I: Dst)) {
3325 // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3326 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3327 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3328 args: SCEVUnionPredicate(Assume, *SE));
3329 }
3330
3331 const MemoryLocation &DstLoc = MemoryLocation::get(Inst: Dst);
3332 const MemoryLocation &SrcLoc = MemoryLocation::get(Inst: Src);
3333
3334 switch (underlyingObjectsAlias(AA, DL: F->getDataLayout(), LocA: DstLoc, LocB: SrcLoc)) {
3335 case AliasResult::MayAlias:
3336 case AliasResult::PartialAlias:
3337 // cannot analyse objects if we don't understand their aliasing.
3338 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3339 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3340 args: SCEVUnionPredicate(Assume, *SE));
3341 case AliasResult::NoAlias:
3342 // If the objects noalias, they are distinct, accesses are independent.
3343 LLVM_DEBUG(dbgs() << "no alias\n");
3344 return nullptr;
3345 case AliasResult::MustAlias:
3346 break; // The underlying objects alias; test accesses for dependence.
3347 }
3348
3349 if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() ||
3350 !SrcLoc.Size.isPrecise()) {
3351 // The dependence test gets confused if the size of the memory accesses
3352 // differ.
3353 LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
3354 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3355 args: SCEVUnionPredicate(Assume, *SE));
3356 }
3357
3358 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3359 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3360 const SCEV *SrcSCEV = SE->getSCEV(V: SrcPtr);
3361 const SCEV *DstSCEV = SE->getSCEV(V: DstPtr);
3362 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
3363 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
3364 const SCEV *SrcBase = SE->getPointerBase(V: SrcSCEV);
3365 const SCEV *DstBase = SE->getPointerBase(V: DstSCEV);
3366 if (SrcBase != DstBase) {
3367 // If two pointers have different bases, trying to analyze indexes won't
3368 // work; we can't compare them to each other. This can happen, for example,
3369 // if one is produced by an LCSSA PHI node.
3370 //
3371 // We check this upfront so we don't crash in cases where getMinusSCEV()
3372 // returns a SCEVCouldNotCompute.
3373 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
3374 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3375 args: SCEVUnionPredicate(Assume, *SE));
3376 }
3377
3378 // Even if the base pointers are the same, they may not be loop-invariant. It
3379 // could lead to incorrect results, as we're analyzing loop-carried
3380 // dependencies. Src and Dst can be in different loops, so we need to check
3381 // the base pointer is invariant in both loops.
3382 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
3383 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
3384 if (!isLoopInvariant(Expression: SrcBase, LoopNest: SrcLoop) ||
3385 !isLoopInvariant(Expression: DstBase, LoopNest: DstLoop)) {
3386 LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
3387 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3388 args: SCEVUnionPredicate(Assume, *SE));
3389 }
3390
3391 uint64_t EltSize = SrcLoc.Size.toRaw();
3392 const SCEV *SrcEv = SE->getMinusSCEV(LHS: SrcSCEV, RHS: SrcBase);
3393 const SCEV *DstEv = SE->getMinusSCEV(LHS: DstSCEV, RHS: DstBase);
3394
3395 // Check that memory access offsets are multiples of element sizes.
3396 if (!SE->isKnownMultipleOf(S: SrcEv, M: EltSize, Assumptions&: Assume) ||
3397 !SE->isKnownMultipleOf(S: DstEv, M: EltSize, Assumptions&: Assume)) {
3398 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
3399 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3400 args: SCEVUnionPredicate(Assume, *SE));
3401 }
3402
3403 // Runtime assumptions needed but not allowed.
3404 if (!Assume.empty() && !UnderRuntimeAssumptions)
3405 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3406 args: SCEVUnionPredicate(Assume, *SE));
3407
3408 unsigned Pairs = 1;
3409 SmallVector<Subscript, 2> Pair(Pairs);
3410 Pair[0].Src = SrcEv;
3411 Pair[0].Dst = DstEv;
3412
3413 SCEVMonotonicityChecker MonChecker(SE);
3414 const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
3415 if (EnableMonotonicityCheck)
3416 if (MonChecker.checkMonotonicity(Expr: Pair[0].Src, OutermostLoop).isUnknown() ||
3417 MonChecker.checkMonotonicity(Expr: Pair[0].Dst, OutermostLoop).isUnknown())
3418 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3419 args: SCEVUnionPredicate(Assume, *SE));
3420
3421 if (Delinearize) {
3422 if (tryDelinearize(Src, Dst, Pair)) {
3423 LLVM_DEBUG(dbgs() << " delinearized\n");
3424 Pairs = Pair.size();
3425 }
3426 }
3427
3428 // Establish loop nesting levels considering SameSD loops as common
3429 establishNestingLevels(Src, Dst);
3430
3431 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
3432 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
3433 LLVM_DEBUG(dbgs() << " SameSD nesting levels = " << SameSDLevels << "\n");
3434
3435 // Modify common levels to consider the SameSD levels in the tests
3436 CommonLevels += SameSDLevels;
3437 MaxLevels -= SameSDLevels;
3438 if (SameSDLevels > 0) {
3439 // Not all tests are handled yet over SameSD loops
3440 // Revoke if there are any tests other than ZIV, SIV or RDIV
3441 for (unsigned P = 0; P < Pairs; ++P) {
3442 SmallBitVector Loops;
3443 Subscript::ClassificationKind TestClass =
3444 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
3445 Dst: Pair[P].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops);
3446
3447 if (TestClass != Subscript::ZIV && TestClass != Subscript::SIV &&
3448 TestClass != Subscript::RDIV) {
3449 // Revert the levels to not consider the SameSD levels
3450 CommonLevels -= SameSDLevels;
3451 MaxLevels += SameSDLevels;
3452 SameSDLevels = 0;
3453 break;
3454 }
3455 }
3456 }
3457
3458 if (SameSDLevels > 0)
3459 SameSDLoopsCount++;
3460
3461 FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
3462 PossiblyLoopIndependent, CommonLevels);
3463 ++TotalArrayPairs;
3464
3465 for (unsigned P = 0; P < Pairs; ++P) {
3466 assert(Pair[P].Src->getType()->isIntegerTy() && "Src must be an integer");
3467 assert(Pair[P].Dst->getType()->isIntegerTy() && "Dst must be an integer");
3468 Pair[P].Loops.resize(N: MaxLevels + 1);
3469 Pair[P].GroupLoops.resize(N: MaxLevels + 1);
3470 Pair[P].Group.resize(N: Pairs);
3471 Pair[P].Classification =
3472 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()), Dst: Pair[P].Dst,
3473 DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops&: Pair[P].Loops);
3474 Pair[P].GroupLoops = Pair[P].Loops;
3475 Pair[P].Group.set(P);
3476 LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
3477 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3478 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3479 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3480 LLVM_DEBUG(dbgs() << "\tloops = ");
3481 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
3482 }
3483
3484 // Test each subscript individually
3485 for (unsigned SI = 0; SI < Pairs; ++SI) {
3486 LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3487 switch (Pair[SI].Classification) {
3488 case Subscript::NonLinear:
3489 // ignore these, but collect loops for later
3490 ++NonlinearSubscriptPairs;
3491 collectCommonLoops(Expression: Pair[SI].Src, LoopNest: LI->getLoopFor(BB: Src->getParent()),
3492 Loops&: Pair[SI].Loops);
3493 collectCommonLoops(Expression: Pair[SI].Dst, LoopNest: LI->getLoopFor(BB: Dst->getParent()),
3494 Loops&: Pair[SI].Loops);
3495 Result.Consistent = false;
3496 break;
3497 case Subscript::ZIV:
3498 LLVM_DEBUG(dbgs() << ", ZIV\n");
3499 if (testZIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
3500 return nullptr;
3501 break;
3502 case Subscript::SIV: {
3503 LLVM_DEBUG(dbgs() << ", SIV\n");
3504 unsigned Level;
3505 if (testSIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Level, Result,
3506 UnderRuntimeAssumptions))
3507 return nullptr;
3508 break;
3509 }
3510 case Subscript::RDIV:
3511 LLVM_DEBUG(dbgs() << ", RDIV\n");
3512 if (testRDIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
3513 return nullptr;
3514 break;
3515 case Subscript::MIV:
3516 LLVM_DEBUG(dbgs() << ", MIV\n");
3517 if (testMIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Loops: Pair[SI].Loops, Result))
3518 return nullptr;
3519 break;
3520 }
3521 }
3522
3523 // Make sure the Scalar flags are set correctly.
3524 SmallBitVector CompleteLoops(MaxLevels + 1);
3525 for (unsigned SI = 0; SI < Pairs; ++SI)
3526 CompleteLoops |= Pair[SI].Loops;
3527 for (unsigned II = 1; II <= CommonLevels; ++II)
3528 if (CompleteLoops[II])
3529 Result.DV[II - 1].Scalar = false;
3530
3531 // Set the distance to zero if the direction is EQ.
3532 // TODO: Ideally, the distance should be set to 0 immediately simultaneously
3533 // with the corresponding direction being set to EQ.
3534 for (unsigned II = 1; II <= Result.getLevels(); ++II) {
3535 if (Result.getDirection(Level: II) == Dependence::DVEntry::EQ) {
3536 if (Result.DV[II - 1].Distance == nullptr)
3537 Result.DV[II - 1].Distance = SE->getZero(Ty: SrcSCEV->getType());
3538 else
3539 assert(Result.DV[II - 1].Distance->isZero() &&
3540 "Inconsistency between distance and direction");
3541 }
3542
3543#ifndef NDEBUG
3544 // Check that the converse (i.e., if the distance is zero, then the
3545 // direction is EQ) holds.
3546 const SCEV *Distance = Result.getDistance(II);
3547 if (Distance && Distance->isZero())
3548 assert(Result.getDirection(II) == Dependence::DVEntry::EQ &&
3549 "Distance is zero, but direction is not EQ");
3550#endif
3551 }
3552
3553 if (SameSDLevels > 0) {
3554 // Extracting SameSD levels from the common levels
3555 // Reverting CommonLevels and MaxLevels to their original values
3556 assert(CommonLevels >= SameSDLevels);
3557 CommonLevels -= SameSDLevels;
3558 MaxLevels += SameSDLevels;
3559 std::unique_ptr<FullDependence::DVEntry[]> DV, DVSameSD;
3560 DV = std::make_unique<FullDependence::DVEntry[]>(num: CommonLevels);
3561 DVSameSD = std::make_unique<FullDependence::DVEntry[]>(num: SameSDLevels);
3562 for (unsigned Level = 0; Level < CommonLevels; ++Level)
3563 DV[Level] = Result.DV[Level];
3564 for (unsigned Level = 0; Level < SameSDLevels; ++Level)
3565 DVSameSD[Level] = Result.DV[CommonLevels + Level];
3566 Result.DV = std::move(DV);
3567 Result.DVSameSD = std::move(DVSameSD);
3568 Result.Levels = CommonLevels;
3569 Result.SameSDLevels = SameSDLevels;
3570 // Result is not consistent if it considers SameSD levels
3571 Result.Consistent = false;
3572 }
3573
3574 if (PossiblyLoopIndependent) {
3575 // Make sure the LoopIndependent flag is set correctly.
3576 // All directions must include equal, otherwise no
3577 // loop-independent dependence is possible.
3578 for (unsigned II = 1; II <= CommonLevels; ++II) {
3579 if (!(Result.getDirection(Level: II) & Dependence::DVEntry::EQ)) {
3580 Result.LoopIndependent = false;
3581 break;
3582 }
3583 }
3584 } else {
3585 // On the other hand, if all directions are equal and there's no
3586 // loop-independent dependence possible, then no dependence exists.
3587 // However, if there are runtime assumptions, we must return the result.
3588 bool AllEqual = true;
3589 for (unsigned II = 1; II <= CommonLevels; ++II) {
3590 if (Result.getDirection(Level: II) != Dependence::DVEntry::EQ) {
3591 AllEqual = false;
3592 break;
3593 }
3594 }
3595 if (AllEqual && Result.Assumptions.getPredicates().empty())
3596 return nullptr;
3597 }
3598
3599 return std::make_unique<FullDependence>(args: std::move(Result));
3600}
3601