1//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10// accesses. Currently, it is an (incomplete) implementation of the approach
11// described in
12//
13// Practical Dependence Testing
14// Goff, Kennedy, Tseng
15// PLDI 1991
16//
17// There's a single entry point that analyzes the dependence between a pair
18// of memory references in a function, returning either NULL, for no dependence,
19// or a more-or-less detailed description of the dependence between them.
20//
21// Since Clang linearizes some array subscripts, the dependence
22// analysis is using SCEV->delinearize to recover the representation of multiple
23// subscripts, and thus avoid the more expensive and less precise MIV tests. The
24// delinearization is controlled by the flag -da-delinearize.
25//
26// We should pay some careful attention to the possibility of integer overflow
27// in the implementation of the various tests. This could happen with Add,
28// Subtract, or Multiply, with both APInt's and SCEV's.
29//
30// Some non-linear subscript pairs can be handled by the GCD test
31// (and perhaps other tests).
32// Should explore how often these things occur.
33//
34// Finally, it seems like certain test cases expose weaknesses in the SCEV
35// simplification, especially in the handling of sign and zero extensions.
36// It could be useful to spend time exploring these.
37//
38// Please note that this is work in progress and the interface is subject to
39// change.
40//
41//===----------------------------------------------------------------------===//
42// //
43// In memory of Ken Kennedy, 1945 - 2007 //
44// //
45//===----------------------------------------------------------------------===//
46
47#include "llvm/Analysis/DependenceAnalysis.h"
48#include "llvm/ADT/Statistic.h"
49#include "llvm/Analysis/AliasAnalysis.h"
50#include "llvm/Analysis/Delinearization.h"
51#include "llvm/Analysis/LoopInfo.h"
52#include "llvm/Analysis/ScalarEvolution.h"
53#include "llvm/Analysis/ScalarEvolutionExpressions.h"
54#include "llvm/Analysis/ValueTracking.h"
55#include "llvm/IR/InstIterator.h"
56#include "llvm/IR/Module.h"
57#include "llvm/InitializePasses.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/raw_ostream.h"
62
63using namespace llvm;
64
65#define DEBUG_TYPE "da"
66
67//===----------------------------------------------------------------------===//
68// statistics
69
70STATISTIC(TotalArrayPairs, "Array pairs tested");
71STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
72STATISTIC(ZIVapplications, "ZIV applications");
73STATISTIC(ZIVindependence, "ZIV independence");
74STATISTIC(StrongSIVapplications, "Strong SIV applications");
75STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
76STATISTIC(StrongSIVindependence, "Strong SIV independence");
77STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
78STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
79STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
80STATISTIC(ExactSIVapplications, "Exact SIV applications");
81STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
82STATISTIC(ExactSIVindependence, "Exact SIV independence");
83STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
84STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
85STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
86STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
87STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
88STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
89STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
90STATISTIC(GCDapplications, "GCD applications");
91STATISTIC(GCDsuccesses, "GCD successes");
92STATISTIC(GCDindependence, "GCD independence");
93STATISTIC(BanerjeeApplications, "Banerjee applications");
94STATISTIC(BanerjeeIndependence, "Banerjee independence");
95STATISTIC(BanerjeeSuccesses, "Banerjee successes");
96STATISTIC(SameSDLoopsCount, "Loops with Same iteration Space and Depth");
97
98static cl::opt<bool>
99 Delinearize("da-delinearize", cl::init(Val: true), cl::Hidden,
100 cl::desc("Try to delinearize array references."));
101static cl::opt<bool> DisableDelinearizationChecks(
102 "da-disable-delinearization-checks", cl::Hidden,
103 cl::desc(
104 "Disable checks that try to statically verify validity of "
105 "delinearized subscripts. Enabling this option may result in incorrect "
106 "dependence vectors for languages that allow the subscript of one "
107 "dimension to underflow or overflow into another dimension."));
108
109static cl::opt<unsigned> MIVMaxLevelThreshold(
110 "da-miv-max-level-threshold", cl::init(Val: 7), cl::Hidden,
111 cl::desc("Maximum depth allowed for the recursive algorithm used to "
112 "explore MIV direction vectors."));
113
114namespace {
115
116/// Types of dependence test routines.
117enum class DependenceTestType {
118 All,
119 StrongSIV,
120 WeakCrossingSIV,
121 ExactSIV,
122 WeakZeroSIV,
123 ExactRDIV,
124 SymbolicRDIV,
125 GCDMIV,
126 BanerjeeMIV,
127};
128
129} // anonymous namespace
130
131static cl::opt<DependenceTestType> EnableDependenceTest(
132 "da-enable-dependence-test", cl::init(Val: DependenceTestType::All),
133 cl::ReallyHidden,
134 cl::desc("Run only specified dependence test routine and disable others. "
135 "The purpose is mainly to exclude the influence of other "
136 "dependence test routines in regression tests. If set to All, all "
137 "dependence test routines are enabled."),
138 cl::values(clEnumValN(DependenceTestType::All, "all",
139 "Enable all dependence test routines."),
140 clEnumValN(DependenceTestType::StrongSIV, "strong-siv",
141 "Enable only Strong SIV test."),
142 clEnumValN(DependenceTestType::WeakCrossingSIV,
143 "weak-crossing-siv",
144 "Enable only Weak-Crossing SIV test."),
145 clEnumValN(DependenceTestType::ExactSIV, "exact-siv",
146 "Enable only Exact SIV test."),
147 clEnumValN(DependenceTestType::WeakZeroSIV, "weak-zero-siv",
148 "Enable only Weak-Zero SIV test."),
149 clEnumValN(DependenceTestType::ExactRDIV, "exact-rdiv",
150 "Enable only Exact RDIV test."),
151 clEnumValN(DependenceTestType::SymbolicRDIV, "symbolic-rdiv",
152 "Enable only Symbolic RDIV test."),
153 clEnumValN(DependenceTestType::GCDMIV, "gcd-miv",
154 "Enable only GCD MIV test."),
155 clEnumValN(DependenceTestType::BanerjeeMIV, "banerjee-miv",
156 "Enable only Banerjee MIV test.")));
157
158// TODO: This flag is disabled by default because it is still under development.
159// Enable it or delete this flag when the feature is ready.
160static cl::opt<bool> EnableMonotonicityCheck(
161 "da-enable-monotonicity-check", cl::init(Val: false), cl::Hidden,
162 cl::desc("Check if the subscripts are monotonic. If it's not, dependence "
163 "is reported as unknown."));
164
165static cl::opt<bool> DumpMonotonicityReport(
166 "da-dump-monotonicity-report", cl::init(Val: false), cl::Hidden,
167 cl::desc(
168 "When printing analysis, dump the results of monotonicity checks."));
169
170//===----------------------------------------------------------------------===//
171// basics
172
173DependenceAnalysis::Result
174DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
175 auto &AA = FAM.getResult<AAManager>(IR&: F);
176 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
177 auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
178 return DependenceInfo(&F, &AA, &SE, &LI);
179}
180
181AnalysisKey DependenceAnalysis::Key;
182
183INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
184 "Dependence Analysis", true, true)
185INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
186INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
187INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
188INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
189 true, true)
190
191char DependenceAnalysisWrapperPass::ID = 0;
192
193DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
194 : FunctionPass(ID) {}
195
196FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
197 return new DependenceAnalysisWrapperPass();
198}
199
200bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
201 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
202 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
203 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
204 info.reset(p: new DependenceInfo(&F, &AA, &SE, &LI));
205 return false;
206}
207
208DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
209
210void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
211
212void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
213 AU.setPreservesAll();
214 AU.addRequiredTransitive<AAResultsWrapperPass>();
215 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
216 AU.addRequiredTransitive<LoopInfoWrapperPass>();
217}
218
219namespace {
220
221/// The property of monotonicity of a SCEV. To define the monotonicity, assume
222/// a SCEV defined within N-nested loops. Let i_k denote the iteration number
223/// of the k-th loop. Then we can regard the SCEV as an N-ary function:
224///
225/// F(i_1, i_2, ..., i_N)
226///
227/// The domain of i_k is the closed range [0, BTC_k], where BTC_k is the
228/// backedge-taken count of the k-th loop.
229///
230/// A function F is said to be "monotonically increasing with respect to the
231/// k-th loop" if x <= y implies the following condition:
232///
233/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) <=
234/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
235///
236/// where i_1, ..., i_{k-1}, i_{k+1}, ..., i_N, x, and y are elements of their
237/// respective domains.
238///
239/// Likewise F is "monotonically decreasing with respect to the k-th loop"
240/// if x <= y implies
241///
242/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) >=
243/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
244///
245/// A function F that is monotonically increasing or decreasing with respect to
246/// the k-th loop is simply called "monotonic with respect to k-th loop".
247///
248/// A function F is said to be "multivariate monotonic" when it is monotonic
249/// with respect to all of the N loops.
250///
251/// Since integer comparison can be either signed or unsigned, we need to
252/// distinguish monotonicity in the signed sense from that in the unsigned
253/// sense. Note that the inequality "x <= y" merely indicates loop progression
254/// and is not affected by the difference between signed and unsigned order.
255///
256/// Currently we only consider monotonicity in a signed sense.
257enum class SCEVMonotonicityType {
258 /// We don't know anything about the monotonicity of the SCEV.
259 Unknown,
260
261 /// The SCEV is loop-invariant with respect to the outermost loop. In other
262 /// words, the function F corresponding to the SCEV is a constant function.
263 Invariant,
264
265 /// The function F corresponding to the SCEV is multivariate monotonic in a
266 /// signed sense. Note that the multivariate monotonic function may also be a
267 /// constant function. The order employed in the definition of monotonicity
268 /// is not strict order.
269 MultivariateSignedMonotonic,
270};
271
272struct SCEVMonotonicity {
273 SCEVMonotonicity(SCEVMonotonicityType Type,
274 const SCEV *FailurePoint = nullptr);
275
276 SCEVMonotonicityType getType() const { return Type; }
277
278 const SCEV *getFailurePoint() const { return FailurePoint; }
279
280 bool isUnknown() const { return Type == SCEVMonotonicityType::Unknown; }
281
282 void print(raw_ostream &OS, unsigned Depth) const;
283
284private:
285 SCEVMonotonicityType Type;
286
287 /// The subexpression that caused Unknown. Mainly for debugging purpose.
288 const SCEV *FailurePoint;
289};
290
291/// Check the monotonicity of a SCEV. Since dependence tests (SIV, MIV, etc.)
292/// assume that subscript expressions are (multivariate) monotonic, we need to
293/// verify this property before applying those tests. Violating this assumption
294/// may cause them to produce incorrect results.
295struct SCEVMonotonicityChecker
296 : public SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity> {
297
298 SCEVMonotonicityChecker(ScalarEvolution *SE) : SE(SE) {}
299
300 /// Check the monotonicity of \p Expr. \p Expr must be integer type. If \p
301 /// OutermostLoop is not null, \p Expr must be defined in \p OutermostLoop or
302 /// one of its nested loops.
303 SCEVMonotonicity checkMonotonicity(const SCEV *Expr,
304 const Loop *OutermostLoop);
305
306private:
307 ScalarEvolution *SE;
308
309 /// The outermost loop that DA is analyzing.
310 const Loop *OutermostLoop;
311
312 /// A helper to classify \p Expr as either Invariant or Unknown.
313 SCEVMonotonicity invariantOrUnknown(const SCEV *Expr);
314
315 /// Return true if \p Expr is loop-invariant with respect to the outermost
316 /// loop.
317 bool isLoopInvariant(const SCEV *Expr) const;
318
319 /// A helper to create an Unknown SCEVMonotonicity.
320 SCEVMonotonicity createUnknown(const SCEV *FailurePoint) {
321 return SCEVMonotonicity(SCEVMonotonicityType::Unknown, FailurePoint);
322 }
323
324 SCEVMonotonicity visitAddRecExpr(const SCEVAddRecExpr *Expr);
325
326 SCEVMonotonicity visitConstant(const SCEVConstant *) {
327 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
328 }
329 SCEVMonotonicity visitVScale(const SCEVVScale *) {
330 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
331 }
332
333 // TODO: Handle more cases.
334 SCEVMonotonicity visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
335 return invariantOrUnknown(Expr);
336 }
337 SCEVMonotonicity visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
338 return invariantOrUnknown(Expr);
339 }
340 SCEVMonotonicity visitAddExpr(const SCEVAddExpr *Expr) {
341 return invariantOrUnknown(Expr);
342 }
343 SCEVMonotonicity visitMulExpr(const SCEVMulExpr *Expr) {
344 return invariantOrUnknown(Expr);
345 }
346 SCEVMonotonicity visitPtrToAddrExpr(const SCEVPtrToAddrExpr *Expr) {
347 return invariantOrUnknown(Expr);
348 }
349 SCEVMonotonicity visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) {
350 return invariantOrUnknown(Expr);
351 }
352 SCEVMonotonicity visitTruncateExpr(const SCEVTruncateExpr *Expr) {
353 return invariantOrUnknown(Expr);
354 }
355 SCEVMonotonicity visitUDivExpr(const SCEVUDivExpr *Expr) {
356 return invariantOrUnknown(Expr);
357 }
358 SCEVMonotonicity visitSMaxExpr(const SCEVSMaxExpr *Expr) {
359 return invariantOrUnknown(Expr);
360 }
361 SCEVMonotonicity visitUMaxExpr(const SCEVUMaxExpr *Expr) {
362 return invariantOrUnknown(Expr);
363 }
364 SCEVMonotonicity visitSMinExpr(const SCEVSMinExpr *Expr) {
365 return invariantOrUnknown(Expr);
366 }
367 SCEVMonotonicity visitUMinExpr(const SCEVUMinExpr *Expr) {
368 return invariantOrUnknown(Expr);
369 }
370 SCEVMonotonicity visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) {
371 return invariantOrUnknown(Expr);
372 }
373 SCEVMonotonicity visitUnknown(const SCEVUnknown *Expr) {
374 return invariantOrUnknown(Expr);
375 }
376 SCEVMonotonicity visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
377 return invariantOrUnknown(Expr);
378 }
379
380 friend struct SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity>;
381};
382
383/// A wrapper class for std::optional<APInt> that provides arithmetic operators
384/// with overflow checking in a signed sense. This allows us to omit inserting
385/// an overflow check at every arithmetic operation, which simplifies the code
386/// if the operations are chained like `a + b + c + ...`.
387///
388/// If an calculation overflows, the result becomes "invalid" which is
389/// internally represented by std::nullopt. If any operand of an arithmetic
390/// operation is "invalid", the result will also be "invalid".
391struct OverflowSafeSignedAPInt {
392 OverflowSafeSignedAPInt() : Value(std::nullopt) {}
393 OverflowSafeSignedAPInt(const APInt &V) : Value(V) {}
394 OverflowSafeSignedAPInt(const std::optional<APInt> &V) : Value(V) {}
395
396 OverflowSafeSignedAPInt operator+(const OverflowSafeSignedAPInt &RHS) const {
397 if (!Value || !RHS.Value)
398 return OverflowSafeSignedAPInt();
399 bool Overflow;
400 APInt Result = Value->sadd_ov(RHS: *RHS.Value, Overflow);
401 if (Overflow)
402 return OverflowSafeSignedAPInt();
403 return OverflowSafeSignedAPInt(Result);
404 }
405
406 OverflowSafeSignedAPInt operator+(int RHS) const {
407 if (!Value)
408 return OverflowSafeSignedAPInt();
409 return *this + fromInt(V: RHS);
410 }
411
412 OverflowSafeSignedAPInt operator-(const OverflowSafeSignedAPInt &RHS) const {
413 if (!Value || !RHS.Value)
414 return OverflowSafeSignedAPInt();
415 bool Overflow;
416 APInt Result = Value->ssub_ov(RHS: *RHS.Value, Overflow);
417 if (Overflow)
418 return OverflowSafeSignedAPInt();
419 return OverflowSafeSignedAPInt(Result);
420 }
421
422 OverflowSafeSignedAPInt operator-(int RHS) const {
423 if (!Value)
424 return OverflowSafeSignedAPInt();
425 return *this - fromInt(V: RHS);
426 }
427
428 OverflowSafeSignedAPInt operator*(const OverflowSafeSignedAPInt &RHS) const {
429 if (!Value || !RHS.Value)
430 return OverflowSafeSignedAPInt();
431 bool Overflow;
432 APInt Result = Value->smul_ov(RHS: *RHS.Value, Overflow);
433 if (Overflow)
434 return OverflowSafeSignedAPInt();
435 return OverflowSafeSignedAPInt(Result);
436 }
437
438 OverflowSafeSignedAPInt operator-() const {
439 if (!Value)
440 return OverflowSafeSignedAPInt();
441 if (Value->isMinSignedValue())
442 return OverflowSafeSignedAPInt();
443 return OverflowSafeSignedAPInt(-*Value);
444 }
445
446 operator bool() const { return Value.has_value(); }
447
448 bool operator!() const { return !Value.has_value(); }
449
450 const APInt &operator*() const {
451 assert(Value && "Value is not available.");
452 return *Value;
453 }
454
455 const APInt *operator->() const {
456 assert(Value && "Value is not available.");
457 return &*Value;
458 }
459
460private:
461 /// Underlying value. std::nullopt means "unknown". An arithmetic operation on
462 /// "unknown" always produces "unknown".
463 std::optional<APInt> Value;
464
465 OverflowSafeSignedAPInt fromInt(uint64_t V) const {
466 assert(Value && "Value is not available.");
467 return OverflowSafeSignedAPInt(
468 APInt(Value->getBitWidth(), V, /*isSigned=*/true));
469 }
470};
471
472} // anonymous namespace
473
474// Used to test the dependence analyzer.
475// Looks through the function, noting instructions that may access memory.
476// Calls depends() on every possible pair and prints out the result.
477// Ignores all other instructions.
478static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
479 ScalarEvolution &SE, LoopInfo &LI,
480 bool NormalizeResults) {
481 auto *F = DA->getFunction();
482
483 if (DumpMonotonicityReport) {
484 SCEVMonotonicityChecker Checker(&SE);
485 OS << "Monotonicity check:\n";
486 for (Instruction &Inst : instructions(F)) {
487 if (!isa<LoadInst>(Val: Inst) && !isa<StoreInst>(Val: Inst))
488 continue;
489 Value *Ptr = getLoadStorePointerOperand(V: &Inst);
490 const Loop *L = LI.getLoopFor(BB: Inst.getParent());
491 const Loop *OutermostLoop = L ? L->getOutermostLoop() : nullptr;
492 const SCEV *PtrSCEV = SE.getSCEVAtScope(V: Ptr, L);
493 const SCEV *AccessFn = SE.removePointerBase(S: PtrSCEV);
494 SCEVMonotonicity Mon = Checker.checkMonotonicity(Expr: AccessFn, OutermostLoop);
495 OS.indent(NumSpaces: 2) << "Inst: " << Inst << "\n";
496 OS.indent(NumSpaces: 4) << "Expr: " << *AccessFn << "\n";
497 Mon.print(OS, Depth: 4);
498 }
499 OS << "\n";
500 }
501
502 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
503 ++SrcI) {
504 if (SrcI->mayReadOrWriteMemory()) {
505 for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE;
506 ++DstI) {
507 if (DstI->mayReadOrWriteMemory()) {
508 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
509 OS << " da analyze - ";
510 if (auto D = DA->depends(Src: &*SrcI, Dst: &*DstI,
511 /*UnderRuntimeAssumptions=*/true)) {
512
513#ifndef NDEBUG
514 // Verify that the distance being zero is equivalent to the
515 // direction being EQ.
516 for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
517 const SCEV *Distance = D->getDistance(Level);
518 bool IsDistanceZero = Distance && Distance->isZero();
519 bool IsDirectionEQ =
520 D->getDirection(Level) == Dependence::DVEntry::EQ;
521 assert(IsDistanceZero == IsDirectionEQ &&
522 "Inconsistent distance and direction.");
523 }
524#endif
525
526 // Normalize negative direction vectors if required by clients.
527 if (NormalizeResults && D->normalize(SE: &SE))
528 OS << "normalized - ";
529 D->dump(OS);
530 } else
531 OS << "none!\n";
532 }
533 }
534 }
535 }
536}
537
538void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
539 const Module *) const {
540 dumpExampleDependence(
541 OS, DA: info.get(), SE&: getAnalysis<ScalarEvolutionWrapperPass>().getSE(),
542 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), NormalizeResults: false);
543}
544
545PreservedAnalyses
546DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
547 OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName()
548 << "':\n";
549 dumpExampleDependence(OS, DA: &FAM.getResult<DependenceAnalysis>(IR&: F),
550 SE&: FAM.getResult<ScalarEvolutionAnalysis>(IR&: F),
551 LI&: FAM.getResult<LoopAnalysis>(IR&: F), NormalizeResults);
552 return PreservedAnalyses::all();
553}
554
555//===----------------------------------------------------------------------===//
556// Dependence methods
557
558// Returns true if this is an input dependence.
559bool Dependence::isInput() const {
560 return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
561}
562
563// Returns true if this is an output dependence.
564bool Dependence::isOutput() const {
565 return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
566}
567
568// Returns true if this is an flow (aka true) dependence.
569bool Dependence::isFlow() const {
570 return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
571}
572
573// Returns true if this is an anti dependence.
574bool Dependence::isAnti() const {
575 return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
576}
577
578// Returns true if a particular level is scalar; that is,
579// if no subscript in the source or destination mention the induction
580// variable associated with the loop at this level.
581// Leave this out of line, so it will serve as a virtual method anchor
582bool Dependence::isScalar(unsigned level, bool IsSameSD) const { return false; }
583
584//===----------------------------------------------------------------------===//
585// FullDependence methods
586
587FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
588 const SCEVUnionPredicate &Assumes,
589 bool PossiblyLoopIndependent,
590 unsigned CommonLevels)
591 : Dependence(Source, Destination, Assumes), Levels(CommonLevels),
592 LoopIndependent(PossiblyLoopIndependent) {
593 SameSDLevels = 0;
594 if (CommonLevels)
595 DV = std::make_unique<DVEntry[]>(num: CommonLevels);
596}
597
598// FIXME: in some cases the meaning of a negative direction vector
599// may not be straightforward, e.g.,
600// for (int i = 0; i < 32; ++i) {
601// Src: A[i] = ...;
602// Dst: use(A[31 - i]);
603// }
604// The dependency is
605// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and
606// anti { Dst[i] -> Src[31 - i] : when i < 16 },
607// -- hence a [<>].
608// As long as a dependence result contains '>' ('<>', '<=>', "*"), it
609// means that a reversed/normalized dependence needs to be considered
610// as well. Nevertheless, current isDirectionNegative() only returns
611// true with a '>' or '>=' dependency for ease of canonicalizing the
612// dependency vector, since the reverse of '<>', '<=>' and "*" is itself.
613bool FullDependence::isDirectionNegative() const {
614 for (unsigned Level = 1; Level <= Levels; ++Level) {
615 unsigned char Direction = DV[Level - 1].Direction;
616 if (Direction == Dependence::DVEntry::EQ)
617 continue;
618 if (Direction == Dependence::DVEntry::GT ||
619 Direction == Dependence::DVEntry::GE)
620 return true;
621 return false;
622 }
623 return false;
624}
625
626bool FullDependence::normalize(ScalarEvolution *SE) {
627 if (!isDirectionNegative())
628 return false;
629
630 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
631 dump(dbgs()););
632 std::swap(a&: Src, b&: Dst);
633 for (unsigned Level = 1; Level <= Levels; ++Level) {
634 unsigned char Direction = DV[Level - 1].Direction;
635 // Reverse the direction vector, this means LT becomes GT
636 // and GT becomes LT.
637 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
638 if (Direction & Dependence::DVEntry::LT)
639 RevDirection |= Dependence::DVEntry::GT;
640 if (Direction & Dependence::DVEntry::GT)
641 RevDirection |= Dependence::DVEntry::LT;
642 DV[Level - 1].Direction = RevDirection;
643 // Reverse the dependence distance as well.
644 if (DV[Level - 1].Distance != nullptr)
645 DV[Level - 1].Distance = SE->getNegativeSCEV(V: DV[Level - 1].Distance);
646 }
647
648 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
649 dump(dbgs()););
650 return true;
651}
652
653// The rest are simple getters that hide the implementation.
654
655// getDirection - Returns the direction associated with a particular common or
656// SameSD level.
657unsigned FullDependence::getDirection(unsigned Level, bool IsSameSD) const {
658 return getDVEntry(Level, IsSameSD).Direction;
659}
660
661// Returns the distance (or NULL) associated with a particular common or
662// SameSD level.
663const SCEV *FullDependence::getDistance(unsigned Level, bool IsSameSD) const {
664 return getDVEntry(Level, IsSameSD).Distance;
665}
666
667// Returns true if a particular regular or SameSD level is scalar; that is,
668// if no subscript in the source or destination mention the induction variable
669// associated with the loop at this level.
670bool FullDependence::isScalar(unsigned Level, bool IsSameSD) const {
671 return getDVEntry(Level, IsSameSD).Scalar;
672}
673
674// inSameSDLoops - Returns true if this level is an SameSD level, i.e.,
675// performed across two separate loop nests that have the Same iteration space
676// and Depth.
677bool FullDependence::inSameSDLoops(unsigned Level) const {
678 assert(0 < Level && Level <= static_cast<unsigned>(Levels) + SameSDLevels &&
679 "Level out of range");
680 return Level > Levels;
681}
682
683//===----------------------------------------------------------------------===//
684// SCEVMonotonicity
685
686SCEVMonotonicity::SCEVMonotonicity(SCEVMonotonicityType Type,
687 const SCEV *FailurePoint)
688 : Type(Type), FailurePoint(FailurePoint) {
689 assert(
690 ((Type == SCEVMonotonicityType::Unknown) == (FailurePoint != nullptr)) &&
691 "FailurePoint must be provided iff Type is Unknown");
692}
693
694void SCEVMonotonicity::print(raw_ostream &OS, unsigned Depth) const {
695 OS.indent(NumSpaces: Depth) << "Monotonicity: ";
696 switch (Type) {
697 case SCEVMonotonicityType::Unknown:
698 assert(FailurePoint && "FailurePoint must be provided for Unknown");
699 OS << "Unknown\n";
700 OS.indent(NumSpaces: Depth) << "Reason: " << *FailurePoint << "\n";
701 break;
702 case SCEVMonotonicityType::Invariant:
703 OS << "Invariant\n";
704 break;
705 case SCEVMonotonicityType::MultivariateSignedMonotonic:
706 OS << "MultivariateSignedMonotonic\n";
707 break;
708 }
709}
710
711bool SCEVMonotonicityChecker::isLoopInvariant(const SCEV *Expr) const {
712 return !OutermostLoop || SE->isLoopInvariant(S: Expr, L: OutermostLoop);
713}
714
715SCEVMonotonicity SCEVMonotonicityChecker::invariantOrUnknown(const SCEV *Expr) {
716 if (isLoopInvariant(Expr))
717 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
718 return createUnknown(FailurePoint: Expr);
719}
720
721SCEVMonotonicity
722SCEVMonotonicityChecker::checkMonotonicity(const SCEV *Expr,
723 const Loop *OutermostLoop) {
724 assert((!OutermostLoop || OutermostLoop->isOutermost()) &&
725 "OutermostLoop must be outermost");
726 assert(Expr->getType()->isIntegerTy() && "Expr must be integer type");
727 this->OutermostLoop = OutermostLoop;
728 return visit(S: Expr);
729}
730
731/// We only care about an affine AddRec at the moment. For an affine AddRec,
732/// the monotonicity can be inferred from its nowrap property. For example, let
733/// X and Y be loop-invariant, and assume Y is non-negative. An AddRec
734/// {X,+.Y}<nsw> implies:
735///
736/// X <=s (X + Y) <=s ((X + Y) + Y) <=s ...
737///
738/// Thus, we can conclude that the AddRec is monotonically increasing with
739/// respect to the associated loop in a signed sense. The similar reasoning
740/// applies when Y is non-positive, leading to a monotonically decreasing
741/// AddRec.
742SCEVMonotonicity
743SCEVMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
744 if (!Expr->isAffine() || !Expr->hasNoSignedWrap())
745 return createUnknown(FailurePoint: Expr);
746
747 const SCEV *Start = Expr->getStart();
748 const SCEV *Step = Expr->getStepRecurrence(SE&: *SE);
749
750 SCEVMonotonicity StartMon = visit(S: Start);
751 if (StartMon.isUnknown())
752 return StartMon;
753
754 if (!isLoopInvariant(Expr: Step))
755 return createUnknown(FailurePoint: Expr);
756
757 return SCEVMonotonicity(SCEVMonotonicityType::MultivariateSignedMonotonic);
758}
759
760//===----------------------------------------------------------------------===//
761// DependenceInfo methods
762
763// For debugging purposes. Dumps a dependence to OS.
764void Dependence::dump(raw_ostream &OS) const {
765 if (isConfused())
766 OS << "confused";
767 else {
768 if (isFlow())
769 OS << "flow";
770 else if (isOutput())
771 OS << "output";
772 else if (isAnti())
773 OS << "anti";
774 else if (isInput())
775 OS << "input";
776 dumpImp(OS);
777 unsigned SameSDLevels = getSameSDLevels();
778 if (SameSDLevels > 0) {
779 OS << " / assuming " << SameSDLevels << " loop level(s) fused: ";
780 dumpImp(OS, IsSameSD: true);
781 }
782 }
783 OS << "!\n";
784
785 SCEVUnionPredicate Assumptions = getRuntimeAssumptions();
786 if (!Assumptions.isAlwaysTrue()) {
787 OS << " Runtime Assumptions:\n";
788 Assumptions.print(OS, Depth: 2);
789 }
790}
791
792// For debugging purposes. Dumps a dependence to OS with or without considering
793// the SameSD levels.
794void Dependence::dumpImp(raw_ostream &OS, bool IsSameSD) const {
795 unsigned Levels = getLevels();
796 unsigned SameSDLevels = getSameSDLevels();
797 bool OnSameSD = false;
798 unsigned LevelNum = Levels;
799 if (IsSameSD)
800 LevelNum += SameSDLevels;
801 OS << " [";
802 for (unsigned II = 1; II <= LevelNum; ++II) {
803 if (!OnSameSD && inSameSDLoops(Level: II))
804 OnSameSD = true;
805 const SCEV *Distance = getDistance(Level: II, SameSD: OnSameSD);
806 if (Distance)
807 OS << *Distance;
808 else if (isScalar(level: II, IsSameSD: OnSameSD))
809 OS << "S";
810 else {
811 unsigned Direction = getDirection(Level: II, SameSD: OnSameSD);
812 if (Direction == DVEntry::ALL)
813 OS << "*";
814 else {
815 if (Direction & DVEntry::LT)
816 OS << "<";
817 if (Direction & DVEntry::EQ)
818 OS << "=";
819 if (Direction & DVEntry::GT)
820 OS << ">";
821 }
822 }
823 if (II < LevelNum)
824 OS << " ";
825 }
826 if (isLoopIndependent())
827 OS << "|<";
828 OS << "]";
829}
830
831// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
832// underlaying objects. If LocA and LocB are known to not alias (for any reason:
833// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
834// Otherwise the underlying objects are checked to see if they point to
835// different identifiable objects.
836static AliasResult underlyingObjectsAlias(AAResults *AA, const DataLayout &DL,
837 const MemoryLocation &LocA,
838 const MemoryLocation &LocB) {
839 // Check the original locations (minus size) for noalias, which can happen for
840 // tbaa, incompatible underlying object locations, etc.
841 MemoryLocation LocAS =
842 MemoryLocation::getBeforeOrAfter(Ptr: LocA.Ptr, AATags: LocA.AATags);
843 MemoryLocation LocBS =
844 MemoryLocation::getBeforeOrAfter(Ptr: LocB.Ptr, AATags: LocB.AATags);
845 BatchAAResults BAA(*AA);
846 BAA.enableCrossIterationMode();
847
848 if (BAA.isNoAlias(LocA: LocAS, LocB: LocBS))
849 return AliasResult::NoAlias;
850
851 // Check the underlying objects are the same
852 const Value *AObj = getUnderlyingObject(V: LocA.Ptr);
853 const Value *BObj = getUnderlyingObject(V: LocB.Ptr);
854
855 // If the underlying objects are the same, they must alias
856 if (AObj == BObj)
857 return AliasResult::MustAlias;
858
859 // We may have hit the recursion limit for underlying objects, or have
860 // underlying objects where we don't know they will alias.
861 if (!isIdentifiedObject(V: AObj) || !isIdentifiedObject(V: BObj))
862 return AliasResult::MayAlias;
863
864 // Otherwise we know the objects are different and both identified objects so
865 // must not alias.
866 return AliasResult::NoAlias;
867}
868
869// Returns true if the load or store can be analyzed. Atomic and volatile
870// operations have properties which this analysis does not understand.
871static bool isLoadOrStore(const Instruction *I) {
872 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
873 return LI->isUnordered();
874 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
875 return SI->isUnordered();
876 return false;
877}
878
879// Returns true if two loops have the Same iteration Space and Depth. To be
880// more specific, two loops have SameSD if they are in the same nesting
881// depth and have the same backedge count. SameSD stands for Same iteration
882// Space and Depth.
883bool DependenceInfo::haveSameSD(const Loop *SrcLoop,
884 const Loop *DstLoop) const {
885 if (SrcLoop == DstLoop)
886 return true;
887
888 if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
889 return false;
890
891 if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
892 !DstLoop->getLoopLatch())
893 return false;
894
895 const SCEV *SrcUB = nullptr, *DstUP = nullptr;
896 if (SE->hasLoopInvariantBackedgeTakenCount(L: SrcLoop))
897 SrcUB = SE->getBackedgeTakenCount(L: SrcLoop);
898 if (SE->hasLoopInvariantBackedgeTakenCount(L: DstLoop))
899 DstUP = SE->getBackedgeTakenCount(L: DstLoop);
900
901 if (SrcUB != nullptr && DstUP != nullptr) {
902 Type *WiderType = SE->getWiderType(Ty1: SrcUB->getType(), Ty2: DstUP->getType());
903 SrcUB = SE->getNoopOrZeroExtend(V: SrcUB, Ty: WiderType);
904 DstUP = SE->getNoopOrZeroExtend(V: DstUP, Ty: WiderType);
905
906 if (SE->isKnownPredicate(Pred: ICmpInst::ICMP_EQ, LHS: SrcUB, RHS: DstUP))
907 return true;
908 }
909
910 return false;
911}
912
913// Examines the loop nesting of the Src and Dst
914// instructions and establishes their shared loops. Sets the variables
915// CommonLevels, SrcLevels, and MaxLevels.
916// The source and destination instructions needn't be contained in the same
917// loop. The routine establishNestingLevels finds the level of most deeply
918// nested loop that contains them both, CommonLevels. An instruction that's
919// not contained in a loop is at level = 0. MaxLevels is equal to the level
920// of the source plus the level of the destination, minus CommonLevels.
921// This lets us allocate vectors MaxLevels in length, with room for every
922// distinct loop referenced in both the source and destination subscripts.
923// The variable SrcLevels is the nesting depth of the source instruction.
924// It's used to help calculate distinct loops referenced by the destination.
925// Here's the map from loops to levels:
926// 0 - unused
927// 1 - outermost common loop
928// ... - other common loops
929// CommonLevels - innermost common loop
930// ... - loops containing Src but not Dst
931// SrcLevels - innermost loop containing Src but not Dst
932// ... - loops containing Dst but not Src
933// MaxLevels - innermost loops containing Dst but not Src
934// Consider the follow code fragment:
935// for (a = ...) {
936// for (b = ...) {
937// for (c = ...) {
938// for (d = ...) {
939// A[] = ...;
940// }
941// }
942// for (e = ...) {
943// for (f = ...) {
944// for (g = ...) {
945// ... = A[];
946// }
947// }
948// }
949// }
950// }
951// If we're looking at the possibility of a dependence between the store
952// to A (the Src) and the load from A (the Dst), we'll note that they
953// have 2 loops in common, so CommonLevels will equal 2 and the direction
954// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
955// A map from loop names to loop numbers would look like
956// a - 1
957// b - 2 = CommonLevels
958// c - 3
959// d - 4 = SrcLevels
960// e - 5
961// f - 6
962// g - 7 = MaxLevels
963// SameSDLevels counts the number of levels after common levels that are
964// not common but have the same iteration space and depth. Internally this
965// is checked using haveSameSD. Assume that in this code fragment, levels c and
966// e have the same iteration space and depth, but levels d and f does not. Then
967// SameSDLevels is set to 1. In that case the level numbers for the previous
968// code look like
969// a - 1
970// b - 2
971// c,e - 3 = CommonLevels
972// d - 4 = SrcLevels
973// f - 5
974// g - 6 = MaxLevels
975void DependenceInfo::establishNestingLevels(const Instruction *Src,
976 const Instruction *Dst) {
977 const BasicBlock *SrcBlock = Src->getParent();
978 const BasicBlock *DstBlock = Dst->getParent();
979 unsigned SrcLevel = LI->getLoopDepth(BB: SrcBlock);
980 unsigned DstLevel = LI->getLoopDepth(BB: DstBlock);
981 const Loop *SrcLoop = LI->getLoopFor(BB: SrcBlock);
982 const Loop *DstLoop = LI->getLoopFor(BB: DstBlock);
983 SrcLevels = SrcLevel;
984 MaxLevels = SrcLevel + DstLevel;
985 SameSDLevels = 0;
986 while (SrcLevel > DstLevel) {
987 SrcLoop = SrcLoop->getParentLoop();
988 SrcLevel--;
989 }
990 while (DstLevel > SrcLevel) {
991 DstLoop = DstLoop->getParentLoop();
992 DstLevel--;
993 }
994
995 // find the first common level and count the SameSD levels leading to it
996 while (SrcLoop != DstLoop) {
997 SameSDLevels++;
998 if (!haveSameSD(SrcLoop, DstLoop))
999 SameSDLevels = 0;
1000 SrcLoop = SrcLoop->getParentLoop();
1001 DstLoop = DstLoop->getParentLoop();
1002 SrcLevel--;
1003 }
1004 CommonLevels = SrcLevel;
1005 MaxLevels -= CommonLevels;
1006}
1007
1008// Given one of the loops containing the source, return
1009// its level index in our numbering scheme.
1010unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
1011 return SrcLoop->getLoopDepth();
1012}
1013
1014// Given one of the loops containing the destination,
1015// return its level index in our numbering scheme.
1016unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
1017 unsigned D = DstLoop->getLoopDepth();
1018 if (D > CommonLevels)
1019 // This tries to make sure that we assign unique numbers to src and dst when
1020 // the memory accesses reside in different loops that have the same depth.
1021 return D - CommonLevels + SrcLevels;
1022 else
1023 return D;
1024}
1025
1026// Returns true if Expression is loop invariant in LoopNest.
1027bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
1028 const Loop *LoopNest) const {
1029 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
1030 // any loop as invariant, because we only consier expression evaluation at a
1031 // specific position (where the array access takes place), and not across the
1032 // entire function.
1033 if (!LoopNest)
1034 return true;
1035
1036 // If the expression is invariant in the outermost loop of the loop nest, it
1037 // is invariant anywhere in the loop nest.
1038 return SE->isLoopInvariant(S: Expression, L: LoopNest->getOutermostLoop());
1039}
1040
1041// Finds the set of loops from the LoopNest that
1042// have a level <= CommonLevels and are referred to by the SCEV Expression.
1043void DependenceInfo::collectCommonLoops(const SCEV *Expression,
1044 const Loop *LoopNest,
1045 SmallBitVector &Loops) const {
1046 while (LoopNest) {
1047 unsigned Level = LoopNest->getLoopDepth();
1048 if (Level <= CommonLevels && !SE->isLoopInvariant(S: Expression, L: LoopNest))
1049 Loops.set(Level);
1050 LoopNest = LoopNest->getParentLoop();
1051 }
1052}
1053
1054// Examine the scev and return true iff it's affine.
1055// Collect any loops mentioned in the set of "Loops".
1056bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
1057 SmallBitVector &Loops, bool IsSrc) {
1058 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
1059 if (!AddRec)
1060 return isLoopInvariant(Expression: Expr, LoopNest);
1061
1062 // The AddRec must depend on one of the containing loops. Otherwise,
1063 // mapSrcLoop and mapDstLoop return indices outside the intended range. This
1064 // can happen when a subscript in one loop references an IV from a sibling
1065 // loop that could not be replaced with a concrete exit value by
1066 // getSCEVAtScope.
1067 const Loop *L = LoopNest;
1068 while (L && AddRec->getLoop() != L)
1069 L = L->getParentLoop();
1070 if (!L)
1071 return false;
1072
1073 const SCEV *Start = AddRec->getStart();
1074 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
1075 if (!isLoopInvariant(Expression: Step, LoopNest))
1076 return false;
1077 if (IsSrc)
1078 Loops.set(mapSrcLoop(SrcLoop: AddRec->getLoop()));
1079 else
1080 Loops.set(mapDstLoop(DstLoop: AddRec->getLoop()));
1081 return checkSubscript(Expr: Start, LoopNest, Loops, IsSrc);
1082}
1083
1084// Examine the scev and return true iff it's linear.
1085// Collect any loops mentioned in the set of "Loops".
1086bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
1087 SmallBitVector &Loops) {
1088 return checkSubscript(Expr: Src, LoopNest, Loops, IsSrc: true);
1089}
1090
1091// Examine the scev and return true iff it's linear.
1092// Collect any loops mentioned in the set of "Loops".
1093bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
1094 SmallBitVector &Loops) {
1095 return checkSubscript(Expr: Dst, LoopNest, Loops, IsSrc: false);
1096}
1097
1098// Examines the subscript pair (the Src and Dst SCEVs)
1099// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
1100// Collects the associated loops in a set.
1101DependenceInfo::Subscript::ClassificationKind
1102DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
1103 const SCEV *Dst, const Loop *DstLoopNest,
1104 SmallBitVector &Loops) {
1105 SmallBitVector SrcLoops(MaxLevels + 1);
1106 SmallBitVector DstLoops(MaxLevels + 1);
1107 if (!checkSrcSubscript(Src, LoopNest: SrcLoopNest, Loops&: SrcLoops))
1108 return Subscript::NonLinear;
1109 if (!checkDstSubscript(Dst, LoopNest: DstLoopNest, Loops&: DstLoops))
1110 return Subscript::NonLinear;
1111 Loops = SrcLoops;
1112 Loops |= DstLoops;
1113 unsigned N = Loops.count();
1114 if (N == 0)
1115 return Subscript::ZIV;
1116 if (N == 1)
1117 return Subscript::SIV;
1118 if (N == 2 && SrcLoops.count() == 1 && DstLoops.count() == 1)
1119 return Subscript::RDIV;
1120 return Subscript::MIV;
1121}
1122
1123// All subscripts are all the same type.
1124// Loop bound may be smaller (e.g., a char).
1125// Should zero extend loop bound, since it's always >= 0.
1126// This routine collects upper bound and extends or truncates if needed.
1127// Truncating is safe when subscripts are known not to wrap. Cases without
1128// nowrap flags should have been rejected earlier.
1129// Return null if no bound available.
1130const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
1131 if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
1132 const SCEV *UB = SE->getBackedgeTakenCount(L);
1133 return SE->getTruncateOrZeroExtend(V: UB, Ty: T);
1134 }
1135 return nullptr;
1136}
1137
1138// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1139// If the cast fails, returns NULL.
1140const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
1141 Type *T) const {
1142 if (const SCEV *UB = collectUpperBound(L, T))
1143 return dyn_cast<SCEVConstant>(Val: UB);
1144 return nullptr;
1145}
1146
1147/// Returns \p A - \p B if it guaranteed not to signed wrap. Otherwise returns
1148/// nullptr. \p A and \p B must have the same integer type.
1149static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
1150 ScalarEvolution &SE) {
1151 if (SE.willNotOverflow(BinOp: Instruction::Sub, /*Signed=*/true, LHS: A, RHS: B))
1152 return SE.getMinusSCEV(LHS: A, RHS: B);
1153 return nullptr;
1154}
1155
1156/// Returns true iff \p Test is enabled.
1157static bool isDependenceTestEnabled(DependenceTestType Test) {
1158 if (EnableDependenceTest == DependenceTestType::All)
1159 return true;
1160 return EnableDependenceTest == Test;
1161}
1162
1163// testZIV -
1164// When we have a pair of subscripts of the form [c1] and [c2],
1165// where c1 and c2 are both loop invariant, we attack it using
1166// the ZIV test. Basically, we test by comparing the two values,
1167// but there are actually three possible results:
1168// 1) the values are equal, so there's a dependence
1169// 2) the values are different, so there's no dependence
1170// 3) the values might be equal, so we have to assume a dependence.
1171//
1172// Return true if dependence disproved.
1173bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1174 FullDependence &Result) const {
1175 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1176 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1177 ++ZIVapplications;
1178 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: Src, RHS: Dst)) {
1179 LLVM_DEBUG(dbgs() << " provably dependent\n");
1180 return false; // provably dependent
1181 }
1182 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_NE, LHS: Src, RHS: Dst)) {
1183 LLVM_DEBUG(dbgs() << " provably independent\n");
1184 ++ZIVindependence;
1185 return true; // provably independent
1186 }
1187 LLVM_DEBUG(dbgs() << " possibly dependent\n");
1188 return false; // possibly dependent
1189}
1190
1191// strongSIVtest -
1192// From the paper, Practical Dependence Testing, Section 4.2.1
1193//
1194// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1195// where i is an induction variable, c1 and c2 are loop invariant,
1196// and a is a constant, we can solve it exactly using the Strong SIV test.
1197//
1198// Can prove independence. Failing that, can compute distance (and direction).
1199// In the presence of symbolic terms, we can sometimes make progress.
1200//
1201// If there's a dependence,
1202//
1203// c1 + a*i = c2 + a*i'
1204//
1205// The dependence distance is
1206//
1207// d = i' - i = (c1 - c2)/a
1208//
1209// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1210// loop's upper bound. If a dependence exists, the dependence direction is
1211// defined as
1212//
1213// { < if d > 0
1214// direction = { = if d = 0
1215// { > if d < 0
1216//
1217// Return true if dependence disproved.
1218bool DependenceInfo::strongSIVtest(const SCEVAddRecExpr *Src,
1219 const SCEVAddRecExpr *Dst, unsigned Level,
1220 FullDependence &Result,
1221 bool UnderRuntimeAssumptions) {
1222 if (!isDependenceTestEnabled(Test: DependenceTestType::StrongSIV))
1223 return false;
1224
1225 if (!Src->hasNoSignedWrap() || !Dst->hasNoSignedWrap())
1226 return false;
1227
1228 const SCEV *Coeff = Src->getStepRecurrence(SE&: *SE);
1229 assert(Coeff == Dst->getStepRecurrence(*SE) &&
1230 "Expecting same coefficient in Strong SIV test");
1231 const SCEV *SrcConst = Src->getStart();
1232 const SCEV *DstConst = Dst->getStart();
1233 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1234 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
1235 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1236 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
1237 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1238 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
1239 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1240 ++StrongSIVapplications;
1241 assert(0 < Level && Level <= CommonLevels && "level out of range");
1242 Level--;
1243
1244 // First try to prove independence based on the ranges of the two subscripts.
1245 ConstantRange SrcRange = SE->getSignedRange(S: Src);
1246 ConstantRange DstRange = SE->getSignedRange(S: Dst);
1247 if (SrcRange.intersectWith(CR: DstRange).isEmptySet()) {
1248 ++StrongSIVindependence;
1249 ++StrongSIVsuccesses;
1250 return true;
1251 }
1252
1253 const SCEV *Delta = minusSCEVNoSignedOverflow(A: SrcConst, B: DstConst, SE&: *SE);
1254 if (!Delta)
1255 return false;
1256 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
1257 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1258
1259 // Can we compute distance?
1260 if (isa<SCEVConstant>(Val: Delta) && isa<SCEVConstant>(Val: Coeff)) {
1261 APInt ConstDelta = cast<SCEVConstant>(Val: Delta)->getAPInt();
1262 APInt ConstCoeff = cast<SCEVConstant>(Val: Coeff)->getAPInt();
1263 APInt Distance = ConstDelta; // these need to be initialized
1264 APInt Remainder = ConstDelta;
1265 APInt::sdivrem(LHS: ConstDelta, RHS: ConstCoeff, Quotient&: Distance, Remainder);
1266 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1267 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1268 // Make sure Coeff divides Delta exactly
1269 if (Remainder != 0) {
1270 // Coeff doesn't divide Distance, no dependence
1271 ++StrongSIVindependence;
1272 ++StrongSIVsuccesses;
1273 return true;
1274 }
1275 Result.DV[Level].Distance = SE->getConstant(Val: Distance);
1276 if (Distance.sgt(RHS: 0))
1277 Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1278 else if (Distance.slt(RHS: 0))
1279 Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1280 else
1281 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1282 ++StrongSIVsuccesses;
1283 } else if (Delta->isZero()) {
1284 // Check if coefficient could be zero. If so, 0/0 is undefined and we
1285 // cannot conclude that only same-iteration dependencies exist.
1286 // When coeff=0, all iterations access the same location.
1287 if (SE->isKnownNonZero(S: Coeff)) {
1288 LLVM_DEBUG(
1289 dbgs() << "\t Coefficient proven non-zero by SCEV analysis\n");
1290 } else {
1291 // Cannot prove at compile time, would need runtime assumption.
1292 if (UnderRuntimeAssumptions) {
1293 const SCEVPredicate *Pred = SE->getComparePredicate(
1294 Pred: ICmpInst::ICMP_NE, LHS: Coeff, RHS: SE->getZero(Ty: Coeff->getType()));
1295 Result.Assumptions = Result.Assumptions.getUnionWith(N: Pred, SE&: *SE);
1296 LLVM_DEBUG(dbgs() << "\t Added runtime assumption: " << *Coeff
1297 << " != 0\n");
1298 } else {
1299 // Cannot add runtime assumptions, this test cannot handle this case.
1300 // Let more complex tests try.
1301 LLVM_DEBUG(dbgs() << "\t Would need runtime assumption " << *Coeff
1302 << " != 0, but not allowed. Failing this test.\n");
1303 return false;
1304 }
1305 }
1306 // Since 0/X == 0 (where X is known non-zero or assumed non-zero).
1307 Result.DV[Level].Distance = Delta;
1308 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1309 ++StrongSIVsuccesses;
1310 } else {
1311 if (Coeff->isOne()) {
1312 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
1313 Result.DV[Level].Distance = Delta; // since X/1 == X
1314 }
1315
1316 // maybe we can get a useful direction
1317 bool DeltaMaybeZero = !SE->isKnownNonZero(S: Delta);
1318 bool DeltaMaybePositive = !SE->isKnownNonPositive(S: Delta);
1319 bool DeltaMaybeNegative = !SE->isKnownNonNegative(S: Delta);
1320 bool CoeffMaybePositive = !SE->isKnownNonPositive(S: Coeff);
1321 bool CoeffMaybeNegative = !SE->isKnownNonNegative(S: Coeff);
1322 // The double negatives above are confusing.
1323 // It helps to read !SE->isKnownNonZero(Delta)
1324 // as "Delta might be Zero"
1325 unsigned NewDirection = Dependence::DVEntry::NONE;
1326 if ((DeltaMaybePositive && CoeffMaybePositive) ||
1327 (DeltaMaybeNegative && CoeffMaybeNegative))
1328 NewDirection = Dependence::DVEntry::LT;
1329 if (DeltaMaybeZero)
1330 NewDirection |= Dependence::DVEntry::EQ;
1331 if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1332 (DeltaMaybePositive && CoeffMaybeNegative))
1333 NewDirection |= Dependence::DVEntry::GT;
1334 if (NewDirection < Result.DV[Level].Direction)
1335 ++StrongSIVsuccesses;
1336 Result.DV[Level].Direction &= NewDirection;
1337 }
1338 return false;
1339}
1340
1341// weakCrossingSIVtest -
1342// From the paper, Practical Dependence Testing, Section 4.2.2
1343//
1344// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1345// where i is an induction variable, c1 and c2 are loop invariant,
1346// and a is a constant, we can solve it exactly using the
1347// Weak-Crossing SIV test.
1348//
1349// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1350// the two lines, where i = i', yielding
1351//
1352// c1 + a*i = c2 - a*i
1353// 2a*i = c2 - c1
1354// i = (c2 - c1)/2a
1355//
1356// If i < 0, there is no dependence.
1357// If i > upperbound, there is no dependence.
1358// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1359// If i = upperbound, there's a dependence with distance = 0.
1360// If i is integral, there's a dependence (all directions).
1361// If the non-integer part = 1/2, there's a dependence (<> directions).
1362// Otherwise, there's no dependence.
1363//
1364// Can prove independence. Failing that,
1365// can sometimes refine the directions.
1366// Can determine iteration for splitting.
1367//
1368// Return true if dependence disproved.
1369bool DependenceInfo::weakCrossingSIVtest(const SCEV *Coeff,
1370 const SCEV *SrcConst,
1371 const SCEV *DstConst,
1372 const Loop *CurSrcLoop,
1373 const Loop *CurDstLoop, unsigned Level,
1374 FullDependence &Result) const {
1375 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakCrossingSIV))
1376 return false;
1377
1378 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1379 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
1380 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1381 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1382 ++WeakCrossingSIVapplications;
1383 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1384 Level--;
1385 const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
1386 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1387 if (Delta->isZero()) {
1388 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1389 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1390 ++WeakCrossingSIVsuccesses;
1391 if (!Result.DV[Level].Direction) {
1392 ++WeakCrossingSIVindependence;
1393 return true;
1394 }
1395 Result.DV[Level].Distance = Delta; // = 0
1396 return false;
1397 }
1398 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: Coeff);
1399 if (!ConstCoeff)
1400 return false;
1401
1402 if (SE->isKnownNegative(S: ConstCoeff)) {
1403 ConstCoeff = dyn_cast<SCEVConstant>(Val: SE->getNegativeSCEV(V: ConstCoeff));
1404 assert(ConstCoeff &&
1405 "dynamic cast of negative of ConstCoeff should yield constant");
1406 Delta = SE->getNegativeSCEV(V: Delta);
1407 }
1408 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1409
1410 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1411 if (!ConstDelta)
1412 return false;
1413
1414 // We're certain that ConstCoeff > 0; therefore,
1415 // if Delta < 0, then no dependence.
1416 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1417 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
1418 if (SE->isKnownNegative(S: Delta)) {
1419 // No dependence, Delta < 0
1420 ++WeakCrossingSIVindependence;
1421 ++WeakCrossingSIVsuccesses;
1422 return true;
1423 }
1424
1425 // We're certain that Delta > 0 and ConstCoeff > 0.
1426 // Check Delta/(2*ConstCoeff) against upper loop bound
1427 if (const SCEV *UpperBound =
1428 collectUpperBound(L: CurSrcLoop, T: Delta->getType())) {
1429 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1430 const SCEV *ConstantTwo = SE->getConstant(Ty: UpperBound->getType(), V: 2);
1431 const SCEV *ML =
1432 SE->getMulExpr(LHS: SE->getMulExpr(LHS: ConstCoeff, RHS: UpperBound), RHS: ConstantTwo);
1433 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");
1434 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: Delta, RHS: ML)) {
1435 // Delta too big, no dependence
1436 ++WeakCrossingSIVindependence;
1437 ++WeakCrossingSIVsuccesses;
1438 return true;
1439 }
1440 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: Delta, RHS: ML)) {
1441 // i = i' = UB
1442 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1443 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1444 ++WeakCrossingSIVsuccesses;
1445 if (!Result.DV[Level].Direction) {
1446 ++WeakCrossingSIVindependence;
1447 return true;
1448 }
1449 Result.DV[Level].Distance = SE->getZero(Ty: Delta->getType());
1450 return false;
1451 }
1452 }
1453
1454 // check that Coeff divides Delta
1455 APInt APDelta = ConstDelta->getAPInt();
1456 APInt APCoeff = ConstCoeff->getAPInt();
1457 APInt Distance = APDelta; // these need to be initialzed
1458 APInt Remainder = APDelta;
1459 APInt::sdivrem(LHS: APDelta, RHS: APCoeff, Quotient&: Distance, Remainder);
1460 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1461 if (Remainder != 0) {
1462 // Coeff doesn't divide Delta, no dependence
1463 ++WeakCrossingSIVindependence;
1464 ++WeakCrossingSIVsuccesses;
1465 return true;
1466 }
1467 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1468
1469 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1470 APInt Two = APInt(Distance.getBitWidth(), 2, true);
1471 Remainder = Distance.srem(RHS: Two);
1472 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1473 if (Remainder != 0) {
1474 // Equal direction isn't possible
1475 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;
1476 ++WeakCrossingSIVsuccesses;
1477 }
1478 return false;
1479}
1480
1481// Kirch's algorithm, from
1482//
1483// Optimizing Supercompilers for Supercomputers
1484// Michael Wolfe
1485// MIT Press, 1989
1486//
1487// Program 2.1, page 29.
1488// Computes the GCD of AM and BM.
1489// Also finds a solution to the equation ax - by = gcd(a, b).
1490// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1491//
1492// We don't use OverflowSafeSignedAPInt here because it's known that this
1493// algorithm doesn't overflow.
1494static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1495 const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1496 APInt A0(Bits, 1, true), A1(Bits, 0, true);
1497 APInt B0(Bits, 0, true), B1(Bits, 1, true);
1498 APInt G0 = AM.abs();
1499 APInt G1 = BM.abs();
1500 APInt Q = G0; // these need to be initialized
1501 APInt R = G0;
1502 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1503 while (R != 0) {
1504 // clang-format off
1505 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1506 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1507 G0 = G1; G1 = R;
1508 // clang-format on
1509 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1510 }
1511 G = G1;
1512 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
1513 X = AM.slt(RHS: 0) ? -A1 : A1;
1514 Y = BM.slt(RHS: 0) ? B1 : -B1;
1515
1516 // make sure gcd divides Delta
1517 R = Delta.srem(RHS: G);
1518 if (R != 0)
1519 return true; // gcd doesn't divide Delta, no dependence
1520 Q = Delta.sdiv(RHS: G);
1521 return false;
1522}
1523
1524static OverflowSafeSignedAPInt
1525floorOfQuotient(const OverflowSafeSignedAPInt &OA,
1526 const OverflowSafeSignedAPInt &OB) {
1527 if (!OA || !OB)
1528 return OverflowSafeSignedAPInt();
1529
1530 APInt A = *OA;
1531 APInt B = *OB;
1532 APInt Q = A; // these need to be initialized
1533 APInt R = A;
1534 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1535 if (R == 0)
1536 return Q;
1537 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1538 return Q;
1539 return OverflowSafeSignedAPInt(Q) - 1;
1540}
1541
1542static OverflowSafeSignedAPInt
1543ceilingOfQuotient(const OverflowSafeSignedAPInt &OA,
1544 const OverflowSafeSignedAPInt &OB) {
1545 if (!OA || !OB)
1546 return OverflowSafeSignedAPInt();
1547
1548 APInt A = *OA;
1549 APInt B = *OB;
1550 APInt Q = A; // these need to be initialized
1551 APInt R = A;
1552 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1553 if (R == 0)
1554 return Q;
1555 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1556 return OverflowSafeSignedAPInt(Q) + 1;
1557 return Q;
1558}
1559
1560/// Given an affine expression of the form A*k + B, where k is an arbitrary
1561/// integer, infer the possible range of k based on the known range of the
1562/// affine expression. If we know A*k + B is non-negative, i.e.,
1563///
1564/// A*k + B >= 0
1565///
1566/// we can derive the following inequalities for k when A is positive:
1567///
1568/// k >= -B / A
1569///
1570/// Since k is an integer, it means k is greater than or equal to the
1571/// ceil(-B / A).
1572///
1573/// If the upper bound of the affine expression \p UB is passed, the following
1574/// inequality can be derived as well:
1575///
1576/// A*k + B <= UB
1577///
1578/// which leads to:
1579///
1580/// k <= (UB - B) / A
1581///
1582/// Again, as k is an integer, it means k is less than or equal to the
1583/// floor((UB - B) / A).
1584///
1585/// The similar logic applies when A is negative, but the inequalities sign flip
1586/// while working with them.
1587///
1588/// Preconditions: \p A is non-zero, and we know A*k + B is non-negative.
1589static std::pair<OverflowSafeSignedAPInt, OverflowSafeSignedAPInt>
1590inferDomainOfAffine(OverflowSafeSignedAPInt A, OverflowSafeSignedAPInt B,
1591 OverflowSafeSignedAPInt UB) {
1592 assert(A && B && "A and B must be available");
1593 assert(*A != 0 && "A must be non-zero");
1594 OverflowSafeSignedAPInt TL, TU;
1595 if (A->sgt(RHS: 0)) {
1596 TL = ceilingOfQuotient(OA: -B, OB: A);
1597 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1598
1599 // New bound check - modification to Banerjee's e3 check
1600 TU = floorOfQuotient(OA: UB - B, OB: A);
1601 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1602 } else {
1603 TU = floorOfQuotient(OA: -B, OB: A);
1604 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1605
1606 // New bound check - modification to Banerjee's e3 check
1607 TL = ceilingOfQuotient(OA: UB - B, OB: A);
1608 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1609 }
1610 return std::make_pair(x&: TL, y&: TU);
1611}
1612
1613// exactSIVtest -
1614// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1615// where i is an induction variable, c1 and c2 are loop invariant, and a1
1616// and a2 are constant, we can solve it exactly using an algorithm developed
1617// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1618//
1619// Dependence Analysis for Supercomputing
1620// Utpal Banerjee
1621// Kluwer Academic Publishers, 1988
1622//
1623// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1624// so use them if possible. They're also a bit better with symbolics and,
1625// in the case of the strong SIV test, can compute Distances.
1626//
1627// Return true if dependence disproved.
1628//
1629// This is a modified version of the original Banerjee algorithm. The original
1630// only tested whether Dst depends on Src. This algorithm extends that and
1631// returns all the dependencies that exist between Dst and Src.
1632bool DependenceInfo::exactSIVtest(const SCEVAddRecExpr *Src,
1633 const SCEVAddRecExpr *Dst, unsigned Level,
1634 FullDependence &Result) const {
1635 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactSIV))
1636 return false;
1637
1638 const SCEV *SrcCoeff = Src->getStepRecurrence(SE&: *SE);
1639 const SCEV *SrcConst = Src->getStart();
1640 const SCEV *DstCoeff = Dst->getStepRecurrence(SE&: *SE);
1641 const SCEV *DstConst = Dst->getStart();
1642 LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1643 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1644 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1645 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1646 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1647 ++ExactSIVapplications;
1648 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1649 Level--;
1650 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1651 if (!Delta)
1652 return false;
1653 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1654 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1655 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1656 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1657 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1658 return false;
1659
1660 // find gcd
1661 APInt G, X, Y;
1662 APInt AM = ConstSrcCoeff->getAPInt();
1663 APInt BM = ConstDstCoeff->getAPInt();
1664 APInt CM = ConstDelta->getAPInt();
1665 unsigned Bits = AM.getBitWidth();
1666 if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
1667 // gcd doesn't divide Delta, no dependence
1668 ++ExactSIVindependence;
1669 ++ExactSIVsuccesses;
1670 return true;
1671 }
1672
1673 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1674
1675 // since SCEV construction normalizes, LM = 0
1676 std::optional<APInt> UM;
1677 // UM is perhaps unavailable, let's check
1678 if (const SCEVConstant *CUB =
1679 collectConstantUpperBound(L: Src->getLoop(), T: Delta->getType())) {
1680 UM = CUB->getAPInt();
1681 LLVM_DEBUG(dbgs() << "\t UM = " << *UM << "\n");
1682 }
1683
1684 APInt TU(APInt::getSignedMaxValue(numBits: Bits));
1685 APInt TL(APInt::getSignedMinValue(numBits: Bits));
1686 APInt TC = CM.sdiv(RHS: G);
1687 APInt TX = X * TC;
1688 APInt TY = Y * TC;
1689 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
1690 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
1691 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
1692
1693 APInt TB = BM.sdiv(RHS: G);
1694 APInt TA = AM.sdiv(RHS: G);
1695
1696 // At this point, we have the following equations:
1697 //
1698 // TA*i0 - TB*i1 = TC
1699 //
1700 // Also, we know that the all pairs of (i0, i1) can be expressed as:
1701 //
1702 // (TX + k*TB, TY + k*TA)
1703 //
1704 // where k is an arbitrary integer.
1705 auto [TL0, TU0] = inferDomainOfAffine(A: TB, B: TX, UB: UM);
1706 auto [TL1, TU1] = inferDomainOfAffine(A: TA, B: TY, UB: UM);
1707
1708 auto GetMaxOrMin = [](const OverflowSafeSignedAPInt &V0,
1709 const OverflowSafeSignedAPInt &V1,
1710 bool IsMin) -> std::optional<APInt> {
1711 if (V0 && V1)
1712 return IsMin ? APIntOps::smin(A: *V0, B: *V1) : APIntOps::smax(A: *V0, B: *V1);
1713 if (V0)
1714 return *V0;
1715 if (V1)
1716 return *V1;
1717 return std::nullopt;
1718 };
1719
1720 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
1721 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
1722
1723 std::optional<APInt> OptTL = GetMaxOrMin(TL0, TL1, false);
1724 std::optional<APInt> OptTU = GetMaxOrMin(TU0, TU1, true);
1725 if (!OptTL || !OptTU)
1726 return false;
1727
1728 TL = std::move(*OptTL);
1729 TU = std::move(*OptTU);
1730 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
1731 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
1732
1733 if (TL.sgt(RHS: TU)) {
1734 ++ExactSIVindependence;
1735 ++ExactSIVsuccesses;
1736 return true;
1737 }
1738
1739 // explore directions
1740 unsigned NewDirection = Dependence::DVEntry::NONE;
1741 OverflowSafeSignedAPInt LowerDistance, UpperDistance;
1742 OverflowSafeSignedAPInt OTY(TY), OTX(TX), OTA(TA), OTB(TB), OTL(TL), OTU(TU);
1743 // NOTE: It's unclear whether these calculations can overflow. At the moment,
1744 // we conservatively assume they can.
1745 if (TA.sgt(RHS: TB)) {
1746 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1747 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1748 } else {
1749 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1750 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1751 }
1752
1753 if (!LowerDistance || !UpperDistance)
1754 return false;
1755
1756 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << *LowerDistance << "\n");
1757 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << *UpperDistance << "\n");
1758
1759 if (LowerDistance->sle(RHS: 0) && UpperDistance->sge(RHS: 0)) {
1760 NewDirection |= Dependence::DVEntry::EQ;
1761 ++ExactSIVsuccesses;
1762 }
1763 if (LowerDistance->slt(RHS: 0)) {
1764 NewDirection |= Dependence::DVEntry::GT;
1765 ++ExactSIVsuccesses;
1766 }
1767 if (UpperDistance->sgt(RHS: 0)) {
1768 NewDirection |= Dependence::DVEntry::LT;
1769 ++ExactSIVsuccesses;
1770 }
1771
1772 // finished
1773 Result.DV[Level].Direction &= NewDirection;
1774 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1775 ++ExactSIVindependence;
1776 LLVM_DEBUG(dbgs() << "\t Result = ");
1777 LLVM_DEBUG(Result.dump(dbgs()));
1778 return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1779}
1780
1781// Return true if the divisor evenly divides the dividend.
1782static bool isRemainderZero(const SCEVConstant *Dividend,
1783 const SCEVConstant *Divisor) {
1784 const APInt &ConstDividend = Dividend->getAPInt();
1785 const APInt &ConstDivisor = Divisor->getAPInt();
1786 return ConstDividend.srem(RHS: ConstDivisor) == 0;
1787}
1788
1789// weakZeroSrcSIVtest -
1790// From the paper, Practical Dependence Testing, Section 4.2.2
1791//
1792// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1793// where i is an induction variable, c1 and c2 are loop invariant,
1794// and a is a constant, we can solve it exactly using the
1795// Weak-Zero SIV test.
1796//
1797// Given
1798//
1799// c1 = c2 + a*i
1800//
1801// we get
1802//
1803// (c1 - c2)/a = i
1804//
1805// If i is not an integer, there's no dependence.
1806// If i < 0 or > UB, there's no dependence.
1807// If i = 0, the direction is >=.
1808// If i = UB, the direction is <=.
1809// Otherwise, the direction is *.
1810//
1811// Can prove independence. Failing that, we can sometimes refine
1812// the directions. Can sometimes show that first or last
1813// iteration carries all the dependences (so worth peeling).
1814//
1815// (see also weakZeroDstSIVtest)
1816//
1817// Return true if dependence disproved.
1818bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *SrcConst,
1819 const SCEVAddRecExpr *Dst,
1820 unsigned Level,
1821 FullDependence &Result) const {
1822 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1823 return false;
1824
1825 // For the WeakSIV test, it's possible the loop isn't common to
1826 // the Src and Dst loops. If it isn't, then there's no need to
1827 // record a direction.
1828 const SCEV *DstCoeff = Dst->getStepRecurrence(SE&: *SE);
1829 const SCEV *DstConst = Dst->getStart();
1830 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1831 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1832 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1833 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1834 ++WeakZeroSIVapplications;
1835 assert(0 < Level && Level <= MaxLevels && "Level out of range");
1836 Level--;
1837 if (SrcConst == DstConst && SE->isKnownNonZero(S: DstCoeff)) {
1838 if (Level < CommonLevels) {
1839 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1840 ++WeakZeroSIVsuccesses;
1841 }
1842 return false; // dependences caused by first iteration
1843 }
1844 const SCEV *Delta = minusSCEVNoSignedOverflow(A: SrcConst, B: DstConst, SE&: *SE);
1845 if (!Delta)
1846 return false;
1847 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1848 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1849 if (!ConstCoeff)
1850 return false;
1851
1852 // Since ConstCoeff is constant, !isKnownNegative means it's non-negative.
1853 // TODO: Bail out if it's a signed minimum value.
1854 const SCEV *AbsCoeff = SE->isKnownNegative(S: ConstCoeff)
1855 ? SE->getNegativeSCEV(V: ConstCoeff)
1856 : ConstCoeff;
1857 const SCEV *NewDelta =
1858 SE->isKnownNegative(S: ConstCoeff) ? SE->getNegativeSCEV(V: Delta) : Delta;
1859
1860 // check that Delta/SrcCoeff < iteration count
1861 // really check NewDelta < count*AbsCoeff
1862 if (const SCEV *UpperBound =
1863 collectUpperBound(L: Dst->getLoop(), T: Delta->getType())) {
1864 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1865 const SCEV *Product = SE->getMulExpr(LHS: AbsCoeff, RHS: UpperBound);
1866 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: NewDelta, RHS: Product)) {
1867 ++WeakZeroSIVindependence;
1868 ++WeakZeroSIVsuccesses;
1869 return true;
1870 }
1871 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: NewDelta, RHS: Product)) {
1872 // dependences caused by last iteration
1873 if (Level < CommonLevels) {
1874 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1875 ++WeakZeroSIVsuccesses;
1876 }
1877 return false;
1878 }
1879 }
1880
1881 // check that Delta/SrcCoeff >= 0
1882 // really check that NewDelta >= 0
1883 if (SE->isKnownNegative(S: NewDelta)) {
1884 // No dependence, newDelta < 0
1885 ++WeakZeroSIVindependence;
1886 ++WeakZeroSIVsuccesses;
1887 return true;
1888 }
1889
1890 // if SrcCoeff doesn't divide Delta, then no dependence
1891 if (isa<SCEVConstant>(Val: Delta) &&
1892 !isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
1893 ++WeakZeroSIVindependence;
1894 ++WeakZeroSIVsuccesses;
1895 return true;
1896 }
1897 return false;
1898}
1899
1900// weakZeroDstSIVtest -
1901// From the paper, Practical Dependence Testing, Section 4.2.2
1902//
1903// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1904// where i is an induction variable, c1 and c2 are loop invariant,
1905// and a is a constant, we can solve it exactly using the
1906// Weak-Zero SIV test.
1907//
1908// Given
1909//
1910// c1 + a*i = c2
1911//
1912// we get
1913//
1914// i = (c2 - c1)/a
1915//
1916// If i is not an integer, there's no dependence.
1917// If i < 0 or > UB, there's no dependence.
1918// If i = 0, the direction is <=.
1919// If i = UB, the direction is >=.
1920// Otherwise, the direction is *.
1921//
1922// Can prove independence. Failing that, we can sometimes refine
1923// the directions. Can sometimes show that first or last
1924// iteration carries all the dependences (so worth peeling).
1925//
1926// (see also weakZeroSrcSIVtest)
1927//
1928// Return true if dependence disproved.
1929bool DependenceInfo::weakZeroDstSIVtest(const SCEVAddRecExpr *Src,
1930 const SCEV *DstConst, unsigned Level,
1931 FullDependence &Result) const {
1932 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1933 return false;
1934
1935 // For the WeakSIV test, it's possible the loop isn't common to the
1936 // Src and Dst loops. If it isn't, then there's no need to record a direction.
1937 const SCEV *SrcCoeff = Src->getStepRecurrence(SE&: *SE);
1938 const SCEV *SrcConst = Src->getStart();
1939 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1940 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1941 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1942 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1943 ++WeakZeroSIVapplications;
1944 assert(0 < Level && Level <= SrcLevels && "Level out of range");
1945 Level--;
1946 if (DstConst == SrcConst && SE->isKnownNonZero(S: SrcCoeff)) {
1947 if (Level < CommonLevels) {
1948 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1949 ++WeakZeroSIVsuccesses;
1950 }
1951 return false; // dependences caused by first iteration
1952 }
1953 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1954 if (!Delta)
1955 return false;
1956 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1957 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1958 if (!ConstCoeff)
1959 return false;
1960
1961 // Since ConstCoeff is constant, !isKnownNegative means it's non-negative.
1962 // TODO: Bail out if it's a signed minimum value.
1963 const SCEV *AbsCoeff = SE->isKnownNegative(S: ConstCoeff)
1964 ? SE->getNegativeSCEV(V: ConstCoeff)
1965 : ConstCoeff;
1966 const SCEV *NewDelta =
1967 SE->isKnownNegative(S: ConstCoeff) ? SE->getNegativeSCEV(V: Delta) : Delta;
1968
1969 // check that Delta/SrcCoeff < iteration count
1970 // really check NewDelta < count*AbsCoeff
1971 if (const SCEV *UpperBound =
1972 collectUpperBound(L: Src->getLoop(), T: Delta->getType())) {
1973 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1974 const SCEV *Product = SE->getMulExpr(LHS: AbsCoeff, RHS: UpperBound);
1975 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: NewDelta, RHS: Product)) {
1976 ++WeakZeroSIVindependence;
1977 ++WeakZeroSIVsuccesses;
1978 return true;
1979 }
1980 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: NewDelta, RHS: Product)) {
1981 // dependences caused by last iteration
1982 if (Level < CommonLevels) {
1983 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1984 ++WeakZeroSIVsuccesses;
1985 }
1986 return false;
1987 }
1988 }
1989
1990 // check that Delta/SrcCoeff >= 0
1991 // really check that NewDelta >= 0
1992 if (SE->isKnownNegative(S: NewDelta)) {
1993 // No dependence, newDelta < 0
1994 ++WeakZeroSIVindependence;
1995 ++WeakZeroSIVsuccesses;
1996 return true;
1997 }
1998
1999 // if SrcCoeff doesn't divide Delta, then no dependence
2000 if (isa<SCEVConstant>(Val: Delta) &&
2001 !isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
2002 ++WeakZeroSIVindependence;
2003 ++WeakZeroSIVsuccesses;
2004 return true;
2005 }
2006 return false;
2007}
2008
2009// exactRDIVtest - Tests the RDIV subscript pair for dependence.
2010// Things of the form [c1 + a*i] and [c2 + b*j],
2011// where i and j are induction variable, c1 and c2 are loop invariant,
2012// and a and b are constants.
2013// Returns true if any possible dependence is disproved.
2014// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
2015bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
2016 const SCEV *SrcConst, const SCEV *DstConst,
2017 const Loop *SrcLoop, const Loop *DstLoop,
2018 FullDependence &Result) const {
2019 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactRDIV))
2020 return false;
2021
2022 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
2023 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
2024 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
2025 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
2026 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
2027 ++ExactRDIVapplications;
2028 const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
2029 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
2030 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
2031 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
2032 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
2033 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
2034 return false;
2035
2036 // find gcd
2037 APInt G, X, Y;
2038 APInt AM = ConstSrcCoeff->getAPInt();
2039 APInt BM = ConstDstCoeff->getAPInt();
2040 APInt CM = ConstDelta->getAPInt();
2041 unsigned Bits = AM.getBitWidth();
2042 if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
2043 // gcd doesn't divide Delta, no dependence
2044 ++ExactRDIVindependence;
2045 return true;
2046 }
2047
2048 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
2049
2050 // since SCEV construction seems to normalize, LM = 0
2051 std::optional<APInt> SrcUM;
2052 // SrcUM is perhaps unavailable, let's check
2053 if (const SCEVConstant *UpperBound =
2054 collectConstantUpperBound(L: SrcLoop, T: Delta->getType())) {
2055 SrcUM = UpperBound->getAPInt();
2056 LLVM_DEBUG(dbgs() << "\t SrcUM = " << *SrcUM << "\n");
2057 }
2058
2059 std::optional<APInt> DstUM;
2060 // UM is perhaps unavailable, let's check
2061 if (const SCEVConstant *UpperBound =
2062 collectConstantUpperBound(L: DstLoop, T: Delta->getType())) {
2063 DstUM = UpperBound->getAPInt();
2064 LLVM_DEBUG(dbgs() << "\t DstUM = " << *DstUM << "\n");
2065 }
2066
2067 APInt TU(APInt::getSignedMaxValue(numBits: Bits));
2068 APInt TL(APInt::getSignedMinValue(numBits: Bits));
2069 APInt TC = CM.sdiv(RHS: G);
2070 APInt TX = X * TC;
2071 APInt TY = Y * TC;
2072 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
2073 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
2074 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
2075
2076 APInt TB = BM.sdiv(RHS: G);
2077 APInt TA = AM.sdiv(RHS: G);
2078
2079 // At this point, we have the following equations:
2080 //
2081 // TA*i - TB*j = TC
2082 //
2083 // Also, we know that the all pairs of (i, j) can be expressed as:
2084 //
2085 // (TX + k*TB, TY + k*TA)
2086 //
2087 // where k is an arbitrary integer.
2088 auto [TL0, TU0] = inferDomainOfAffine(A: TB, B: TX, UB: SrcUM);
2089 auto [TL1, TU1] = inferDomainOfAffine(A: TA, B: TY, UB: DstUM);
2090
2091 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
2092 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
2093
2094 auto GetMaxOrMin = [](const OverflowSafeSignedAPInt &V0,
2095 const OverflowSafeSignedAPInt &V1,
2096 bool IsMin) -> std::optional<APInt> {
2097 if (V0 && V1)
2098 return IsMin ? APIntOps::smin(A: *V0, B: *V1) : APIntOps::smax(A: *V0, B: *V1);
2099 if (V0)
2100 return *V0;
2101 if (V1)
2102 return *V1;
2103 return std::nullopt;
2104 };
2105
2106 std::optional<APInt> OptTL = GetMaxOrMin(TL0, TL1, false);
2107 std::optional<APInt> OptTU = GetMaxOrMin(TU0, TU1, true);
2108 if (!OptTL || !OptTU)
2109 return false;
2110
2111 TL = std::move(*OptTL);
2112 TU = std::move(*OptTU);
2113 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
2114 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
2115
2116 if (TL.sgt(RHS: TU))
2117 ++ExactRDIVindependence;
2118 return TL.sgt(RHS: TU);
2119}
2120
2121// symbolicRDIVtest -
2122// In Section 4.5 of the Practical Dependence Testing paper,the authors
2123// introduce a special case of Banerjee's Inequalities (also called the
2124// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
2125// particularly cases with symbolics. Since it's only able to disprove
2126// dependence (not compute distances or directions), we'll use it as a
2127// fall back for the other tests.
2128//
2129// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2130// where i and j are induction variables and c1 and c2 are loop invariants,
2131// we can use the symbolic tests to disprove some dependences, serving as a
2132// backup for the RDIV test. Note that i and j can be the same variable,
2133// letting this test serve as a backup for the various SIV tests.
2134//
2135// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
2136// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2137// loop bounds for the i and j loops, respectively. So, ...
2138//
2139// c1 + a1*i = c2 + a2*j
2140// a1*i - a2*j = c2 - c1
2141//
2142// To test for a dependence, we compute c2 - c1 and make sure it's in the
2143// range of the maximum and minimum possible values of a1*i - a2*j.
2144// Considering the signs of a1 and a2, we have 4 possible cases:
2145//
2146// 1) If a1 >= 0 and a2 >= 0, then
2147// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
2148// -a2*N2 <= c2 - c1 <= a1*N1
2149//
2150// 2) If a1 >= 0 and a2 <= 0, then
2151// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
2152// 0 <= c2 - c1 <= a1*N1 - a2*N2
2153//
2154// 3) If a1 <= 0 and a2 >= 0, then
2155// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
2156// a1*N1 - a2*N2 <= c2 - c1 <= 0
2157//
2158// 4) If a1 <= 0 and a2 <= 0, then
2159// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
2160// a1*N1 <= c2 - c1 <= -a2*N2
2161//
2162// return true if dependence disproved
2163bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
2164 const SCEV *C1, const SCEV *C2,
2165 const Loop *Loop1,
2166 const Loop *Loop2) const {
2167 if (!isDependenceTestEnabled(Test: DependenceTestType::SymbolicRDIV))
2168 return false;
2169
2170 ++SymbolicRDIVapplications;
2171 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2172 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
2173 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
2174 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
2175 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
2176 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
2177 const SCEV *N1 = collectUpperBound(L: Loop1, T: A1->getType());
2178 const SCEV *N2 = collectUpperBound(L: Loop2, T: A1->getType());
2179 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
2180 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
2181 const SCEV *C2_C1 = SE->getMinusSCEV(LHS: C2, RHS: C1);
2182 const SCEV *C1_C2 = SE->getMinusSCEV(LHS: C1, RHS: C2);
2183 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
2184 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
2185 if (SE->isKnownNonNegative(S: A1)) {
2186 if (SE->isKnownNonNegative(S: A2)) {
2187 // A1 >= 0 && A2 >= 0
2188 if (N1) {
2189 // make sure that c2 - c1 <= a1*N1
2190 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2191 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2192 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: C2_C1, RHS: A1N1)) {
2193 ++SymbolicRDIVindependence;
2194 return true;
2195 }
2196 }
2197 if (N2) {
2198 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
2199 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2200 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2201 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SLT, LHS: A2N2, RHS: C1_C2)) {
2202 ++SymbolicRDIVindependence;
2203 return true;
2204 }
2205 }
2206 } else if (SE->isKnownNonPositive(S: A2)) {
2207 // a1 >= 0 && a2 <= 0
2208 if (N1 && N2) {
2209 // make sure that c2 - c1 <= a1*N1 - a2*N2
2210 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2211 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2212 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(LHS: A1N1, RHS: A2N2);
2213 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2214 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: C2_C1, RHS: A1N1_A2N2)) {
2215 ++SymbolicRDIVindependence;
2216 return true;
2217 }
2218 }
2219 // make sure that 0 <= c2 - c1
2220 if (SE->isKnownNegative(S: C2_C1)) {
2221 ++SymbolicRDIVindependence;
2222 return true;
2223 }
2224 }
2225 } else if (SE->isKnownNonPositive(S: A1)) {
2226 if (SE->isKnownNonNegative(S: A2)) {
2227 // a1 <= 0 && a2 >= 0
2228 if (N1 && N2) {
2229 // make sure that a1*N1 - a2*N2 <= c2 - c1
2230 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2231 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2232 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(LHS: A1N1, RHS: A2N2);
2233 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2234 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: A1N1_A2N2, RHS: C2_C1)) {
2235 ++SymbolicRDIVindependence;
2236 return true;
2237 }
2238 }
2239 // make sure that c2 - c1 <= 0
2240 if (SE->isKnownPositive(S: C2_C1)) {
2241 ++SymbolicRDIVindependence;
2242 return true;
2243 }
2244 } else if (SE->isKnownNonPositive(S: A2)) {
2245 // a1 <= 0 && a2 <= 0
2246 if (N1) {
2247 // make sure that a1*N1 <= c2 - c1
2248 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2249 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2250 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: A1N1, RHS: C2_C1)) {
2251 ++SymbolicRDIVindependence;
2252 return true;
2253 }
2254 }
2255 if (N2) {
2256 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2257 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2258 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2259 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SLT, LHS: C1_C2, RHS: A2N2)) {
2260 ++SymbolicRDIVindependence;
2261 return true;
2262 }
2263 }
2264 }
2265 }
2266 return false;
2267}
2268
2269// testSIV -
2270// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2271// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2272// a2 are constant, we attack it with an SIV test. While they can all be
2273// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2274// they apply; they're cheaper and sometimes more precise.
2275//
2276// Return true if dependence disproved.
2277bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
2278 FullDependence &Result,
2279 bool UnderRuntimeAssumptions) {
2280 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2281 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2282 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2283 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2284 if (SrcAddRec && DstAddRec) {
2285 const SCEV *SrcConst = SrcAddRec->getStart();
2286 const SCEV *DstConst = DstAddRec->getStart();
2287 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2288 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2289 const Loop *CurSrcLoop = SrcAddRec->getLoop();
2290 const Loop *CurDstLoop = DstAddRec->getLoop();
2291 assert(haveSameSD(CurSrcLoop, CurDstLoop) &&
2292 "Loops in the SIV test should have the same iteration space and "
2293 "depth");
2294 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
2295 bool disproven;
2296 if (SrcCoeff == DstCoeff)
2297 disproven = strongSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result,
2298 UnderRuntimeAssumptions);
2299 else if (SrcCoeff == SE->getNegativeSCEV(V: DstCoeff))
2300 disproven = weakCrossingSIVtest(Coeff: SrcCoeff, SrcConst, DstConst, CurSrcLoop,
2301 CurDstLoop, Level, Result);
2302 else
2303 disproven = exactSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result);
2304 return disproven || gcdMIVtest(Src, Dst, Result) ||
2305 symbolicRDIVtest(A1: SrcCoeff, A2: DstCoeff, C1: SrcConst, C2: DstConst, Loop1: CurSrcLoop,
2306 Loop2: CurDstLoop);
2307 }
2308 if (SrcAddRec) {
2309 const Loop *CurSrcLoop = SrcAddRec->getLoop();
2310 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
2311 return weakZeroDstSIVtest(Src: SrcAddRec, DstConst: Dst, Level, Result) ||
2312 gcdMIVtest(Src, Dst, Result);
2313 }
2314 if (DstAddRec) {
2315 const Loop *CurDstLoop = DstAddRec->getLoop();
2316 Level = mapDstLoop(DstLoop: CurDstLoop);
2317 return weakZeroSrcSIVtest(SrcConst: Src, Dst: DstAddRec, Level, Result) ||
2318 gcdMIVtest(Src, Dst, Result);
2319 }
2320 llvm_unreachable("SIV test expected at least one AddRec");
2321 return false;
2322}
2323
2324// testRDIV -
2325// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2326// where i and j are induction variables, c1 and c2 are loop invariant,
2327// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2328// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2329// It doesn't make sense to talk about distance or direction in this case,
2330// so there's no point in making special versions of the Strong SIV test or
2331// the Weak-crossing SIV test.
2332//
2333// Return true if dependence disproved.
2334bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2335 FullDependence &Result) const {
2336 const SCEV *SrcConst, *DstConst;
2337 const SCEV *SrcCoeff, *DstCoeff;
2338 const Loop *SrcLoop, *DstLoop;
2339
2340 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2341 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2342 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2343 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2344 if (SrcAddRec && DstAddRec) {
2345 SrcConst = SrcAddRec->getStart();
2346 SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2347 SrcLoop = SrcAddRec->getLoop();
2348 DstConst = DstAddRec->getStart();
2349 DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2350 DstLoop = DstAddRec->getLoop();
2351 } else
2352 llvm_unreachable("RDIV expected at least one AddRec");
2353 return exactRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, SrcLoop, DstLoop,
2354 Result) ||
2355 gcdMIVtest(Src, Dst, Result) ||
2356 symbolicRDIVtest(A1: SrcCoeff, A2: DstCoeff, C1: SrcConst, C2: DstConst, Loop1: SrcLoop,
2357 Loop2: DstLoop);
2358}
2359
2360// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2361// Return true if dependence disproved.
2362// Can sometimes refine direction vectors.
2363bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2364 const SmallBitVector &Loops,
2365 FullDependence &Result) const {
2366 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2367 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2368 return gcdMIVtest(Src, Dst, Result) ||
2369 banerjeeMIVtest(Src, Dst, Loops, Result);
2370}
2371
2372/// Given a SCEVMulExpr, returns its first operand if its first operand is a
2373/// constant and the product doesn't overflow in a signed sense. Otherwise,
2374/// returns std::nullopt. For example, given (10 * X * Y)<nsw>, it returns 10.
2375/// Notably, if it doesn't have nsw, the multiplication may overflow, and if
2376/// so, it may not a multiple of 10.
2377static std::optional<APInt> getConstantCoefficient(const SCEV *Expr) {
2378 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Expr))
2379 return Constant->getAPInt();
2380 if (const auto *Product = dyn_cast<SCEVMulExpr>(Val: Expr))
2381 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Product->getOperand(i: 0)))
2382 if (Product->hasNoSignedWrap())
2383 return Constant->getAPInt();
2384 return std::nullopt;
2385}
2386
2387bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
2388 const Loop *CurLoop,
2389 const SCEV *&CurLoopCoeff,
2390 APInt &RunningGCD) const {
2391 // If RunningGCD is already 1, exit early.
2392 // TODO: It might be better to continue the recursion to find CurLoopCoeff.
2393 if (RunningGCD == 1)
2394 return true;
2395
2396 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
2397 if (!AddRec) {
2398 assert(isLoopInvariant(Expr, CurLoop) &&
2399 "Expected loop invariant expression");
2400 return true;
2401 }
2402
2403 assert(AddRec->isAffine() && "Unexpected Expr");
2404 const SCEV *Start = AddRec->getStart();
2405 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
2406 if (AddRec->getLoop() == CurLoop) {
2407 CurLoopCoeff = Step;
2408 } else {
2409 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Step);
2410
2411 // If the coefficient is the product of a constant and other stuff, we can
2412 // use the constant in the GCD computation.
2413 if (!ConstCoeff)
2414 return false;
2415
2416 // TODO: What happens if ConstCoeff is the "most negative" signed number
2417 // (e.g. -128 for 8 bit wide APInt)?
2418 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2419 }
2420
2421 return accumulateCoefficientsGCD(Expr: Start, CurLoop, CurLoopCoeff, RunningGCD);
2422}
2423
2424//===----------------------------------------------------------------------===//
2425// gcdMIVtest -
2426// Tests an MIV subscript pair for dependence.
2427// Returns true if any possible dependence is disproved.
2428// Can sometimes disprove the equal direction for 1 or more loops,
2429// as discussed in Michael Wolfe's book,
2430// High Performance Compilers for Parallel Computing, page 235.
2431//
2432// We spend some effort (code!) to handle cases like
2433// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2434// but M and N are just loop-invariant variables.
2435// This should help us handle linearized subscripts;
2436// also makes this test a useful backup to the various SIV tests.
2437//
2438// It occurs to me that the presence of loop-invariant variables
2439// changes the nature of the test from "greatest common divisor"
2440// to "a common divisor".
2441bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2442 FullDependence &Result) const {
2443 if (!isDependenceTestEnabled(Test: DependenceTestType::GCDMIV))
2444 return false;
2445
2446 LLVM_DEBUG(dbgs() << "starting gcd\n");
2447 ++GCDapplications;
2448 unsigned BitWidth = SE->getTypeSizeInBits(Ty: Src->getType());
2449 APInt RunningGCD = APInt::getZero(numBits: BitWidth);
2450
2451 // Examine Src coefficients.
2452 // Compute running GCD and record source constant.
2453 // Because we're looking for the constant at the end of the chain,
2454 // we can't quit the loop just because the GCD == 1.
2455 const SCEV *Coefficients = Src;
2456 while (const SCEVAddRecExpr *AddRec =
2457 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2458 const SCEV *Coeff = AddRec->getStepRecurrence(SE&: *SE);
2459 // If the coefficient is the product of a constant and other stuff,
2460 // we can use the constant in the GCD computation.
2461 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Coeff);
2462 if (!ConstCoeff)
2463 return false;
2464 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2465 Coefficients = AddRec->getStart();
2466 }
2467 const SCEV *SrcConst = Coefficients;
2468
2469 // Examine Dst coefficients.
2470 // Compute running GCD and record destination constant.
2471 // Because we're looking for the constant at the end of the chain,
2472 // we can't quit the loop just because the GCD == 1.
2473 Coefficients = Dst;
2474 while (const SCEVAddRecExpr *AddRec =
2475 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2476 const SCEV *Coeff = AddRec->getStepRecurrence(SE&: *SE);
2477 // If the coefficient is the product of a constant and other stuff,
2478 // we can use the constant in the GCD computation.
2479 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Coeff);
2480 if (!ConstCoeff)
2481 return false;
2482 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2483 Coefficients = AddRec->getStart();
2484 }
2485 const SCEV *DstConst = Coefficients;
2486
2487 APInt ExtraGCD = APInt::getZero(numBits: BitWidth);
2488 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
2489 if (!Delta)
2490 return false;
2491 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
2492 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Val: Delta);
2493 if (!Constant)
2494 return false;
2495 APInt ConstDelta = cast<SCEVConstant>(Val: Constant)->getAPInt();
2496 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
2497 if (ConstDelta == 0)
2498 return false;
2499 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
2500 APInt Remainder = ConstDelta.srem(RHS: RunningGCD);
2501 if (Remainder != 0) {
2502 ++GCDindependence;
2503 return true;
2504 }
2505
2506 // Try to disprove equal directions.
2507 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2508 // the code above can't disprove the dependence because the GCD = 1.
2509 // So we consider what happen if i = i' and what happens if j = j'.
2510 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2511 // which is infeasible, so we can disallow the = direction for the i level.
2512 // Setting j = j' doesn't help matters, so we end up with a direction vector
2513 // of [<>, *]
2514 //
2515 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2516 // we need to remember that the constant part is 5 and the RunningGCD should
2517 // be initialized to ExtraGCD = 30.
2518 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
2519
2520 bool Improved = false;
2521 Coefficients = Src;
2522 while (const SCEVAddRecExpr *AddRec =
2523 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2524 Coefficients = AddRec->getStart();
2525 const Loop *CurLoop = AddRec->getLoop();
2526 RunningGCD = ExtraGCD;
2527 const SCEV *SrcCoeff = AddRec->getStepRecurrence(SE&: *SE);
2528 const SCEV *DstCoeff = SE->getMinusSCEV(LHS: SrcCoeff, RHS: SrcCoeff);
2529
2530 if (!accumulateCoefficientsGCD(Expr: Src, CurLoop, CurLoopCoeff&: SrcCoeff, RunningGCD) ||
2531 !accumulateCoefficientsGCD(Expr: Dst, CurLoop, CurLoopCoeff&: DstCoeff, RunningGCD))
2532 return false;
2533
2534 Delta = SE->getMinusSCEV(LHS: SrcCoeff, RHS: DstCoeff);
2535 // If the coefficient is the product of a constant and other stuff,
2536 // we can use the constant in the GCD computation.
2537 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Delta);
2538 if (!ConstCoeff)
2539 // The difference of the two coefficients might not be a product
2540 // or constant, in which case we give up on this direction.
2541 continue;
2542 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2543 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2544 if (RunningGCD != 0) {
2545 Remainder = ConstDelta.srem(RHS: RunningGCD);
2546 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2547 if (Remainder != 0) {
2548 unsigned Level = mapSrcLoop(SrcLoop: CurLoop);
2549 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;
2550 Improved = true;
2551 }
2552 }
2553 }
2554 if (Improved)
2555 ++GCDsuccesses;
2556 LLVM_DEBUG(dbgs() << "all done\n");
2557 return false;
2558}
2559
2560//===----------------------------------------------------------------------===//
2561// banerjeeMIVtest -
2562// Use Banerjee's Inequalities to test an MIV subscript pair.
2563// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2564// Generally follows the discussion in Section 2.5.2 of
2565//
2566// Optimizing Supercompilers for Supercomputers
2567// Michael Wolfe
2568//
2569// The inequalities given on page 25 are simplified in that loops are
2570// normalized so that the lower bound is always 0 and the stride is always 1.
2571// For example, Wolfe gives
2572//
2573// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2574//
2575// where A_k is the coefficient of the kth index in the source subscript,
2576// B_k is the coefficient of the kth index in the destination subscript,
2577// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2578// index, and N_k is the stride of the kth index. Since all loops are normalized
2579// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2580// equation to
2581//
2582// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2583// = (A^-_k - B_k)^- (U_k - 1) - B_k
2584//
2585// Similar simplifications are possible for the other equations.
2586//
2587// When we can't determine the number of iterations for a loop,
2588// we use NULL as an indicator for the worst case, infinity.
2589// When computing the upper bound, NULL denotes +inf;
2590// for the lower bound, NULL denotes -inf.
2591//
2592// Return true if dependence disproved.
2593bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2594 const SmallBitVector &Loops,
2595 FullDependence &Result) const {
2596 if (!isDependenceTestEnabled(Test: DependenceTestType::BanerjeeMIV))
2597 return false;
2598
2599 LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2600 ++BanerjeeApplications;
2601 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
2602 const SCEV *A0;
2603 CoefficientInfo *A = collectCoeffInfo(Subscript: Src, SrcFlag: true, Constant&: A0);
2604 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
2605 const SCEV *B0;
2606 CoefficientInfo *B = collectCoeffInfo(Subscript: Dst, SrcFlag: false, Constant&: B0);
2607 BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2608 const SCEV *Delta = SE->getMinusSCEV(LHS: B0, RHS: A0);
2609 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2610
2611 // Compute bounds for all the * directions.
2612 LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2613 for (unsigned K = 1; K <= MaxLevels; ++K) {
2614 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2615 Bound[K].Direction = Dependence::DVEntry::ALL;
2616 Bound[K].DirSet = Dependence::DVEntry::NONE;
2617 findBoundsALL(A, B, Bound, K);
2618#ifndef NDEBUG
2619 LLVM_DEBUG(dbgs() << "\t " << K << '\t');
2620 if (Bound[K].Lower[Dependence::DVEntry::ALL])
2621 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2622 else
2623 LLVM_DEBUG(dbgs() << "-inf\t");
2624 if (Bound[K].Upper[Dependence::DVEntry::ALL])
2625 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2626 else
2627 LLVM_DEBUG(dbgs() << "+inf\n");
2628#endif
2629 }
2630
2631 // Test the *, *, *, ... case.
2632 bool Disproved = false;
2633 if (testBounds(DirKind: Dependence::DVEntry::ALL, Level: 0, Bound, Delta)) {
2634 // Explore the direction vector hierarchy.
2635 unsigned DepthExpanded = 0;
2636 unsigned NewDeps =
2637 exploreDirections(Level: 1, A, B, Bound, Loops, DepthExpanded, Delta);
2638 if (NewDeps > 0) {
2639 bool Improved = false;
2640 for (unsigned K = 1; K <= CommonLevels; ++K) {
2641 if (Loops[K]) {
2642 unsigned Old = Result.DV[K - 1].Direction;
2643 Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2644 Improved |= Old != Result.DV[K - 1].Direction;
2645 if (!Result.DV[K - 1].Direction) {
2646 Improved = false;
2647 Disproved = true;
2648 break;
2649 }
2650 }
2651 }
2652 if (Improved)
2653 ++BanerjeeSuccesses;
2654 } else {
2655 ++BanerjeeIndependence;
2656 Disproved = true;
2657 }
2658 } else {
2659 ++BanerjeeIndependence;
2660 Disproved = true;
2661 }
2662 delete[] Bound;
2663 delete[] A;
2664 delete[] B;
2665 return Disproved;
2666}
2667
2668// Hierarchically expands the direction vector
2669// search space, combining the directions of discovered dependences
2670// in the DirSet field of Bound. Returns the number of distinct
2671// dependences discovered. If the dependence is disproved,
2672// it will return 0.
2673unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2674 CoefficientInfo *B, BoundInfo *Bound,
2675 const SmallBitVector &Loops,
2676 unsigned &DepthExpanded,
2677 const SCEV *Delta) const {
2678 // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2679 // of common loop levels. To avoid excessive compile-time, pessimize all the
2680 // results and immediately return when the number of common levels is beyond
2681 // the given threshold.
2682 if (CommonLevels > MIVMaxLevelThreshold) {
2683 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2684 "direction exploration is terminated.\n");
2685 for (unsigned K = 1; K <= CommonLevels; ++K)
2686 if (Loops[K])
2687 Bound[K].DirSet = Dependence::DVEntry::ALL;
2688 return 1;
2689 }
2690
2691 if (Level > CommonLevels) {
2692 // record result
2693 LLVM_DEBUG(dbgs() << "\t[");
2694 for (unsigned K = 1; K <= CommonLevels; ++K) {
2695 if (Loops[K]) {
2696 Bound[K].DirSet |= Bound[K].Direction;
2697#ifndef NDEBUG
2698 switch (Bound[K].Direction) {
2699 case Dependence::DVEntry::LT:
2700 LLVM_DEBUG(dbgs() << " <");
2701 break;
2702 case Dependence::DVEntry::EQ:
2703 LLVM_DEBUG(dbgs() << " =");
2704 break;
2705 case Dependence::DVEntry::GT:
2706 LLVM_DEBUG(dbgs() << " >");
2707 break;
2708 case Dependence::DVEntry::ALL:
2709 LLVM_DEBUG(dbgs() << " *");
2710 break;
2711 default:
2712 llvm_unreachable("unexpected Bound[K].Direction");
2713 }
2714#endif
2715 }
2716 }
2717 LLVM_DEBUG(dbgs() << " ]\n");
2718 return 1;
2719 }
2720 if (Loops[Level]) {
2721 if (Level > DepthExpanded) {
2722 DepthExpanded = Level;
2723 // compute bounds for <, =, > at current level
2724 findBoundsLT(A, B, Bound, K: Level);
2725 findBoundsGT(A, B, Bound, K: Level);
2726 findBoundsEQ(A, B, Bound, K: Level);
2727#ifndef NDEBUG
2728 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2729 LLVM_DEBUG(dbgs() << "\t <\t");
2730 if (Bound[Level].Lower[Dependence::DVEntry::LT])
2731 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2732 << '\t');
2733 else
2734 LLVM_DEBUG(dbgs() << "-inf\t");
2735 if (Bound[Level].Upper[Dependence::DVEntry::LT])
2736 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2737 << '\n');
2738 else
2739 LLVM_DEBUG(dbgs() << "+inf\n");
2740 LLVM_DEBUG(dbgs() << "\t =\t");
2741 if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2742 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2743 << '\t');
2744 else
2745 LLVM_DEBUG(dbgs() << "-inf\t");
2746 if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2747 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2748 << '\n');
2749 else
2750 LLVM_DEBUG(dbgs() << "+inf\n");
2751 LLVM_DEBUG(dbgs() << "\t >\t");
2752 if (Bound[Level].Lower[Dependence::DVEntry::GT])
2753 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2754 << '\t');
2755 else
2756 LLVM_DEBUG(dbgs() << "-inf\t");
2757 if (Bound[Level].Upper[Dependence::DVEntry::GT])
2758 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2759 << '\n');
2760 else
2761 LLVM_DEBUG(dbgs() << "+inf\n");
2762#endif
2763 }
2764
2765 unsigned NewDeps = 0;
2766
2767 // test bounds for <, *, *, ...
2768 if (testBounds(DirKind: Dependence::DVEntry::LT, Level, Bound, Delta))
2769 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2770 Delta);
2771
2772 // Test bounds for =, *, *, ...
2773 if (testBounds(DirKind: Dependence::DVEntry::EQ, Level, Bound, Delta))
2774 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2775 Delta);
2776
2777 // test bounds for >, *, *, ...
2778 if (testBounds(DirKind: Dependence::DVEntry::GT, Level, Bound, Delta))
2779 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2780 Delta);
2781
2782 Bound[Level].Direction = Dependence::DVEntry::ALL;
2783 return NewDeps;
2784 } else
2785 return exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2786 Delta);
2787}
2788
2789// Returns true iff the current bounds are plausible.
2790bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2791 BoundInfo *Bound, const SCEV *Delta) const {
2792 Bound[Level].Direction = DirKind;
2793 if (const SCEV *LowerBound = getLowerBound(Bound))
2794 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: LowerBound, RHS: Delta))
2795 return false;
2796 if (const SCEV *UpperBound = getUpperBound(Bound))
2797 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: Delta, RHS: UpperBound))
2798 return false;
2799 return true;
2800}
2801
2802// Computes the upper and lower bounds for level K
2803// using the * direction. Records them in Bound.
2804// Wolfe gives the equations
2805//
2806// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2807// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2808//
2809// Since we normalize loops, we can simplify these equations to
2810//
2811// LB^*_k = (A^-_k - B^+_k)U_k
2812// UB^*_k = (A^+_k - B^-_k)U_k
2813//
2814// We must be careful to handle the case where the upper bound is unknown.
2815// Note that the lower bound is always <= 0
2816// and the upper bound is always >= 0.
2817void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2818 BoundInfo *Bound, unsigned K) const {
2819 Bound[K].Lower[Dependence::DVEntry::ALL] =
2820 nullptr; // Default value = -infinity.
2821 Bound[K].Upper[Dependence::DVEntry::ALL] =
2822 nullptr; // Default value = +infinity.
2823 if (Bound[K].Iterations) {
2824 Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(
2825 LHS: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].PosPart), RHS: Bound[K].Iterations);
2826 Bound[K].Upper[Dependence::DVEntry::ALL] = SE->getMulExpr(
2827 LHS: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].NegPart), RHS: Bound[K].Iterations);
2828 } else {
2829 // If the difference is 0, we won't need to know the number of iterations.
2830 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].NegPart, RHS: B[K].PosPart))
2831 Bound[K].Lower[Dependence::DVEntry::ALL] =
2832 SE->getZero(Ty: A[K].Coeff->getType());
2833 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].PosPart, RHS: B[K].NegPart))
2834 Bound[K].Upper[Dependence::DVEntry::ALL] =
2835 SE->getZero(Ty: A[K].Coeff->getType());
2836 }
2837}
2838
2839// Computes the upper and lower bounds for level K
2840// using the = direction. Records them in Bound.
2841// Wolfe gives the equations
2842//
2843// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2844// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2845//
2846// Since we normalize loops, we can simplify these equations to
2847//
2848// LB^=_k = (A_k - B_k)^- U_k
2849// UB^=_k = (A_k - B_k)^+ U_k
2850//
2851// We must be careful to handle the case where the upper bound is unknown.
2852// Note that the lower bound is always <= 0
2853// and the upper bound is always >= 0.
2854void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
2855 BoundInfo *Bound, unsigned K) const {
2856 Bound[K].Lower[Dependence::DVEntry::EQ] =
2857 nullptr; // Default value = -infinity.
2858 Bound[K].Upper[Dependence::DVEntry::EQ] =
2859 nullptr; // Default value = +infinity.
2860 if (Bound[K].Iterations) {
2861 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2862 const SCEV *NegativePart = getNegativePart(X: Delta);
2863 Bound[K].Lower[Dependence::DVEntry::EQ] =
2864 SE->getMulExpr(LHS: NegativePart, RHS: Bound[K].Iterations);
2865 const SCEV *PositivePart = getPositivePart(X: Delta);
2866 Bound[K].Upper[Dependence::DVEntry::EQ] =
2867 SE->getMulExpr(LHS: PositivePart, RHS: Bound[K].Iterations);
2868 } else {
2869 // If the positive/negative part of the difference is 0,
2870 // we won't need to know the number of iterations.
2871 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2872 const SCEV *NegativePart = getNegativePart(X: Delta);
2873 if (NegativePart->isZero())
2874 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2875 const SCEV *PositivePart = getPositivePart(X: Delta);
2876 if (PositivePart->isZero())
2877 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2878 }
2879}
2880
2881// Computes the upper and lower bounds for level K
2882// using the < direction. Records them in Bound.
2883// Wolfe gives the equations
2884//
2885// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2886// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2887//
2888// Since we normalize loops, we can simplify these equations to
2889//
2890// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2891// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2892//
2893// We must be careful to handle the case where the upper bound is unknown.
2894void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
2895 BoundInfo *Bound, unsigned K) const {
2896 Bound[K].Lower[Dependence::DVEntry::LT] =
2897 nullptr; // Default value = -infinity.
2898 Bound[K].Upper[Dependence::DVEntry::LT] =
2899 nullptr; // Default value = +infinity.
2900 if (Bound[K].Iterations) {
2901 const SCEV *Iter_1 = SE->getMinusSCEV(
2902 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2903 const SCEV *NegPart =
2904 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2905 Bound[K].Lower[Dependence::DVEntry::LT] =
2906 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: B[K].Coeff);
2907 const SCEV *PosPart =
2908 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2909 Bound[K].Upper[Dependence::DVEntry::LT] =
2910 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: B[K].Coeff);
2911 } else {
2912 // If the positive/negative part of the difference is 0,
2913 // we won't need to know the number of iterations.
2914 const SCEV *NegPart =
2915 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2916 if (NegPart->isZero())
2917 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2918 const SCEV *PosPart =
2919 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2920 if (PosPart->isZero())
2921 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2922 }
2923}
2924
2925// Computes the upper and lower bounds for level K
2926// using the > direction. Records them in Bound.
2927// Wolfe gives the equations
2928//
2929// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2930// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2931//
2932// Since we normalize loops, we can simplify these equations to
2933//
2934// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2935// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2936//
2937// We must be careful to handle the case where the upper bound is unknown.
2938void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
2939 BoundInfo *Bound, unsigned K) const {
2940 Bound[K].Lower[Dependence::DVEntry::GT] =
2941 nullptr; // Default value = -infinity.
2942 Bound[K].Upper[Dependence::DVEntry::GT] =
2943 nullptr; // Default value = +infinity.
2944 if (Bound[K].Iterations) {
2945 const SCEV *Iter_1 = SE->getMinusSCEV(
2946 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2947 const SCEV *NegPart =
2948 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2949 Bound[K].Lower[Dependence::DVEntry::GT] =
2950 SE->getAddExpr(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: A[K].Coeff);
2951 const SCEV *PosPart =
2952 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2953 Bound[K].Upper[Dependence::DVEntry::GT] =
2954 SE->getAddExpr(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: A[K].Coeff);
2955 } else {
2956 // If the positive/negative part of the difference is 0,
2957 // we won't need to know the number of iterations.
2958 const SCEV *NegPart =
2959 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2960 if (NegPart->isZero())
2961 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2962 const SCEV *PosPart =
2963 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2964 if (PosPart->isZero())
2965 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2966 }
2967}
2968
2969// X^+ = max(X, 0)
2970const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
2971 return SE->getSMaxExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
2972}
2973
2974// X^- = min(X, 0)
2975const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
2976 return SE->getSMinExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
2977}
2978
2979// Walks through the subscript,
2980// collecting each coefficient, the associated loop bounds,
2981// and recording its positive and negative parts for later use.
2982DependenceInfo::CoefficientInfo *
2983DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
2984 const SCEV *&Constant) const {
2985 const SCEV *Zero = SE->getZero(Ty: Subscript->getType());
2986 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
2987 for (unsigned K = 1; K <= MaxLevels; ++K) {
2988 CI[K].Coeff = Zero;
2989 CI[K].PosPart = Zero;
2990 CI[K].NegPart = Zero;
2991 CI[K].Iterations = nullptr;
2992 }
2993 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Subscript)) {
2994 const Loop *L = AddRec->getLoop();
2995 unsigned K = SrcFlag ? mapSrcLoop(SrcLoop: L) : mapDstLoop(DstLoop: L);
2996 CI[K].Coeff = AddRec->getStepRecurrence(SE&: *SE);
2997 CI[K].PosPart = getPositivePart(X: CI[K].Coeff);
2998 CI[K].NegPart = getNegativePart(X: CI[K].Coeff);
2999 CI[K].Iterations = collectUpperBound(L, T: Subscript->getType());
3000 Subscript = AddRec->getStart();
3001 }
3002 Constant = Subscript;
3003#ifndef NDEBUG
3004 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
3005 for (unsigned K = 1; K <= MaxLevels; ++K) {
3006 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
3007 LLVM_DEBUG(dbgs() << "\tPos Part = ");
3008 LLVM_DEBUG(dbgs() << *CI[K].PosPart);
3009 LLVM_DEBUG(dbgs() << "\tNeg Part = ");
3010 LLVM_DEBUG(dbgs() << *CI[K].NegPart);
3011 LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
3012 if (CI[K].Iterations)
3013 LLVM_DEBUG(dbgs() << *CI[K].Iterations);
3014 else
3015 LLVM_DEBUG(dbgs() << "+inf");
3016 LLVM_DEBUG(dbgs() << '\n');
3017 }
3018 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
3019#endif
3020 return CI;
3021}
3022
3023// Looks through all the bounds info and
3024// computes the lower bound given the current direction settings
3025// at each level. If the lower bound for any level is -inf,
3026// the result is -inf.
3027const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
3028 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
3029 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3030 if (Bound[K].Lower[Bound[K].Direction])
3031 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Lower[Bound[K].Direction]);
3032 else
3033 Sum = nullptr;
3034 }
3035 return Sum;
3036}
3037
3038// Looks through all the bounds info and
3039// computes the upper bound given the current direction settings
3040// at each level. If the upper bound at any level is +inf,
3041// the result is +inf.
3042const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
3043 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
3044 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3045 if (Bound[K].Upper[Bound[K].Direction])
3046 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Upper[Bound[K].Direction]);
3047 else
3048 Sum = nullptr;
3049 }
3050 return Sum;
3051}
3052
3053/// Check if we can delinearize the subscripts. If the SCEVs representing the
3054/// source and destination array references are recurrences on a nested loop,
3055/// this function flattens the nested recurrences into separate recurrences
3056/// for each loop level.
3057bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
3058 SmallVectorImpl<Subscript> &Pair) {
3059 assert(isLoadOrStore(Src) && "instruction is not load or store");
3060 assert(isLoadOrStore(Dst) && "instruction is not load or store");
3061 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3062 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3063 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
3064 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
3065 const SCEV *SrcAccessFn = SE->getSCEVAtScope(V: SrcPtr, L: SrcLoop);
3066 const SCEV *DstAccessFn = SE->getSCEVAtScope(V: DstPtr, L: DstLoop);
3067 const SCEVUnknown *SrcBase =
3068 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
3069 const SCEVUnknown *DstBase =
3070 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
3071
3072 if (!SrcBase || !DstBase || SrcBase != DstBase)
3073 return false;
3074
3075 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
3076
3077 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
3078 SrcSubscripts, DstSubscripts) &&
3079 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
3080 SrcSubscripts, DstSubscripts))
3081 return false;
3082
3083 assert(isLoopInvariant(SrcBase, SrcLoop) &&
3084 isLoopInvariant(DstBase, DstLoop) &&
3085 "Expected SrcBase and DstBase to be loop invariant");
3086
3087 int Size = SrcSubscripts.size();
3088 LLVM_DEBUG({
3089 dbgs() << "\nSrcSubscripts: ";
3090 for (int I = 0; I < Size; I++)
3091 dbgs() << *SrcSubscripts[I];
3092 dbgs() << "\nDstSubscripts: ";
3093 for (int I = 0; I < Size; I++)
3094 dbgs() << *DstSubscripts[I];
3095 dbgs() << "\n";
3096 });
3097
3098 // The delinearization transforms a single-subscript MIV dependence test into
3099 // a multi-subscript SIV dependence test that is easier to compute. So we
3100 // resize Pair to contain as many pairs of subscripts as the delinearization
3101 // has found, and then initialize the pairs following the delinearization.
3102 Pair.resize(N: Size);
3103 SCEVMonotonicityChecker MonChecker(SE);
3104 const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
3105 for (int I = 0; I < Size; ++I) {
3106 Pair[I].Src = SrcSubscripts[I];
3107 Pair[I].Dst = DstSubscripts[I];
3108
3109 assert(Pair[I].Src->getType() == Pair[I].Dst->getType() &&
3110 "Unexpected different types for the subscripts");
3111
3112 if (EnableMonotonicityCheck) {
3113 if (MonChecker.checkMonotonicity(Expr: Pair[I].Src, OutermostLoop).isUnknown())
3114 return false;
3115 if (MonChecker.checkMonotonicity(Expr: Pair[I].Dst, OutermostLoop).isUnknown())
3116 return false;
3117 }
3118 }
3119
3120 return true;
3121}
3122
3123/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
3124/// arrays accessed are fixed-size arrays. Return true if delinearization was
3125/// successful.
3126bool DependenceInfo::tryDelinearizeFixedSize(
3127 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3128 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3129 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3130 LLVM_DEBUG({
3131 const SCEVUnknown *SrcBase =
3132 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3133 const SCEVUnknown *DstBase =
3134 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3135 assert(SrcBase && DstBase && SrcBase == DstBase &&
3136 "expected src and dst scev unknowns to be equal");
3137 });
3138
3139 const SCEV *ElemSize = SE->getElementSize(Inst: Src);
3140 assert(ElemSize == SE->getElementSize(Dst) && "Different element sizes");
3141 SmallVector<const SCEV *, 4> SrcSizes, DstSizes;
3142 if (!delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: SrcAccessFn),
3143 Subscripts&: SrcSubscripts, Sizes&: SrcSizes, ElementSize: ElemSize) ||
3144 !delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: DstAccessFn),
3145 Subscripts&: DstSubscripts, Sizes&: DstSizes, ElementSize: ElemSize))
3146 return false;
3147
3148 // Check that the two size arrays are non-empty and equal in length and
3149 // value. SCEV expressions are uniqued, so we can compare pointers.
3150 if (SrcSizes.size() != DstSizes.size() ||
3151 !std::equal(first1: SrcSizes.begin(), last1: SrcSizes.end(), first2: DstSizes.begin())) {
3152 SrcSubscripts.clear();
3153 DstSubscripts.clear();
3154 return false;
3155 }
3156
3157 assert(SrcSubscripts.size() == DstSubscripts.size() &&
3158 "Expected equal number of entries in the list of SrcSubscripts and "
3159 "DstSubscripts.");
3160
3161 // In general we cannot safely assume that the subscripts recovered from GEPs
3162 // are in the range of values defined for their corresponding array
3163 // dimensions. For example some C language usage/interpretation make it
3164 // impossible to verify this at compile-time. As such we can only delinearize
3165 // iff the subscripts are positive and are less than the range of the
3166 // dimension.
3167 if (!DisableDelinearizationChecks) {
3168 if (!validateDelinearizationResult(SE&: *SE, Sizes: SrcSizes, Subscripts: SrcSubscripts) ||
3169 !validateDelinearizationResult(SE&: *SE, Sizes: DstSizes, Subscripts: DstSubscripts)) {
3170 SrcSubscripts.clear();
3171 DstSubscripts.clear();
3172 return false;
3173 }
3174 }
3175 LLVM_DEBUG({
3176 dbgs() << "Delinearized subscripts of fixed-size array\n"
3177 << "SrcGEP:" << *getLoadStorePointerOperand(Src) << "\n"
3178 << "DstGEP:" << *getLoadStorePointerOperand(Dst) << "\n";
3179 });
3180 return true;
3181}
3182
3183bool DependenceInfo::tryDelinearizeParametricSize(
3184 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3185 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3186 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3187
3188 const SCEVUnknown *SrcBase =
3189 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
3190 const SCEVUnknown *DstBase =
3191 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
3192 assert(SrcBase && DstBase && SrcBase == DstBase &&
3193 "expected src and dst scev unknowns to be equal");
3194
3195 const SCEV *ElementSize = SE->getElementSize(Inst: Src);
3196 if (ElementSize != SE->getElementSize(Inst: Dst))
3197 return false;
3198
3199 const SCEV *SrcSCEV = SE->getMinusSCEV(LHS: SrcAccessFn, RHS: SrcBase);
3200 const SCEV *DstSCEV = SE->getMinusSCEV(LHS: DstAccessFn, RHS: DstBase);
3201
3202 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(Val: SrcSCEV);
3203 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(Val: DstSCEV);
3204 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
3205 return false;
3206
3207 // First step: collect parametric terms in both array references.
3208 SmallVector<const SCEV *, 4> Terms;
3209 collectParametricTerms(SE&: *SE, Expr: SrcAR, Terms);
3210 collectParametricTerms(SE&: *SE, Expr: DstAR, Terms);
3211
3212 // Second step: find subscript sizes.
3213 SmallVector<const SCEV *, 4> Sizes;
3214 findArrayDimensions(SE&: *SE, Terms, Sizes, ElementSize);
3215
3216 // Third step: compute the access functions for each subscript.
3217 computeAccessFunctions(SE&: *SE, Expr: SrcAR, Subscripts&: SrcSubscripts, Sizes);
3218 computeAccessFunctions(SE&: *SE, Expr: DstAR, Subscripts&: DstSubscripts, Sizes);
3219
3220 // Fail when there is only a subscript: that's a linearized access function.
3221 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
3222 SrcSubscripts.size() != DstSubscripts.size())
3223 return false;
3224
3225 // Statically check that the array bounds are in-range. The first subscript we
3226 // don't have a size for and it cannot overflow into another subscript, so is
3227 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3228 // and dst.
3229 // FIXME: It may be better to record these sizes and add them as constraints
3230 // to the dependency checks.
3231 if (!DisableDelinearizationChecks)
3232 if (!validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: SrcSubscripts) ||
3233 !validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: DstSubscripts))
3234 return false;
3235
3236 return true;
3237}
3238
3239//===----------------------------------------------------------------------===//
3240
3241#ifndef NDEBUG
3242// For debugging purposes, dump a small bit vector to dbgs().
3243static void dumpSmallBitVector(SmallBitVector &BV) {
3244 dbgs() << "{";
3245 for (unsigned VI : BV.set_bits()) {
3246 dbgs() << VI;
3247 if (BV.find_next(VI) >= 0)
3248 dbgs() << ' ';
3249 }
3250 dbgs() << "}\n";
3251}
3252#endif
3253
3254bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
3255 FunctionAnalysisManager::Invalidator &Inv) {
3256 // Check if the analysis itself has been invalidated.
3257 auto PAC = PA.getChecker<DependenceAnalysis>();
3258 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3259 return true;
3260
3261 // Check transitive dependencies.
3262 return Inv.invalidate<AAManager>(IR&: F, PA) ||
3263 Inv.invalidate<ScalarEvolutionAnalysis>(IR&: F, PA) ||
3264 Inv.invalidate<LoopAnalysis>(IR&: F, PA);
3265}
3266
3267// depends -
3268// Returns NULL if there is no dependence.
3269// Otherwise, return a Dependence with as many details as possible.
3270// Corresponds to Section 3.1 in the paper
3271//
3272// Practical Dependence Testing
3273// Goff, Kennedy, Tseng
3274// PLDI 1991
3275//
3276std::unique_ptr<Dependence>
3277DependenceInfo::depends(Instruction *Src, Instruction *Dst,
3278 bool UnderRuntimeAssumptions) {
3279 SmallVector<const SCEVPredicate *, 4> Assume;
3280 bool PossiblyLoopIndependent = true;
3281 if (Src == Dst)
3282 PossiblyLoopIndependent = false;
3283
3284 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
3285 // if both instructions don't reference memory, there's no dependence
3286 return nullptr;
3287
3288 if (!isLoadOrStore(I: Src) || !isLoadOrStore(I: Dst)) {
3289 // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3290 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3291 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3292 args: SCEVUnionPredicate(Assume, *SE));
3293 }
3294
3295 const MemoryLocation &DstLoc = MemoryLocation::get(Inst: Dst);
3296 const MemoryLocation &SrcLoc = MemoryLocation::get(Inst: Src);
3297
3298 switch (underlyingObjectsAlias(AA, DL: F->getDataLayout(), LocA: DstLoc, LocB: SrcLoc)) {
3299 case AliasResult::MayAlias:
3300 case AliasResult::PartialAlias:
3301 // cannot analyse objects if we don't understand their aliasing.
3302 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3303 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3304 args: SCEVUnionPredicate(Assume, *SE));
3305 case AliasResult::NoAlias:
3306 // If the objects noalias, they are distinct, accesses are independent.
3307 LLVM_DEBUG(dbgs() << "no alias\n");
3308 return nullptr;
3309 case AliasResult::MustAlias:
3310 break; // The underlying objects alias; test accesses for dependence.
3311 }
3312
3313 if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() ||
3314 !SrcLoc.Size.isPrecise()) {
3315 // The dependence test gets confused if the size of the memory accesses
3316 // differ.
3317 LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
3318 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3319 args: SCEVUnionPredicate(Assume, *SE));
3320 }
3321
3322 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3323 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3324 const SCEV *SrcSCEV = SE->getSCEV(V: SrcPtr);
3325 const SCEV *DstSCEV = SE->getSCEV(V: DstPtr);
3326 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
3327 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
3328 const SCEV *SrcBase = SE->getPointerBase(V: SrcSCEV);
3329 const SCEV *DstBase = SE->getPointerBase(V: DstSCEV);
3330 if (SrcBase != DstBase) {
3331 // If two pointers have different bases, trying to analyze indexes won't
3332 // work; we can't compare them to each other. This can happen, for example,
3333 // if one is produced by an LCSSA PHI node.
3334 //
3335 // We check this upfront so we don't crash in cases where getMinusSCEV()
3336 // returns a SCEVCouldNotCompute.
3337 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
3338 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3339 args: SCEVUnionPredicate(Assume, *SE));
3340 }
3341
3342 // Even if the base pointers are the same, they may not be loop-invariant. It
3343 // could lead to incorrect results, as we're analyzing loop-carried
3344 // dependencies. Src and Dst can be in different loops, so we need to check
3345 // the base pointer is invariant in both loops.
3346 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
3347 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
3348 if (!isLoopInvariant(Expression: SrcBase, LoopNest: SrcLoop) ||
3349 !isLoopInvariant(Expression: DstBase, LoopNest: DstLoop)) {
3350 LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
3351 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3352 args: SCEVUnionPredicate(Assume, *SE));
3353 }
3354
3355 uint64_t EltSize = SrcLoc.Size.toRaw();
3356 const SCEV *SrcEv = SE->getMinusSCEV(LHS: SrcSCEV, RHS: SrcBase);
3357 const SCEV *DstEv = SE->getMinusSCEV(LHS: DstSCEV, RHS: DstBase);
3358
3359 // Check that memory access offsets are multiples of element sizes.
3360 if (!SE->isKnownMultipleOf(S: SrcEv, M: EltSize, Assumptions&: Assume) ||
3361 !SE->isKnownMultipleOf(S: DstEv, M: EltSize, Assumptions&: Assume)) {
3362 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
3363 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3364 args: SCEVUnionPredicate(Assume, *SE));
3365 }
3366
3367 // Runtime assumptions needed but not allowed.
3368 if (!Assume.empty() && !UnderRuntimeAssumptions)
3369 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3370 args: SCEVUnionPredicate(Assume, *SE));
3371
3372 unsigned Pairs = 1;
3373 SmallVector<Subscript, 2> Pair(Pairs);
3374 Pair[0].Src = SrcEv;
3375 Pair[0].Dst = DstEv;
3376
3377 SCEVMonotonicityChecker MonChecker(SE);
3378 const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
3379 if (EnableMonotonicityCheck)
3380 if (MonChecker.checkMonotonicity(Expr: Pair[0].Src, OutermostLoop).isUnknown() ||
3381 MonChecker.checkMonotonicity(Expr: Pair[0].Dst, OutermostLoop).isUnknown())
3382 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3383 args: SCEVUnionPredicate(Assume, *SE));
3384
3385 if (Delinearize) {
3386 if (tryDelinearize(Src, Dst, Pair)) {
3387 LLVM_DEBUG(dbgs() << " delinearized\n");
3388 Pairs = Pair.size();
3389 }
3390 }
3391
3392 // Establish loop nesting levels considering SameSD loops as common
3393 establishNestingLevels(Src, Dst);
3394
3395 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
3396 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
3397 LLVM_DEBUG(dbgs() << " SameSD nesting levels = " << SameSDLevels << "\n");
3398
3399 // Modify common levels to consider the SameSD levels in the tests
3400 CommonLevels += SameSDLevels;
3401 MaxLevels -= SameSDLevels;
3402 if (SameSDLevels > 0) {
3403 // Not all tests are handled yet over SameSD loops
3404 // Revoke if there are any tests other than ZIV, SIV or RDIV
3405 for (unsigned P = 0; P < Pairs; ++P) {
3406 SmallBitVector Loops;
3407 Subscript::ClassificationKind TestClass =
3408 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
3409 Dst: Pair[P].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops);
3410
3411 if (TestClass != Subscript::ZIV && TestClass != Subscript::SIV &&
3412 TestClass != Subscript::RDIV) {
3413 // Revert the levels to not consider the SameSD levels
3414 CommonLevels -= SameSDLevels;
3415 MaxLevels += SameSDLevels;
3416 SameSDLevels = 0;
3417 break;
3418 }
3419 }
3420 }
3421
3422 if (SameSDLevels > 0)
3423 SameSDLoopsCount++;
3424
3425 FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
3426 PossiblyLoopIndependent, CommonLevels);
3427 ++TotalArrayPairs;
3428
3429 for (unsigned P = 0; P < Pairs; ++P) {
3430 assert(Pair[P].Src->getType()->isIntegerTy() && "Src must be an integer");
3431 assert(Pair[P].Dst->getType()->isIntegerTy() && "Dst must be an integer");
3432 Pair[P].Loops.resize(N: MaxLevels + 1);
3433 Pair[P].GroupLoops.resize(N: MaxLevels + 1);
3434 Pair[P].Group.resize(N: Pairs);
3435 Pair[P].Classification =
3436 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()), Dst: Pair[P].Dst,
3437 DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops&: Pair[P].Loops);
3438 Pair[P].GroupLoops = Pair[P].Loops;
3439 Pair[P].Group.set(P);
3440 LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
3441 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3442 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3443 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3444 LLVM_DEBUG(dbgs() << "\tloops = ");
3445 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
3446 }
3447
3448 // Test each subscript individually
3449 for (unsigned SI = 0; SI < Pairs; ++SI) {
3450 LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3451 switch (Pair[SI].Classification) {
3452 case Subscript::NonLinear:
3453 // ignore these, but collect loops for later
3454 ++NonlinearSubscriptPairs;
3455 collectCommonLoops(Expression: Pair[SI].Src, LoopNest: LI->getLoopFor(BB: Src->getParent()),
3456 Loops&: Pair[SI].Loops);
3457 collectCommonLoops(Expression: Pair[SI].Dst, LoopNest: LI->getLoopFor(BB: Dst->getParent()),
3458 Loops&: Pair[SI].Loops);
3459 break;
3460 case Subscript::ZIV:
3461 LLVM_DEBUG(dbgs() << ", ZIV\n");
3462 if (testZIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
3463 return nullptr;
3464 break;
3465 case Subscript::SIV: {
3466 LLVM_DEBUG(dbgs() << ", SIV\n");
3467 unsigned Level;
3468 if (testSIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Level, Result,
3469 UnderRuntimeAssumptions))
3470 return nullptr;
3471 break;
3472 }
3473 case Subscript::RDIV:
3474 LLVM_DEBUG(dbgs() << ", RDIV\n");
3475 if (testRDIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
3476 return nullptr;
3477 break;
3478 case Subscript::MIV:
3479 LLVM_DEBUG(dbgs() << ", MIV\n");
3480 if (testMIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Loops: Pair[SI].Loops, Result))
3481 return nullptr;
3482 break;
3483 }
3484 }
3485
3486 // Make sure the Scalar flags are set correctly.
3487 SmallBitVector CompleteLoops(MaxLevels + 1);
3488 for (unsigned SI = 0; SI < Pairs; ++SI)
3489 CompleteLoops |= Pair[SI].Loops;
3490 for (unsigned II = 1; II <= CommonLevels; ++II)
3491 if (CompleteLoops[II])
3492 Result.DV[II - 1].Scalar = false;
3493
3494 // Set the distance to zero if the direction is EQ.
3495 // TODO: Ideally, the distance should be set to 0 immediately simultaneously
3496 // with the corresponding direction being set to EQ.
3497 for (unsigned II = 1; II <= Result.getLevels(); ++II) {
3498 if (Result.getDirection(Level: II) == Dependence::DVEntry::EQ) {
3499 if (Result.DV[II - 1].Distance == nullptr)
3500 Result.DV[II - 1].Distance = SE->getZero(Ty: SrcSCEV->getType());
3501 else
3502 assert(Result.DV[II - 1].Distance->isZero() &&
3503 "Inconsistency between distance and direction");
3504 }
3505
3506#ifndef NDEBUG
3507 // Check that the converse (i.e., if the distance is zero, then the
3508 // direction is EQ) holds.
3509 const SCEV *Distance = Result.getDistance(II);
3510 if (Distance && Distance->isZero())
3511 assert(Result.getDirection(II) == Dependence::DVEntry::EQ &&
3512 "Distance is zero, but direction is not EQ");
3513#endif
3514 }
3515
3516 if (SameSDLevels > 0) {
3517 // Extracting SameSD levels from the common levels
3518 // Reverting CommonLevels and MaxLevels to their original values
3519 assert(CommonLevels >= SameSDLevels);
3520 CommonLevels -= SameSDLevels;
3521 MaxLevels += SameSDLevels;
3522 std::unique_ptr<FullDependence::DVEntry[]> DV, DVSameSD;
3523 DV = std::make_unique<FullDependence::DVEntry[]>(num: CommonLevels);
3524 DVSameSD = std::make_unique<FullDependence::DVEntry[]>(num: SameSDLevels);
3525 for (unsigned Level = 0; Level < CommonLevels; ++Level)
3526 DV[Level] = Result.DV[Level];
3527 for (unsigned Level = 0; Level < SameSDLevels; ++Level)
3528 DVSameSD[Level] = Result.DV[CommonLevels + Level];
3529 Result.DV = std::move(DV);
3530 Result.DVSameSD = std::move(DVSameSD);
3531 Result.Levels = CommonLevels;
3532 Result.SameSDLevels = SameSDLevels;
3533 }
3534
3535 if (PossiblyLoopIndependent) {
3536 // Make sure the LoopIndependent flag is set correctly.
3537 // All directions must include equal, otherwise no
3538 // loop-independent dependence is possible.
3539 for (unsigned II = 1; II <= CommonLevels; ++II) {
3540 if (!(Result.getDirection(Level: II) & Dependence::DVEntry::EQ)) {
3541 Result.LoopIndependent = false;
3542 break;
3543 }
3544 }
3545 } else {
3546 // On the other hand, if all directions are equal and there's no
3547 // loop-independent dependence possible, then no dependence exists.
3548 // However, if there are runtime assumptions, we must return the result.
3549 bool AllEqual = true;
3550 for (unsigned II = 1; II <= CommonLevels; ++II) {
3551 if (Result.getDirection(Level: II) != Dependence::DVEntry::EQ) {
3552 AllEqual = false;
3553 break;
3554 }
3555 }
3556 if (AllEqual && Result.Assumptions.getPredicates().empty())
3557 return nullptr;
3558 }
3559
3560 return std::make_unique<FullDependence>(args: std::move(Result));
3561}
3562