1//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10// accesses. Currently, it is an (incomplete) implementation of the approach
11// described in
12//
13// Practical Dependence Testing
14// Goff, Kennedy, Tseng
15// PLDI 1991
16//
17// There's a single entry point that analyzes the dependence between a pair
18// of memory references in a function, returning either NULL, for no dependence,
19// or a more-or-less detailed description of the dependence between them.
20//
21// Since Clang linearizes some array subscripts, the dependence
22// analysis is using SCEV->delinearize to recover the representation of multiple
23// subscripts, and thus avoid the more expensive and less precise MIV tests. The
24// delinearization is controlled by the flag -da-delinearize.
25//
26// We should pay some careful attention to the possibility of integer overflow
27// in the implementation of the various tests. This could happen with Add,
28// Subtract, or Multiply, with both APInt's and SCEV's.
29//
30// Some non-linear subscript pairs can be handled by the GCD test
31// (and perhaps other tests).
32// Should explore how often these things occur.
33//
34// Finally, it seems like certain test cases expose weaknesses in the SCEV
35// simplification, especially in the handling of sign and zero extensions.
36// It could be useful to spend time exploring these.
37//
38// Please note that this is work in progress and the interface is subject to
39// change.
40//
41//===----------------------------------------------------------------------===//
42// //
43// In memory of Ken Kennedy, 1945 - 2007 //
44// //
45//===----------------------------------------------------------------------===//
46
47#include "llvm/Analysis/DependenceAnalysis.h"
48#include "llvm/ADT/Statistic.h"
49#include "llvm/Analysis/AliasAnalysis.h"
50#include "llvm/Analysis/Delinearization.h"
51#include "llvm/Analysis/LoopInfo.h"
52#include "llvm/Analysis/ScalarEvolution.h"
53#include "llvm/Analysis/ScalarEvolutionExpressions.h"
54#include "llvm/Analysis/ValueTracking.h"
55#include "llvm/IR/InstIterator.h"
56#include "llvm/IR/Module.h"
57#include "llvm/InitializePasses.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/raw_ostream.h"
62
63using namespace llvm;
64
65#define DEBUG_TYPE "da"
66
67//===----------------------------------------------------------------------===//
68// statistics
69
70STATISTIC(TotalArrayPairs, "Array pairs tested");
71STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
72STATISTIC(ZIVapplications, "ZIV applications");
73STATISTIC(ZIVindependence, "ZIV independence");
74STATISTIC(StrongSIVapplications, "Strong SIV applications");
75STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
76STATISTIC(StrongSIVindependence, "Strong SIV independence");
77STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
78STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
79STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
80STATISTIC(ExactSIVapplications, "Exact SIV applications");
81STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
82STATISTIC(ExactSIVindependence, "Exact SIV independence");
83STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
84STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
85STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
86STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
87STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
88STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
89STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
90STATISTIC(GCDapplications, "GCD applications");
91STATISTIC(GCDsuccesses, "GCD successes");
92STATISTIC(GCDindependence, "GCD independence");
93STATISTIC(BanerjeeApplications, "Banerjee applications");
94STATISTIC(BanerjeeIndependence, "Banerjee independence");
95STATISTIC(BanerjeeSuccesses, "Banerjee successes");
96STATISTIC(SameSDLoopsCount, "Loops with Same iteration Space and Depth");
97
98static cl::opt<bool>
99 Delinearize("da-delinearize", cl::init(Val: true), cl::Hidden,
100 cl::desc("Try to delinearize array references."));
101static cl::opt<bool> DisableDelinearizationChecks(
102 "da-disable-delinearization-checks", cl::Hidden,
103 cl::desc(
104 "Disable checks that try to statically verify validity of "
105 "delinearized subscripts. Enabling this option may result in incorrect "
106 "dependence vectors for languages that allow the subscript of one "
107 "dimension to underflow or overflow into another dimension."));
108
109static cl::opt<unsigned> MIVMaxLevelThreshold(
110 "da-miv-max-level-threshold", cl::init(Val: 7), cl::Hidden,
111 cl::desc("Maximum depth allowed for the recursive algorithm used to "
112 "explore MIV direction vectors."));
113
114namespace {
115
116/// Types of dependence test routines.
117enum class DependenceTestType {
118 All,
119 StrongSIV,
120 WeakCrossingSIV,
121 ExactSIV,
122 WeakZeroSIV,
123 ExactRDIV,
124 SymbolicRDIV,
125 GCDMIV,
126 BanerjeeMIV,
127};
128
129} // anonymous namespace
130
131static cl::opt<DependenceTestType> EnableDependenceTest(
132 "da-enable-dependence-test", cl::init(Val: DependenceTestType::All),
133 cl::ReallyHidden,
134 cl::desc("Run only specified dependence test routine and disable others. "
135 "The purpose is mainly to exclude the influence of other "
136 "dependence test routines in regression tests. If set to All, all "
137 "dependence test routines are enabled."),
138 cl::values(clEnumValN(DependenceTestType::All, "all",
139 "Enable all dependence test routines."),
140 clEnumValN(DependenceTestType::StrongSIV, "strong-siv",
141 "Enable only Strong SIV test."),
142 clEnumValN(DependenceTestType::WeakCrossingSIV,
143 "weak-crossing-siv",
144 "Enable only Weak-Crossing SIV test."),
145 clEnumValN(DependenceTestType::ExactSIV, "exact-siv",
146 "Enable only Exact SIV test."),
147 clEnumValN(DependenceTestType::WeakZeroSIV, "weak-zero-siv",
148 "Enable only Weak-Zero SIV test."),
149 clEnumValN(DependenceTestType::ExactRDIV, "exact-rdiv",
150 "Enable only Exact RDIV test."),
151 clEnumValN(DependenceTestType::SymbolicRDIV, "symbolic-rdiv",
152 "Enable only Symbolic RDIV test."),
153 clEnumValN(DependenceTestType::GCDMIV, "gcd-miv",
154 "Enable only GCD MIV test."),
155 clEnumValN(DependenceTestType::BanerjeeMIV, "banerjee-miv",
156 "Enable only Banerjee MIV test.")));
157
158// TODO: This flag is disabled by default because it is still under development.
159// Enable it or delete this flag when the feature is ready.
160static cl::opt<bool> EnableMonotonicityCheck(
161 "da-enable-monotonicity-check", cl::init(Val: false), cl::Hidden,
162 cl::desc("Check if the subscripts are monotonic. If it's not, dependence "
163 "is reported as unknown."));
164
165static cl::opt<bool> DumpMonotonicityReport(
166 "da-dump-monotonicity-report", cl::init(Val: false), cl::Hidden,
167 cl::desc(
168 "When printing analysis, dump the results of monotonicity checks."));
169
170//===----------------------------------------------------------------------===//
171// basics
172
173DependenceAnalysis::Result
174DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
175 auto &AA = FAM.getResult<AAManager>(IR&: F);
176 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
177 auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
178 return DependenceInfo(&F, &AA, &SE, &LI);
179}
180
181AnalysisKey DependenceAnalysis::Key;
182
183INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
184 "Dependence Analysis", true, true)
185INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
186INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
187INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
188INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
189 true, true)
190
191char DependenceAnalysisWrapperPass::ID = 0;
192
193DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
194 : FunctionPass(ID) {}
195
196FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
197 return new DependenceAnalysisWrapperPass();
198}
199
200bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
201 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
202 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
203 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
204 info.reset(p: new DependenceInfo(&F, &AA, &SE, &LI));
205 return false;
206}
207
208DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
209
210void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
211
212void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
213 AU.setPreservesAll();
214 AU.addRequiredTransitive<AAResultsWrapperPass>();
215 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
216 AU.addRequiredTransitive<LoopInfoWrapperPass>();
217}
218
219namespace {
220
221/// The property of monotonicity of a SCEV. To define the monotonicity, assume
222/// a SCEV defined within N-nested loops. Let i_k denote the iteration number
223/// of the k-th loop. Then we can regard the SCEV as an N-ary function:
224///
225/// F(i_1, i_2, ..., i_N)
226///
227/// The domain of i_k is the closed range [0, BTC_k], where BTC_k is the
228/// backedge-taken count of the k-th loop.
229///
230/// A function F is said to be "monotonically increasing with respect to the
231/// k-th loop" if x <= y implies the following condition:
232///
233/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) <=
234/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
235///
236/// where i_1, ..., i_{k-1}, i_{k+1}, ..., i_N, x, and y are elements of their
237/// respective domains.
238///
239/// Likewise F is "monotonically decreasing with respect to the k-th loop"
240/// if x <= y implies
241///
242/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) >=
243/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
244///
245/// A function F that is monotonically increasing or decreasing with respect to
246/// the k-th loop is simply called "monotonic with respect to k-th loop".
247///
248/// A function F is said to be "multivariate monotonic" when it is monotonic
249/// with respect to all of the N loops.
250///
251/// Since integer comparison can be either signed or unsigned, we need to
252/// distinguish monotonicity in the signed sense from that in the unsigned
253/// sense. Note that the inequality "x <= y" merely indicates loop progression
254/// and is not affected by the difference between signed and unsigned order.
255///
256/// Currently we only consider monotonicity in a signed sense.
257enum class SCEVMonotonicityType {
258 /// We don't know anything about the monotonicity of the SCEV.
259 Unknown,
260
261 /// The SCEV is loop-invariant with respect to the outermost loop. In other
262 /// words, the function F corresponding to the SCEV is a constant function.
263 Invariant,
264
265 /// The function F corresponding to the SCEV is multivariate monotonic in a
266 /// signed sense. Note that the multivariate monotonic function may also be a
267 /// constant function. The order employed in the definition of monotonicity
268 /// is not strict order.
269 MultivariateSignedMonotonic,
270};
271
272struct SCEVMonotonicity {
273 SCEVMonotonicity(SCEVMonotonicityType Type,
274 const SCEV *FailurePoint = nullptr);
275
276 SCEVMonotonicityType getType() const { return Type; }
277
278 const SCEV *getFailurePoint() const { return FailurePoint; }
279
280 bool isUnknown() const { return Type == SCEVMonotonicityType::Unknown; }
281
282 void print(raw_ostream &OS, unsigned Depth) const;
283
284private:
285 SCEVMonotonicityType Type;
286
287 /// The subexpression that caused Unknown. Mainly for debugging purpose.
288 const SCEV *FailurePoint;
289};
290
291/// Check the monotonicity of a SCEV. Since dependence tests (SIV, MIV, etc.)
292/// assume that subscript expressions are (multivariate) monotonic, we need to
293/// verify this property before applying those tests. Violating this assumption
294/// may cause them to produce incorrect results.
295struct SCEVMonotonicityChecker
296 : public SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity> {
297
298 SCEVMonotonicityChecker(ScalarEvolution *SE) : SE(SE) {}
299
300 /// Check the monotonicity of \p Expr. \p Expr must be integer type. If \p
301 /// OutermostLoop is not null, \p Expr must be defined in \p OutermostLoop or
302 /// one of its nested loops.
303 SCEVMonotonicity checkMonotonicity(const SCEV *Expr,
304 const Loop *OutermostLoop);
305
306private:
307 ScalarEvolution *SE;
308
309 /// The outermost loop that DA is analyzing.
310 const Loop *OutermostLoop;
311
312 /// A helper to classify \p Expr as either Invariant or Unknown.
313 SCEVMonotonicity invariantOrUnknown(const SCEV *Expr);
314
315 /// Return true if \p Expr is loop-invariant with respect to the outermost
316 /// loop.
317 bool isLoopInvariant(const SCEV *Expr) const;
318
319 /// A helper to create an Unknown SCEVMonotonicity.
320 SCEVMonotonicity createUnknown(const SCEV *FailurePoint) {
321 return SCEVMonotonicity(SCEVMonotonicityType::Unknown, FailurePoint);
322 }
323
324 SCEVMonotonicity visitAddRecExpr(const SCEVAddRecExpr *Expr);
325
326 SCEVMonotonicity visitConstant(const SCEVConstant *) {
327 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
328 }
329 SCEVMonotonicity visitVScale(const SCEVVScale *) {
330 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
331 }
332
333 // TODO: Handle more cases.
334 SCEVMonotonicity visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
335 return invariantOrUnknown(Expr);
336 }
337 SCEVMonotonicity visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
338 return invariantOrUnknown(Expr);
339 }
340 SCEVMonotonicity visitAddExpr(const SCEVAddExpr *Expr) {
341 return invariantOrUnknown(Expr);
342 }
343 SCEVMonotonicity visitMulExpr(const SCEVMulExpr *Expr) {
344 return invariantOrUnknown(Expr);
345 }
346 SCEVMonotonicity visitPtrToAddrExpr(const SCEVPtrToAddrExpr *Expr) {
347 return invariantOrUnknown(Expr);
348 }
349 SCEVMonotonicity visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) {
350 return invariantOrUnknown(Expr);
351 }
352 SCEVMonotonicity visitTruncateExpr(const SCEVTruncateExpr *Expr) {
353 return invariantOrUnknown(Expr);
354 }
355 SCEVMonotonicity visitUDivExpr(const SCEVUDivExpr *Expr) {
356 return invariantOrUnknown(Expr);
357 }
358 SCEVMonotonicity visitSMaxExpr(const SCEVSMaxExpr *Expr) {
359 return invariantOrUnknown(Expr);
360 }
361 SCEVMonotonicity visitUMaxExpr(const SCEVUMaxExpr *Expr) {
362 return invariantOrUnknown(Expr);
363 }
364 SCEVMonotonicity visitSMinExpr(const SCEVSMinExpr *Expr) {
365 return invariantOrUnknown(Expr);
366 }
367 SCEVMonotonicity visitUMinExpr(const SCEVUMinExpr *Expr) {
368 return invariantOrUnknown(Expr);
369 }
370 SCEVMonotonicity visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) {
371 return invariantOrUnknown(Expr);
372 }
373 SCEVMonotonicity visitUnknown(const SCEVUnknown *Expr) {
374 return invariantOrUnknown(Expr);
375 }
376 SCEVMonotonicity visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
377 return invariantOrUnknown(Expr);
378 }
379
380 friend struct SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity>;
381};
382
383/// A wrapper class for std::optional<APInt> that provides arithmetic operators
384/// with overflow checking in a signed sense. This allows us to omit inserting
385/// an overflow check at every arithmetic operation, which simplifies the code
386/// if the operations are chained like `a + b + c + ...`.
387///
388/// If an calculation overflows, the result becomes "invalid" which is
389/// internally represented by std::nullopt. If any operand of an arithmetic
390/// operation is "invalid", the result will also be "invalid".
391struct OverflowSafeSignedAPInt {
392 OverflowSafeSignedAPInt() : Value(std::nullopt) {}
393 OverflowSafeSignedAPInt(const APInt &V) : Value(V) {}
394 OverflowSafeSignedAPInt(const std::optional<APInt> &V) : Value(V) {}
395
396 OverflowSafeSignedAPInt operator+(const OverflowSafeSignedAPInt &RHS) const {
397 if (!Value || !RHS.Value)
398 return OverflowSafeSignedAPInt();
399 bool Overflow;
400 APInt Result = Value->sadd_ov(RHS: *RHS.Value, Overflow);
401 if (Overflow)
402 return OverflowSafeSignedAPInt();
403 return OverflowSafeSignedAPInt(Result);
404 }
405
406 OverflowSafeSignedAPInt operator+(int RHS) const {
407 if (!Value)
408 return OverflowSafeSignedAPInt();
409 return *this + fromInt(V: RHS);
410 }
411
412 OverflowSafeSignedAPInt operator-(const OverflowSafeSignedAPInt &RHS) const {
413 if (!Value || !RHS.Value)
414 return OverflowSafeSignedAPInt();
415 bool Overflow;
416 APInt Result = Value->ssub_ov(RHS: *RHS.Value, Overflow);
417 if (Overflow)
418 return OverflowSafeSignedAPInt();
419 return OverflowSafeSignedAPInt(Result);
420 }
421
422 OverflowSafeSignedAPInt operator-(int RHS) const {
423 if (!Value)
424 return OverflowSafeSignedAPInt();
425 return *this - fromInt(V: RHS);
426 }
427
428 OverflowSafeSignedAPInt operator*(const OverflowSafeSignedAPInt &RHS) const {
429 if (!Value || !RHS.Value)
430 return OverflowSafeSignedAPInt();
431 bool Overflow;
432 APInt Result = Value->smul_ov(RHS: *RHS.Value, Overflow);
433 if (Overflow)
434 return OverflowSafeSignedAPInt();
435 return OverflowSafeSignedAPInt(Result);
436 }
437
438 OverflowSafeSignedAPInt operator-() const {
439 if (!Value)
440 return OverflowSafeSignedAPInt();
441 if (Value->isMinSignedValue())
442 return OverflowSafeSignedAPInt();
443 return OverflowSafeSignedAPInt(-*Value);
444 }
445
446 operator bool() const { return Value.has_value(); }
447
448 bool operator!() const { return !Value.has_value(); }
449
450 const APInt &operator*() const {
451 assert(Value && "Value is not available.");
452 return *Value;
453 }
454
455 const APInt *operator->() const {
456 assert(Value && "Value is not available.");
457 return &*Value;
458 }
459
460private:
461 /// Underlying value. std::nullopt means "unknown". An arithmetic operation on
462 /// "unknown" always produces "unknown".
463 std::optional<APInt> Value;
464
465 OverflowSafeSignedAPInt fromInt(uint64_t V) const {
466 assert(Value && "Value is not available.");
467 return OverflowSafeSignedAPInt(
468 APInt(Value->getBitWidth(), V, /*isSigned=*/true));
469 }
470};
471
472} // anonymous namespace
473
474// Used to test the dependence analyzer.
475// Looks through the function, noting instructions that may access memory.
476// Calls depends() on every possible pair and prints out the result.
477// Ignores all other instructions.
478static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
479 ScalarEvolution &SE, LoopInfo &LI,
480 bool NormalizeResults) {
481 auto *F = DA->getFunction();
482
483 if (DumpMonotonicityReport) {
484 SCEVMonotonicityChecker Checker(&SE);
485 OS << "Monotonicity check:\n";
486 for (Instruction &Inst : instructions(F)) {
487 if (!isa<LoadInst>(Val: Inst) && !isa<StoreInst>(Val: Inst))
488 continue;
489 Value *Ptr = getLoadStorePointerOperand(V: &Inst);
490 const Loop *L = LI.getLoopFor(BB: Inst.getParent());
491 const Loop *OutermostLoop = L ? L->getOutermostLoop() : nullptr;
492 const SCEV *PtrSCEV = SE.getSCEVAtScope(V: Ptr, L);
493 const SCEV *AccessFn = SE.removePointerBase(S: PtrSCEV);
494 SCEVMonotonicity Mon = Checker.checkMonotonicity(Expr: AccessFn, OutermostLoop);
495 OS.indent(NumSpaces: 2) << "Inst: " << Inst << "\n";
496 OS.indent(NumSpaces: 4) << "Expr: " << *AccessFn << "\n";
497 Mon.print(OS, Depth: 4);
498 }
499 OS << "\n";
500 }
501
502 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
503 ++SrcI) {
504 if (SrcI->mayReadOrWriteMemory()) {
505 for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE;
506 ++DstI) {
507 if (DstI->mayReadOrWriteMemory()) {
508 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
509 OS << " da analyze - ";
510 if (auto D = DA->depends(Src: &*SrcI, Dst: &*DstI,
511 /*UnderRuntimeAssumptions=*/true)) {
512
513#ifndef NDEBUG
514 // Verify that the distance being zero is equivalent to the
515 // direction being EQ.
516 for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
517 const SCEV *Distance = D->getDistance(Level);
518 bool IsDistanceZero = Distance && Distance->isZero();
519 bool IsDirectionEQ =
520 D->getDirection(Level) == Dependence::DVEntry::EQ;
521 assert(IsDistanceZero == IsDirectionEQ &&
522 "Inconsistent distance and direction.");
523 }
524#endif
525
526 // Normalize negative direction vectors if required by clients.
527 if (NormalizeResults && D->normalize(SE: &SE))
528 OS << "normalized - ";
529 D->dump(OS);
530 } else
531 OS << "none!\n";
532 }
533 }
534 }
535 }
536}
537
538void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
539 const Module *) const {
540 dumpExampleDependence(
541 OS, DA: info.get(), SE&: getAnalysis<ScalarEvolutionWrapperPass>().getSE(),
542 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), NormalizeResults: false);
543}
544
545PreservedAnalyses
546DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
547 OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName()
548 << "':\n";
549 dumpExampleDependence(OS, DA: &FAM.getResult<DependenceAnalysis>(IR&: F),
550 SE&: FAM.getResult<ScalarEvolutionAnalysis>(IR&: F),
551 LI&: FAM.getResult<LoopAnalysis>(IR&: F), NormalizeResults);
552 return PreservedAnalyses::all();
553}
554
555//===----------------------------------------------------------------------===//
556// Dependence methods
557
558// Returns true if this is an input dependence.
559bool Dependence::isInput() const {
560 return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
561}
562
563// Returns true if this is an output dependence.
564bool Dependence::isOutput() const {
565 return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
566}
567
568// Returns true if this is an flow (aka true) dependence.
569bool Dependence::isFlow() const {
570 return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
571}
572
573// Returns true if this is an anti dependence.
574bool Dependence::isAnti() const {
575 return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
576}
577
578// Returns true if a particular level is scalar; that is,
579// if no subscript in the source or destination mention the induction
580// variable associated with the loop at this level.
581// Leave this out of line, so it will serve as a virtual method anchor
582bool Dependence::isScalar(unsigned level, bool IsSameSD) const { return false; }
583
584//===----------------------------------------------------------------------===//
585// FullDependence methods
586
587FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
588 const SCEVUnionPredicate &Assumes,
589 bool PossiblyLoopIndependent,
590 unsigned CommonLevels)
591 : Dependence(Source, Destination, Assumes), Levels(CommonLevels),
592 LoopIndependent(PossiblyLoopIndependent) {
593 Consistent = true;
594 SameSDLevels = 0;
595 if (CommonLevels)
596 DV = std::make_unique<DVEntry[]>(num: CommonLevels);
597}
598
599// FIXME: in some cases the meaning of a negative direction vector
600// may not be straightforward, e.g.,
601// for (int i = 0; i < 32; ++i) {
602// Src: A[i] = ...;
603// Dst: use(A[31 - i]);
604// }
605// The dependency is
606// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and
607// anti { Dst[i] -> Src[31 - i] : when i < 16 },
608// -- hence a [<>].
609// As long as a dependence result contains '>' ('<>', '<=>', "*"), it
610// means that a reversed/normalized dependence needs to be considered
611// as well. Nevertheless, current isDirectionNegative() only returns
612// true with a '>' or '>=' dependency for ease of canonicalizing the
613// dependency vector, since the reverse of '<>', '<=>' and "*" is itself.
614bool FullDependence::isDirectionNegative() const {
615 for (unsigned Level = 1; Level <= Levels; ++Level) {
616 unsigned char Direction = DV[Level - 1].Direction;
617 if (Direction == Dependence::DVEntry::EQ)
618 continue;
619 if (Direction == Dependence::DVEntry::GT ||
620 Direction == Dependence::DVEntry::GE)
621 return true;
622 return false;
623 }
624 return false;
625}
626
627bool FullDependence::normalize(ScalarEvolution *SE) {
628 if (!isDirectionNegative())
629 return false;
630
631 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
632 dump(dbgs()););
633 std::swap(a&: Src, b&: Dst);
634 for (unsigned Level = 1; Level <= Levels; ++Level) {
635 unsigned char Direction = DV[Level - 1].Direction;
636 // Reverse the direction vector, this means LT becomes GT
637 // and GT becomes LT.
638 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
639 if (Direction & Dependence::DVEntry::LT)
640 RevDirection |= Dependence::DVEntry::GT;
641 if (Direction & Dependence::DVEntry::GT)
642 RevDirection |= Dependence::DVEntry::LT;
643 DV[Level - 1].Direction = RevDirection;
644 // Reverse the dependence distance as well.
645 if (DV[Level - 1].Distance != nullptr)
646 DV[Level - 1].Distance = SE->getNegativeSCEV(V: DV[Level - 1].Distance);
647 }
648
649 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
650 dump(dbgs()););
651 return true;
652}
653
654// The rest are simple getters that hide the implementation.
655
656// getDirection - Returns the direction associated with a particular common or
657// SameSD level.
658unsigned FullDependence::getDirection(unsigned Level, bool IsSameSD) const {
659 return getDVEntry(Level, IsSameSD).Direction;
660}
661
662// Returns the distance (or NULL) associated with a particular common or
663// SameSD level.
664const SCEV *FullDependence::getDistance(unsigned Level, bool IsSameSD) const {
665 return getDVEntry(Level, IsSameSD).Distance;
666}
667
668// Returns true if a particular regular or SameSD level is scalar; that is,
669// if no subscript in the source or destination mention the induction variable
670// associated with the loop at this level.
671bool FullDependence::isScalar(unsigned Level, bool IsSameSD) const {
672 return getDVEntry(Level, IsSameSD).Scalar;
673}
674
675// Returns true if peeling the first iteration from this regular or SameSD
676// loop level will break this dependence.
677bool FullDependence::isPeelFirst(unsigned Level, bool IsSameSD) const {
678 return getDVEntry(Level, IsSameSD).PeelFirst;
679}
680
681// Returns true if peeling the last iteration from this regular or SameSD
682// loop level will break this dependence.
683bool FullDependence::isPeelLast(unsigned Level, bool IsSameSD) const {
684 return getDVEntry(Level, IsSameSD).PeelLast;
685}
686
687// inSameSDLoops - Returns true if this level is an SameSD level, i.e.,
688// performed across two separate loop nests that have the Same iteration space
689// and Depth.
690bool FullDependence::inSameSDLoops(unsigned Level) const {
691 assert(0 < Level && Level <= static_cast<unsigned>(Levels) + SameSDLevels &&
692 "Level out of range");
693 return Level > Levels;
694}
695
696//===----------------------------------------------------------------------===//
697// SCEVMonotonicity
698
699SCEVMonotonicity::SCEVMonotonicity(SCEVMonotonicityType Type,
700 const SCEV *FailurePoint)
701 : Type(Type), FailurePoint(FailurePoint) {
702 assert(
703 ((Type == SCEVMonotonicityType::Unknown) == (FailurePoint != nullptr)) &&
704 "FailurePoint must be provided iff Type is Unknown");
705}
706
707void SCEVMonotonicity::print(raw_ostream &OS, unsigned Depth) const {
708 OS.indent(NumSpaces: Depth) << "Monotonicity: ";
709 switch (Type) {
710 case SCEVMonotonicityType::Unknown:
711 assert(FailurePoint && "FailurePoint must be provided for Unknown");
712 OS << "Unknown\n";
713 OS.indent(NumSpaces: Depth) << "Reason: " << *FailurePoint << "\n";
714 break;
715 case SCEVMonotonicityType::Invariant:
716 OS << "Invariant\n";
717 break;
718 case SCEVMonotonicityType::MultivariateSignedMonotonic:
719 OS << "MultivariateSignedMonotonic\n";
720 break;
721 }
722}
723
724bool SCEVMonotonicityChecker::isLoopInvariant(const SCEV *Expr) const {
725 return !OutermostLoop || SE->isLoopInvariant(S: Expr, L: OutermostLoop);
726}
727
728SCEVMonotonicity SCEVMonotonicityChecker::invariantOrUnknown(const SCEV *Expr) {
729 if (isLoopInvariant(Expr))
730 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
731 return createUnknown(FailurePoint: Expr);
732}
733
734SCEVMonotonicity
735SCEVMonotonicityChecker::checkMonotonicity(const SCEV *Expr,
736 const Loop *OutermostLoop) {
737 assert((!OutermostLoop || OutermostLoop->isOutermost()) &&
738 "OutermostLoop must be outermost");
739 assert(Expr->getType()->isIntegerTy() && "Expr must be integer type");
740 this->OutermostLoop = OutermostLoop;
741 return visit(S: Expr);
742}
743
744/// We only care about an affine AddRec at the moment. For an affine AddRec,
745/// the monotonicity can be inferred from its nowrap property. For example, let
746/// X and Y be loop-invariant, and assume Y is non-negative. An AddRec
747/// {X,+.Y}<nsw> implies:
748///
749/// X <=s (X + Y) <=s ((X + Y) + Y) <=s ...
750///
751/// Thus, we can conclude that the AddRec is monotonically increasing with
752/// respect to the associated loop in a signed sense. The similar reasoning
753/// applies when Y is non-positive, leading to a monotonically decreasing
754/// AddRec.
755SCEVMonotonicity
756SCEVMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
757 if (!Expr->isAffine() || !Expr->hasNoSignedWrap())
758 return createUnknown(FailurePoint: Expr);
759
760 const SCEV *Start = Expr->getStart();
761 const SCEV *Step = Expr->getStepRecurrence(SE&: *SE);
762
763 SCEVMonotonicity StartMon = visit(S: Start);
764 if (StartMon.isUnknown())
765 return StartMon;
766
767 if (!isLoopInvariant(Expr: Step))
768 return createUnknown(FailurePoint: Expr);
769
770 return SCEVMonotonicity(SCEVMonotonicityType::MultivariateSignedMonotonic);
771}
772
773//===----------------------------------------------------------------------===//
774// DependenceInfo methods
775
776// For debugging purposes. Dumps a dependence to OS.
777void Dependence::dump(raw_ostream &OS) const {
778 if (isConfused())
779 OS << "confused";
780 else {
781 if (isConsistent())
782 OS << "consistent ";
783 if (isFlow())
784 OS << "flow";
785 else if (isOutput())
786 OS << "output";
787 else if (isAnti())
788 OS << "anti";
789 else if (isInput())
790 OS << "input";
791 dumpImp(OS);
792 unsigned SameSDLevels = getSameSDLevels();
793 if (SameSDLevels > 0) {
794 OS << " / assuming " << SameSDLevels << " loop level(s) fused: ";
795 dumpImp(OS, IsSameSD: true);
796 }
797 }
798 OS << "!\n";
799
800 SCEVUnionPredicate Assumptions = getRuntimeAssumptions();
801 if (!Assumptions.isAlwaysTrue()) {
802 OS << " Runtime Assumptions:\n";
803 Assumptions.print(OS, Depth: 2);
804 }
805}
806
807// For debugging purposes. Dumps a dependence to OS with or without considering
808// the SameSD levels.
809void Dependence::dumpImp(raw_ostream &OS, bool IsSameSD) const {
810 unsigned Levels = getLevels();
811 unsigned SameSDLevels = getSameSDLevels();
812 bool OnSameSD = false;
813 unsigned LevelNum = Levels;
814 if (IsSameSD)
815 LevelNum += SameSDLevels;
816 OS << " [";
817 for (unsigned II = 1; II <= LevelNum; ++II) {
818 if (!OnSameSD && inSameSDLoops(Level: II))
819 OnSameSD = true;
820 if (isPeelFirst(Level: II, SameSD: OnSameSD))
821 OS << 'p';
822 const SCEV *Distance = getDistance(Level: II, SameSD: OnSameSD);
823 if (Distance)
824 OS << *Distance;
825 else if (isScalar(level: II, IsSameSD: OnSameSD))
826 OS << "S";
827 else {
828 unsigned Direction = getDirection(Level: II, SameSD: OnSameSD);
829 if (Direction == DVEntry::ALL)
830 OS << "*";
831 else {
832 if (Direction & DVEntry::LT)
833 OS << "<";
834 if (Direction & DVEntry::EQ)
835 OS << "=";
836 if (Direction & DVEntry::GT)
837 OS << ">";
838 }
839 }
840 if (isPeelLast(Level: II, SameSD: OnSameSD))
841 OS << 'p';
842 if (II < LevelNum)
843 OS << " ";
844 }
845 if (isLoopIndependent())
846 OS << "|<";
847 OS << "]";
848}
849
850// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
851// underlaying objects. If LocA and LocB are known to not alias (for any reason:
852// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
853// Otherwise the underlying objects are checked to see if they point to
854// different identifiable objects.
855static AliasResult underlyingObjectsAlias(AAResults *AA, const DataLayout &DL,
856 const MemoryLocation &LocA,
857 const MemoryLocation &LocB) {
858 // Check the original locations (minus size) for noalias, which can happen for
859 // tbaa, incompatible underlying object locations, etc.
860 MemoryLocation LocAS =
861 MemoryLocation::getBeforeOrAfter(Ptr: LocA.Ptr, AATags: LocA.AATags);
862 MemoryLocation LocBS =
863 MemoryLocation::getBeforeOrAfter(Ptr: LocB.Ptr, AATags: LocB.AATags);
864 BatchAAResults BAA(*AA);
865 BAA.enableCrossIterationMode();
866
867 if (BAA.isNoAlias(LocA: LocAS, LocB: LocBS))
868 return AliasResult::NoAlias;
869
870 // Check the underlying objects are the same
871 const Value *AObj = getUnderlyingObject(V: LocA.Ptr);
872 const Value *BObj = getUnderlyingObject(V: LocB.Ptr);
873
874 // If the underlying objects are the same, they must alias
875 if (AObj == BObj)
876 return AliasResult::MustAlias;
877
878 // We may have hit the recursion limit for underlying objects, or have
879 // underlying objects where we don't know they will alias.
880 if (!isIdentifiedObject(V: AObj) || !isIdentifiedObject(V: BObj))
881 return AliasResult::MayAlias;
882
883 // Otherwise we know the objects are different and both identified objects so
884 // must not alias.
885 return AliasResult::NoAlias;
886}
887
888// Returns true if the load or store can be analyzed. Atomic and volatile
889// operations have properties which this analysis does not understand.
890static bool isLoadOrStore(const Instruction *I) {
891 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
892 return LI->isUnordered();
893 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
894 return SI->isUnordered();
895 return false;
896}
897
898// Returns true if two loops have the Same iteration Space and Depth. To be
899// more specific, two loops have SameSD if they are in the same nesting
900// depth and have the same backedge count. SameSD stands for Same iteration
901// Space and Depth.
902bool DependenceInfo::haveSameSD(const Loop *SrcLoop,
903 const Loop *DstLoop) const {
904 if (SrcLoop == DstLoop)
905 return true;
906
907 if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
908 return false;
909
910 if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
911 !DstLoop->getLoopLatch())
912 return false;
913
914 const SCEV *SrcUB = nullptr, *DstUP = nullptr;
915 if (SE->hasLoopInvariantBackedgeTakenCount(L: SrcLoop))
916 SrcUB = SE->getBackedgeTakenCount(L: SrcLoop);
917 if (SE->hasLoopInvariantBackedgeTakenCount(L: DstLoop))
918 DstUP = SE->getBackedgeTakenCount(L: DstLoop);
919
920 if (SrcUB != nullptr && DstUP != nullptr) {
921 Type *WiderType = SE->getWiderType(Ty1: SrcUB->getType(), Ty2: DstUP->getType());
922 SrcUB = SE->getNoopOrZeroExtend(V: SrcUB, Ty: WiderType);
923 DstUP = SE->getNoopOrZeroExtend(V: DstUP, Ty: WiderType);
924
925 if (SE->isKnownPredicate(Pred: ICmpInst::ICMP_EQ, LHS: SrcUB, RHS: DstUP))
926 return true;
927 }
928
929 return false;
930}
931
932// Examines the loop nesting of the Src and Dst
933// instructions and establishes their shared loops. Sets the variables
934// CommonLevels, SrcLevels, and MaxLevels.
935// The source and destination instructions needn't be contained in the same
936// loop. The routine establishNestingLevels finds the level of most deeply
937// nested loop that contains them both, CommonLevels. An instruction that's
938// not contained in a loop is at level = 0. MaxLevels is equal to the level
939// of the source plus the level of the destination, minus CommonLevels.
940// This lets us allocate vectors MaxLevels in length, with room for every
941// distinct loop referenced in both the source and destination subscripts.
942// The variable SrcLevels is the nesting depth of the source instruction.
943// It's used to help calculate distinct loops referenced by the destination.
944// Here's the map from loops to levels:
945// 0 - unused
946// 1 - outermost common loop
947// ... - other common loops
948// CommonLevels - innermost common loop
949// ... - loops containing Src but not Dst
950// SrcLevels - innermost loop containing Src but not Dst
951// ... - loops containing Dst but not Src
952// MaxLevels - innermost loops containing Dst but not Src
953// Consider the follow code fragment:
954// for (a = ...) {
955// for (b = ...) {
956// for (c = ...) {
957// for (d = ...) {
958// A[] = ...;
959// }
960// }
961// for (e = ...) {
962// for (f = ...) {
963// for (g = ...) {
964// ... = A[];
965// }
966// }
967// }
968// }
969// }
970// If we're looking at the possibility of a dependence between the store
971// to A (the Src) and the load from A (the Dst), we'll note that they
972// have 2 loops in common, so CommonLevels will equal 2 and the direction
973// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
974// A map from loop names to loop numbers would look like
975// a - 1
976// b - 2 = CommonLevels
977// c - 3
978// d - 4 = SrcLevels
979// e - 5
980// f - 6
981// g - 7 = MaxLevels
982// SameSDLevels counts the number of levels after common levels that are
983// not common but have the same iteration space and depth. Internally this
984// is checked using haveSameSD. Assume that in this code fragment, levels c and
985// e have the same iteration space and depth, but levels d and f does not. Then
986// SameSDLevels is set to 1. In that case the level numbers for the previous
987// code look like
988// a - 1
989// b - 2
990// c,e - 3 = CommonLevels
991// d - 4 = SrcLevels
992// f - 5
993// g - 6 = MaxLevels
994void DependenceInfo::establishNestingLevels(const Instruction *Src,
995 const Instruction *Dst) {
996 const BasicBlock *SrcBlock = Src->getParent();
997 const BasicBlock *DstBlock = Dst->getParent();
998 unsigned SrcLevel = LI->getLoopDepth(BB: SrcBlock);
999 unsigned DstLevel = LI->getLoopDepth(BB: DstBlock);
1000 const Loop *SrcLoop = LI->getLoopFor(BB: SrcBlock);
1001 const Loop *DstLoop = LI->getLoopFor(BB: DstBlock);
1002 SrcLevels = SrcLevel;
1003 MaxLevels = SrcLevel + DstLevel;
1004 SameSDLevels = 0;
1005 while (SrcLevel > DstLevel) {
1006 SrcLoop = SrcLoop->getParentLoop();
1007 SrcLevel--;
1008 }
1009 while (DstLevel > SrcLevel) {
1010 DstLoop = DstLoop->getParentLoop();
1011 DstLevel--;
1012 }
1013
1014 // find the first common level and count the SameSD levels leading to it
1015 while (SrcLoop != DstLoop) {
1016 SameSDLevels++;
1017 if (!haveSameSD(SrcLoop, DstLoop))
1018 SameSDLevels = 0;
1019 SrcLoop = SrcLoop->getParentLoop();
1020 DstLoop = DstLoop->getParentLoop();
1021 SrcLevel--;
1022 }
1023 CommonLevels = SrcLevel;
1024 MaxLevels -= CommonLevels;
1025}
1026
1027// Given one of the loops containing the source, return
1028// its level index in our numbering scheme.
1029unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
1030 return SrcLoop->getLoopDepth();
1031}
1032
1033// Given one of the loops containing the destination,
1034// return its level index in our numbering scheme.
1035unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
1036 unsigned D = DstLoop->getLoopDepth();
1037 if (D > CommonLevels)
1038 // This tries to make sure that we assign unique numbers to src and dst when
1039 // the memory accesses reside in different loops that have the same depth.
1040 return D - CommonLevels + SrcLevels;
1041 else
1042 return D;
1043}
1044
1045// Returns true if Expression is loop invariant in LoopNest.
1046bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
1047 const Loop *LoopNest) const {
1048 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
1049 // any loop as invariant, because we only consier expression evaluation at a
1050 // specific position (where the array access takes place), and not across the
1051 // entire function.
1052 if (!LoopNest)
1053 return true;
1054
1055 // If the expression is invariant in the outermost loop of the loop nest, it
1056 // is invariant anywhere in the loop nest.
1057 return SE->isLoopInvariant(S: Expression, L: LoopNest->getOutermostLoop());
1058}
1059
1060// Finds the set of loops from the LoopNest that
1061// have a level <= CommonLevels and are referred to by the SCEV Expression.
1062void DependenceInfo::collectCommonLoops(const SCEV *Expression,
1063 const Loop *LoopNest,
1064 SmallBitVector &Loops) const {
1065 while (LoopNest) {
1066 unsigned Level = LoopNest->getLoopDepth();
1067 if (Level <= CommonLevels && !SE->isLoopInvariant(S: Expression, L: LoopNest))
1068 Loops.set(Level);
1069 LoopNest = LoopNest->getParentLoop();
1070 }
1071}
1072
1073void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
1074
1075 unsigned widestWidthSeen = 0;
1076 Type *widestType;
1077
1078 // Go through each pair and find the widest bit to which we need
1079 // to extend all of them.
1080 for (Subscript *Pair : Pairs) {
1081 const SCEV *Src = Pair->Src;
1082 const SCEV *Dst = Pair->Dst;
1083 IntegerType *SrcTy = dyn_cast<IntegerType>(Val: Src->getType());
1084 IntegerType *DstTy = dyn_cast<IntegerType>(Val: Dst->getType());
1085 if (SrcTy == nullptr || DstTy == nullptr) {
1086 assert(SrcTy == DstTy &&
1087 "This function only unify integer types and "
1088 "expect Src and Dst share the same type otherwise.");
1089 continue;
1090 }
1091 if (SrcTy->getBitWidth() > widestWidthSeen) {
1092 widestWidthSeen = SrcTy->getBitWidth();
1093 widestType = SrcTy;
1094 }
1095 if (DstTy->getBitWidth() > widestWidthSeen) {
1096 widestWidthSeen = DstTy->getBitWidth();
1097 widestType = DstTy;
1098 }
1099 }
1100
1101 assert(widestWidthSeen > 0);
1102
1103 // Now extend each pair to the widest seen.
1104 for (Subscript *Pair : Pairs) {
1105 const SCEV *Src = Pair->Src;
1106 const SCEV *Dst = Pair->Dst;
1107 IntegerType *SrcTy = dyn_cast<IntegerType>(Val: Src->getType());
1108 IntegerType *DstTy = dyn_cast<IntegerType>(Val: Dst->getType());
1109 if (SrcTy == nullptr || DstTy == nullptr) {
1110 assert(SrcTy == DstTy &&
1111 "This function only unify integer types and "
1112 "expect Src and Dst share the same type otherwise.");
1113 continue;
1114 }
1115 if (SrcTy->getBitWidth() < widestWidthSeen)
1116 // Sign-extend Src to widestType
1117 Pair->Src = SE->getSignExtendExpr(Op: Src, Ty: widestType);
1118 if (DstTy->getBitWidth() < widestWidthSeen) {
1119 // Sign-extend Dst to widestType
1120 Pair->Dst = SE->getSignExtendExpr(Op: Dst, Ty: widestType);
1121 }
1122 }
1123}
1124
1125// removeMatchingExtensions - Examines a subscript pair.
1126// If the source and destination are identically sign (or zero)
1127// extended, it strips off the extension in an effect to simplify
1128// the actual analysis.
1129void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
1130 const SCEV *Src = Pair->Src;
1131 const SCEV *Dst = Pair->Dst;
1132 if ((isa<SCEVZeroExtendExpr>(Val: Src) && isa<SCEVZeroExtendExpr>(Val: Dst)) ||
1133 (isa<SCEVSignExtendExpr>(Val: Src) && isa<SCEVSignExtendExpr>(Val: Dst))) {
1134 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Val: Src);
1135 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Val: Dst);
1136 const SCEV *SrcCastOp = SrcCast->getOperand();
1137 const SCEV *DstCastOp = DstCast->getOperand();
1138 if (SrcCastOp->getType() == DstCastOp->getType()) {
1139 Pair->Src = SrcCastOp;
1140 Pair->Dst = DstCastOp;
1141 }
1142 }
1143}
1144
1145// Examine the scev and return true iff it's affine.
1146// Collect any loops mentioned in the set of "Loops".
1147bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
1148 SmallBitVector &Loops, bool IsSrc) {
1149 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
1150 if (!AddRec)
1151 return isLoopInvariant(Expression: Expr, LoopNest);
1152
1153 // The AddRec must depend on one of the containing loops. Otherwise,
1154 // mapSrcLoop and mapDstLoop return indices outside the intended range. This
1155 // can happen when a subscript in one loop references an IV from a sibling
1156 // loop that could not be replaced with a concrete exit value by
1157 // getSCEVAtScope.
1158 const Loop *L = LoopNest;
1159 while (L && AddRec->getLoop() != L)
1160 L = L->getParentLoop();
1161 if (!L)
1162 return false;
1163
1164 const SCEV *Start = AddRec->getStart();
1165 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
1166 if (!isLoopInvariant(Expression: Step, LoopNest))
1167 return false;
1168 if (IsSrc)
1169 Loops.set(mapSrcLoop(SrcLoop: AddRec->getLoop()));
1170 else
1171 Loops.set(mapDstLoop(DstLoop: AddRec->getLoop()));
1172 return checkSubscript(Expr: Start, LoopNest, Loops, IsSrc);
1173}
1174
1175// Examine the scev and return true iff it's linear.
1176// Collect any loops mentioned in the set of "Loops".
1177bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
1178 SmallBitVector &Loops) {
1179 return checkSubscript(Expr: Src, LoopNest, Loops, IsSrc: true);
1180}
1181
1182// Examine the scev and return true iff it's linear.
1183// Collect any loops mentioned in the set of "Loops".
1184bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
1185 SmallBitVector &Loops) {
1186 return checkSubscript(Expr: Dst, LoopNest, Loops, IsSrc: false);
1187}
1188
1189// Examines the subscript pair (the Src and Dst SCEVs)
1190// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
1191// Collects the associated loops in a set.
1192DependenceInfo::Subscript::ClassificationKind
1193DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
1194 const SCEV *Dst, const Loop *DstLoopNest,
1195 SmallBitVector &Loops) {
1196 SmallBitVector SrcLoops(MaxLevels + 1);
1197 SmallBitVector DstLoops(MaxLevels + 1);
1198 if (!checkSrcSubscript(Src, LoopNest: SrcLoopNest, Loops&: SrcLoops))
1199 return Subscript::NonLinear;
1200 if (!checkDstSubscript(Dst, LoopNest: DstLoopNest, Loops&: DstLoops))
1201 return Subscript::NonLinear;
1202 Loops = SrcLoops;
1203 Loops |= DstLoops;
1204 unsigned N = Loops.count();
1205 if (N == 0)
1206 return Subscript::ZIV;
1207 if (N == 1)
1208 return Subscript::SIV;
1209 if (N == 2 && (SrcLoops.count() == 0 || DstLoops.count() == 0 ||
1210 (SrcLoops.count() == 1 && DstLoops.count() == 1)))
1211 return Subscript::RDIV;
1212 return Subscript::MIV;
1213}
1214
1215// All subscripts are all the same type.
1216// Loop bound may be smaller (e.g., a char).
1217// Should zero extend loop bound, since it's always >= 0.
1218// This routine collects upper bound and extends or truncates if needed.
1219// Truncating is safe when subscripts are known not to wrap. Cases without
1220// nowrap flags should have been rejected earlier.
1221// Return null if no bound available.
1222const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
1223 if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
1224 const SCEV *UB = SE->getBackedgeTakenCount(L);
1225 return SE->getTruncateOrZeroExtend(V: UB, Ty: T);
1226 }
1227 return nullptr;
1228}
1229
1230// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1231// If the cast fails, returns NULL.
1232const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
1233 Type *T) const {
1234 if (const SCEV *UB = collectUpperBound(L, T))
1235 return dyn_cast<SCEVConstant>(Val: UB);
1236 return nullptr;
1237}
1238
1239/// Returns \p A - \p B if it guaranteed not to signed wrap. Otherwise returns
1240/// nullptr. \p A and \p B must have the same integer type.
1241static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
1242 ScalarEvolution &SE) {
1243 if (SE.willNotOverflow(BinOp: Instruction::Sub, /*Signed=*/true, LHS: A, RHS: B))
1244 return SE.getMinusSCEV(LHS: A, RHS: B);
1245 return nullptr;
1246}
1247
1248/// Returns \p A * \p B if it guaranteed not to signed wrap. Otherwise returns
1249/// nullptr. \p A and \p B must have the same integer type.
1250static const SCEV *mulSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
1251 ScalarEvolution &SE) {
1252 if (SE.willNotOverflow(BinOp: Instruction::Mul, /*Signed=*/true, LHS: A, RHS: B))
1253 return SE.getMulExpr(LHS: A, RHS: B);
1254 return nullptr;
1255}
1256
1257/// Returns the absolute value of \p A. In the context of dependence analysis,
1258/// we need an absolute value in a mathematical sense. If \p A is the signed
1259/// minimum value, we cannot represent it unless extending the original type.
1260/// Thus if we cannot prove that \p A is not the signed minimum value, returns
1261/// nullptr.
1262static const SCEV *absSCEVNoSignedOverflow(const SCEV *A, ScalarEvolution &SE) {
1263 IntegerType *Ty = cast<IntegerType>(Val: A->getType());
1264 if (!Ty)
1265 return nullptr;
1266
1267 const SCEV *SMin =
1268 SE.getConstant(Val: APInt::getSignedMinValue(numBits: Ty->getBitWidth()));
1269 if (!SE.isKnownPredicate(Pred: CmpInst::ICMP_NE, LHS: A, RHS: SMin))
1270 return nullptr;
1271 return SE.getAbsExpr(Op: A, /*IsNSW=*/true);
1272}
1273
1274/// Returns true iff \p Test is enabled.
1275static bool isDependenceTestEnabled(DependenceTestType Test) {
1276 if (EnableDependenceTest == DependenceTestType::All)
1277 return true;
1278 return EnableDependenceTest == Test;
1279}
1280
1281// testZIV -
1282// When we have a pair of subscripts of the form [c1] and [c2],
1283// where c1 and c2 are both loop invariant, we attack it using
1284// the ZIV test. Basically, we test by comparing the two values,
1285// but there are actually three possible results:
1286// 1) the values are equal, so there's a dependence
1287// 2) the values are different, so there's no dependence
1288// 3) the values might be equal, so we have to assume a dependence.
1289//
1290// Return true if dependence disproved.
1291bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1292 FullDependence &Result) const {
1293 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1294 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1295 ++ZIVapplications;
1296 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: Src, RHS: Dst)) {
1297 LLVM_DEBUG(dbgs() << " provably dependent\n");
1298 return false; // provably dependent
1299 }
1300 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_NE, LHS: Src, RHS: Dst)) {
1301 LLVM_DEBUG(dbgs() << " provably independent\n");
1302 ++ZIVindependence;
1303 return true; // provably independent
1304 }
1305 LLVM_DEBUG(dbgs() << " possibly dependent\n");
1306 Result.Consistent = false;
1307 return false; // possibly dependent
1308}
1309
1310// strongSIVtest -
1311// From the paper, Practical Dependence Testing, Section 4.2.1
1312//
1313// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1314// where i is an induction variable, c1 and c2 are loop invariant,
1315// and a is a constant, we can solve it exactly using the Strong SIV test.
1316//
1317// Can prove independence. Failing that, can compute distance (and direction).
1318// In the presence of symbolic terms, we can sometimes make progress.
1319//
1320// If there's a dependence,
1321//
1322// c1 + a*i = c2 + a*i'
1323//
1324// The dependence distance is
1325//
1326// d = i' - i = (c1 - c2)/a
1327//
1328// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1329// loop's upper bound. If a dependence exists, the dependence direction is
1330// defined as
1331//
1332// { < if d > 0
1333// direction = { = if d = 0
1334// { > if d < 0
1335//
1336// Return true if dependence disproved.
1337bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
1338 const SCEV *DstConst, const Loop *CurSrcLoop,
1339 const Loop *CurDstLoop, unsigned Level,
1340 FullDependence &Result,
1341 bool UnderRuntimeAssumptions) {
1342 if (!isDependenceTestEnabled(Test: DependenceTestType::StrongSIV))
1343 return false;
1344
1345 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1346 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
1347 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1348 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
1349 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1350 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
1351 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1352 ++StrongSIVapplications;
1353 assert(0 < Level && Level <= CommonLevels && "level out of range");
1354 Level--;
1355
1356 const SCEV *Delta = minusSCEVNoSignedOverflow(A: SrcConst, B: DstConst, SE&: *SE);
1357 if (!Delta) {
1358 Result.Consistent = false;
1359 return false;
1360 }
1361 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
1362 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1363
1364 // check that |Delta| < iteration count
1365 bool IsDeltaLarge = [&] {
1366 const SCEV *UpperBound = collectUpperBound(L: CurSrcLoop, T: Delta->getType());
1367 if (!UpperBound)
1368 return false;
1369
1370 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
1371 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
1372 const SCEV *AbsDelta = absSCEVNoSignedOverflow(A: Delta, SE&: *SE);
1373 const SCEV *AbsCoeff = absSCEVNoSignedOverflow(A: Coeff, SE&: *SE);
1374 if (!AbsDelta || !AbsCoeff)
1375 return false;
1376 const SCEV *Product = mulSCEVNoSignedOverflow(A: UpperBound, B: AbsCoeff, SE&: *SE);
1377 if (!Product)
1378 return false;
1379 return SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: AbsDelta, RHS: Product);
1380 }();
1381 if (IsDeltaLarge) {
1382 // Distance greater than trip count - no dependence
1383 ++StrongSIVindependence;
1384 ++StrongSIVsuccesses;
1385 return true;
1386 }
1387
1388 // Can we compute distance?
1389 if (isa<SCEVConstant>(Val: Delta) && isa<SCEVConstant>(Val: Coeff)) {
1390 APInt ConstDelta = cast<SCEVConstant>(Val: Delta)->getAPInt();
1391 APInt ConstCoeff = cast<SCEVConstant>(Val: Coeff)->getAPInt();
1392 APInt Distance = ConstDelta; // these need to be initialized
1393 APInt Remainder = ConstDelta;
1394 APInt::sdivrem(LHS: ConstDelta, RHS: ConstCoeff, Quotient&: Distance, Remainder);
1395 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1396 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1397 // Make sure Coeff divides Delta exactly
1398 if (Remainder != 0) {
1399 // Coeff doesn't divide Distance, no dependence
1400 ++StrongSIVindependence;
1401 ++StrongSIVsuccesses;
1402 return true;
1403 }
1404 Result.DV[Level].Distance = SE->getConstant(Val: Distance);
1405 if (Distance.sgt(RHS: 0))
1406 Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1407 else if (Distance.slt(RHS: 0))
1408 Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1409 else
1410 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1411 ++StrongSIVsuccesses;
1412 } else if (Delta->isZero()) {
1413 // Check if coefficient could be zero. If so, 0/0 is undefined and we
1414 // cannot conclude that only same-iteration dependencies exist.
1415 // When coeff=0, all iterations access the same location.
1416 if (SE->isKnownNonZero(S: Coeff)) {
1417 LLVM_DEBUG(
1418 dbgs() << "\t Coefficient proven non-zero by SCEV analysis\n");
1419 } else {
1420 // Cannot prove at compile time, would need runtime assumption.
1421 if (UnderRuntimeAssumptions) {
1422 const SCEVPredicate *Pred = SE->getComparePredicate(
1423 Pred: ICmpInst::ICMP_NE, LHS: Coeff, RHS: SE->getZero(Ty: Coeff->getType()));
1424 Result.Assumptions = Result.Assumptions.getUnionWith(N: Pred, SE&: *SE);
1425 LLVM_DEBUG(dbgs() << "\t Added runtime assumption: " << *Coeff
1426 << " != 0\n");
1427 } else {
1428 // Cannot add runtime assumptions, this test cannot handle this case.
1429 // Let more complex tests try.
1430 LLVM_DEBUG(dbgs() << "\t Would need runtime assumption " << *Coeff
1431 << " != 0, but not allowed. Failing this test.\n");
1432 return false;
1433 }
1434 }
1435 // Since 0/X == 0 (where X is known non-zero or assumed non-zero).
1436 Result.DV[Level].Distance = Delta;
1437 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1438 ++StrongSIVsuccesses;
1439 } else {
1440 if (Coeff->isOne()) {
1441 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
1442 Result.DV[Level].Distance = Delta; // since X/1 == X
1443 } else {
1444 Result.Consistent = false;
1445 }
1446
1447 // maybe we can get a useful direction
1448 bool DeltaMaybeZero = !SE->isKnownNonZero(S: Delta);
1449 bool DeltaMaybePositive = !SE->isKnownNonPositive(S: Delta);
1450 bool DeltaMaybeNegative = !SE->isKnownNonNegative(S: Delta);
1451 bool CoeffMaybePositive = !SE->isKnownNonPositive(S: Coeff);
1452 bool CoeffMaybeNegative = !SE->isKnownNonNegative(S: Coeff);
1453 // The double negatives above are confusing.
1454 // It helps to read !SE->isKnownNonZero(Delta)
1455 // as "Delta might be Zero"
1456 unsigned NewDirection = Dependence::DVEntry::NONE;
1457 if ((DeltaMaybePositive && CoeffMaybePositive) ||
1458 (DeltaMaybeNegative && CoeffMaybeNegative))
1459 NewDirection = Dependence::DVEntry::LT;
1460 if (DeltaMaybeZero)
1461 NewDirection |= Dependence::DVEntry::EQ;
1462 if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1463 (DeltaMaybePositive && CoeffMaybeNegative))
1464 NewDirection |= Dependence::DVEntry::GT;
1465 if (NewDirection < Result.DV[Level].Direction)
1466 ++StrongSIVsuccesses;
1467 Result.DV[Level].Direction &= NewDirection;
1468 }
1469 return false;
1470}
1471
1472// weakCrossingSIVtest -
1473// From the paper, Practical Dependence Testing, Section 4.2.2
1474//
1475// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1476// where i is an induction variable, c1 and c2 are loop invariant,
1477// and a is a constant, we can solve it exactly using the
1478// Weak-Crossing SIV test.
1479//
1480// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1481// the two lines, where i = i', yielding
1482//
1483// c1 + a*i = c2 - a*i
1484// 2a*i = c2 - c1
1485// i = (c2 - c1)/2a
1486//
1487// If i < 0, there is no dependence.
1488// If i > upperbound, there is no dependence.
1489// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1490// If i = upperbound, there's a dependence with distance = 0.
1491// If i is integral, there's a dependence (all directions).
1492// If the non-integer part = 1/2, there's a dependence (<> directions).
1493// Otherwise, there's no dependence.
1494//
1495// Can prove independence. Failing that,
1496// can sometimes refine the directions.
1497// Can determine iteration for splitting.
1498//
1499// Return true if dependence disproved.
1500bool DependenceInfo::weakCrossingSIVtest(const SCEV *Coeff,
1501 const SCEV *SrcConst,
1502 const SCEV *DstConst,
1503 const Loop *CurSrcLoop,
1504 const Loop *CurDstLoop, unsigned Level,
1505 FullDependence &Result) const {
1506 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakCrossingSIV))
1507 return false;
1508
1509 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1510 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
1511 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1512 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1513 ++WeakCrossingSIVapplications;
1514 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1515 Level--;
1516 Result.Consistent = false;
1517 const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
1518 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1519 if (Delta->isZero()) {
1520 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1521 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1522 ++WeakCrossingSIVsuccesses;
1523 if (!Result.DV[Level].Direction) {
1524 ++WeakCrossingSIVindependence;
1525 return true;
1526 }
1527 Result.DV[Level].Distance = Delta; // = 0
1528 return false;
1529 }
1530 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: Coeff);
1531 if (!ConstCoeff)
1532 return false;
1533
1534 if (SE->isKnownNegative(S: ConstCoeff)) {
1535 ConstCoeff = dyn_cast<SCEVConstant>(Val: SE->getNegativeSCEV(V: ConstCoeff));
1536 assert(ConstCoeff &&
1537 "dynamic cast of negative of ConstCoeff should yield constant");
1538 Delta = SE->getNegativeSCEV(V: Delta);
1539 }
1540 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1541
1542 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1543 if (!ConstDelta)
1544 return false;
1545
1546 // We're certain that ConstCoeff > 0; therefore,
1547 // if Delta < 0, then no dependence.
1548 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1549 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
1550 if (SE->isKnownNegative(S: Delta)) {
1551 // No dependence, Delta < 0
1552 ++WeakCrossingSIVindependence;
1553 ++WeakCrossingSIVsuccesses;
1554 return true;
1555 }
1556
1557 // We're certain that Delta > 0 and ConstCoeff > 0.
1558 // Check Delta/(2*ConstCoeff) against upper loop bound
1559 if (const SCEV *UpperBound =
1560 collectUpperBound(L: CurSrcLoop, T: Delta->getType())) {
1561 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1562 const SCEV *ConstantTwo = SE->getConstant(Ty: UpperBound->getType(), V: 2);
1563 const SCEV *ML =
1564 SE->getMulExpr(LHS: SE->getMulExpr(LHS: ConstCoeff, RHS: UpperBound), RHS: ConstantTwo);
1565 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");
1566 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: Delta, RHS: ML)) {
1567 // Delta too big, no dependence
1568 ++WeakCrossingSIVindependence;
1569 ++WeakCrossingSIVsuccesses;
1570 return true;
1571 }
1572 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: Delta, RHS: ML)) {
1573 // i = i' = UB
1574 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1575 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1576 ++WeakCrossingSIVsuccesses;
1577 if (!Result.DV[Level].Direction) {
1578 ++WeakCrossingSIVindependence;
1579 return true;
1580 }
1581 Result.DV[Level].Distance = SE->getZero(Ty: Delta->getType());
1582 return false;
1583 }
1584 }
1585
1586 // check that Coeff divides Delta
1587 APInt APDelta = ConstDelta->getAPInt();
1588 APInt APCoeff = ConstCoeff->getAPInt();
1589 APInt Distance = APDelta; // these need to be initialzed
1590 APInt Remainder = APDelta;
1591 APInt::sdivrem(LHS: APDelta, RHS: APCoeff, Quotient&: Distance, Remainder);
1592 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1593 if (Remainder != 0) {
1594 // Coeff doesn't divide Delta, no dependence
1595 ++WeakCrossingSIVindependence;
1596 ++WeakCrossingSIVsuccesses;
1597 return true;
1598 }
1599 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1600
1601 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1602 APInt Two = APInt(Distance.getBitWidth(), 2, true);
1603 Remainder = Distance.srem(RHS: Two);
1604 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1605 if (Remainder != 0) {
1606 // Equal direction isn't possible
1607 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;
1608 ++WeakCrossingSIVsuccesses;
1609 }
1610 return false;
1611}
1612
1613// Kirch's algorithm, from
1614//
1615// Optimizing Supercompilers for Supercomputers
1616// Michael Wolfe
1617// MIT Press, 1989
1618//
1619// Program 2.1, page 29.
1620// Computes the GCD of AM and BM.
1621// Also finds a solution to the equation ax - by = gcd(a, b).
1622// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1623//
1624// We don't use OverflowSafeSignedAPInt here because it's known that this
1625// algorithm doesn't overflow.
1626static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1627 const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1628 APInt A0(Bits, 1, true), A1(Bits, 0, true);
1629 APInt B0(Bits, 0, true), B1(Bits, 1, true);
1630 APInt G0 = AM.abs();
1631 APInt G1 = BM.abs();
1632 APInt Q = G0; // these need to be initialized
1633 APInt R = G0;
1634 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1635 while (R != 0) {
1636 // clang-format off
1637 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1638 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1639 G0 = G1; G1 = R;
1640 // clang-format on
1641 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1642 }
1643 G = G1;
1644 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
1645 X = AM.slt(RHS: 0) ? -A1 : A1;
1646 Y = BM.slt(RHS: 0) ? B1 : -B1;
1647
1648 // make sure gcd divides Delta
1649 R = Delta.srem(RHS: G);
1650 if (R != 0)
1651 return true; // gcd doesn't divide Delta, no dependence
1652 Q = Delta.sdiv(RHS: G);
1653 return false;
1654}
1655
1656static OverflowSafeSignedAPInt
1657floorOfQuotient(const OverflowSafeSignedAPInt &OA,
1658 const OverflowSafeSignedAPInt &OB) {
1659 if (!OA || !OB)
1660 return OverflowSafeSignedAPInt();
1661
1662 APInt A = *OA;
1663 APInt B = *OB;
1664 APInt Q = A; // these need to be initialized
1665 APInt R = A;
1666 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1667 if (R == 0)
1668 return Q;
1669 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1670 return Q;
1671 return OverflowSafeSignedAPInt(Q) - 1;
1672}
1673
1674static OverflowSafeSignedAPInt
1675ceilingOfQuotient(const OverflowSafeSignedAPInt &OA,
1676 const OverflowSafeSignedAPInt &OB) {
1677 if (!OA || !OB)
1678 return OverflowSafeSignedAPInt();
1679
1680 APInt A = *OA;
1681 APInt B = *OB;
1682 APInt Q = A; // these need to be initialized
1683 APInt R = A;
1684 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1685 if (R == 0)
1686 return Q;
1687 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1688 return OverflowSafeSignedAPInt(Q) + 1;
1689 return Q;
1690}
1691
1692/// Given an affine expression of the form A*k + B, where k is an arbitrary
1693/// integer, infer the possible range of k based on the known range of the
1694/// affine expression. If we know A*k + B is non-negative, i.e.,
1695///
1696/// A*k + B >= 0
1697///
1698/// we can derive the following inequalities for k when A is positive:
1699///
1700/// k >= -B / A
1701///
1702/// Since k is an integer, it means k is greater than or equal to the
1703/// ceil(-B / A).
1704///
1705/// If the upper bound of the affine expression \p UB is passed, the following
1706/// inequality can be derived as well:
1707///
1708/// A*k + B <= UB
1709///
1710/// which leads to:
1711///
1712/// k <= (UB - B) / A
1713///
1714/// Again, as k is an integer, it means k is less than or equal to the
1715/// floor((UB - B) / A).
1716///
1717/// The similar logic applies when A is negative, but the inequalities sign flip
1718/// while working with them.
1719///
1720/// Preconditions: \p A is non-zero, and we know A*k + B is non-negative.
1721static std::pair<OverflowSafeSignedAPInt, OverflowSafeSignedAPInt>
1722inferDomainOfAffine(OverflowSafeSignedAPInt A, OverflowSafeSignedAPInt B,
1723 OverflowSafeSignedAPInt UB) {
1724 assert(A && B && "A and B must be available");
1725 assert(*A != 0 && "A must be non-zero");
1726 OverflowSafeSignedAPInt TL, TU;
1727 if (A->sgt(RHS: 0)) {
1728 TL = ceilingOfQuotient(OA: -B, OB: A);
1729 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1730
1731 // New bound check - modification to Banerjee's e3 check
1732 TU = floorOfQuotient(OA: UB - B, OB: A);
1733 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1734 } else {
1735 TU = floorOfQuotient(OA: -B, OB: A);
1736 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1737
1738 // New bound check - modification to Banerjee's e3 check
1739 TL = ceilingOfQuotient(OA: UB - B, OB: A);
1740 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1741 }
1742 return std::make_pair(x&: TL, y&: TU);
1743}
1744
1745// exactSIVtest -
1746// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1747// where i is an induction variable, c1 and c2 are loop invariant, and a1
1748// and a2 are constant, we can solve it exactly using an algorithm developed
1749// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1750//
1751// Dependence Analysis for Supercomputing
1752// Utpal Banerjee
1753// Kluwer Academic Publishers, 1988
1754//
1755// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1756// so use them if possible. They're also a bit better with symbolics and,
1757// in the case of the strong SIV test, can compute Distances.
1758//
1759// Return true if dependence disproved.
1760//
1761// This is a modified version of the original Banerjee algorithm. The original
1762// only tested whether Dst depends on Src. This algorithm extends that and
1763// returns all the dependencies that exist between Dst and Src.
1764bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1765 const SCEV *SrcConst, const SCEV *DstConst,
1766 const Loop *CurSrcLoop,
1767 const Loop *CurDstLoop, unsigned Level,
1768 FullDependence &Result) const {
1769 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactSIV))
1770 return false;
1771
1772 LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1773 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1774 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1775 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1776 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1777 ++ExactSIVapplications;
1778 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1779 Level--;
1780 Result.Consistent = false;
1781 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1782 if (!Delta)
1783 return false;
1784 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1785 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1786 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1787 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1788 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1789 return false;
1790
1791 // find gcd
1792 APInt G, X, Y;
1793 APInt AM = ConstSrcCoeff->getAPInt();
1794 APInt BM = ConstDstCoeff->getAPInt();
1795 APInt CM = ConstDelta->getAPInt();
1796 unsigned Bits = AM.getBitWidth();
1797 if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
1798 // gcd doesn't divide Delta, no dependence
1799 ++ExactSIVindependence;
1800 ++ExactSIVsuccesses;
1801 return true;
1802 }
1803
1804 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1805
1806 // since SCEV construction normalizes, LM = 0
1807 std::optional<APInt> UM;
1808 // UM is perhaps unavailable, let's check
1809 if (const SCEVConstant *CUB =
1810 collectConstantUpperBound(L: CurSrcLoop, T: Delta->getType())) {
1811 UM = CUB->getAPInt();
1812 LLVM_DEBUG(dbgs() << "\t UM = " << *UM << "\n");
1813 }
1814
1815 APInt TU(APInt::getSignedMaxValue(numBits: Bits));
1816 APInt TL(APInt::getSignedMinValue(numBits: Bits));
1817 APInt TC = CM.sdiv(RHS: G);
1818 APInt TX = X * TC;
1819 APInt TY = Y * TC;
1820 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
1821 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
1822 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
1823
1824 APInt TB = BM.sdiv(RHS: G);
1825 APInt TA = AM.sdiv(RHS: G);
1826
1827 // At this point, we have the following equations:
1828 //
1829 // TA*i0 - TB*i1 = TC
1830 //
1831 // Also, we know that the all pairs of (i0, i1) can be expressed as:
1832 //
1833 // (TX + k*TB, TY + k*TA)
1834 //
1835 // where k is an arbitrary integer.
1836 auto [TL0, TU0] = inferDomainOfAffine(A: TB, B: TX, UB: UM);
1837 auto [TL1, TU1] = inferDomainOfAffine(A: TA, B: TY, UB: UM);
1838
1839 auto CreateVec = [](const OverflowSafeSignedAPInt &V0,
1840 const OverflowSafeSignedAPInt &V1) {
1841 SmallVector<APInt, 2> Vec;
1842 if (V0)
1843 Vec.push_back(Elt: *V0);
1844 if (V1)
1845 Vec.push_back(Elt: *V1);
1846 return Vec;
1847 };
1848
1849 SmallVector<APInt, 2> TLVec = CreateVec(TL0, TL1);
1850 SmallVector<APInt, 2> TUVec = CreateVec(TU0, TU1);
1851
1852 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
1853 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
1854
1855 if (TLVec.empty() || TUVec.empty())
1856 return false;
1857 TL = APIntOps::smax(A: TLVec.front(), B: TLVec.back());
1858 TU = APIntOps::smin(A: TUVec.front(), B: TUVec.back());
1859 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
1860 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
1861
1862 if (TL.sgt(RHS: TU)) {
1863 ++ExactSIVindependence;
1864 ++ExactSIVsuccesses;
1865 return true;
1866 }
1867
1868 // explore directions
1869 unsigned NewDirection = Dependence::DVEntry::NONE;
1870 OverflowSafeSignedAPInt LowerDistance, UpperDistance;
1871 OverflowSafeSignedAPInt OTY(TY), OTX(TX), OTA(TA), OTB(TB), OTL(TL), OTU(TU);
1872 // NOTE: It's unclear whether these calculations can overflow. At the moment,
1873 // we conservatively assume they can.
1874 if (TA.sgt(RHS: TB)) {
1875 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1876 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1877 } else {
1878 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1879 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1880 }
1881
1882 if (!LowerDistance || !UpperDistance)
1883 return false;
1884
1885 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << *LowerDistance << "\n");
1886 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << *UpperDistance << "\n");
1887
1888 if (LowerDistance->sle(RHS: 0) && UpperDistance->sge(RHS: 0)) {
1889 NewDirection |= Dependence::DVEntry::EQ;
1890 ++ExactSIVsuccesses;
1891 }
1892 if (LowerDistance->slt(RHS: 0)) {
1893 NewDirection |= Dependence::DVEntry::GT;
1894 ++ExactSIVsuccesses;
1895 }
1896 if (UpperDistance->sgt(RHS: 0)) {
1897 NewDirection |= Dependence::DVEntry::LT;
1898 ++ExactSIVsuccesses;
1899 }
1900
1901 // finished
1902 Result.DV[Level].Direction &= NewDirection;
1903 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1904 ++ExactSIVindependence;
1905 LLVM_DEBUG(dbgs() << "\t Result = ");
1906 LLVM_DEBUG(Result.dump(dbgs()));
1907 return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1908}
1909
1910// Return true if the divisor evenly divides the dividend.
1911static bool isRemainderZero(const SCEVConstant *Dividend,
1912 const SCEVConstant *Divisor) {
1913 const APInt &ConstDividend = Dividend->getAPInt();
1914 const APInt &ConstDivisor = Divisor->getAPInt();
1915 return ConstDividend.srem(RHS: ConstDivisor) == 0;
1916}
1917
1918// weakZeroSrcSIVtest -
1919// From the paper, Practical Dependence Testing, Section 4.2.2
1920//
1921// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1922// where i is an induction variable, c1 and c2 are loop invariant,
1923// and a is a constant, we can solve it exactly using the
1924// Weak-Zero SIV test.
1925//
1926// Given
1927//
1928// c1 = c2 + a*i
1929//
1930// we get
1931//
1932// (c1 - c2)/a = i
1933//
1934// If i is not an integer, there's no dependence.
1935// If i < 0 or > UB, there's no dependence.
1936// If i = 0, the direction is >= and peeling the
1937// 1st iteration will break the dependence.
1938// If i = UB, the direction is <= and peeling the
1939// last iteration will break the dependence.
1940// Otherwise, the direction is *.
1941//
1942// Can prove independence. Failing that, we can sometimes refine
1943// the directions. Can sometimes show that first or last
1944// iteration carries all the dependences (so worth peeling).
1945//
1946// (see also weakZeroDstSIVtest)
1947//
1948// Return true if dependence disproved.
1949bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1950 const SCEV *SrcConst,
1951 const SCEV *DstConst,
1952 const Loop *CurSrcLoop,
1953 const Loop *CurDstLoop, unsigned Level,
1954 FullDependence &Result) const {
1955 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1956 return false;
1957
1958 // For the WeakSIV test, it's possible the loop isn't common to
1959 // the Src and Dst loops. If it isn't, then there's no need to
1960 // record a direction.
1961 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1962 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1963 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1964 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1965 ++WeakZeroSIVapplications;
1966 assert(0 < Level && Level <= MaxLevels && "Level out of range");
1967 Level--;
1968 Result.Consistent = false;
1969 const SCEV *Delta = SE->getMinusSCEV(LHS: SrcConst, RHS: DstConst);
1970 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1971 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: SrcConst, RHS: DstConst)) {
1972 if (Level < CommonLevels) {
1973 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1974 Result.DV[Level].PeelFirst = true;
1975 ++WeakZeroSIVsuccesses;
1976 }
1977 return false; // dependences caused by first iteration
1978 }
1979 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1980 if (!ConstCoeff)
1981 return false;
1982
1983 // Since ConstCoeff is constant, !isKnownNegative means it's non-negative.
1984 // TODO: Bail out if it's a signed minimum value.
1985 const SCEV *AbsCoeff = SE->isKnownNegative(S: ConstCoeff)
1986 ? SE->getNegativeSCEV(V: ConstCoeff)
1987 : ConstCoeff;
1988 const SCEV *NewDelta =
1989 SE->isKnownNegative(S: ConstCoeff) ? SE->getNegativeSCEV(V: Delta) : Delta;
1990
1991 // check that Delta/SrcCoeff < iteration count
1992 // really check NewDelta < count*AbsCoeff
1993 if (const SCEV *UpperBound =
1994 collectUpperBound(L: CurSrcLoop, T: Delta->getType())) {
1995 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1996 const SCEV *Product = SE->getMulExpr(LHS: AbsCoeff, RHS: UpperBound);
1997 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: NewDelta, RHS: Product)) {
1998 ++WeakZeroSIVindependence;
1999 ++WeakZeroSIVsuccesses;
2000 return true;
2001 }
2002 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: NewDelta, RHS: Product)) {
2003 // dependences caused by last iteration
2004 if (Level < CommonLevels) {
2005 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
2006 Result.DV[Level].PeelLast = true;
2007 ++WeakZeroSIVsuccesses;
2008 }
2009 return false;
2010 }
2011 }
2012
2013 // check that Delta/SrcCoeff >= 0
2014 // really check that NewDelta >= 0
2015 if (SE->isKnownNegative(S: NewDelta)) {
2016 // No dependence, newDelta < 0
2017 ++WeakZeroSIVindependence;
2018 ++WeakZeroSIVsuccesses;
2019 return true;
2020 }
2021
2022 // if SrcCoeff doesn't divide Delta, then no dependence
2023 if (isa<SCEVConstant>(Val: Delta) &&
2024 !isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
2025 ++WeakZeroSIVindependence;
2026 ++WeakZeroSIVsuccesses;
2027 return true;
2028 }
2029 return false;
2030}
2031
2032// weakZeroDstSIVtest -
2033// From the paper, Practical Dependence Testing, Section 4.2.2
2034//
2035// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
2036// where i is an induction variable, c1 and c2 are loop invariant,
2037// and a is a constant, we can solve it exactly using the
2038// Weak-Zero SIV test.
2039//
2040// Given
2041//
2042// c1 + a*i = c2
2043//
2044// we get
2045//
2046// i = (c2 - c1)/a
2047//
2048// If i is not an integer, there's no dependence.
2049// If i < 0 or > UB, there's no dependence.
2050// If i = 0, the direction is <= and peeling the
2051// 1st iteration will break the dependence.
2052// If i = UB, the direction is >= and peeling the
2053// last iteration will break the dependence.
2054// Otherwise, the direction is *.
2055//
2056// Can prove independence. Failing that, we can sometimes refine
2057// the directions. Can sometimes show that first or last
2058// iteration carries all the dependences (so worth peeling).
2059//
2060// (see also weakZeroSrcSIVtest)
2061//
2062// Return true if dependence disproved.
2063bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
2064 const SCEV *SrcConst,
2065 const SCEV *DstConst,
2066 const Loop *CurSrcLoop,
2067 const Loop *CurDstLoop, unsigned Level,
2068 FullDependence &Result) const {
2069 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
2070 return false;
2071
2072 // For the WeakSIV test, it's possible the loop isn't common to the
2073 // Src and Dst loops. If it isn't, then there's no need to record a direction.
2074 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
2075 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
2076 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
2077 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
2078 ++WeakZeroSIVapplications;
2079 assert(0 < Level && Level <= SrcLevels && "Level out of range");
2080 Level--;
2081 Result.Consistent = false;
2082 const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
2083 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
2084 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: DstConst, RHS: SrcConst)) {
2085 if (Level < CommonLevels) {
2086 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
2087 Result.DV[Level].PeelFirst = true;
2088 ++WeakZeroSIVsuccesses;
2089 }
2090 return false; // dependences caused by first iteration
2091 }
2092 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
2093 if (!ConstCoeff)
2094 return false;
2095
2096 // Since ConstCoeff is constant, !isKnownNegative means it's non-negative.
2097 // TODO: Bail out if it's a signed minimum value.
2098 const SCEV *AbsCoeff = SE->isKnownNegative(S: ConstCoeff)
2099 ? SE->getNegativeSCEV(V: ConstCoeff)
2100 : ConstCoeff;
2101 const SCEV *NewDelta =
2102 SE->isKnownNegative(S: ConstCoeff) ? SE->getNegativeSCEV(V: Delta) : Delta;
2103
2104 // check that Delta/SrcCoeff < iteration count
2105 // really check NewDelta < count*AbsCoeff
2106 if (const SCEV *UpperBound =
2107 collectUpperBound(L: CurSrcLoop, T: Delta->getType())) {
2108 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
2109 const SCEV *Product = SE->getMulExpr(LHS: AbsCoeff, RHS: UpperBound);
2110 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: NewDelta, RHS: Product)) {
2111 ++WeakZeroSIVindependence;
2112 ++WeakZeroSIVsuccesses;
2113 return true;
2114 }
2115 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: NewDelta, RHS: Product)) {
2116 // dependences caused by last iteration
2117 if (Level < CommonLevels) {
2118 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
2119 Result.DV[Level].PeelLast = true;
2120 ++WeakZeroSIVsuccesses;
2121 }
2122 return false;
2123 }
2124 }
2125
2126 // check that Delta/SrcCoeff >= 0
2127 // really check that NewDelta >= 0
2128 if (SE->isKnownNegative(S: NewDelta)) {
2129 // No dependence, newDelta < 0
2130 ++WeakZeroSIVindependence;
2131 ++WeakZeroSIVsuccesses;
2132 return true;
2133 }
2134
2135 // if SrcCoeff doesn't divide Delta, then no dependence
2136 if (isa<SCEVConstant>(Val: Delta) &&
2137 !isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
2138 ++WeakZeroSIVindependence;
2139 ++WeakZeroSIVsuccesses;
2140 return true;
2141 }
2142 return false;
2143}
2144
2145// exactRDIVtest - Tests the RDIV subscript pair for dependence.
2146// Things of the form [c1 + a*i] and [c2 + b*j],
2147// where i and j are induction variable, c1 and c2 are loop invariant,
2148// and a and b are constants.
2149// Returns true if any possible dependence is disproved.
2150// Marks the result as inconsistent.
2151// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
2152bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
2153 const SCEV *SrcConst, const SCEV *DstConst,
2154 const Loop *SrcLoop, const Loop *DstLoop,
2155 FullDependence &Result) const {
2156 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactRDIV))
2157 return false;
2158
2159 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
2160 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
2161 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
2162 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
2163 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
2164 ++ExactRDIVapplications;
2165 Result.Consistent = false;
2166 const SCEV *Delta = SE->getMinusSCEV(LHS: DstConst, RHS: SrcConst);
2167 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
2168 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
2169 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
2170 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
2171 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
2172 return false;
2173
2174 // find gcd
2175 APInt G, X, Y;
2176 APInt AM = ConstSrcCoeff->getAPInt();
2177 APInt BM = ConstDstCoeff->getAPInt();
2178 APInt CM = ConstDelta->getAPInt();
2179 unsigned Bits = AM.getBitWidth();
2180 if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
2181 // gcd doesn't divide Delta, no dependence
2182 ++ExactRDIVindependence;
2183 return true;
2184 }
2185
2186 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
2187
2188 // since SCEV construction seems to normalize, LM = 0
2189 std::optional<APInt> SrcUM;
2190 // SrcUM is perhaps unavailable, let's check
2191 if (const SCEVConstant *UpperBound =
2192 collectConstantUpperBound(L: SrcLoop, T: Delta->getType())) {
2193 SrcUM = UpperBound->getAPInt();
2194 LLVM_DEBUG(dbgs() << "\t SrcUM = " << *SrcUM << "\n");
2195 }
2196
2197 std::optional<APInt> DstUM;
2198 // UM is perhaps unavailable, let's check
2199 if (const SCEVConstant *UpperBound =
2200 collectConstantUpperBound(L: DstLoop, T: Delta->getType())) {
2201 DstUM = UpperBound->getAPInt();
2202 LLVM_DEBUG(dbgs() << "\t DstUM = " << *DstUM << "\n");
2203 }
2204
2205 APInt TU(APInt::getSignedMaxValue(numBits: Bits));
2206 APInt TL(APInt::getSignedMinValue(numBits: Bits));
2207 APInt TC = CM.sdiv(RHS: G);
2208 APInt TX = X * TC;
2209 APInt TY = Y * TC;
2210 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
2211 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
2212 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
2213
2214 APInt TB = BM.sdiv(RHS: G);
2215 APInt TA = AM.sdiv(RHS: G);
2216
2217 // At this point, we have the following equations:
2218 //
2219 // TA*i - TB*j = TC
2220 //
2221 // Also, we know that the all pairs of (i, j) can be expressed as:
2222 //
2223 // (TX + k*TB, TY + k*TA)
2224 //
2225 // where k is an arbitrary integer.
2226 auto [TL0, TU0] = inferDomainOfAffine(A: TB, B: TX, UB: SrcUM);
2227 auto [TL1, TU1] = inferDomainOfAffine(A: TA, B: TY, UB: DstUM);
2228
2229 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
2230 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
2231
2232 auto CreateVec = [](const OverflowSafeSignedAPInt &V0,
2233 const OverflowSafeSignedAPInt &V1) {
2234 SmallVector<APInt, 2> Vec;
2235 if (V0)
2236 Vec.push_back(Elt: *V0);
2237 if (V1)
2238 Vec.push_back(Elt: *V1);
2239 return Vec;
2240 };
2241
2242 SmallVector<APInt, 2> TLVec = CreateVec(TL0, TL1);
2243 SmallVector<APInt, 2> TUVec = CreateVec(TU0, TU1);
2244 if (TLVec.empty() || TUVec.empty())
2245 return false;
2246
2247 TL = APIntOps::smax(A: TLVec.front(), B: TLVec.back());
2248 TU = APIntOps::smin(A: TUVec.front(), B: TUVec.back());
2249 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
2250 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
2251
2252 if (TL.sgt(RHS: TU))
2253 ++ExactRDIVindependence;
2254 return TL.sgt(RHS: TU);
2255}
2256
2257// symbolicRDIVtest -
2258// In Section 4.5 of the Practical Dependence Testing paper,the authors
2259// introduce a special case of Banerjee's Inequalities (also called the
2260// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
2261// particularly cases with symbolics. Since it's only able to disprove
2262// dependence (not compute distances or directions), we'll use it as a
2263// fall back for the other tests.
2264//
2265// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2266// where i and j are induction variables and c1 and c2 are loop invariants,
2267// we can use the symbolic tests to disprove some dependences, serving as a
2268// backup for the RDIV test. Note that i and j can be the same variable,
2269// letting this test serve as a backup for the various SIV tests.
2270//
2271// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
2272// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2273// loop bounds for the i and j loops, respectively. So, ...
2274//
2275// c1 + a1*i = c2 + a2*j
2276// a1*i - a2*j = c2 - c1
2277//
2278// To test for a dependence, we compute c2 - c1 and make sure it's in the
2279// range of the maximum and minimum possible values of a1*i - a2*j.
2280// Considering the signs of a1 and a2, we have 4 possible cases:
2281//
2282// 1) If a1 >= 0 and a2 >= 0, then
2283// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
2284// -a2*N2 <= c2 - c1 <= a1*N1
2285//
2286// 2) If a1 >= 0 and a2 <= 0, then
2287// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
2288// 0 <= c2 - c1 <= a1*N1 - a2*N2
2289//
2290// 3) If a1 <= 0 and a2 >= 0, then
2291// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
2292// a1*N1 - a2*N2 <= c2 - c1 <= 0
2293//
2294// 4) If a1 <= 0 and a2 <= 0, then
2295// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
2296// a1*N1 <= c2 - c1 <= -a2*N2
2297//
2298// return true if dependence disproved
2299bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
2300 const SCEV *C1, const SCEV *C2,
2301 const Loop *Loop1,
2302 const Loop *Loop2) const {
2303 if (!isDependenceTestEnabled(Test: DependenceTestType::SymbolicRDIV))
2304 return false;
2305
2306 ++SymbolicRDIVapplications;
2307 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2308 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
2309 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
2310 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
2311 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
2312 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
2313 const SCEV *N1 = collectUpperBound(L: Loop1, T: A1->getType());
2314 const SCEV *N2 = collectUpperBound(L: Loop2, T: A1->getType());
2315 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
2316 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
2317 const SCEV *C2_C1 = SE->getMinusSCEV(LHS: C2, RHS: C1);
2318 const SCEV *C1_C2 = SE->getMinusSCEV(LHS: C1, RHS: C2);
2319 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
2320 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
2321 if (SE->isKnownNonNegative(S: A1)) {
2322 if (SE->isKnownNonNegative(S: A2)) {
2323 // A1 >= 0 && A2 >= 0
2324 if (N1) {
2325 // make sure that c2 - c1 <= a1*N1
2326 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2327 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2328 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: C2_C1, RHS: A1N1)) {
2329 ++SymbolicRDIVindependence;
2330 return true;
2331 }
2332 }
2333 if (N2) {
2334 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
2335 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2336 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2337 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SLT, LHS: A2N2, RHS: C1_C2)) {
2338 ++SymbolicRDIVindependence;
2339 return true;
2340 }
2341 }
2342 } else if (SE->isKnownNonPositive(S: A2)) {
2343 // a1 >= 0 && a2 <= 0
2344 if (N1 && N2) {
2345 // make sure that c2 - c1 <= a1*N1 - a2*N2
2346 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2347 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2348 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(LHS: A1N1, RHS: A2N2);
2349 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2350 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: C2_C1, RHS: A1N1_A2N2)) {
2351 ++SymbolicRDIVindependence;
2352 return true;
2353 }
2354 }
2355 // make sure that 0 <= c2 - c1
2356 if (SE->isKnownNegative(S: C2_C1)) {
2357 ++SymbolicRDIVindependence;
2358 return true;
2359 }
2360 }
2361 } else if (SE->isKnownNonPositive(S: A1)) {
2362 if (SE->isKnownNonNegative(S: A2)) {
2363 // a1 <= 0 && a2 >= 0
2364 if (N1 && N2) {
2365 // make sure that a1*N1 - a2*N2 <= c2 - c1
2366 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2367 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2368 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(LHS: A1N1, RHS: A2N2);
2369 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2370 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: A1N1_A2N2, RHS: C2_C1)) {
2371 ++SymbolicRDIVindependence;
2372 return true;
2373 }
2374 }
2375 // make sure that c2 - c1 <= 0
2376 if (SE->isKnownPositive(S: C2_C1)) {
2377 ++SymbolicRDIVindependence;
2378 return true;
2379 }
2380 } else if (SE->isKnownNonPositive(S: A2)) {
2381 // a1 <= 0 && a2 <= 0
2382 if (N1) {
2383 // make sure that a1*N1 <= c2 - c1
2384 const SCEV *A1N1 = SE->getMulExpr(LHS: A1, RHS: N1);
2385 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2386 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: A1N1, RHS: C2_C1)) {
2387 ++SymbolicRDIVindependence;
2388 return true;
2389 }
2390 }
2391 if (N2) {
2392 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2393 const SCEV *A2N2 = SE->getMulExpr(LHS: A2, RHS: N2);
2394 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2395 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SLT, LHS: C1_C2, RHS: A2N2)) {
2396 ++SymbolicRDIVindependence;
2397 return true;
2398 }
2399 }
2400 }
2401 }
2402 return false;
2403}
2404
2405// testSIV -
2406// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2407// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2408// a2 are constant, we attack it with an SIV test. While they can all be
2409// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2410// they apply; they're cheaper and sometimes more precise.
2411//
2412// Return true if dependence disproved.
2413bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
2414 FullDependence &Result,
2415 bool UnderRuntimeAssumptions) {
2416 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2417 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2418 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2419 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2420 if (SrcAddRec && DstAddRec) {
2421 const SCEV *SrcConst = SrcAddRec->getStart();
2422 const SCEV *DstConst = DstAddRec->getStart();
2423 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2424 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2425 const Loop *CurSrcLoop = SrcAddRec->getLoop();
2426 const Loop *CurDstLoop = DstAddRec->getLoop();
2427 assert(haveSameSD(CurSrcLoop, CurDstLoop) &&
2428 "Loops in the SIV test should have the same iteration space and "
2429 "depth");
2430 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
2431 bool disproven;
2432 if (SrcCoeff == DstCoeff)
2433 disproven =
2434 strongSIVtest(Coeff: SrcCoeff, SrcConst, DstConst, CurSrcLoop, CurDstLoop,
2435 Level, Result, UnderRuntimeAssumptions);
2436 else if (SrcCoeff == SE->getNegativeSCEV(V: DstCoeff))
2437 disproven = weakCrossingSIVtest(Coeff: SrcCoeff, SrcConst, DstConst, CurSrcLoop,
2438 CurDstLoop, Level, Result);
2439 else
2440 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst,
2441 CurSrcLoop, CurDstLoop, Level, Result);
2442 return disproven || gcdMIVtest(Src, Dst, Result) ||
2443 symbolicRDIVtest(A1: SrcCoeff, A2: DstCoeff, C1: SrcConst, C2: DstConst, Loop1: CurSrcLoop,
2444 Loop2: CurDstLoop);
2445 }
2446 if (SrcAddRec) {
2447 const SCEV *SrcConst = SrcAddRec->getStart();
2448 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2449 const SCEV *DstConst = Dst;
2450 const Loop *CurSrcLoop = SrcAddRec->getLoop();
2451 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
2452 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
2453 CurDstLoop: CurSrcLoop, Level, Result) ||
2454 gcdMIVtest(Src, Dst, Result);
2455 }
2456 if (DstAddRec) {
2457 const SCEV *DstConst = DstAddRec->getStart();
2458 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2459 const SCEV *SrcConst = Src;
2460 const Loop *CurDstLoop = DstAddRec->getLoop();
2461 Level = mapDstLoop(DstLoop: CurDstLoop);
2462 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, CurSrcLoop: CurDstLoop,
2463 CurDstLoop, Level, Result) ||
2464 gcdMIVtest(Src, Dst, Result);
2465 }
2466 llvm_unreachable("SIV test expected at least one AddRec");
2467 return false;
2468}
2469
2470// testRDIV -
2471// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2472// where i and j are induction variables, c1 and c2 are loop invariant,
2473// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2474// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2475// It doesn't make sense to talk about distance or direction in this case,
2476// so there's no point in making special versions of the Strong SIV test or
2477// the Weak-crossing SIV test.
2478//
2479// With minor algebra, this test can also be used for things like
2480// [c1 + a1*i + a2*j][c2].
2481//
2482// Return true if dependence disproved.
2483bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2484 FullDependence &Result) const {
2485 // we have 3 possible situations here:
2486 // 1) [a*i + b] and [c*j + d]
2487 // 2) [a*i + c*j + b] and [d]
2488 // 3) [b] and [a*i + c*j + d]
2489 // We need to find what we've got and get organized
2490
2491 const SCEV *SrcConst, *DstConst;
2492 const SCEV *SrcCoeff, *DstCoeff;
2493 const Loop *SrcLoop, *DstLoop;
2494
2495 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2496 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2497 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2498 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2499 if (SrcAddRec && DstAddRec) {
2500 SrcConst = SrcAddRec->getStart();
2501 SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
2502 SrcLoop = SrcAddRec->getLoop();
2503 DstConst = DstAddRec->getStart();
2504 DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
2505 DstLoop = DstAddRec->getLoop();
2506 } else if (SrcAddRec) {
2507 if (const SCEVAddRecExpr *tmpAddRec =
2508 dyn_cast<SCEVAddRecExpr>(Val: SrcAddRec->getStart())) {
2509 SrcConst = tmpAddRec->getStart();
2510 SrcCoeff = tmpAddRec->getStepRecurrence(SE&: *SE);
2511 SrcLoop = tmpAddRec->getLoop();
2512 DstConst = Dst;
2513 DstCoeff = SE->getNegativeSCEV(V: SrcAddRec->getStepRecurrence(SE&: *SE));
2514 DstLoop = SrcAddRec->getLoop();
2515 } else
2516 llvm_unreachable("RDIV reached by surprising SCEVs");
2517 } else if (DstAddRec) {
2518 if (const SCEVAddRecExpr *tmpAddRec =
2519 dyn_cast<SCEVAddRecExpr>(Val: DstAddRec->getStart())) {
2520 DstConst = tmpAddRec->getStart();
2521 DstCoeff = tmpAddRec->getStepRecurrence(SE&: *SE);
2522 DstLoop = tmpAddRec->getLoop();
2523 SrcConst = Src;
2524 SrcCoeff = SE->getNegativeSCEV(V: DstAddRec->getStepRecurrence(SE&: *SE));
2525 SrcLoop = DstAddRec->getLoop();
2526 } else
2527 llvm_unreachable("RDIV reached by surprising SCEVs");
2528 } else
2529 llvm_unreachable("RDIV expected at least one AddRec");
2530 return exactRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, SrcLoop, DstLoop,
2531 Result) ||
2532 gcdMIVtest(Src, Dst, Result) ||
2533 symbolicRDIVtest(A1: SrcCoeff, A2: DstCoeff, C1: SrcConst, C2: DstConst, Loop1: SrcLoop,
2534 Loop2: DstLoop);
2535}
2536
2537// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2538// Return true if dependence disproved.
2539// Can sometimes refine direction vectors.
2540bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2541 const SmallBitVector &Loops,
2542 FullDependence &Result) const {
2543 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2544 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2545 Result.Consistent = false;
2546 return gcdMIVtest(Src, Dst, Result) ||
2547 banerjeeMIVtest(Src, Dst, Loops, Result);
2548}
2549
2550/// Given a SCEVMulExpr, returns its first operand if its first operand is a
2551/// constant and the product doesn't overflow in a signed sense. Otherwise,
2552/// returns std::nullopt. For example, given (10 * X * Y)<nsw>, it returns 10.
2553/// Notably, if it doesn't have nsw, the multiplication may overflow, and if
2554/// so, it may not a multiple of 10.
2555static std::optional<APInt> getConstantCoefficient(const SCEV *Expr) {
2556 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Expr))
2557 return Constant->getAPInt();
2558 if (const auto *Product = dyn_cast<SCEVMulExpr>(Val: Expr))
2559 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Product->getOperand(i: 0)))
2560 if (Product->hasNoSignedWrap())
2561 return Constant->getAPInt();
2562 return std::nullopt;
2563}
2564
2565bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
2566 const Loop *CurLoop,
2567 const SCEV *&CurLoopCoeff,
2568 APInt &RunningGCD) const {
2569 // If RunningGCD is already 1, exit early.
2570 // TODO: It might be better to continue the recursion to find CurLoopCoeff.
2571 if (RunningGCD == 1)
2572 return true;
2573
2574 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
2575 if (!AddRec) {
2576 assert(isLoopInvariant(Expr, CurLoop) &&
2577 "Expected loop invariant expression");
2578 return true;
2579 }
2580
2581 assert(AddRec->isAffine() && "Unexpected Expr");
2582 const SCEV *Start = AddRec->getStart();
2583 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
2584 if (AddRec->getLoop() == CurLoop) {
2585 CurLoopCoeff = Step;
2586 } else {
2587 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Step);
2588
2589 // If the coefficient is the product of a constant and other stuff, we can
2590 // use the constant in the GCD computation.
2591 if (!ConstCoeff)
2592 return false;
2593
2594 // TODO: What happens if ConstCoeff is the "most negative" signed number
2595 // (e.g. -128 for 8 bit wide APInt)?
2596 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2597 }
2598
2599 return accumulateCoefficientsGCD(Expr: Start, CurLoop, CurLoopCoeff, RunningGCD);
2600}
2601
2602//===----------------------------------------------------------------------===//
2603// gcdMIVtest -
2604// Tests an MIV subscript pair for dependence.
2605// Returns true if any possible dependence is disproved.
2606// Marks the result as inconsistent.
2607// Can sometimes disprove the equal direction for 1 or more loops,
2608// as discussed in Michael Wolfe's book,
2609// High Performance Compilers for Parallel Computing, page 235.
2610//
2611// We spend some effort (code!) to handle cases like
2612// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2613// but M and N are just loop-invariant variables.
2614// This should help us handle linearized subscripts;
2615// also makes this test a useful backup to the various SIV tests.
2616//
2617// It occurs to me that the presence of loop-invariant variables
2618// changes the nature of the test from "greatest common divisor"
2619// to "a common divisor".
2620bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2621 FullDependence &Result) const {
2622 if (!isDependenceTestEnabled(Test: DependenceTestType::GCDMIV))
2623 return false;
2624
2625 LLVM_DEBUG(dbgs() << "starting gcd\n");
2626 ++GCDapplications;
2627 unsigned BitWidth = SE->getTypeSizeInBits(Ty: Src->getType());
2628 APInt RunningGCD = APInt::getZero(numBits: BitWidth);
2629
2630 // Examine Src coefficients.
2631 // Compute running GCD and record source constant.
2632 // Because we're looking for the constant at the end of the chain,
2633 // we can't quit the loop just because the GCD == 1.
2634 const SCEV *Coefficients = Src;
2635 while (const SCEVAddRecExpr *AddRec =
2636 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2637 const SCEV *Coeff = AddRec->getStepRecurrence(SE&: *SE);
2638 // If the coefficient is the product of a constant and other stuff,
2639 // we can use the constant in the GCD computation.
2640 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Coeff);
2641 if (!ConstCoeff)
2642 return false;
2643 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2644 Coefficients = AddRec->getStart();
2645 }
2646 const SCEV *SrcConst = Coefficients;
2647
2648 // Examine Dst coefficients.
2649 // Compute running GCD and record destination constant.
2650 // Because we're looking for the constant at the end of the chain,
2651 // we can't quit the loop just because the GCD == 1.
2652 Coefficients = Dst;
2653 while (const SCEVAddRecExpr *AddRec =
2654 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2655 const SCEV *Coeff = AddRec->getStepRecurrence(SE&: *SE);
2656 // If the coefficient is the product of a constant and other stuff,
2657 // we can use the constant in the GCD computation.
2658 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Coeff);
2659 if (!ConstCoeff)
2660 return false;
2661 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2662 Coefficients = AddRec->getStart();
2663 }
2664 const SCEV *DstConst = Coefficients;
2665
2666 APInt ExtraGCD = APInt::getZero(numBits: BitWidth);
2667 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
2668 if (!Delta)
2669 return false;
2670 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
2671 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Val: Delta);
2672 if (!Constant)
2673 return false;
2674 APInt ConstDelta = cast<SCEVConstant>(Val: Constant)->getAPInt();
2675 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
2676 if (ConstDelta == 0)
2677 return false;
2678 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
2679 APInt Remainder = ConstDelta.srem(RHS: RunningGCD);
2680 if (Remainder != 0) {
2681 ++GCDindependence;
2682 return true;
2683 }
2684
2685 // Try to disprove equal directions.
2686 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2687 // the code above can't disprove the dependence because the GCD = 1.
2688 // So we consider what happen if i = i' and what happens if j = j'.
2689 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2690 // which is infeasible, so we can disallow the = direction for the i level.
2691 // Setting j = j' doesn't help matters, so we end up with a direction vector
2692 // of [<>, *]
2693 //
2694 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2695 // we need to remember that the constant part is 5 and the RunningGCD should
2696 // be initialized to ExtraGCD = 30.
2697 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
2698
2699 bool Improved = false;
2700 Coefficients = Src;
2701 while (const SCEVAddRecExpr *AddRec =
2702 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2703 Coefficients = AddRec->getStart();
2704 const Loop *CurLoop = AddRec->getLoop();
2705 RunningGCD = ExtraGCD;
2706 const SCEV *SrcCoeff = AddRec->getStepRecurrence(SE&: *SE);
2707 const SCEV *DstCoeff = SE->getMinusSCEV(LHS: SrcCoeff, RHS: SrcCoeff);
2708
2709 if (!accumulateCoefficientsGCD(Expr: Src, CurLoop, CurLoopCoeff&: SrcCoeff, RunningGCD) ||
2710 !accumulateCoefficientsGCD(Expr: Dst, CurLoop, CurLoopCoeff&: DstCoeff, RunningGCD))
2711 return false;
2712
2713 Delta = SE->getMinusSCEV(LHS: SrcCoeff, RHS: DstCoeff);
2714 // If the coefficient is the product of a constant and other stuff,
2715 // we can use the constant in the GCD computation.
2716 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Delta);
2717 if (!ConstCoeff)
2718 // The difference of the two coefficients might not be a product
2719 // or constant, in which case we give up on this direction.
2720 continue;
2721 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2722 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2723 if (RunningGCD != 0) {
2724 Remainder = ConstDelta.srem(RHS: RunningGCD);
2725 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2726 if (Remainder != 0) {
2727 unsigned Level = mapSrcLoop(SrcLoop: CurLoop);
2728 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;
2729 Improved = true;
2730 }
2731 }
2732 }
2733 if (Improved)
2734 ++GCDsuccesses;
2735 LLVM_DEBUG(dbgs() << "all done\n");
2736 return false;
2737}
2738
2739//===----------------------------------------------------------------------===//
2740// banerjeeMIVtest -
2741// Use Banerjee's Inequalities to test an MIV subscript pair.
2742// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2743// Generally follows the discussion in Section 2.5.2 of
2744//
2745// Optimizing Supercompilers for Supercomputers
2746// Michael Wolfe
2747//
2748// The inequalities given on page 25 are simplified in that loops are
2749// normalized so that the lower bound is always 0 and the stride is always 1.
2750// For example, Wolfe gives
2751//
2752// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2753//
2754// where A_k is the coefficient of the kth index in the source subscript,
2755// B_k is the coefficient of the kth index in the destination subscript,
2756// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2757// index, and N_k is the stride of the kth index. Since all loops are normalized
2758// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2759// equation to
2760//
2761// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2762// = (A^-_k - B_k)^- (U_k - 1) - B_k
2763//
2764// Similar simplifications are possible for the other equations.
2765//
2766// When we can't determine the number of iterations for a loop,
2767// we use NULL as an indicator for the worst case, infinity.
2768// When computing the upper bound, NULL denotes +inf;
2769// for the lower bound, NULL denotes -inf.
2770//
2771// Return true if dependence disproved.
2772bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2773 const SmallBitVector &Loops,
2774 FullDependence &Result) const {
2775 if (!isDependenceTestEnabled(Test: DependenceTestType::BanerjeeMIV))
2776 return false;
2777
2778 LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2779 ++BanerjeeApplications;
2780 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
2781 const SCEV *A0;
2782 CoefficientInfo *A = collectCoeffInfo(Subscript: Src, SrcFlag: true, Constant&: A0);
2783 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
2784 const SCEV *B0;
2785 CoefficientInfo *B = collectCoeffInfo(Subscript: Dst, SrcFlag: false, Constant&: B0);
2786 BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2787 const SCEV *Delta = SE->getMinusSCEV(LHS: B0, RHS: A0);
2788 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2789
2790 // Compute bounds for all the * directions.
2791 LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2792 for (unsigned K = 1; K <= MaxLevels; ++K) {
2793 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2794 Bound[K].Direction = Dependence::DVEntry::ALL;
2795 Bound[K].DirSet = Dependence::DVEntry::NONE;
2796 findBoundsALL(A, B, Bound, K);
2797#ifndef NDEBUG
2798 LLVM_DEBUG(dbgs() << "\t " << K << '\t');
2799 if (Bound[K].Lower[Dependence::DVEntry::ALL])
2800 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2801 else
2802 LLVM_DEBUG(dbgs() << "-inf\t");
2803 if (Bound[K].Upper[Dependence::DVEntry::ALL])
2804 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2805 else
2806 LLVM_DEBUG(dbgs() << "+inf\n");
2807#endif
2808 }
2809
2810 // Test the *, *, *, ... case.
2811 bool Disproved = false;
2812 if (testBounds(DirKind: Dependence::DVEntry::ALL, Level: 0, Bound, Delta)) {
2813 // Explore the direction vector hierarchy.
2814 unsigned DepthExpanded = 0;
2815 unsigned NewDeps =
2816 exploreDirections(Level: 1, A, B, Bound, Loops, DepthExpanded, Delta);
2817 if (NewDeps > 0) {
2818 bool Improved = false;
2819 for (unsigned K = 1; K <= CommonLevels; ++K) {
2820 if (Loops[K]) {
2821 unsigned Old = Result.DV[K - 1].Direction;
2822 Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2823 Improved |= Old != Result.DV[K - 1].Direction;
2824 if (!Result.DV[K - 1].Direction) {
2825 Improved = false;
2826 Disproved = true;
2827 break;
2828 }
2829 }
2830 }
2831 if (Improved)
2832 ++BanerjeeSuccesses;
2833 } else {
2834 ++BanerjeeIndependence;
2835 Disproved = true;
2836 }
2837 } else {
2838 ++BanerjeeIndependence;
2839 Disproved = true;
2840 }
2841 delete[] Bound;
2842 delete[] A;
2843 delete[] B;
2844 return Disproved;
2845}
2846
2847// Hierarchically expands the direction vector
2848// search space, combining the directions of discovered dependences
2849// in the DirSet field of Bound. Returns the number of distinct
2850// dependences discovered. If the dependence is disproved,
2851// it will return 0.
2852unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2853 CoefficientInfo *B, BoundInfo *Bound,
2854 const SmallBitVector &Loops,
2855 unsigned &DepthExpanded,
2856 const SCEV *Delta) const {
2857 // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2858 // of common loop levels. To avoid excessive compile-time, pessimize all the
2859 // results and immediately return when the number of common levels is beyond
2860 // the given threshold.
2861 if (CommonLevels > MIVMaxLevelThreshold) {
2862 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2863 "direction exploration is terminated.\n");
2864 for (unsigned K = 1; K <= CommonLevels; ++K)
2865 if (Loops[K])
2866 Bound[K].DirSet = Dependence::DVEntry::ALL;
2867 return 1;
2868 }
2869
2870 if (Level > CommonLevels) {
2871 // record result
2872 LLVM_DEBUG(dbgs() << "\t[");
2873 for (unsigned K = 1; K <= CommonLevels; ++K) {
2874 if (Loops[K]) {
2875 Bound[K].DirSet |= Bound[K].Direction;
2876#ifndef NDEBUG
2877 switch (Bound[K].Direction) {
2878 case Dependence::DVEntry::LT:
2879 LLVM_DEBUG(dbgs() << " <");
2880 break;
2881 case Dependence::DVEntry::EQ:
2882 LLVM_DEBUG(dbgs() << " =");
2883 break;
2884 case Dependence::DVEntry::GT:
2885 LLVM_DEBUG(dbgs() << " >");
2886 break;
2887 case Dependence::DVEntry::ALL:
2888 LLVM_DEBUG(dbgs() << " *");
2889 break;
2890 default:
2891 llvm_unreachable("unexpected Bound[K].Direction");
2892 }
2893#endif
2894 }
2895 }
2896 LLVM_DEBUG(dbgs() << " ]\n");
2897 return 1;
2898 }
2899 if (Loops[Level]) {
2900 if (Level > DepthExpanded) {
2901 DepthExpanded = Level;
2902 // compute bounds for <, =, > at current level
2903 findBoundsLT(A, B, Bound, K: Level);
2904 findBoundsGT(A, B, Bound, K: Level);
2905 findBoundsEQ(A, B, Bound, K: Level);
2906#ifndef NDEBUG
2907 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2908 LLVM_DEBUG(dbgs() << "\t <\t");
2909 if (Bound[Level].Lower[Dependence::DVEntry::LT])
2910 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2911 << '\t');
2912 else
2913 LLVM_DEBUG(dbgs() << "-inf\t");
2914 if (Bound[Level].Upper[Dependence::DVEntry::LT])
2915 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2916 << '\n');
2917 else
2918 LLVM_DEBUG(dbgs() << "+inf\n");
2919 LLVM_DEBUG(dbgs() << "\t =\t");
2920 if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2921 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2922 << '\t');
2923 else
2924 LLVM_DEBUG(dbgs() << "-inf\t");
2925 if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2926 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2927 << '\n');
2928 else
2929 LLVM_DEBUG(dbgs() << "+inf\n");
2930 LLVM_DEBUG(dbgs() << "\t >\t");
2931 if (Bound[Level].Lower[Dependence::DVEntry::GT])
2932 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2933 << '\t');
2934 else
2935 LLVM_DEBUG(dbgs() << "-inf\t");
2936 if (Bound[Level].Upper[Dependence::DVEntry::GT])
2937 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2938 << '\n');
2939 else
2940 LLVM_DEBUG(dbgs() << "+inf\n");
2941#endif
2942 }
2943
2944 unsigned NewDeps = 0;
2945
2946 // test bounds for <, *, *, ...
2947 if (testBounds(DirKind: Dependence::DVEntry::LT, Level, Bound, Delta))
2948 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2949 Delta);
2950
2951 // Test bounds for =, *, *, ...
2952 if (testBounds(DirKind: Dependence::DVEntry::EQ, Level, Bound, Delta))
2953 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2954 Delta);
2955
2956 // test bounds for >, *, *, ...
2957 if (testBounds(DirKind: Dependence::DVEntry::GT, Level, Bound, Delta))
2958 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2959 Delta);
2960
2961 Bound[Level].Direction = Dependence::DVEntry::ALL;
2962 return NewDeps;
2963 } else
2964 return exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2965 Delta);
2966}
2967
2968// Returns true iff the current bounds are plausible.
2969bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2970 BoundInfo *Bound, const SCEV *Delta) const {
2971 Bound[Level].Direction = DirKind;
2972 if (const SCEV *LowerBound = getLowerBound(Bound))
2973 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: LowerBound, RHS: Delta))
2974 return false;
2975 if (const SCEV *UpperBound = getUpperBound(Bound))
2976 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: Delta, RHS: UpperBound))
2977 return false;
2978 return true;
2979}
2980
2981// Computes the upper and lower bounds for level K
2982// using the * direction. Records them in Bound.
2983// Wolfe gives the equations
2984//
2985// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2986// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2987//
2988// Since we normalize loops, we can simplify these equations to
2989//
2990// LB^*_k = (A^-_k - B^+_k)U_k
2991// UB^*_k = (A^+_k - B^-_k)U_k
2992//
2993// We must be careful to handle the case where the upper bound is unknown.
2994// Note that the lower bound is always <= 0
2995// and the upper bound is always >= 0.
2996void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2997 BoundInfo *Bound, unsigned K) const {
2998 Bound[K].Lower[Dependence::DVEntry::ALL] =
2999 nullptr; // Default value = -infinity.
3000 Bound[K].Upper[Dependence::DVEntry::ALL] =
3001 nullptr; // Default value = +infinity.
3002 if (Bound[K].Iterations) {
3003 Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(
3004 LHS: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].PosPart), RHS: Bound[K].Iterations);
3005 Bound[K].Upper[Dependence::DVEntry::ALL] = SE->getMulExpr(
3006 LHS: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].NegPart), RHS: Bound[K].Iterations);
3007 } else {
3008 // If the difference is 0, we won't need to know the number of iterations.
3009 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].NegPart, RHS: B[K].PosPart))
3010 Bound[K].Lower[Dependence::DVEntry::ALL] =
3011 SE->getZero(Ty: A[K].Coeff->getType());
3012 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].PosPart, RHS: B[K].NegPart))
3013 Bound[K].Upper[Dependence::DVEntry::ALL] =
3014 SE->getZero(Ty: A[K].Coeff->getType());
3015 }
3016}
3017
3018// Computes the upper and lower bounds for level K
3019// using the = direction. Records them in Bound.
3020// Wolfe gives the equations
3021//
3022// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
3023// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
3024//
3025// Since we normalize loops, we can simplify these equations to
3026//
3027// LB^=_k = (A_k - B_k)^- U_k
3028// UB^=_k = (A_k - B_k)^+ U_k
3029//
3030// We must be careful to handle the case where the upper bound is unknown.
3031// Note that the lower bound is always <= 0
3032// and the upper bound is always >= 0.
3033void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
3034 BoundInfo *Bound, unsigned K) const {
3035 Bound[K].Lower[Dependence::DVEntry::EQ] =
3036 nullptr; // Default value = -infinity.
3037 Bound[K].Upper[Dependence::DVEntry::EQ] =
3038 nullptr; // Default value = +infinity.
3039 if (Bound[K].Iterations) {
3040 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
3041 const SCEV *NegativePart = getNegativePart(X: Delta);
3042 Bound[K].Lower[Dependence::DVEntry::EQ] =
3043 SE->getMulExpr(LHS: NegativePart, RHS: Bound[K].Iterations);
3044 const SCEV *PositivePart = getPositivePart(X: Delta);
3045 Bound[K].Upper[Dependence::DVEntry::EQ] =
3046 SE->getMulExpr(LHS: PositivePart, RHS: Bound[K].Iterations);
3047 } else {
3048 // If the positive/negative part of the difference is 0,
3049 // we won't need to know the number of iterations.
3050 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
3051 const SCEV *NegativePart = getNegativePart(X: Delta);
3052 if (NegativePart->isZero())
3053 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
3054 const SCEV *PositivePart = getPositivePart(X: Delta);
3055 if (PositivePart->isZero())
3056 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
3057 }
3058}
3059
3060// Computes the upper and lower bounds for level K
3061// using the < direction. Records them in Bound.
3062// Wolfe gives the equations
3063//
3064// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
3065// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
3066//
3067// Since we normalize loops, we can simplify these equations to
3068//
3069// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
3070// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
3071//
3072// We must be careful to handle the case where the upper bound is unknown.
3073void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
3074 BoundInfo *Bound, unsigned K) const {
3075 Bound[K].Lower[Dependence::DVEntry::LT] =
3076 nullptr; // Default value = -infinity.
3077 Bound[K].Upper[Dependence::DVEntry::LT] =
3078 nullptr; // Default value = +infinity.
3079 if (Bound[K].Iterations) {
3080 const SCEV *Iter_1 = SE->getMinusSCEV(
3081 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
3082 const SCEV *NegPart =
3083 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
3084 Bound[K].Lower[Dependence::DVEntry::LT] =
3085 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: B[K].Coeff);
3086 const SCEV *PosPart =
3087 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
3088 Bound[K].Upper[Dependence::DVEntry::LT] =
3089 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: B[K].Coeff);
3090 } else {
3091 // If the positive/negative part of the difference is 0,
3092 // we won't need to know the number of iterations.
3093 const SCEV *NegPart =
3094 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
3095 if (NegPart->isZero())
3096 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
3097 const SCEV *PosPart =
3098 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
3099 if (PosPart->isZero())
3100 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
3101 }
3102}
3103
3104// Computes the upper and lower bounds for level K
3105// using the > direction. Records them in Bound.
3106// Wolfe gives the equations
3107//
3108// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
3109// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
3110//
3111// Since we normalize loops, we can simplify these equations to
3112//
3113// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
3114// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
3115//
3116// We must be careful to handle the case where the upper bound is unknown.
3117void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
3118 BoundInfo *Bound, unsigned K) const {
3119 Bound[K].Lower[Dependence::DVEntry::GT] =
3120 nullptr; // Default value = -infinity.
3121 Bound[K].Upper[Dependence::DVEntry::GT] =
3122 nullptr; // Default value = +infinity.
3123 if (Bound[K].Iterations) {
3124 const SCEV *Iter_1 = SE->getMinusSCEV(
3125 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
3126 const SCEV *NegPart =
3127 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
3128 Bound[K].Lower[Dependence::DVEntry::GT] =
3129 SE->getAddExpr(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: A[K].Coeff);
3130 const SCEV *PosPart =
3131 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
3132 Bound[K].Upper[Dependence::DVEntry::GT] =
3133 SE->getAddExpr(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: A[K].Coeff);
3134 } else {
3135 // If the positive/negative part of the difference is 0,
3136 // we won't need to know the number of iterations.
3137 const SCEV *NegPart =
3138 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
3139 if (NegPart->isZero())
3140 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
3141 const SCEV *PosPart =
3142 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
3143 if (PosPart->isZero())
3144 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
3145 }
3146}
3147
3148// X^+ = max(X, 0)
3149const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
3150 return SE->getSMaxExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
3151}
3152
3153// X^- = min(X, 0)
3154const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
3155 return SE->getSMinExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
3156}
3157
3158// Walks through the subscript,
3159// collecting each coefficient, the associated loop bounds,
3160// and recording its positive and negative parts for later use.
3161DependenceInfo::CoefficientInfo *
3162DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
3163 const SCEV *&Constant) const {
3164 const SCEV *Zero = SE->getZero(Ty: Subscript->getType());
3165 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
3166 for (unsigned K = 1; K <= MaxLevels; ++K) {
3167 CI[K].Coeff = Zero;
3168 CI[K].PosPart = Zero;
3169 CI[K].NegPart = Zero;
3170 CI[K].Iterations = nullptr;
3171 }
3172 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Subscript)) {
3173 const Loop *L = AddRec->getLoop();
3174 unsigned K = SrcFlag ? mapSrcLoop(SrcLoop: L) : mapDstLoop(DstLoop: L);
3175 CI[K].Coeff = AddRec->getStepRecurrence(SE&: *SE);
3176 CI[K].PosPart = getPositivePart(X: CI[K].Coeff);
3177 CI[K].NegPart = getNegativePart(X: CI[K].Coeff);
3178 CI[K].Iterations = collectUpperBound(L, T: Subscript->getType());
3179 Subscript = AddRec->getStart();
3180 }
3181 Constant = Subscript;
3182#ifndef NDEBUG
3183 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
3184 for (unsigned K = 1; K <= MaxLevels; ++K) {
3185 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
3186 LLVM_DEBUG(dbgs() << "\tPos Part = ");
3187 LLVM_DEBUG(dbgs() << *CI[K].PosPart);
3188 LLVM_DEBUG(dbgs() << "\tNeg Part = ");
3189 LLVM_DEBUG(dbgs() << *CI[K].NegPart);
3190 LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
3191 if (CI[K].Iterations)
3192 LLVM_DEBUG(dbgs() << *CI[K].Iterations);
3193 else
3194 LLVM_DEBUG(dbgs() << "+inf");
3195 LLVM_DEBUG(dbgs() << '\n');
3196 }
3197 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
3198#endif
3199 return CI;
3200}
3201
3202// Looks through all the bounds info and
3203// computes the lower bound given the current direction settings
3204// at each level. If the lower bound for any level is -inf,
3205// the result is -inf.
3206const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
3207 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
3208 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3209 if (Bound[K].Lower[Bound[K].Direction])
3210 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Lower[Bound[K].Direction]);
3211 else
3212 Sum = nullptr;
3213 }
3214 return Sum;
3215}
3216
3217// Looks through all the bounds info and
3218// computes the upper bound given the current direction settings
3219// at each level. If the upper bound at any level is +inf,
3220// the result is +inf.
3221const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
3222 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
3223 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3224 if (Bound[K].Upper[Bound[K].Direction])
3225 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Upper[Bound[K].Direction]);
3226 else
3227 Sum = nullptr;
3228 }
3229 return Sum;
3230}
3231
3232/// Check if we can delinearize the subscripts. If the SCEVs representing the
3233/// source and destination array references are recurrences on a nested loop,
3234/// this function flattens the nested recurrences into separate recurrences
3235/// for each loop level.
3236bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
3237 SmallVectorImpl<Subscript> &Pair) {
3238 assert(isLoadOrStore(Src) && "instruction is not load or store");
3239 assert(isLoadOrStore(Dst) && "instruction is not load or store");
3240 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3241 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3242 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
3243 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
3244 const SCEV *SrcAccessFn = SE->getSCEVAtScope(V: SrcPtr, L: SrcLoop);
3245 const SCEV *DstAccessFn = SE->getSCEVAtScope(V: DstPtr, L: DstLoop);
3246 const SCEVUnknown *SrcBase =
3247 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
3248 const SCEVUnknown *DstBase =
3249 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
3250
3251 if (!SrcBase || !DstBase || SrcBase != DstBase)
3252 return false;
3253
3254 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
3255
3256 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
3257 SrcSubscripts, DstSubscripts) &&
3258 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
3259 SrcSubscripts, DstSubscripts))
3260 return false;
3261
3262 assert(isLoopInvariant(SrcBase, SrcLoop) &&
3263 isLoopInvariant(DstBase, DstLoop) &&
3264 "Expected SrcBase and DstBase to be loop invariant");
3265
3266 int Size = SrcSubscripts.size();
3267 LLVM_DEBUG({
3268 dbgs() << "\nSrcSubscripts: ";
3269 for (int I = 0; I < Size; I++)
3270 dbgs() << *SrcSubscripts[I];
3271 dbgs() << "\nDstSubscripts: ";
3272 for (int I = 0; I < Size; I++)
3273 dbgs() << *DstSubscripts[I];
3274 });
3275
3276 // The delinearization transforms a single-subscript MIV dependence test into
3277 // a multi-subscript SIV dependence test that is easier to compute. So we
3278 // resize Pair to contain as many pairs of subscripts as the delinearization
3279 // has found, and then initialize the pairs following the delinearization.
3280 Pair.resize(N: Size);
3281 SCEVMonotonicityChecker MonChecker(SE);
3282 const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
3283 for (int I = 0; I < Size; ++I) {
3284 Pair[I].Src = SrcSubscripts[I];
3285 Pair[I].Dst = DstSubscripts[I];
3286 unifySubscriptType(Pairs: &Pair[I]);
3287
3288 if (EnableMonotonicityCheck) {
3289 if (MonChecker.checkMonotonicity(Expr: Pair[I].Src, OutermostLoop).isUnknown())
3290 return false;
3291 if (MonChecker.checkMonotonicity(Expr: Pair[I].Dst, OutermostLoop).isUnknown())
3292 return false;
3293 }
3294 }
3295
3296 return true;
3297}
3298
3299/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
3300/// arrays accessed are fixed-size arrays. Return true if delinearization was
3301/// successful.
3302bool DependenceInfo::tryDelinearizeFixedSize(
3303 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3304 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3305 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3306 LLVM_DEBUG({
3307 const SCEVUnknown *SrcBase =
3308 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3309 const SCEVUnknown *DstBase =
3310 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3311 assert(SrcBase && DstBase && SrcBase == DstBase &&
3312 "expected src and dst scev unknowns to be equal");
3313 });
3314
3315 const SCEV *ElemSize = SE->getElementSize(Inst: Src);
3316 assert(ElemSize == SE->getElementSize(Dst) && "Different element sizes");
3317 SmallVector<const SCEV *, 4> SrcSizes, DstSizes;
3318 if (!delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: SrcAccessFn),
3319 Subscripts&: SrcSubscripts, Sizes&: SrcSizes, ElementSize: ElemSize) ||
3320 !delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: DstAccessFn),
3321 Subscripts&: DstSubscripts, Sizes&: DstSizes, ElementSize: ElemSize))
3322 return false;
3323
3324 // Check that the two size arrays are non-empty and equal in length and
3325 // value. SCEV expressions are uniqued, so we can compare pointers.
3326 if (SrcSizes.size() != DstSizes.size() ||
3327 !std::equal(first1: SrcSizes.begin(), last1: SrcSizes.end(), first2: DstSizes.begin())) {
3328 SrcSubscripts.clear();
3329 DstSubscripts.clear();
3330 return false;
3331 }
3332
3333 assert(SrcSubscripts.size() == DstSubscripts.size() &&
3334 "Expected equal number of entries in the list of SrcSubscripts and "
3335 "DstSubscripts.");
3336
3337 // In general we cannot safely assume that the subscripts recovered from GEPs
3338 // are in the range of values defined for their corresponding array
3339 // dimensions. For example some C language usage/interpretation make it
3340 // impossible to verify this at compile-time. As such we can only delinearize
3341 // iff the subscripts are positive and are less than the range of the
3342 // dimension.
3343 if (!DisableDelinearizationChecks) {
3344 if (!validateDelinearizationResult(SE&: *SE, Sizes: SrcSizes, Subscripts: SrcSubscripts) ||
3345 !validateDelinearizationResult(SE&: *SE, Sizes: DstSizes, Subscripts: DstSubscripts)) {
3346 SrcSubscripts.clear();
3347 DstSubscripts.clear();
3348 return false;
3349 }
3350 }
3351 LLVM_DEBUG({
3352 dbgs() << "Delinearized subscripts of fixed-size array\n"
3353 << "SrcGEP:" << *getLoadStorePointerOperand(Src) << "\n"
3354 << "DstGEP:" << *getLoadStorePointerOperand(Dst) << "\n";
3355 });
3356 return true;
3357}
3358
3359bool DependenceInfo::tryDelinearizeParametricSize(
3360 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3361 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3362 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3363
3364 const SCEVUnknown *SrcBase =
3365 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
3366 const SCEVUnknown *DstBase =
3367 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
3368 assert(SrcBase && DstBase && SrcBase == DstBase &&
3369 "expected src and dst scev unknowns to be equal");
3370
3371 const SCEV *ElementSize = SE->getElementSize(Inst: Src);
3372 if (ElementSize != SE->getElementSize(Inst: Dst))
3373 return false;
3374
3375 const SCEV *SrcSCEV = SE->getMinusSCEV(LHS: SrcAccessFn, RHS: SrcBase);
3376 const SCEV *DstSCEV = SE->getMinusSCEV(LHS: DstAccessFn, RHS: DstBase);
3377
3378 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(Val: SrcSCEV);
3379 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(Val: DstSCEV);
3380 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
3381 return false;
3382
3383 // First step: collect parametric terms in both array references.
3384 SmallVector<const SCEV *, 4> Terms;
3385 collectParametricTerms(SE&: *SE, Expr: SrcAR, Terms);
3386 collectParametricTerms(SE&: *SE, Expr: DstAR, Terms);
3387
3388 // Second step: find subscript sizes.
3389 SmallVector<const SCEV *, 4> Sizes;
3390 findArrayDimensions(SE&: *SE, Terms, Sizes, ElementSize);
3391
3392 // Third step: compute the access functions for each subscript.
3393 computeAccessFunctions(SE&: *SE, Expr: SrcAR, Subscripts&: SrcSubscripts, Sizes);
3394 computeAccessFunctions(SE&: *SE, Expr: DstAR, Subscripts&: DstSubscripts, Sizes);
3395
3396 // Fail when there is only a subscript: that's a linearized access function.
3397 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
3398 SrcSubscripts.size() != DstSubscripts.size())
3399 return false;
3400
3401 // Statically check that the array bounds are in-range. The first subscript we
3402 // don't have a size for and it cannot overflow into another subscript, so is
3403 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3404 // and dst.
3405 // FIXME: It may be better to record these sizes and add them as constraints
3406 // to the dependency checks.
3407 if (!DisableDelinearizationChecks)
3408 if (!validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: SrcSubscripts) ||
3409 !validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: DstSubscripts))
3410 return false;
3411
3412 return true;
3413}
3414
3415//===----------------------------------------------------------------------===//
3416
3417#ifndef NDEBUG
3418// For debugging purposes, dump a small bit vector to dbgs().
3419static void dumpSmallBitVector(SmallBitVector &BV) {
3420 dbgs() << "{";
3421 for (unsigned VI : BV.set_bits()) {
3422 dbgs() << VI;
3423 if (BV.find_next(VI) >= 0)
3424 dbgs() << ' ';
3425 }
3426 dbgs() << "}\n";
3427}
3428#endif
3429
3430bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
3431 FunctionAnalysisManager::Invalidator &Inv) {
3432 // Check if the analysis itself has been invalidated.
3433 auto PAC = PA.getChecker<DependenceAnalysis>();
3434 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3435 return true;
3436
3437 // Check transitive dependencies.
3438 return Inv.invalidate<AAManager>(IR&: F, PA) ||
3439 Inv.invalidate<ScalarEvolutionAnalysis>(IR&: F, PA) ||
3440 Inv.invalidate<LoopAnalysis>(IR&: F, PA);
3441}
3442
3443// depends -
3444// Returns NULL if there is no dependence.
3445// Otherwise, return a Dependence with as many details as possible.
3446// Corresponds to Section 3.1 in the paper
3447//
3448// Practical Dependence Testing
3449// Goff, Kennedy, Tseng
3450// PLDI 1991
3451//
3452std::unique_ptr<Dependence>
3453DependenceInfo::depends(Instruction *Src, Instruction *Dst,
3454 bool UnderRuntimeAssumptions) {
3455 SmallVector<const SCEVPredicate *, 4> Assume;
3456 bool PossiblyLoopIndependent = true;
3457 if (Src == Dst)
3458 PossiblyLoopIndependent = false;
3459
3460 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
3461 // if both instructions don't reference memory, there's no dependence
3462 return nullptr;
3463
3464 if (!isLoadOrStore(I: Src) || !isLoadOrStore(I: Dst)) {
3465 // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3466 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3467 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3468 args: SCEVUnionPredicate(Assume, *SE));
3469 }
3470
3471 const MemoryLocation &DstLoc = MemoryLocation::get(Inst: Dst);
3472 const MemoryLocation &SrcLoc = MemoryLocation::get(Inst: Src);
3473
3474 switch (underlyingObjectsAlias(AA, DL: F->getDataLayout(), LocA: DstLoc, LocB: SrcLoc)) {
3475 case AliasResult::MayAlias:
3476 case AliasResult::PartialAlias:
3477 // cannot analyse objects if we don't understand their aliasing.
3478 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3479 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3480 args: SCEVUnionPredicate(Assume, *SE));
3481 case AliasResult::NoAlias:
3482 // If the objects noalias, they are distinct, accesses are independent.
3483 LLVM_DEBUG(dbgs() << "no alias\n");
3484 return nullptr;
3485 case AliasResult::MustAlias:
3486 break; // The underlying objects alias; test accesses for dependence.
3487 }
3488
3489 if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() ||
3490 !SrcLoc.Size.isPrecise()) {
3491 // The dependence test gets confused if the size of the memory accesses
3492 // differ.
3493 LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
3494 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3495 args: SCEVUnionPredicate(Assume, *SE));
3496 }
3497
3498 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
3499 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
3500 const SCEV *SrcSCEV = SE->getSCEV(V: SrcPtr);
3501 const SCEV *DstSCEV = SE->getSCEV(V: DstPtr);
3502 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
3503 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
3504 const SCEV *SrcBase = SE->getPointerBase(V: SrcSCEV);
3505 const SCEV *DstBase = SE->getPointerBase(V: DstSCEV);
3506 if (SrcBase != DstBase) {
3507 // If two pointers have different bases, trying to analyze indexes won't
3508 // work; we can't compare them to each other. This can happen, for example,
3509 // if one is produced by an LCSSA PHI node.
3510 //
3511 // We check this upfront so we don't crash in cases where getMinusSCEV()
3512 // returns a SCEVCouldNotCompute.
3513 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
3514 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3515 args: SCEVUnionPredicate(Assume, *SE));
3516 }
3517
3518 // Even if the base pointers are the same, they may not be loop-invariant. It
3519 // could lead to incorrect results, as we're analyzing loop-carried
3520 // dependencies. Src and Dst can be in different loops, so we need to check
3521 // the base pointer is invariant in both loops.
3522 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
3523 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
3524 if (!isLoopInvariant(Expression: SrcBase, LoopNest: SrcLoop) ||
3525 !isLoopInvariant(Expression: DstBase, LoopNest: DstLoop)) {
3526 LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
3527 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3528 args: SCEVUnionPredicate(Assume, *SE));
3529 }
3530
3531 uint64_t EltSize = SrcLoc.Size.toRaw();
3532 const SCEV *SrcEv = SE->getMinusSCEV(LHS: SrcSCEV, RHS: SrcBase);
3533 const SCEV *DstEv = SE->getMinusSCEV(LHS: DstSCEV, RHS: DstBase);
3534
3535 // Check that memory access offsets are multiples of element sizes.
3536 if (!SE->isKnownMultipleOf(S: SrcEv, M: EltSize, Assumptions&: Assume) ||
3537 !SE->isKnownMultipleOf(S: DstEv, M: EltSize, Assumptions&: Assume)) {
3538 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
3539 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3540 args: SCEVUnionPredicate(Assume, *SE));
3541 }
3542
3543 // Runtime assumptions needed but not allowed.
3544 if (!Assume.empty() && !UnderRuntimeAssumptions)
3545 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3546 args: SCEVUnionPredicate(Assume, *SE));
3547
3548 unsigned Pairs = 1;
3549 SmallVector<Subscript, 2> Pair(Pairs);
3550 Pair[0].Src = SrcEv;
3551 Pair[0].Dst = DstEv;
3552
3553 SCEVMonotonicityChecker MonChecker(SE);
3554 const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
3555 if (EnableMonotonicityCheck)
3556 if (MonChecker.checkMonotonicity(Expr: Pair[0].Src, OutermostLoop).isUnknown() ||
3557 MonChecker.checkMonotonicity(Expr: Pair[0].Dst, OutermostLoop).isUnknown())
3558 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3559 args: SCEVUnionPredicate(Assume, *SE));
3560
3561 if (Delinearize) {
3562 if (tryDelinearize(Src, Dst, Pair)) {
3563 LLVM_DEBUG(dbgs() << " delinearized\n");
3564 Pairs = Pair.size();
3565 }
3566 }
3567
3568 // Establish loop nesting levels considering SameSD loops as common
3569 establishNestingLevels(Src, Dst);
3570
3571 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
3572 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
3573 LLVM_DEBUG(dbgs() << " SameSD nesting levels = " << SameSDLevels << "\n");
3574
3575 // Modify common levels to consider the SameSD levels in the tests
3576 CommonLevels += SameSDLevels;
3577 MaxLevels -= SameSDLevels;
3578 if (SameSDLevels > 0) {
3579 // Not all tests are handled yet over SameSD loops
3580 // Revoke if there are any tests other than ZIV, SIV or RDIV
3581 for (unsigned P = 0; P < Pairs; ++P) {
3582 SmallBitVector Loops;
3583 Subscript::ClassificationKind TestClass =
3584 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
3585 Dst: Pair[P].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops);
3586
3587 if (TestClass != Subscript::ZIV && TestClass != Subscript::SIV &&
3588 TestClass != Subscript::RDIV) {
3589 // Revert the levels to not consider the SameSD levels
3590 CommonLevels -= SameSDLevels;
3591 MaxLevels += SameSDLevels;
3592 SameSDLevels = 0;
3593 break;
3594 }
3595 }
3596 }
3597
3598 if (SameSDLevels > 0)
3599 SameSDLoopsCount++;
3600
3601 FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
3602 PossiblyLoopIndependent, CommonLevels);
3603 ++TotalArrayPairs;
3604
3605 for (unsigned P = 0; P < Pairs; ++P) {
3606 assert(Pair[P].Src->getType()->isIntegerTy() && "Src must be an integer");
3607 assert(Pair[P].Dst->getType()->isIntegerTy() && "Dst must be an integer");
3608 Pair[P].Loops.resize(N: MaxLevels + 1);
3609 Pair[P].GroupLoops.resize(N: MaxLevels + 1);
3610 Pair[P].Group.resize(N: Pairs);
3611 removeMatchingExtensions(Pair: &Pair[P]);
3612 Pair[P].Classification =
3613 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()), Dst: Pair[P].Dst,
3614 DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops&: Pair[P].Loops);
3615 Pair[P].GroupLoops = Pair[P].Loops;
3616 Pair[P].Group.set(P);
3617 LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
3618 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3619 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3620 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3621 LLVM_DEBUG(dbgs() << "\tloops = ");
3622 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
3623 }
3624
3625 // Test each subscript individually
3626 for (unsigned SI = 0; SI < Pairs; ++SI) {
3627 LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3628 switch (Pair[SI].Classification) {
3629 case Subscript::NonLinear:
3630 // ignore these, but collect loops for later
3631 ++NonlinearSubscriptPairs;
3632 collectCommonLoops(Expression: Pair[SI].Src, LoopNest: LI->getLoopFor(BB: Src->getParent()),
3633 Loops&: Pair[SI].Loops);
3634 collectCommonLoops(Expression: Pair[SI].Dst, LoopNest: LI->getLoopFor(BB: Dst->getParent()),
3635 Loops&: Pair[SI].Loops);
3636 Result.Consistent = false;
3637 break;
3638 case Subscript::ZIV:
3639 LLVM_DEBUG(dbgs() << ", ZIV\n");
3640 if (testZIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
3641 return nullptr;
3642 break;
3643 case Subscript::SIV: {
3644 LLVM_DEBUG(dbgs() << ", SIV\n");
3645 unsigned Level;
3646 if (testSIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Level, Result,
3647 UnderRuntimeAssumptions))
3648 return nullptr;
3649 break;
3650 }
3651 case Subscript::RDIV:
3652 LLVM_DEBUG(dbgs() << ", RDIV\n");
3653 if (testRDIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
3654 return nullptr;
3655 break;
3656 case Subscript::MIV:
3657 LLVM_DEBUG(dbgs() << ", MIV\n");
3658 if (testMIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Loops: Pair[SI].Loops, Result))
3659 return nullptr;
3660 break;
3661 }
3662 }
3663
3664 // Make sure the Scalar flags are set correctly.
3665 SmallBitVector CompleteLoops(MaxLevels + 1);
3666 for (unsigned SI = 0; SI < Pairs; ++SI)
3667 CompleteLoops |= Pair[SI].Loops;
3668 for (unsigned II = 1; II <= CommonLevels; ++II)
3669 if (CompleteLoops[II])
3670 Result.DV[II - 1].Scalar = false;
3671
3672 // Set the distance to zero if the direction is EQ.
3673 // TODO: Ideally, the distance should be set to 0 immediately simultaneously
3674 // with the corresponding direction being set to EQ.
3675 for (unsigned II = 1; II <= Result.getLevels(); ++II) {
3676 if (Result.getDirection(Level: II) == Dependence::DVEntry::EQ) {
3677 if (Result.DV[II - 1].Distance == nullptr)
3678 Result.DV[II - 1].Distance = SE->getZero(Ty: SrcSCEV->getType());
3679 else
3680 assert(Result.DV[II - 1].Distance->isZero() &&
3681 "Inconsistency between distance and direction");
3682 }
3683
3684#ifndef NDEBUG
3685 // Check that the converse (i.e., if the distance is zero, then the
3686 // direction is EQ) holds.
3687 const SCEV *Distance = Result.getDistance(II);
3688 if (Distance && Distance->isZero())
3689 assert(Result.getDirection(II) == Dependence::DVEntry::EQ &&
3690 "Distance is zero, but direction is not EQ");
3691#endif
3692 }
3693
3694 if (SameSDLevels > 0) {
3695 // Extracting SameSD levels from the common levels
3696 // Reverting CommonLevels and MaxLevels to their original values
3697 assert(CommonLevels >= SameSDLevels);
3698 CommonLevels -= SameSDLevels;
3699 MaxLevels += SameSDLevels;
3700 std::unique_ptr<FullDependence::DVEntry[]> DV, DVSameSD;
3701 DV = std::make_unique<FullDependence::DVEntry[]>(num: CommonLevels);
3702 DVSameSD = std::make_unique<FullDependence::DVEntry[]>(num: SameSDLevels);
3703 for (unsigned Level = 0; Level < CommonLevels; ++Level)
3704 DV[Level] = Result.DV[Level];
3705 for (unsigned Level = 0; Level < SameSDLevels; ++Level)
3706 DVSameSD[Level] = Result.DV[CommonLevels + Level];
3707 Result.DV = std::move(DV);
3708 Result.DVSameSD = std::move(DVSameSD);
3709 Result.Levels = CommonLevels;
3710 Result.SameSDLevels = SameSDLevels;
3711 // Result is not consistent if it considers SameSD levels
3712 Result.Consistent = false;
3713 }
3714
3715 if (PossiblyLoopIndependent) {
3716 // Make sure the LoopIndependent flag is set correctly.
3717 // All directions must include equal, otherwise no
3718 // loop-independent dependence is possible.
3719 for (unsigned II = 1; II <= CommonLevels; ++II) {
3720 if (!(Result.getDirection(Level: II) & Dependence::DVEntry::EQ)) {
3721 Result.LoopIndependent = false;
3722 break;
3723 }
3724 }
3725 } else {
3726 // On the other hand, if all directions are equal and there's no
3727 // loop-independent dependence possible, then no dependence exists.
3728 // However, if there are runtime assumptions, we must return the result.
3729 bool AllEqual = true;
3730 for (unsigned II = 1; II <= CommonLevels; ++II) {
3731 if (Result.getDirection(Level: II) != Dependence::DVEntry::EQ) {
3732 AllEqual = false;
3733 break;
3734 }
3735 }
3736 if (AllEqual && Result.Assumptions.getPredicates().empty())
3737 return nullptr;
3738 }
3739
3740 return std::make_unique<FullDependence>(args: std::move(Result));
3741}
3742