1//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10// accesses. Currently, it is an (incomplete) implementation of the approach
11// described in
12//
13// Practical Dependence Testing
14// Goff, Kennedy, Tseng
15// PLDI 1991
16//
17// There's a single entry point that analyzes the dependence between a pair
18// of memory references in a function, returning either NULL, for no dependence,
19// or a more-or-less detailed description of the dependence between them.
20//
21// Since Clang linearizes some array subscripts, the dependence
22// analysis is using SCEV->delinearize to recover the representation of multiple
23// subscripts, and thus avoid the more expensive and less precise MIV tests. The
24// delinearization is controlled by the flag -da-delinearize.
25//
26// We should pay some careful attention to the possibility of integer overflow
27// in the implementation of the various tests. This could happen with Add,
28// Subtract, or Multiply, with both APInt's and SCEV's.
29//
30// Some non-linear subscript pairs can be handled by the GCD test
31// (and perhaps other tests).
32// Should explore how often these things occur.
33//
34// Finally, it seems like certain test cases expose weaknesses in the SCEV
35// simplification, especially in the handling of sign and zero extensions.
36// It could be useful to spend time exploring these.
37//
38// Please note that this is work in progress and the interface is subject to
39// change.
40//
41//===----------------------------------------------------------------------===//
42// //
43// In memory of Ken Kennedy, 1945 - 2007 //
44// //
45//===----------------------------------------------------------------------===//
46
47#include "llvm/Analysis/DependenceAnalysis.h"
48#include "llvm/ADT/Statistic.h"
49#include "llvm/Analysis/AliasAnalysis.h"
50#include "llvm/Analysis/Delinearization.h"
51#include "llvm/Analysis/LoopInfo.h"
52#include "llvm/Analysis/ScalarEvolution.h"
53#include "llvm/Analysis/ScalarEvolutionExpressions.h"
54#include "llvm/Analysis/ValueTracking.h"
55#include "llvm/IR/InstIterator.h"
56#include "llvm/IR/Module.h"
57#include "llvm/InitializePasses.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/raw_ostream.h"
62
63using namespace llvm;
64
65#define DEBUG_TYPE "da"
66
67//===----------------------------------------------------------------------===//
68// statistics
69
70STATISTIC(TotalArrayPairs, "Array pairs tested");
71STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
72STATISTIC(ZIVapplications, "ZIV applications");
73STATISTIC(ZIVindependence, "ZIV independence");
74STATISTIC(StrongSIVapplications, "Strong SIV applications");
75STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
76STATISTIC(StrongSIVindependence, "Strong SIV independence");
77STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
78STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
79STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
80STATISTIC(ExactSIVapplications, "Exact SIV applications");
81STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
82STATISTIC(ExactSIVindependence, "Exact SIV independence");
83STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
84STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
85STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
86STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
87STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
88STATISTIC(GCDapplications, "GCD applications");
89STATISTIC(GCDsuccesses, "GCD successes");
90STATISTIC(GCDindependence, "GCD independence");
91STATISTIC(BanerjeeApplications, "Banerjee applications");
92STATISTIC(BanerjeeIndependence, "Banerjee independence");
93STATISTIC(BanerjeeSuccesses, "Banerjee successes");
94STATISTIC(SameSDLoopsCount, "Loops with Same iteration Space and Depth");
95
96static cl::opt<bool>
97 Delinearize("da-delinearize", cl::init(Val: true), cl::Hidden,
98 cl::desc("Try to delinearize array references."));
99static cl::opt<bool> DisableDelinearizationChecks(
100 "da-disable-delinearization-checks", cl::Hidden,
101 cl::desc(
102 "Disable checks that try to statically verify validity of "
103 "delinearized subscripts. Enabling this option may result in incorrect "
104 "dependence vectors for languages that allow the subscript of one "
105 "dimension to underflow or overflow into another dimension."));
106
107static cl::opt<unsigned> MIVMaxLevelThreshold(
108 "da-miv-max-level-threshold", cl::init(Val: 7), cl::Hidden,
109 cl::desc("Maximum depth allowed for the recursive algorithm used to "
110 "explore MIV direction vectors."));
111
112namespace {
113
114/// Types of dependence test routines.
115enum class DependenceTestType {
116 All,
117 StrongSIV,
118 WeakCrossingSIV,
119 ExactSIV,
120 WeakZeroSIV,
121 ExactRDIV,
122 GCDMIV,
123 BanerjeeMIV,
124};
125
126} // anonymous namespace
127
128static cl::opt<DependenceTestType> EnableDependenceTest(
129 "da-enable-dependence-test", cl::init(Val: DependenceTestType::All),
130 cl::ReallyHidden,
131 cl::desc("Run only specified dependence test routine and disable others. "
132 "The purpose is mainly to exclude the influence of other "
133 "dependence test routines in regression tests. If set to All, all "
134 "dependence test routines are enabled."),
135 cl::values(clEnumValN(DependenceTestType::All, "all",
136 "Enable all dependence test routines."),
137 clEnumValN(DependenceTestType::StrongSIV, "strong-siv",
138 "Enable only Strong SIV test."),
139 clEnumValN(DependenceTestType::WeakCrossingSIV,
140 "weak-crossing-siv",
141 "Enable only Weak-Crossing SIV test."),
142 clEnumValN(DependenceTestType::ExactSIV, "exact-siv",
143 "Enable only Exact SIV test."),
144 clEnumValN(DependenceTestType::WeakZeroSIV, "weak-zero-siv",
145 "Enable only Weak-Zero SIV test."),
146 clEnumValN(DependenceTestType::ExactRDIV, "exact-rdiv",
147 "Enable only Exact RDIV test."),
148 clEnumValN(DependenceTestType::GCDMIV, "gcd-miv",
149 "Enable only GCD MIV test."),
150 clEnumValN(DependenceTestType::BanerjeeMIV, "banerjee-miv",
151 "Enable only Banerjee MIV test.")));
152
153// TODO: This flag is disabled by default because it is still under development.
154// Enable it or delete this flag when the feature is ready.
155static cl::opt<bool> EnableMonotonicityCheck(
156 "da-enable-monotonicity-check", cl::init(Val: false), cl::Hidden,
157 cl::desc("Check if the subscripts are monotonic. If it's not, dependence "
158 "is reported as unknown."));
159
160static cl::opt<bool> DumpMonotonicityReport(
161 "da-dump-monotonicity-report", cl::init(Val: false), cl::Hidden,
162 cl::desc(
163 "When printing analysis, dump the results of monotonicity checks."));
164
165//===----------------------------------------------------------------------===//
166// basics
167
168DependenceAnalysis::Result
169DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
170 auto &AA = FAM.getResult<AAManager>(IR&: F);
171 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
172 auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
173 return DependenceInfo(&F, &AA, &SE, &LI);
174}
175
176AnalysisKey DependenceAnalysis::Key;
177
178INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
179 "Dependence Analysis", true, true)
180INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
181INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
182INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
183INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
184 true, true)
185
186char DependenceAnalysisWrapperPass::ID = 0;
187
188DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
189 : FunctionPass(ID) {}
190
191FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
192 return new DependenceAnalysisWrapperPass();
193}
194
195bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
196 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
197 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
198 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
199 info.reset(p: new DependenceInfo(&F, &AA, &SE, &LI));
200 return false;
201}
202
203DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
204
205void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
206
207void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
208 AU.setPreservesAll();
209 AU.addRequiredTransitive<AAResultsWrapperPass>();
210 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
211 AU.addRequiredTransitive<LoopInfoWrapperPass>();
212}
213
214namespace {
215
216/// The property of monotonicity of a SCEV. To define the monotonicity, assume
217/// a SCEV defined within N-nested loops. Let i_k denote the iteration number
218/// of the k-th loop. Then we can regard the SCEV as an N-ary function:
219///
220/// F(i_1, i_2, ..., i_N)
221///
222/// The domain of i_k is the closed range [0, BTC_k], where BTC_k is the
223/// backedge-taken count of the k-th loop.
224///
225/// A function F is said to be "monotonically increasing with respect to the
226/// k-th loop" if x <= y implies the following condition:
227///
228/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) <=
229/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
230///
231/// where i_1, ..., i_{k-1}, i_{k+1}, ..., i_N, x, and y are elements of their
232/// respective domains.
233///
234/// Likewise F is "monotonically decreasing with respect to the k-th loop"
235/// if x <= y implies
236///
237/// F(i_1, ..., i_{k-1}, x, i_{k+1}, ..., i_N) >=
238/// F(i_1, ..., i_{k-1}, y, i_{k+1}, ..., i_N)
239///
240/// A function F that is monotonically increasing or decreasing with respect to
241/// the k-th loop is simply called "monotonic with respect to k-th loop".
242///
243/// A function F is said to be "multivariate monotonic" when it is monotonic
244/// with respect to all of the N loops.
245///
246/// Since integer comparison can be either signed or unsigned, we need to
247/// distinguish monotonicity in the signed sense from that in the unsigned
248/// sense. Note that the inequality "x <= y" merely indicates loop progression
249/// and is not affected by the difference between signed and unsigned order.
250///
251/// Currently we only consider monotonicity in a signed sense.
252enum class SCEVMonotonicityType {
253 /// We don't know anything about the monotonicity of the SCEV.
254 Unknown,
255
256 /// The SCEV is loop-invariant with respect to the outermost loop. In other
257 /// words, the function F corresponding to the SCEV is a constant function.
258 Invariant,
259
260 /// The function F corresponding to the SCEV is multivariate monotonic in a
261 /// signed sense. Note that the multivariate monotonic function may also be a
262 /// constant function. The order employed in the definition of monotonicity
263 /// is not strict order.
264 MultivariateSignedMonotonic,
265};
266
267struct SCEVMonotonicity {
268 SCEVMonotonicity(SCEVMonotonicityType Type,
269 const SCEV *FailurePoint = nullptr);
270
271 SCEVMonotonicityType getType() const { return Type; }
272
273 const SCEV *getFailurePoint() const { return FailurePoint; }
274
275 bool isUnknown() const { return Type == SCEVMonotonicityType::Unknown; }
276
277 void print(raw_ostream &OS, unsigned Depth) const;
278
279private:
280 SCEVMonotonicityType Type;
281
282 /// The subexpression that caused Unknown. Mainly for debugging purpose.
283 const SCEV *FailurePoint;
284};
285
286/// Check the monotonicity of a SCEV. Since dependence tests (SIV, MIV, etc.)
287/// assume that subscript expressions are (multivariate) monotonic, we need to
288/// verify this property before applying those tests. Violating this assumption
289/// may cause them to produce incorrect results.
290struct SCEVMonotonicityChecker
291 : public SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity> {
292
293 SCEVMonotonicityChecker(ScalarEvolution *SE) : SE(SE) {}
294
295 /// Check the monotonicity of \p Expr. \p Expr must be integer type. If \p
296 /// OutermostLoop is not null, \p Expr must be defined in \p OutermostLoop or
297 /// one of its nested loops.
298 SCEVMonotonicity checkMonotonicity(const SCEV *Expr,
299 const Loop *OutermostLoop);
300
301private:
302 ScalarEvolution *SE;
303
304 /// The outermost loop that DA is analyzing.
305 const Loop *OutermostLoop;
306
307 /// A helper to classify \p Expr as either Invariant or Unknown.
308 SCEVMonotonicity invariantOrUnknown(const SCEV *Expr);
309
310 /// Return true if \p Expr is loop-invariant with respect to the outermost
311 /// loop.
312 bool isLoopInvariant(const SCEV *Expr) const;
313
314 /// A helper to create an Unknown SCEVMonotonicity.
315 SCEVMonotonicity createUnknown(const SCEV *FailurePoint) {
316 return SCEVMonotonicity(SCEVMonotonicityType::Unknown, FailurePoint);
317 }
318
319 SCEVMonotonicity visitAddRecExpr(const SCEVAddRecExpr *Expr);
320
321 SCEVMonotonicity visitConstant(const SCEVConstant *) {
322 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
323 }
324 SCEVMonotonicity visitVScale(const SCEVVScale *) {
325 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
326 }
327
328 // TODO: Handle more cases.
329 SCEVMonotonicity visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
330 return invariantOrUnknown(Expr);
331 }
332 SCEVMonotonicity visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
333 return invariantOrUnknown(Expr);
334 }
335 SCEVMonotonicity visitAddExpr(const SCEVAddExpr *Expr) {
336 return invariantOrUnknown(Expr);
337 }
338 SCEVMonotonicity visitMulExpr(const SCEVMulExpr *Expr) {
339 return invariantOrUnknown(Expr);
340 }
341 SCEVMonotonicity visitPtrToAddrExpr(const SCEVPtrToAddrExpr *Expr) {
342 return invariantOrUnknown(Expr);
343 }
344 SCEVMonotonicity visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) {
345 return invariantOrUnknown(Expr);
346 }
347 SCEVMonotonicity visitTruncateExpr(const SCEVTruncateExpr *Expr) {
348 return invariantOrUnknown(Expr);
349 }
350 SCEVMonotonicity visitUDivExpr(const SCEVUDivExpr *Expr) {
351 return invariantOrUnknown(Expr);
352 }
353 SCEVMonotonicity visitSMaxExpr(const SCEVSMaxExpr *Expr) {
354 return invariantOrUnknown(Expr);
355 }
356 SCEVMonotonicity visitUMaxExpr(const SCEVUMaxExpr *Expr) {
357 return invariantOrUnknown(Expr);
358 }
359 SCEVMonotonicity visitSMinExpr(const SCEVSMinExpr *Expr) {
360 return invariantOrUnknown(Expr);
361 }
362 SCEVMonotonicity visitUMinExpr(const SCEVUMinExpr *Expr) {
363 return invariantOrUnknown(Expr);
364 }
365 SCEVMonotonicity visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) {
366 return invariantOrUnknown(Expr);
367 }
368 SCEVMonotonicity visitUnknown(const SCEVUnknown *Expr) {
369 return invariantOrUnknown(Expr);
370 }
371 SCEVMonotonicity visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
372 return invariantOrUnknown(Expr);
373 }
374
375 friend struct SCEVVisitor<SCEVMonotonicityChecker, SCEVMonotonicity>;
376};
377
378/// A wrapper class for std::optional<APInt> that provides arithmetic operators
379/// with overflow checking in a signed sense. This allows us to omit inserting
380/// an overflow check at every arithmetic operation, which simplifies the code
381/// if the operations are chained like `a + b + c + ...`.
382///
383/// If an calculation overflows, the result becomes "invalid" which is
384/// internally represented by std::nullopt. If any operand of an arithmetic
385/// operation is "invalid", the result will also be "invalid".
386struct OverflowSafeSignedAPInt {
387 OverflowSafeSignedAPInt() : Value(std::nullopt) {}
388 OverflowSafeSignedAPInt(const APInt &V) : Value(V) {}
389 OverflowSafeSignedAPInt(const std::optional<APInt> &V) : Value(V) {}
390
391 OverflowSafeSignedAPInt operator+(const OverflowSafeSignedAPInt &RHS) const {
392 if (!Value || !RHS.Value)
393 return OverflowSafeSignedAPInt();
394 bool Overflow;
395 APInt Result = Value->sadd_ov(RHS: *RHS.Value, Overflow);
396 if (Overflow)
397 return OverflowSafeSignedAPInt();
398 return OverflowSafeSignedAPInt(Result);
399 }
400
401 OverflowSafeSignedAPInt operator+(int RHS) const {
402 if (!Value)
403 return OverflowSafeSignedAPInt();
404 return *this + fromInt(V: RHS);
405 }
406
407 OverflowSafeSignedAPInt operator-(const OverflowSafeSignedAPInt &RHS) const {
408 if (!Value || !RHS.Value)
409 return OverflowSafeSignedAPInt();
410 bool Overflow;
411 APInt Result = Value->ssub_ov(RHS: *RHS.Value, Overflow);
412 if (Overflow)
413 return OverflowSafeSignedAPInt();
414 return OverflowSafeSignedAPInt(Result);
415 }
416
417 OverflowSafeSignedAPInt operator-(int RHS) const {
418 if (!Value)
419 return OverflowSafeSignedAPInt();
420 return *this - fromInt(V: RHS);
421 }
422
423 OverflowSafeSignedAPInt operator*(const OverflowSafeSignedAPInt &RHS) const {
424 if (!Value || !RHS.Value)
425 return OverflowSafeSignedAPInt();
426 bool Overflow;
427 APInt Result = Value->smul_ov(RHS: *RHS.Value, Overflow);
428 if (Overflow)
429 return OverflowSafeSignedAPInt();
430 return OverflowSafeSignedAPInt(Result);
431 }
432
433 OverflowSafeSignedAPInt operator-() const {
434 if (!Value)
435 return OverflowSafeSignedAPInt();
436 if (Value->isMinSignedValue())
437 return OverflowSafeSignedAPInt();
438 return OverflowSafeSignedAPInt(-*Value);
439 }
440
441 operator bool() const { return Value.has_value(); }
442
443 bool operator!() const { return !Value.has_value(); }
444
445 const APInt &operator*() const {
446 assert(Value && "Value is not available.");
447 return *Value;
448 }
449
450 const APInt *operator->() const {
451 assert(Value && "Value is not available.");
452 return &*Value;
453 }
454
455private:
456 /// Underlying value. std::nullopt means "unknown". An arithmetic operation on
457 /// "unknown" always produces "unknown".
458 std::optional<APInt> Value;
459
460 OverflowSafeSignedAPInt fromInt(uint64_t V) const {
461 assert(Value && "Value is not available.");
462 return OverflowSafeSignedAPInt(
463 APInt(Value->getBitWidth(), V, /*isSigned=*/true));
464 }
465};
466
467} // anonymous namespace
468
469// Used to test the dependence analyzer.
470// Looks through the function, noting instructions that may access memory.
471// Calls depends() on every possible pair and prints out the result.
472// Ignores all other instructions.
473static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
474 ScalarEvolution &SE, LoopInfo &LI,
475 bool NormalizeResults) {
476 auto *F = DA->getFunction();
477
478 if (DumpMonotonicityReport) {
479 SCEVMonotonicityChecker Checker(&SE);
480 OS << "Monotonicity check:\n";
481 for (Instruction &Inst : instructions(F)) {
482 if (!isa<LoadInst>(Val: Inst) && !isa<StoreInst>(Val: Inst))
483 continue;
484 Value *Ptr = getLoadStorePointerOperand(V: &Inst);
485 const Loop *L = LI.getLoopFor(BB: Inst.getParent());
486 const Loop *OutermostLoop = L ? L->getOutermostLoop() : nullptr;
487 const SCEV *PtrSCEV = SE.getSCEVAtScope(V: Ptr, L);
488 const SCEV *AccessFn = SE.removePointerBase(S: PtrSCEV);
489 SCEVMonotonicity Mon = Checker.checkMonotonicity(Expr: AccessFn, OutermostLoop);
490 OS.indent(NumSpaces: 2) << "Inst: " << Inst << "\n";
491 OS.indent(NumSpaces: 4) << "Expr: " << *AccessFn << "\n";
492 Mon.print(OS, Depth: 4);
493 }
494 OS << "\n";
495 }
496
497 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
498 ++SrcI) {
499 if (SrcI->mayReadOrWriteMemory()) {
500 for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE;
501 ++DstI) {
502 if (DstI->mayReadOrWriteMemory()) {
503 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
504 OS << " da analyze - ";
505 if (auto D = DA->depends(Src: &*SrcI, Dst: &*DstI,
506 /*UnderRuntimeAssumptions=*/true)) {
507
508#ifndef NDEBUG
509 // Verify that the distance being zero is equivalent to the
510 // direction being EQ.
511 for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
512 const SCEV *Distance = D->getDistance(Level);
513 bool IsDistanceZero = Distance && Distance->isZero();
514 bool IsDirectionEQ =
515 D->getDirection(Level) == Dependence::DVEntry::EQ;
516 assert(IsDistanceZero == IsDirectionEQ &&
517 "Inconsistent distance and direction.");
518 }
519#endif
520
521 // Normalize negative direction vectors if required by clients.
522 if (NormalizeResults && D->normalize(SE: &SE))
523 OS << "normalized - ";
524 D->dump(OS);
525 } else
526 OS << "none!\n";
527 }
528 }
529 }
530 }
531}
532
533void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
534 const Module *) const {
535 dumpExampleDependence(
536 OS, DA: info.get(), SE&: getAnalysis<ScalarEvolutionWrapperPass>().getSE(),
537 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), NormalizeResults: false);
538}
539
540PreservedAnalyses
541DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
542 OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName()
543 << "':\n";
544 dumpExampleDependence(OS, DA: &FAM.getResult<DependenceAnalysis>(IR&: F),
545 SE&: FAM.getResult<ScalarEvolutionAnalysis>(IR&: F),
546 LI&: FAM.getResult<LoopAnalysis>(IR&: F), NormalizeResults);
547 return PreservedAnalyses::all();
548}
549
550//===----------------------------------------------------------------------===//
551// Dependence methods
552
553// Returns true if this is an input dependence.
554bool Dependence::isInput() const {
555 return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
556}
557
558// Returns true if this is an output dependence.
559bool Dependence::isOutput() const {
560 return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
561}
562
563// Returns true if this is an flow (aka true) dependence.
564bool Dependence::isFlow() const {
565 return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
566}
567
568// Returns true if this is an anti dependence.
569bool Dependence::isAnti() const {
570 return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
571}
572
573// Returns true if a particular level is scalar; that is,
574// if no subscript in the source or destination mention the induction
575// variable associated with the loop at this level.
576// Leave this out of line, so it will serve as a virtual method anchor
577bool Dependence::isScalar(unsigned level, bool IsSameSD) const { return false; }
578
579//===----------------------------------------------------------------------===//
580// FullDependence methods
581
582FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
583 const SCEVUnionPredicate &Assumes,
584 bool PossiblyLoopIndependent,
585 unsigned CommonLevels)
586 : Dependence(Source, Destination, Assumes), Levels(CommonLevels),
587 LoopIndependent(PossiblyLoopIndependent) {
588 SameSDLevels = 0;
589 if (CommonLevels)
590 DV = std::make_unique<DVEntry[]>(num: CommonLevels);
591}
592
593// FIXME: in some cases the meaning of a negative direction vector
594// may not be straightforward, e.g.,
595// for (int i = 0; i < 32; ++i) {
596// Src: A[i] = ...;
597// Dst: use(A[31 - i]);
598// }
599// The dependency is
600// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and
601// anti { Dst[i] -> Src[31 - i] : when i < 16 },
602// -- hence a [<>].
603// As long as a dependence result contains '>' ('<>', '<=>', "*"), it
604// means that a reversed/normalized dependence needs to be considered
605// as well. Nevertheless, current isDirectionNegative() only returns
606// true with a '>' or '>=' dependency for ease of canonicalizing the
607// dependency vector, since the reverse of '<>', '<=>' and "*" is itself.
608bool FullDependence::isDirectionNegative() const {
609 for (unsigned Level = 1; Level <= Levels; ++Level) {
610 unsigned char Direction = DV[Level - 1].Direction;
611 if (Direction == Dependence::DVEntry::EQ)
612 continue;
613 if (Direction == Dependence::DVEntry::GT ||
614 Direction == Dependence::DVEntry::GE)
615 return true;
616 return false;
617 }
618 return false;
619}
620
621void FullDependence::negate(ScalarEvolution &SE) {
622 std::swap(a&: Src, b&: Dst);
623 for (unsigned Level = 1; Level <= Levels; ++Level) {
624 unsigned char Direction = DV[Level - 1].Direction;
625 // Reverse the direction vector, this means LT becomes GT
626 // and GT becomes LT.
627 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
628 if (Direction & Dependence::DVEntry::LT)
629 RevDirection |= Dependence::DVEntry::GT;
630 if (Direction & Dependence::DVEntry::GT)
631 RevDirection |= Dependence::DVEntry::LT;
632 DV[Level - 1].Direction = RevDirection;
633 // Reverse the dependence distance as well.
634 if (DV[Level - 1].Distance != nullptr)
635 DV[Level - 1].Distance = SE.getNegativeSCEV(V: DV[Level - 1].Distance);
636 }
637}
638
639bool FullDependence::normalize(ScalarEvolution *SE) {
640 if (!isDirectionNegative())
641 return false;
642
643 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
644 dump(dbgs()););
645 negate(SE&: *SE);
646 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
647 dump(dbgs()););
648 return true;
649}
650
651// The rest are simple getters that hide the implementation.
652
653// getDirection - Returns the direction associated with a particular common or
654// SameSD level.
655unsigned FullDependence::getDirection(unsigned Level, bool IsSameSD) const {
656 return getDVEntry(Level, IsSameSD).Direction;
657}
658
659// Returns the distance (or NULL) associated with a particular common or
660// SameSD level.
661const SCEV *FullDependence::getDistance(unsigned Level, bool IsSameSD) const {
662 return getDVEntry(Level, IsSameSD).Distance;
663}
664
665// Returns true if a particular regular or SameSD level is scalar; that is,
666// if no subscript in the source or destination mention the induction variable
667// associated with the loop at this level.
668bool FullDependence::isScalar(unsigned Level, bool IsSameSD) const {
669 return getDVEntry(Level, IsSameSD).Scalar;
670}
671
672// inSameSDLoops - Returns true if this level is an SameSD level, i.e.,
673// performed across two separate loop nests that have the Same iteration space
674// and Depth.
675bool FullDependence::inSameSDLoops(unsigned Level) const {
676 assert(0 < Level && Level <= static_cast<unsigned>(Levels) + SameSDLevels &&
677 "Level out of range");
678 return Level > Levels;
679}
680
681//===----------------------------------------------------------------------===//
682// SCEVMonotonicity
683
684SCEVMonotonicity::SCEVMonotonicity(SCEVMonotonicityType Type,
685 const SCEV *FailurePoint)
686 : Type(Type), FailurePoint(FailurePoint) {
687 assert(
688 ((Type == SCEVMonotonicityType::Unknown) == (FailurePoint != nullptr)) &&
689 "FailurePoint must be provided iff Type is Unknown");
690}
691
692void SCEVMonotonicity::print(raw_ostream &OS, unsigned Depth) const {
693 OS.indent(NumSpaces: Depth) << "Monotonicity: ";
694 switch (Type) {
695 case SCEVMonotonicityType::Unknown:
696 assert(FailurePoint && "FailurePoint must be provided for Unknown");
697 OS << "Unknown\n";
698 OS.indent(NumSpaces: Depth) << "Reason: " << *FailurePoint << "\n";
699 break;
700 case SCEVMonotonicityType::Invariant:
701 OS << "Invariant\n";
702 break;
703 case SCEVMonotonicityType::MultivariateSignedMonotonic:
704 OS << "MultivariateSignedMonotonic\n";
705 break;
706 }
707}
708
709bool SCEVMonotonicityChecker::isLoopInvariant(const SCEV *Expr) const {
710 return !OutermostLoop || SE->isLoopInvariant(S: Expr, L: OutermostLoop);
711}
712
713SCEVMonotonicity SCEVMonotonicityChecker::invariantOrUnknown(const SCEV *Expr) {
714 if (isLoopInvariant(Expr))
715 return SCEVMonotonicity(SCEVMonotonicityType::Invariant);
716 return createUnknown(FailurePoint: Expr);
717}
718
719SCEVMonotonicity
720SCEVMonotonicityChecker::checkMonotonicity(const SCEV *Expr,
721 const Loop *OutermostLoop) {
722 assert((!OutermostLoop || OutermostLoop->isOutermost()) &&
723 "OutermostLoop must be outermost");
724 assert(Expr->getType()->isIntegerTy() && "Expr must be integer type");
725 this->OutermostLoop = OutermostLoop;
726 return visit(S: Expr);
727}
728
729/// We only care about an affine AddRec at the moment. For an affine AddRec,
730/// the monotonicity can be inferred from its nowrap property. For example, let
731/// X and Y be loop-invariant, and assume Y is non-negative. An AddRec
732/// {X,+.Y}<nsw> implies:
733///
734/// X <=s (X + Y) <=s ((X + Y) + Y) <=s ...
735///
736/// Thus, we can conclude that the AddRec is monotonically increasing with
737/// respect to the associated loop in a signed sense. The similar reasoning
738/// applies when Y is non-positive, leading to a monotonically decreasing
739/// AddRec.
740SCEVMonotonicity
741SCEVMonotonicityChecker::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
742 if (!Expr->isAffine() || !Expr->hasNoSignedWrap())
743 return createUnknown(FailurePoint: Expr);
744
745 const SCEV *Start = Expr->getStart();
746 const SCEV *Step = Expr->getStepRecurrence(SE&: *SE);
747
748 SCEVMonotonicity StartMon = visit(S: Start);
749 if (StartMon.isUnknown())
750 return StartMon;
751
752 if (!isLoopInvariant(Expr: Step))
753 return createUnknown(FailurePoint: Expr);
754
755 return SCEVMonotonicity(SCEVMonotonicityType::MultivariateSignedMonotonic);
756}
757
758//===----------------------------------------------------------------------===//
759// DependenceInfo methods
760
761// For debugging purposes. Dumps a dependence to OS.
762void Dependence::dump(raw_ostream &OS) const {
763 if (isConfused())
764 OS << "confused";
765 else {
766 if (isFlow())
767 OS << "flow";
768 else if (isOutput())
769 OS << "output";
770 else if (isAnti())
771 OS << "anti";
772 else if (isInput())
773 OS << "input";
774 dumpImp(OS);
775 unsigned SameSDLevels = getSameSDLevels();
776 if (SameSDLevels > 0) {
777 OS << " / assuming " << SameSDLevels << " loop level(s) fused: ";
778 dumpImp(OS, IsSameSD: true);
779 }
780 }
781 OS << "!\n";
782
783 SCEVUnionPredicate Assumptions = getRuntimeAssumptions();
784 if (!Assumptions.isAlwaysTrue()) {
785 OS << " Runtime Assumptions:\n";
786 Assumptions.print(OS, Depth: 2);
787 }
788}
789
790// For debugging purposes. Dumps a dependence to OS with or without considering
791// the SameSD levels.
792void Dependence::dumpImp(raw_ostream &OS, bool IsSameSD) const {
793 unsigned Levels = getLevels();
794 unsigned SameSDLevels = getSameSDLevels();
795 bool OnSameSD = false;
796 unsigned LevelNum = Levels;
797 if (IsSameSD)
798 LevelNum += SameSDLevels;
799 OS << " [";
800 for (unsigned II = 1; II <= LevelNum; ++II) {
801 if (!OnSameSD && inSameSDLoops(Level: II))
802 OnSameSD = true;
803 const SCEV *Distance = getDistance(Level: II, SameSD: OnSameSD);
804 if (Distance)
805 OS << *Distance;
806 else if (isScalar(level: II, IsSameSD: OnSameSD))
807 OS << "S";
808 else {
809 unsigned Direction = getDirection(Level: II, SameSD: OnSameSD);
810 if (Direction == DVEntry::ALL)
811 OS << "*";
812 else {
813 if (Direction & DVEntry::LT)
814 OS << "<";
815 if (Direction & DVEntry::EQ)
816 OS << "=";
817 if (Direction & DVEntry::GT)
818 OS << ">";
819 }
820 }
821 if (II < LevelNum)
822 OS << " ";
823 }
824 if (isLoopIndependent())
825 OS << "|<";
826 OS << "]";
827}
828
829// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
830// underlaying objects. If LocA and LocB are known to not alias (for any reason:
831// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
832// Otherwise the underlying objects are checked to see if they point to
833// different identifiable objects.
834static AliasResult underlyingObjectsAlias(AAResults *AA, const DataLayout &DL,
835 const MemoryLocation &LocA,
836 const MemoryLocation &LocB) {
837 // Check the original locations (minus size) for noalias, which can happen for
838 // tbaa, incompatible underlying object locations, etc.
839 MemoryLocation LocAS =
840 MemoryLocation::getBeforeOrAfter(Ptr: LocA.Ptr, AATags: LocA.AATags);
841 MemoryLocation LocBS =
842 MemoryLocation::getBeforeOrAfter(Ptr: LocB.Ptr, AATags: LocB.AATags);
843 BatchAAResults BAA(*AA);
844 BAA.enableCrossIterationMode();
845
846 if (BAA.isNoAlias(LocA: LocAS, LocB: LocBS))
847 return AliasResult::NoAlias;
848
849 // Check the underlying objects are the same
850 const Value *AObj = getUnderlyingObject(V: LocA.Ptr);
851 const Value *BObj = getUnderlyingObject(V: LocB.Ptr);
852
853 // If the underlying objects are the same, they must alias
854 if (AObj == BObj)
855 return AliasResult::MustAlias;
856
857 // We may have hit the recursion limit for underlying objects, or have
858 // underlying objects where we don't know they will alias.
859 if (!isIdentifiedObject(V: AObj) || !isIdentifiedObject(V: BObj))
860 return AliasResult::MayAlias;
861
862 // Otherwise we know the objects are different and both identified objects so
863 // must not alias.
864 return AliasResult::NoAlias;
865}
866
867// Returns true if the load or store can be analyzed. Atomic and volatile
868// operations have properties which this analysis does not understand.
869static bool isLoadOrStore(const Instruction *I) {
870 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
871 return LI->isUnordered();
872 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
873 return SI->isUnordered();
874 return false;
875}
876
877// Returns true if two loops have the Same iteration Space and Depth. To be
878// more specific, two loops have SameSD if they are in the same nesting
879// depth and have the same backedge count. SameSD stands for Same iteration
880// Space and Depth.
881bool DependenceInfo::haveSameSD(const Loop *SrcLoop,
882 const Loop *DstLoop) const {
883 if (SrcLoop == DstLoop)
884 return true;
885
886 if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
887 return false;
888
889 if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
890 !DstLoop->getLoopLatch())
891 return false;
892
893 const SCEV *SrcUB = SE->getBackedgeTakenCount(L: SrcLoop);
894 const SCEV *DstUB = SE->getBackedgeTakenCount(L: DstLoop);
895 if (isa<SCEVCouldNotCompute>(Val: SrcUB) || isa<SCEVCouldNotCompute>(Val: DstUB))
896 return false;
897
898 Type *WiderType = SE->getWiderType(Ty1: SrcUB->getType(), Ty2: DstUB->getType());
899 SrcUB = SE->getNoopOrZeroExtend(V: SrcUB, Ty: WiderType);
900 DstUB = SE->getNoopOrZeroExtend(V: DstUB, Ty: WiderType);
901
902 if (SrcUB == DstUB)
903 return true;
904
905 return false;
906}
907
908// Examines the loop nesting of the Src and Dst
909// instructions and establishes their shared loops. Sets the variables
910// CommonLevels, SrcLevels, and MaxLevels.
911// The source and destination instructions needn't be contained in the same
912// loop. The routine establishNestingLevels finds the level of most deeply
913// nested loop that contains them both, CommonLevels. An instruction that's
914// not contained in a loop is at level = 0. MaxLevels is equal to the level
915// of the source plus the level of the destination, minus CommonLevels.
916// This lets us allocate vectors MaxLevels in length, with room for every
917// distinct loop referenced in both the source and destination subscripts.
918// The variable SrcLevels is the nesting depth of the source instruction.
919// It's used to help calculate distinct loops referenced by the destination.
920// Here's the map from loops to levels:
921// 0 - unused
922// 1 - outermost common loop
923// ... - other common loops
924// CommonLevels - innermost common loop
925// ... - loops containing Src but not Dst
926// SrcLevels - innermost loop containing Src but not Dst
927// ... - loops containing Dst but not Src
928// MaxLevels - innermost loops containing Dst but not Src
929// Consider the follow code fragment:
930// for (a = ...) {
931// for (b = ...) {
932// for (c = ...) {
933// for (d = ...) {
934// A[] = ...;
935// }
936// }
937// for (e = ...) {
938// for (f = ...) {
939// for (g = ...) {
940// ... = A[];
941// }
942// }
943// }
944// }
945// }
946// If we're looking at the possibility of a dependence between the store
947// to A (the Src) and the load from A (the Dst), we'll note that they
948// have 2 loops in common, so CommonLevels will equal 2 and the direction
949// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
950// A map from loop names to loop numbers would look like
951// a - 1
952// b - 2 = CommonLevels
953// c - 3
954// d - 4 = SrcLevels
955// e - 5
956// f - 6
957// g - 7 = MaxLevels
958// SameSDLevels counts the number of levels after common levels that are
959// not common but have the same iteration space and depth. Internally this
960// is checked using haveSameSD. Currently we only need to check for SameSD
961// levels up to one level after the common levels, and therefore SameSDLevels
962// will be either 0 or 1.
963// 1. Assume that in this code fragment, levels c and e have the same iteration
964// space and depth, but levels d and f does not. Then SameSDLevels is set to 1.
965// In that case the level numbers for the previous code look like
966// a - 1
967// b - 2
968// c,e - 3 = CommonLevels
969// d - 4 = SrcLevels
970// f - 5
971// g - 6 = MaxLevels
972void DependenceInfo::establishNestingLevels(const Instruction *Src,
973 const Instruction *Dst) {
974 const BasicBlock *SrcBlock = Src->getParent();
975 const BasicBlock *DstBlock = Dst->getParent();
976 unsigned SrcLevel = LI->getLoopDepth(BB: SrcBlock);
977 unsigned DstLevel = LI->getLoopDepth(BB: DstBlock);
978 const Loop *SrcLoop = LI->getLoopFor(BB: SrcBlock);
979 const Loop *DstLoop = LI->getLoopFor(BB: DstBlock);
980 SrcLevels = SrcLevel;
981 MaxLevels = SrcLevel + DstLevel;
982 SameSDLevels = 0;
983 while (SrcLevel > DstLevel) {
984 SrcLoop = SrcLoop->getParentLoop();
985 SrcLevel--;
986 }
987 while (DstLevel > SrcLevel) {
988 DstLoop = DstLoop->getParentLoop();
989 DstLevel--;
990 }
991
992 const Loop *SrcUncommonFrontier = nullptr, *DstUncommonFrontier = nullptr;
993 // Find the first uncommon level pair and check if the associated levels have
994 // the SameSD.
995 while (SrcLoop != DstLoop) {
996 SrcUncommonFrontier = SrcLoop;
997 DstUncommonFrontier = DstLoop;
998 SrcLoop = SrcLoop->getParentLoop();
999 DstLoop = DstLoop->getParentLoop();
1000 SrcLevel--;
1001 }
1002 if (SrcUncommonFrontier && DstUncommonFrontier &&
1003 haveSameSD(SrcLoop: SrcUncommonFrontier, DstLoop: DstUncommonFrontier))
1004 SameSDLevels = 1;
1005 CommonLevels = SrcLevel;
1006 MaxLevels -= CommonLevels;
1007}
1008
1009// Given one of the loops containing the source, return
1010// its level index in our numbering scheme.
1011unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
1012 return SrcLoop->getLoopDepth();
1013}
1014
1015// Given one of the loops containing the destination,
1016// return its level index in our numbering scheme.
1017unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
1018 unsigned D = DstLoop->getLoopDepth();
1019 if (D > CommonLevels)
1020 // This tries to make sure that we assign unique numbers to src and dst when
1021 // the memory accesses reside in different loops that have the same depth.
1022 return D - CommonLevels + SrcLevels;
1023 else
1024 return D;
1025}
1026
1027// Returns true if Expression is loop invariant in LoopNest.
1028bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
1029 const Loop *LoopNest) const {
1030 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
1031 // any loop as invariant, because we only consier expression evaluation at a
1032 // specific position (where the array access takes place), and not across the
1033 // entire function.
1034 if (!LoopNest)
1035 return true;
1036
1037 // If the expression is invariant in the outermost loop of the loop nest, it
1038 // is invariant anywhere in the loop nest.
1039 return SE->isLoopInvariant(S: Expression, L: LoopNest->getOutermostLoop());
1040}
1041
1042// Finds the set of loops from the LoopNest that
1043// have a level <= CommonLevels and are referred to by the SCEV Expression.
1044void DependenceInfo::collectCommonLoops(const SCEV *Expression,
1045 const Loop *LoopNest,
1046 SmallBitVector &Loops) const {
1047 while (LoopNest) {
1048 unsigned Level = LoopNest->getLoopDepth();
1049 if (Level <= CommonLevels && !SE->isLoopInvariant(S: Expression, L: LoopNest))
1050 Loops.set(Level);
1051 LoopNest = LoopNest->getParentLoop();
1052 }
1053}
1054
1055// Examine the scev and return true iff it's affine.
1056// Collect any loops mentioned in the set of "Loops".
1057bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
1058 SmallBitVector &Loops, bool IsSrc) {
1059 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
1060 if (!AddRec)
1061 return isLoopInvariant(Expression: Expr, LoopNest);
1062
1063 // The AddRec must depend on one of the containing loops. Otherwise,
1064 // mapSrcLoop and mapDstLoop return indices outside the intended range. This
1065 // can happen when a subscript in one loop references an IV from a sibling
1066 // loop that could not be replaced with a concrete exit value by
1067 // getSCEVAtScope.
1068 const Loop *L = LoopNest;
1069 while (L && AddRec->getLoop() != L)
1070 L = L->getParentLoop();
1071 if (!L)
1072 return false;
1073
1074 const SCEV *Start = AddRec->getStart();
1075 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
1076 if (!isLoopInvariant(Expression: Step, LoopNest))
1077 return false;
1078 if (IsSrc)
1079 Loops.set(mapSrcLoop(SrcLoop: AddRec->getLoop()));
1080 else
1081 Loops.set(mapDstLoop(DstLoop: AddRec->getLoop()));
1082 return checkSubscript(Expr: Start, LoopNest, Loops, IsSrc);
1083}
1084
1085// Examine the scev and return true iff it's linear.
1086// Collect any loops mentioned in the set of "Loops".
1087bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
1088 SmallBitVector &Loops) {
1089 return checkSubscript(Expr: Src, LoopNest, Loops, IsSrc: true);
1090}
1091
1092// Examine the scev and return true iff it's linear.
1093// Collect any loops mentioned in the set of "Loops".
1094bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
1095 SmallBitVector &Loops) {
1096 return checkSubscript(Expr: Dst, LoopNest, Loops, IsSrc: false);
1097}
1098
1099// Examines the subscript pair (the Src and Dst SCEVs)
1100// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
1101// Collects the associated loops in a set.
1102DependenceInfo::Subscript::ClassificationKind
1103DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
1104 const SCEV *Dst, const Loop *DstLoopNest,
1105 SmallBitVector &Loops) {
1106 SmallBitVector SrcLoops(MaxLevels + 1);
1107 SmallBitVector DstLoops(MaxLevels + 1);
1108 if (!checkSrcSubscript(Src, LoopNest: SrcLoopNest, Loops&: SrcLoops))
1109 return Subscript::NonLinear;
1110 if (!checkDstSubscript(Dst, LoopNest: DstLoopNest, Loops&: DstLoops))
1111 return Subscript::NonLinear;
1112 Loops = SrcLoops;
1113 Loops |= DstLoops;
1114 unsigned N = Loops.count();
1115 if (N == 0)
1116 return Subscript::ZIV;
1117 if (N == 1)
1118 return Subscript::SIV;
1119 if (N == 2 && SrcLoops.count() == 1 && DstLoops.count() == 1)
1120 return Subscript::RDIV;
1121 return Subscript::MIV;
1122}
1123
1124// All subscripts are all the same type.
1125// Loop bound may be smaller (e.g., a char).
1126// Should zero extend loop bound, since it's always >= 0.
1127// This routine collects upper bound and extends or truncates if needed.
1128// Truncating is safe when subscripts are known not to wrap. Cases without
1129// nowrap flags should have been rejected earlier.
1130// Return null if no bound available.
1131const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
1132 if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
1133 const SCEV *UB = SE->getBackedgeTakenCount(L);
1134 return SE->getTruncateOrZeroExtend(V: UB, Ty: T);
1135 }
1136 return nullptr;
1137}
1138
1139// Calls collectUpperBound(), then attempts to cast it to APInt.
1140// If the cast fails, returns std::nullopt.
1141std::optional<APInt>
1142DependenceInfo::collectNonNegativeConstantUpperBound(const Loop *L,
1143 Type *T) const {
1144 if (const SCEV *UB = collectUpperBound(L, T))
1145 if (auto *C = dyn_cast<SCEVConstant>(Val: UB)) {
1146 APInt Res = C->getAPInt();
1147 if (Res.isNonNegative())
1148 return Res;
1149 }
1150 return std::nullopt;
1151}
1152
1153/// Returns \p A - \p B if it guaranteed not to signed wrap. Otherwise returns
1154/// nullptr. \p A and \p B must have the same integer type.
1155static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
1156 ScalarEvolution &SE) {
1157 if (SE.willNotOverflow(BinOp: Instruction::Sub, /*Signed=*/true, LHS: A, RHS: B))
1158 return SE.getMinusSCEV(LHS: A, RHS: B);
1159 return nullptr;
1160}
1161
1162/// Returns true iff \p Test is enabled.
1163static bool isDependenceTestEnabled(DependenceTestType Test) {
1164 if (EnableDependenceTest == DependenceTestType::All)
1165 return true;
1166 return EnableDependenceTest == Test;
1167}
1168
1169// testZIV -
1170// When we have a pair of subscripts of the form [c1] and [c2],
1171// where c1 and c2 are both loop invariant, we attack it using
1172// the ZIV test. Basically, we test by comparing the two values,
1173// but there are actually three possible results:
1174// 1) the values are equal, so there's a dependence
1175// 2) the values are different, so there's no dependence
1176// 3) the values might be equal, so we have to assume a dependence.
1177//
1178// Return true if dependence disproved.
1179bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1180 FullDependence &Result) const {
1181 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1182 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1183 ++ZIVapplications;
1184 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: Src, RHS: Dst)) {
1185 LLVM_DEBUG(dbgs() << " provably dependent\n");
1186 return false; // provably dependent
1187 }
1188 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_NE, LHS: Src, RHS: Dst)) {
1189 LLVM_DEBUG(dbgs() << " provably independent\n");
1190 ++ZIVindependence;
1191 return true; // provably independent
1192 }
1193 LLVM_DEBUG(dbgs() << " possibly dependent\n");
1194 return false; // possibly dependent
1195}
1196
1197// strongSIVtest -
1198// From the paper, Practical Dependence Testing, Section 4.2.1
1199//
1200// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1201// where i is an induction variable, c1 and c2 are loop invariant,
1202// and a is a constant, we can solve it exactly using the Strong SIV test.
1203//
1204// Can prove independence. Failing that, can compute distance (and direction).
1205// In the presence of symbolic terms, we can sometimes make progress.
1206//
1207// If there's a dependence,
1208//
1209// c1 + a*i = c2 + a*i'
1210//
1211// The dependence distance is
1212//
1213// d = i' - i = (c1 - c2)/a
1214//
1215// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1216// loop's upper bound. If a dependence exists, the dependence direction is
1217// defined as
1218//
1219// { < if d > 0
1220// direction = { = if d = 0
1221// { > if d < 0
1222//
1223// Return true if dependence disproved.
1224bool DependenceInfo::strongSIVtest(const SCEVAddRecExpr *Src,
1225 const SCEVAddRecExpr *Dst, unsigned Level,
1226 FullDependence &Result,
1227 bool UnderRuntimeAssumptions) {
1228 if (!isDependenceTestEnabled(Test: DependenceTestType::StrongSIV))
1229 return false;
1230
1231 const SCEV *Coeff = Src->getStepRecurrence(SE&: *SE);
1232 assert(Coeff == Dst->getStepRecurrence(*SE) &&
1233 "Expecting same coefficient in Strong SIV test");
1234 const SCEV *SrcConst = Src->getStart();
1235 const SCEV *DstConst = Dst->getStart();
1236 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1237 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
1238 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1239 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
1240 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1241 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
1242 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1243 ++StrongSIVapplications;
1244 assert(0 < Level && Level <= CommonLevels && "level out of range");
1245 Level--;
1246
1247 const SCEV *Delta = minusSCEVNoSignedOverflow(A: SrcConst, B: DstConst, SE&: *SE);
1248 if (!Delta)
1249 return false;
1250 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
1251 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1252
1253 // Can we compute distance?
1254 if (isa<SCEVConstant>(Val: Delta) && isa<SCEVConstant>(Val: Coeff)) {
1255 APInt ConstDelta = cast<SCEVConstant>(Val: Delta)->getAPInt();
1256 APInt ConstCoeff = cast<SCEVConstant>(Val: Coeff)->getAPInt();
1257 APInt Distance = ConstDelta; // these need to be initialized
1258 APInt Remainder = ConstDelta;
1259 APInt::sdivrem(LHS: ConstDelta, RHS: ConstCoeff, Quotient&: Distance, Remainder);
1260 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1261 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1262 // Make sure Coeff divides Delta exactly
1263 if (Remainder != 0) {
1264 // Coeff doesn't divide Distance, no dependence
1265 ++StrongSIVindependence;
1266 ++StrongSIVsuccesses;
1267 return true;
1268 }
1269 Result.DV[Level].Distance = SE->getConstant(Val: Distance);
1270 if (Distance.sgt(RHS: 0))
1271 Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1272 else if (Distance.slt(RHS: 0))
1273 Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1274 else
1275 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1276 ++StrongSIVsuccesses;
1277 } else if (Delta->isZero()) {
1278 // Check if coefficient could be zero. If so, 0/0 is undefined and we
1279 // cannot conclude that only same-iteration dependencies exist.
1280 // When coeff=0, all iterations access the same location.
1281 if (SE->isKnownNonZero(S: Coeff)) {
1282 LLVM_DEBUG(
1283 dbgs() << "\t Coefficient proven non-zero by SCEV analysis\n");
1284 } else {
1285 // Cannot prove at compile time, would need runtime assumption.
1286 if (UnderRuntimeAssumptions) {
1287 const SCEVPredicate *Pred = SE->getComparePredicate(
1288 Pred: ICmpInst::ICMP_NE, LHS: Coeff, RHS: SE->getZero(Ty: Coeff->getType()));
1289 Result.Assumptions = Result.Assumptions.getUnionWith(N: Pred, SE&: *SE);
1290 LLVM_DEBUG(dbgs() << "\t Added runtime assumption: " << *Coeff
1291 << " != 0\n");
1292 } else {
1293 // Cannot add runtime assumptions, this test cannot handle this case.
1294 // Let more complex tests try.
1295 LLVM_DEBUG(dbgs() << "\t Would need runtime assumption " << *Coeff
1296 << " != 0, but not allowed. Failing this test.\n");
1297 return false;
1298 }
1299 }
1300 // Since 0/X == 0 (where X is known non-zero or assumed non-zero).
1301 Result.DV[Level].Distance = Delta;
1302 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1303 ++StrongSIVsuccesses;
1304 } else {
1305 if (Coeff->isOne()) {
1306 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
1307 Result.DV[Level].Distance = Delta; // since X/1 == X
1308 }
1309
1310 // maybe we can get a useful direction
1311 bool DeltaMaybeZero = !SE->isKnownNonZero(S: Delta);
1312 bool DeltaMaybePositive = !SE->isKnownNonPositive(S: Delta);
1313 bool DeltaMaybeNegative = !SE->isKnownNonNegative(S: Delta);
1314 bool CoeffMaybePositive = !SE->isKnownNonPositive(S: Coeff);
1315 bool CoeffMaybeNegative = !SE->isKnownNonNegative(S: Coeff);
1316 // The double negatives above are confusing.
1317 // It helps to read !SE->isKnownNonZero(Delta)
1318 // as "Delta might be Zero"
1319 unsigned NewDirection = Dependence::DVEntry::NONE;
1320 if ((DeltaMaybePositive && CoeffMaybePositive) ||
1321 (DeltaMaybeNegative && CoeffMaybeNegative))
1322 NewDirection = Dependence::DVEntry::LT;
1323 if (DeltaMaybeZero)
1324 NewDirection |= Dependence::DVEntry::EQ;
1325 if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1326 (DeltaMaybePositive && CoeffMaybeNegative))
1327 NewDirection |= Dependence::DVEntry::GT;
1328 if (NewDirection < Result.DV[Level].Direction)
1329 ++StrongSIVsuccesses;
1330 Result.DV[Level].Direction &= NewDirection;
1331 }
1332 return false;
1333}
1334
1335// weakCrossingSIVtest -
1336// From the paper, Practical Dependence Testing, Section 4.2.2
1337//
1338// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1339// where i is an induction variable, c1 and c2 are loop invariant,
1340// and a is a constant, we can solve it exactly using the
1341// Weak-Crossing SIV test.
1342//
1343// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1344// the two lines, where i = i', yielding
1345//
1346// c1 + a*i = c2 - a*i
1347// 2a*i = c2 - c1
1348// i = (c2 - c1)/2a
1349//
1350// If i < 0, there is no dependence.
1351// If i > upperbound, there is no dependence.
1352// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1353// If i = upperbound, there's a dependence with distance = 0.
1354// If i is integral, there's a dependence (all directions).
1355// If the non-integer part = 1/2, there's a dependence (<> directions).
1356// Otherwise, there's no dependence.
1357//
1358// Can prove independence. Failing that,
1359// can sometimes refine the directions.
1360// Can determine iteration for splitting.
1361//
1362// Return true if dependence disproved.
1363bool DependenceInfo::weakCrossingSIVtest(const SCEVAddRecExpr *Src,
1364 const SCEVAddRecExpr *Dst,
1365 unsigned Level,
1366 FullDependence &Result) const {
1367 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakCrossingSIV))
1368 return false;
1369
1370 const SCEV *Coeff = Src->getStepRecurrence(SE&: *SE);
1371 const SCEV *SrcConst = Src->getStart();
1372 const SCEV *DstConst = Dst->getStart();
1373
1374 assert(Coeff == SE->getNegativeSCEV(Dst->getStepRecurrence(*SE)) &&
1375 "Unexpected input for weakCrossingSIVtest");
1376
1377 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1378 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
1379 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1380 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1381 ++WeakCrossingSIVapplications;
1382 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1383 Level--;
1384 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1385 if (!Delta)
1386 return false;
1387
1388 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1389 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: Coeff);
1390 if (!ConstCoeff)
1391 return false;
1392
1393 if (SE->isKnownNegative(S: ConstCoeff)) {
1394 ConstCoeff = dyn_cast<SCEVConstant>(Val: SE->getNegativeSCEV(V: ConstCoeff));
1395 assert(ConstCoeff &&
1396 "dynamic cast of negative of ConstCoeff should yield constant");
1397 Delta = SE->getNegativeSCEV(V: Delta);
1398 }
1399 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1400
1401 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1402 if (!ConstDelta)
1403 return false;
1404
1405 ConstantRange SrcRange = SE->getSignedRange(S: Src);
1406 ConstantRange DstRange = SE->getSignedRange(S: Dst);
1407 LLVM_DEBUG(dbgs() << "\t SrcRange = " << SrcRange << "\n");
1408 LLVM_DEBUG(dbgs() << "\t DstRange = " << DstRange << "\n");
1409 if (SrcRange.intersectWith(CR: DstRange).isSingleElement()) {
1410 // The ranges touch at exactly one value (i = i' = 0 or i = i' = BTC).
1411 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1412 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1413 ++WeakCrossingSIVsuccesses;
1414 if (!Result.DV[Level].Direction) {
1415 ++WeakCrossingSIVindependence;
1416 return true;
1417 }
1418 Result.DV[Level].Distance = SE->getZero(Ty: Delta->getType());
1419 return false;
1420 }
1421
1422 // check that Coeff divides Delta
1423 APInt APDelta = ConstDelta->getAPInt();
1424 APInt APCoeff = ConstCoeff->getAPInt();
1425 APInt Distance = APDelta; // these need to be initialzed
1426 APInt Remainder = APDelta;
1427 APInt::sdivrem(LHS: APDelta, RHS: APCoeff, Quotient&: Distance, Remainder);
1428 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1429 if (Remainder != 0) {
1430 // Coeff doesn't divide Delta, no dependence
1431 ++WeakCrossingSIVindependence;
1432 ++WeakCrossingSIVsuccesses;
1433 return true;
1434 }
1435 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1436
1437 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1438 if (Distance[0]) {
1439 // Equal direction isn't possible
1440 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;
1441 ++WeakCrossingSIVsuccesses;
1442 }
1443 return false;
1444}
1445
1446// Kirch's algorithm, from
1447//
1448// Optimizing Supercompilers for Supercomputers
1449// Michael Wolfe
1450// MIT Press, 1989
1451//
1452// Program 2.1, page 29.
1453// Computes the GCD of AM and BM.
1454// Also finds a solution to the equation ax - by = gcd(a, b).
1455// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1456//
1457// We don't use OverflowSafeSignedAPInt here because it's known that this
1458// algorithm doesn't overflow.
1459static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1460 const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1461 LLVM_DEBUG(dbgs() << "\t AM = " << AM << "\n");
1462 LLVM_DEBUG(dbgs() << "\t BM = " << BM << "\n");
1463 LLVM_DEBUG(dbgs() << "\t Delta = " << Delta << "\n");
1464 APInt A0(Bits, 1, true), A1(Bits, 0, true);
1465 APInt B0(Bits, 0, true), B1(Bits, 1, true);
1466 APInt G0 = AM.abs();
1467 APInt G1 = BM.abs();
1468 APInt Q = G0; // these need to be initialized
1469 APInt R = G0;
1470 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1471 while (R != 0) {
1472 // clang-format off
1473 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1474 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1475 G0 = G1; G1 = R;
1476 // clang-format on
1477 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1478 }
1479 G = G1;
1480 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
1481 X = AM.slt(RHS: 0) ? -A1 : A1;
1482 Y = BM.slt(RHS: 0) ? B1 : -B1;
1483
1484 // make sure gcd divides Delta
1485 R = Delta.srem(RHS: G);
1486 if (R != 0)
1487 return true; // gcd doesn't divide Delta, no dependence
1488 Q = Delta.sdiv(RHS: G);
1489 return false;
1490}
1491
1492static OverflowSafeSignedAPInt
1493floorOfQuotient(const OverflowSafeSignedAPInt &OA,
1494 const OverflowSafeSignedAPInt &OB) {
1495 if (!OA || !OB)
1496 return OverflowSafeSignedAPInt();
1497
1498 APInt A = *OA;
1499 APInt B = *OB;
1500 APInt Q = A; // these need to be initialized
1501 APInt R = A;
1502 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1503 if (R == 0)
1504 return Q;
1505 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1506 return Q;
1507 return OverflowSafeSignedAPInt(Q) - 1;
1508}
1509
1510static OverflowSafeSignedAPInt
1511ceilingOfQuotient(const OverflowSafeSignedAPInt &OA,
1512 const OverflowSafeSignedAPInt &OB) {
1513 if (!OA || !OB)
1514 return OverflowSafeSignedAPInt();
1515
1516 APInt A = *OA;
1517 APInt B = *OB;
1518 APInt Q = A; // these need to be initialized
1519 APInt R = A;
1520 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1521 if (R == 0)
1522 return Q;
1523 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1524 return OverflowSafeSignedAPInt(Q) + 1;
1525 return Q;
1526}
1527
1528/// Given an affine expression of the form A*k + B, where k is an arbitrary
1529/// integer, infer the possible range of k based on the known range of the
1530/// affine expression. If we know A*k + B is non-negative, i.e.,
1531///
1532/// A*k + B >=s 0
1533///
1534/// we can derive the following inequalities for k when A is positive:
1535///
1536/// k >=s -B / A
1537///
1538/// Since k is an integer, it means k is greater than or equal to the
1539/// ceil(-B / A).
1540///
1541/// If the upper bound of the affine expression \p UB is passed, the following
1542/// inequality can be derived as well:
1543///
1544/// A*k + B <=s UB
1545///
1546/// which leads to:
1547///
1548/// k <=s (UB - B) / A
1549///
1550/// Again, as k is an integer, it means k is less than or equal to the
1551/// floor((UB - B) / A).
1552///
1553/// The similar logic applies when A is negative, but the inequalities sign flip
1554/// while working with them.
1555///
1556/// Preconditions: \p A is non-zero, and we know A*k + B and \p UB are
1557/// non-negative.
1558static std::pair<OverflowSafeSignedAPInt, OverflowSafeSignedAPInt>
1559inferDomainOfAffine(OverflowSafeSignedAPInt A, OverflowSafeSignedAPInt B,
1560 OverflowSafeSignedAPInt UB) {
1561 assert(A && B && "A and B must be available");
1562 assert(*A != 0 && "A must be non-zero");
1563 assert((!UB || UB->isNonNegative()) && "UB must be non-negative");
1564 OverflowSafeSignedAPInt TL, TU;
1565 if (A->sgt(RHS: 0)) {
1566 TL = ceilingOfQuotient(OA: -B, OB: A);
1567 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1568
1569 // New bound check - modification to Banerjee's e3 check
1570 TU = floorOfQuotient(OA: UB - B, OB: A);
1571 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1572 } else {
1573 TU = floorOfQuotient(OA: -B, OB: A);
1574 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1575
1576 // New bound check - modification to Banerjee's e3 check
1577 TL = ceilingOfQuotient(OA: UB - B, OB: A);
1578 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1579 }
1580 return std::make_pair(x&: TL, y&: TU);
1581}
1582
1583// exactSIVtest -
1584// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1585// where i is an induction variable, c1 and c2 are loop invariant, and a1
1586// and a2 are constant, we can solve it exactly using an algorithm developed
1587// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1588//
1589// Dependence Analysis for Supercomputing
1590// Utpal Banerjee
1591// Kluwer Academic Publishers, 1988
1592//
1593// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1594// so use them if possible. They're also a bit better with symbolics and,
1595// in the case of the strong SIV test, can compute Distances.
1596//
1597// Return true if dependence disproved.
1598//
1599// This is a modified version of the original Banerjee algorithm. The original
1600// only tested whether Dst depends on Src. This algorithm extends that and
1601// returns all the dependencies that exist between Dst and Src.
1602bool DependenceInfo::exactSIVtest(const SCEVAddRecExpr *Src,
1603 const SCEVAddRecExpr *Dst, unsigned Level,
1604 FullDependence &Result) const {
1605 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactSIV))
1606 return false;
1607
1608 LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1609 ++ExactSIVapplications;
1610 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1611 Level--;
1612 bool Res = exactTestImpl(Src, Dst, Result, Level);
1613 if (Res) {
1614 ++ExactSIVsuccesses;
1615 ++ExactSIVindependence;
1616 }
1617 return Res;
1618}
1619
1620// Return true if the divisor evenly divides the dividend.
1621static bool isRemainderZero(const SCEVConstant *Dividend,
1622 const SCEVConstant *Divisor) {
1623 const APInt &ConstDividend = Dividend->getAPInt();
1624 const APInt &ConstDivisor = Divisor->getAPInt();
1625 return ConstDividend.srem(RHS: ConstDivisor) == 0;
1626}
1627
1628bool DependenceInfo::weakZeroSIVtestImpl(const SCEVAddRecExpr *AR,
1629 const SCEV *Const, unsigned Level,
1630 FullDependence &Result) const {
1631 const SCEV *ARCoeff = AR->getStepRecurrence(SE&: *SE);
1632 const SCEV *ARConst = AR->getStart();
1633
1634 if (Const == ARConst && SE->isKnownNonZero(S: ARCoeff)) {
1635 if (Level < CommonLevels) {
1636 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1637 ++WeakZeroSIVsuccesses;
1638 }
1639 return false; // dependences caused by first iteration
1640 }
1641
1642 const SCEV *Delta = minusSCEVNoSignedOverflow(A: Const, B: ARConst, SE&: *SE);
1643 if (!Delta)
1644 return false;
1645 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: ARCoeff);
1646 if (!ConstCoeff)
1647 return false;
1648
1649 if (const SCEV *UpperBound =
1650 collectUpperBound(L: AR->getLoop(), T: Delta->getType())) {
1651 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1652 bool OverlapAtLast = [&] {
1653 if (!SE->isKnownNonZero(S: ConstCoeff))
1654 return false;
1655 const SCEV *Last = AR->evaluateAtIteration(It: UpperBound, SE&: *SE);
1656 return Last == Const;
1657 }();
1658 if (OverlapAtLast) {
1659 // dependences caused by last iteration
1660 if (Level < CommonLevels) {
1661 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1662 ++WeakZeroSIVsuccesses;
1663 }
1664 return false;
1665 }
1666 }
1667
1668 // if ARCoeff doesn't divide Delta, then no dependence
1669 if (isa<SCEVConstant>(Val: Delta) &&
1670 !isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
1671 ++WeakZeroSIVindependence;
1672 ++WeakZeroSIVsuccesses;
1673 return true;
1674 }
1675 return false;
1676}
1677
1678// weakZeroSrcSIVtest -
1679// From the paper, Practical Dependence Testing, Section 4.2.2
1680//
1681// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1682// where i is an induction variable, c1 and c2 are loop invariant,
1683// and a is a constant, we can solve it exactly using the
1684// Weak-Zero SIV test.
1685//
1686// Given
1687//
1688// c1 = c2 + a*i
1689//
1690// we get
1691//
1692// (c1 - c2)/a = i
1693//
1694// If i is not an integer, there's no dependence.
1695// If i < 0 or > UB, there's no dependence.
1696// If i = 0, the direction is >=.
1697// If i = UB, the direction is <=.
1698// Otherwise, the direction is *.
1699//
1700// Can prove independence. Failing that, we can sometimes refine
1701// the directions. Can sometimes show that first or last
1702// iteration carries all the dependences (so worth peeling).
1703//
1704// (see also weakZeroDstSIVtest)
1705//
1706// Return true if dependence disproved.
1707bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *SrcConst,
1708 const SCEVAddRecExpr *Dst,
1709 unsigned Level,
1710 FullDependence &Result) const {
1711 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1712 return false;
1713
1714 // For the WeakSIV test, it's possible the loop isn't common to
1715 // the Src and Dst loops. If it isn't, then there's no need to
1716 // record a direction.
1717 [[maybe_unused]] const SCEV *DstCoeff = Dst->getStepRecurrence(SE&: *SE);
1718 [[maybe_unused]] const SCEV *DstConst = Dst->getStart();
1719 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1720 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1721 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1722 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1723 ++WeakZeroSIVapplications;
1724 assert(0 < Level && Level <= MaxLevels && "Level out of range");
1725 Level--;
1726
1727 // We have analyzed a dependence from Src to Dst, so \c Result may represent a
1728 // dependence in that direction. However, \c weakZeroSIVtestImpl will analyze
1729 // a dependence from \c Dst to \c SrcConst. To keep the consistency, we need
1730 // to negate the current result before passing it to \c weakZeroSIVtestImpl,
1731 // and negate it back after that.
1732 Result.negate(SE&: *SE);
1733 bool Res = weakZeroSIVtestImpl(AR: Dst, Const: SrcConst, Level, Result);
1734 Result.negate(SE&: *SE);
1735 return Res;
1736}
1737
1738// weakZeroDstSIVtest -
1739// From the paper, Practical Dependence Testing, Section 4.2.2
1740//
1741// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1742// where i is an induction variable, c1 and c2 are loop invariant,
1743// and a is a constant, we can solve it exactly using the
1744// Weak-Zero SIV test.
1745//
1746// Given
1747//
1748// c1 + a*i = c2
1749//
1750// we get
1751//
1752// i = (c2 - c1)/a
1753//
1754// If i is not an integer, there's no dependence.
1755// If i < 0 or > UB, there's no dependence.
1756// If i = 0, the direction is <=.
1757// If i = UB, the direction is >=.
1758// Otherwise, the direction is *.
1759//
1760// Can prove independence. Failing that, we can sometimes refine
1761// the directions. Can sometimes show that first or last
1762// iteration carries all the dependences (so worth peeling).
1763//
1764// (see also weakZeroSrcSIVtest)
1765//
1766// Return true if dependence disproved.
1767bool DependenceInfo::weakZeroDstSIVtest(const SCEVAddRecExpr *Src,
1768 const SCEV *DstConst, unsigned Level,
1769 FullDependence &Result) const {
1770 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1771 return false;
1772
1773 // For the WeakSIV test, it's possible the loop isn't common to the
1774 // Src and Dst loops. If it isn't, then there's no need to record a direction.
1775 [[maybe_unused]] const SCEV *SrcCoeff = Src->getStepRecurrence(SE&: *SE);
1776 [[maybe_unused]] const SCEV *SrcConst = Src->getStart();
1777 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1778 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1779 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1780 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1781 ++WeakZeroSIVapplications;
1782 assert(0 < Level && Level <= SrcLevels && "Level out of range");
1783 Level--;
1784
1785 return weakZeroSIVtestImpl(AR: Src, Const: DstConst, Level, Result);
1786}
1787
1788// exactRDIVtest - Tests the RDIV subscript pair for dependence.
1789// Things of the form [c1 + a*i] and [c2 + b*j],
1790// where i and j are induction variable, c1 and c2 are loop invariant,
1791// and a and b are constants.
1792// Returns true if any possible dependence is disproved.
1793// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1794bool DependenceInfo::exactRDIVtest(const SCEVAddRecExpr *Src,
1795 const SCEVAddRecExpr *Dst,
1796 FullDependence &Result) const {
1797 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactRDIV))
1798 return false;
1799
1800 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
1801 ++ExactRDIVapplications;
1802 bool Res = exactTestImpl(Src, Dst, Result, Level: std::nullopt);
1803 if (Res)
1804 ++ExactRDIVindependence;
1805 return Res;
1806}
1807
1808bool DependenceInfo::exactTestImpl(const SCEVAddRecExpr *Src,
1809 const SCEVAddRecExpr *Dst,
1810 FullDependence &Result,
1811 std::optional<unsigned> Level) const {
1812 const SCEV *SrcCoeff = Src->getStepRecurrence(SE&: *SE);
1813 const SCEV *SrcConst = Src->getStart();
1814 const SCEV *DstCoeff = Dst->getStepRecurrence(SE&: *SE);
1815 const SCEV *DstConst = Dst->getStart();
1816 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1817 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1818 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1819 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1820
1821 if (!Src->hasNoSignedWrap() || !Dst->hasNoSignedWrap())
1822 return false;
1823
1824 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1825 if (!Delta)
1826 return false;
1827 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1828 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1829 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1830 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1831 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1832 return false;
1833
1834 // find gcd
1835 APInt G, X, Y;
1836 APInt AM = ConstSrcCoeff->getAPInt();
1837 APInt BM = ConstDstCoeff->getAPInt();
1838 APInt CM = ConstDelta->getAPInt();
1839 unsigned Bits = AM.getBitWidth();
1840 if (findGCD(Bits, AM, BM, Delta: CM, G, X, Y)) {
1841 // gcd doesn't divide Delta, no dependence
1842 return true;
1843 }
1844
1845 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1846
1847 // since SCEV construction seems to normalize, LM = 0
1848 std::optional<APInt> SrcUM =
1849 collectNonNegativeConstantUpperBound(L: Src->getLoop(), T: Delta->getType());
1850 if (SrcUM)
1851 LLVM_DEBUG(dbgs() << "\t SrcUM = " << *SrcUM << "\n");
1852
1853 std::optional<APInt> DstUM =
1854 collectNonNegativeConstantUpperBound(L: Dst->getLoop(), T: Delta->getType());
1855 if (DstUM)
1856 LLVM_DEBUG(dbgs() << "\t DstUM = " << *DstUM << "\n");
1857
1858 APInt TU(APInt::getSignedMaxValue(numBits: Bits));
1859 APInt TL(APInt::getSignedMinValue(numBits: Bits));
1860 APInt TC = CM.sdiv(RHS: G);
1861 APInt TX = X * TC;
1862 APInt TY = Y * TC;
1863 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
1864 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
1865 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
1866
1867 APInt TB = BM.sdiv(RHS: G);
1868 APInt TA = AM.sdiv(RHS: G);
1869
1870 // At this point, we have the following equations:
1871 //
1872 // TA*i - TB*j = TC
1873 //
1874 // Also, we know that the all pairs of (i, j) can be expressed as:
1875 //
1876 // (TX + k*TB, TY + k*TA)
1877 //
1878 // where k is an arbitrary integer.
1879 auto [TL0, TU0] = inferDomainOfAffine(A: TB, B: TX, UB: SrcUM);
1880 auto [TL1, TU1] = inferDomainOfAffine(A: TA, B: TY, UB: DstUM);
1881
1882 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
1883 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
1884
1885 auto GetMaxOrMin = [](const OverflowSafeSignedAPInt &V0,
1886 const OverflowSafeSignedAPInt &V1,
1887 bool IsMin) -> std::optional<APInt> {
1888 if (V0 && V1)
1889 return IsMin ? APIntOps::smin(A: *V0, B: *V1) : APIntOps::smax(A: *V0, B: *V1);
1890 if (V0)
1891 return *V0;
1892 if (V1)
1893 return *V1;
1894 return std::nullopt;
1895 };
1896
1897 std::optional<APInt> OptTL = GetMaxOrMin(TL0, TL1, false);
1898 std::optional<APInt> OptTU = GetMaxOrMin(TU0, TU1, true);
1899 if (!OptTL || !OptTU)
1900 return false;
1901
1902 TL = std::move(*OptTL);
1903 TU = std::move(*OptTU);
1904 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
1905 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
1906
1907 if (TL.sgt(RHS: TU))
1908 return true;
1909
1910 if (!Level)
1911 return false;
1912 assert(SrcUM == DstUM && "Expecting same upper bound for Src and Dst");
1913
1914 // explore directions
1915 unsigned NewDirection = Dependence::DVEntry::NONE;
1916 OverflowSafeSignedAPInt LowerDistance, UpperDistance;
1917 OverflowSafeSignedAPInt OTY(TY), OTX(TX), OTA(TA), OTB(TB), OTL(TL), OTU(TU);
1918 // NOTE: It's unclear whether these calculations can overflow. At the moment,
1919 // we conservatively assume they can.
1920 if (TA.sgt(RHS: TB)) {
1921 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1922 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1923 } else {
1924 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1925 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1926 }
1927
1928 if (!LowerDistance || !UpperDistance)
1929 return false;
1930
1931 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << *LowerDistance << "\n");
1932 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << *UpperDistance << "\n");
1933
1934 if (LowerDistance->sle(RHS: 0) && UpperDistance->sge(RHS: 0))
1935 NewDirection |= Dependence::DVEntry::EQ;
1936 if (LowerDistance->slt(RHS: 0))
1937 NewDirection |= Dependence::DVEntry::GT;
1938 if (UpperDistance->sgt(RHS: 0))
1939 NewDirection |= Dependence::DVEntry::LT;
1940
1941 // finished
1942 Result.DV[*Level].Direction &= NewDirection;
1943 LLVM_DEBUG(dbgs() << "\t Result = ");
1944 LLVM_DEBUG(Result.dump(dbgs()));
1945 return Result.DV[*Level].Direction == Dependence::DVEntry::NONE;
1946}
1947
1948// testSIV -
1949// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
1950// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
1951// a2 are constant, we attack it with an SIV test. While they can all be
1952// solved with the Exact SIV test, it's worthwhile to use simpler tests when
1953// they apply; they're cheaper and sometimes more precise.
1954//
1955// Return true if dependence disproved.
1956bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
1957 FullDependence &Result,
1958 bool UnderRuntimeAssumptions) {
1959 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1960 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1961 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
1962 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
1963 bool SrcAnalyzable = SrcAddRec != nullptr && SrcAddRec->hasNoSignedWrap();
1964 bool DstAnalyzable = DstAddRec != nullptr && DstAddRec->hasNoSignedWrap();
1965 if (SrcAnalyzable && DstAnalyzable) {
1966 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
1967 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
1968 const Loop *CurSrcLoop = SrcAddRec->getLoop();
1969 [[maybe_unused]] const Loop *CurDstLoop = DstAddRec->getLoop();
1970 assert(haveSameSD(CurSrcLoop, CurDstLoop) &&
1971 "Loops in the SIV test should have the same iteration space and "
1972 "depth");
1973 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
1974 bool disproven = false;
1975 if (SrcCoeff == DstCoeff)
1976 disproven = strongSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result,
1977 UnderRuntimeAssumptions);
1978 else if (SrcCoeff == SE->getNegativeSCEV(V: DstCoeff))
1979 disproven = weakCrossingSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result);
1980 return disproven || exactSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result);
1981 }
1982 if (SrcAnalyzable && DstAddRec == nullptr) {
1983 const Loop *CurSrcLoop = SrcAddRec->getLoop();
1984 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
1985 return weakZeroDstSIVtest(Src: SrcAddRec, DstConst: Dst, Level, Result);
1986 }
1987 if (DstAnalyzable && SrcAddRec == nullptr) {
1988 const Loop *CurDstLoop = DstAddRec->getLoop();
1989 Level = mapDstLoop(DstLoop: CurDstLoop);
1990 return weakZeroSrcSIVtest(SrcConst: Src, Dst: DstAddRec, Level, Result);
1991 }
1992 assert((SrcAddRec != nullptr || DstAddRec != nullptr) &&
1993 "SIV test expected at least one AddRec");
1994 return false;
1995}
1996
1997// testRDIV -
1998// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
1999// where i and j are induction variables, c1 and c2 are loop invariant,
2000// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2001// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2002// It doesn't make sense to talk about distance or direction in this case,
2003// so there's no point in making special versions of the Strong SIV test or
2004// the Weak-crossing SIV test.
2005//
2006// Return true if dependence disproved.
2007bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2008 FullDependence &Result) const {
2009 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2010 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2011 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
2012 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
2013 assert(SrcAddRec && DstAddRec && "Unexpected non-addrec input");
2014 return exactRDIVtest(Src: SrcAddRec, Dst: DstAddRec, Result) ||
2015 gcdMIVtest(Src, Dst, Result);
2016}
2017
2018// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2019// Return true if dependence disproved.
2020// Can sometimes refine direction vectors.
2021bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2022 const SmallBitVector &Loops,
2023 FullDependence &Result) const {
2024 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2025 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2026 return gcdMIVtest(Src, Dst, Result) ||
2027 banerjeeMIVtest(Src, Dst, Loops, Result);
2028}
2029
2030/// Given a SCEVMulExpr, returns its first operand if its first operand is a
2031/// constant and the product doesn't overflow in a signed sense. Otherwise,
2032/// returns std::nullopt. For example, given (10 * X * Y)<nsw>, it returns 10.
2033/// Notably, if it doesn't have nsw, the multiplication may overflow, and if
2034/// so, it may not a multiple of 10.
2035static std::optional<APInt> getConstantCoefficient(const SCEV *Expr) {
2036 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Expr))
2037 return Constant->getAPInt();
2038 if (const auto *Product = dyn_cast<SCEVMulExpr>(Val: Expr))
2039 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Product->getOperand(i: 0)))
2040 if (Product->hasNoSignedWrap())
2041 return Constant->getAPInt();
2042 return std::nullopt;
2043}
2044
2045bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
2046 const Loop *CurLoop,
2047 const SCEV *&CurLoopCoeff,
2048 APInt &RunningGCD) const {
2049 // If RunningGCD is already 1, exit early.
2050 // TODO: It might be better to continue the recursion to find CurLoopCoeff.
2051 if (RunningGCD == 1)
2052 return true;
2053
2054 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
2055 if (!AddRec) {
2056 assert(isLoopInvariant(Expr, CurLoop) &&
2057 "Expected loop invariant expression");
2058 return true;
2059 }
2060
2061 assert(AddRec->isAffine() && "Unexpected Expr");
2062 const SCEV *Start = AddRec->getStart();
2063 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
2064 if (AddRec->getLoop() == CurLoop) {
2065 CurLoopCoeff = Step;
2066 } else {
2067 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Step);
2068
2069 // If the coefficient is the product of a constant and other stuff, we can
2070 // use the constant in the GCD computation.
2071 if (!ConstCoeff)
2072 return false;
2073
2074 // TODO: What happens if ConstCoeff is the "most negative" signed number
2075 // (e.g. -128 for 8 bit wide APInt)?
2076 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2077 }
2078
2079 return accumulateCoefficientsGCD(Expr: Start, CurLoop, CurLoopCoeff, RunningGCD);
2080}
2081
2082/// Compute \p RunningGCD and return the start value of the innermost
2083/// \p SCEVAddRecExpr. In order to calculate the return value we do not
2084/// return immediately if it is proved that \p RunningGCD = 1.
2085static const SCEV *analyzeCoefficientsForGCD(const SCEV *Coefficients,
2086 APInt &RunningGCD,
2087 ScalarEvolution *SE) {
2088 while (const SCEVAddRecExpr *AddRec =
2089 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2090 const SCEV *Coeff = AddRec->getStepRecurrence(SE&: *SE);
2091 // If the coefficient is the product of a constant and other stuff,
2092 // we can use the constant in the GCD computation.
2093 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Coeff);
2094 if (!ConstCoeff)
2095 return nullptr;
2096 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2097 Coefficients = AddRec->getStart();
2098 }
2099 return Coefficients;
2100}
2101
2102//===----------------------------------------------------------------------===//
2103// gcdMIVtest -
2104// Tests an MIV subscript pair for dependence.
2105// Returns true if any possible dependence is disproved.
2106// Can sometimes disprove the equal direction for 1 or more loops,
2107// as discussed in Michael Wolfe's book,
2108// High Performance Compilers for Parallel Computing, page 235.
2109//
2110// We spend some effort (code!) to handle cases like
2111// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2112// but M and N are just loop-invariant variables.
2113// This should help us handle linearized subscripts;
2114// also makes this test a useful backup to the various SIV tests.
2115//
2116// It occurs to me that the presence of loop-invariant variables
2117// changes the nature of the test from "greatest common divisor"
2118// to "a common divisor".
2119bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2120 FullDependence &Result) const {
2121 if (!isDependenceTestEnabled(Test: DependenceTestType::GCDMIV))
2122 return false;
2123
2124 LLVM_DEBUG(dbgs() << "starting gcd\n");
2125 ++GCDapplications;
2126 unsigned BitWidth = SE->getTypeSizeInBits(Ty: Src->getType());
2127 APInt RunningGCD = APInt::getZero(numBits: BitWidth);
2128
2129 // Examine Src and dst coefficients.
2130 const SCEV *SrcConst = analyzeCoefficientsForGCD(Coefficients: Src, RunningGCD, SE);
2131 if (!SrcConst)
2132 return false;
2133 const SCEV *DstConst = analyzeCoefficientsForGCD(Coefficients: Dst, RunningGCD, SE);
2134 if (!DstConst)
2135 return false;
2136
2137 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
2138 if (!Delta)
2139 return false;
2140 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
2141 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Val: Delta);
2142 if (!Constant)
2143 return false;
2144 APInt ConstDelta = cast<SCEVConstant>(Val: Constant)->getAPInt();
2145 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
2146 if (ConstDelta == 0)
2147 return false;
2148 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
2149 APInt Remainder = ConstDelta.srem(RHS: RunningGCD);
2150 if (Remainder != 0) {
2151 ++GCDindependence;
2152 return true;
2153 }
2154
2155 // Try to disprove equal directions.
2156 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2157 // the code above can't disprove the dependence because the GCD = 1.
2158 // So we consider what happen if i = i' and what happens if j = j'.
2159 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2160 // which is infeasible, so we can disallow the = direction for the i level.
2161 // Setting j = j' doesn't help matters, so we end up with a direction vector
2162 // of [<>, *]
2163
2164 bool Improved = false;
2165 const SCEV *Coefficients = Src;
2166 while (const SCEVAddRecExpr *AddRec =
2167 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
2168 Coefficients = AddRec->getStart();
2169 const Loop *CurLoop = AddRec->getLoop();
2170 RunningGCD = 0;
2171 const SCEV *SrcCoeff = AddRec->getStepRecurrence(SE&: *SE);
2172 const SCEV *DstCoeff = SE->getMinusSCEV(LHS: SrcCoeff, RHS: SrcCoeff);
2173
2174 if (!accumulateCoefficientsGCD(Expr: Src, CurLoop, CurLoopCoeff&: SrcCoeff, RunningGCD) ||
2175 !accumulateCoefficientsGCD(Expr: Dst, CurLoop, CurLoopCoeff&: DstCoeff, RunningGCD))
2176 return false;
2177
2178 Delta = minusSCEVNoSignedOverflow(A: DstCoeff, B: SrcCoeff, SE&: *SE);
2179 if (!Delta)
2180 continue;
2181 // If the coefficient is the product of a constant and other stuff,
2182 // we can use the constant in the GCD computation.
2183 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Delta);
2184 if (!ConstCoeff)
2185 // The difference of the two coefficients might not be a product
2186 // or constant, in which case we give up on this direction.
2187 continue;
2188 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
2189 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2190 if (RunningGCD != 0) {
2191 Remainder = ConstDelta.srem(RHS: RunningGCD);
2192 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2193 if (Remainder != 0) {
2194 unsigned Level = mapSrcLoop(SrcLoop: CurLoop);
2195 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;
2196 Improved = true;
2197 }
2198 }
2199 }
2200 if (Improved)
2201 ++GCDsuccesses;
2202 LLVM_DEBUG(dbgs() << "all done\n");
2203 return false;
2204}
2205
2206//===----------------------------------------------------------------------===//
2207// banerjeeMIVtest -
2208// Use Banerjee's Inequalities to test an MIV subscript pair.
2209// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2210// Generally follows the discussion in Section 2.5.2 of
2211//
2212// Optimizing Supercompilers for Supercomputers
2213// Michael Wolfe
2214//
2215// The inequalities given on page 25 are simplified in that loops are
2216// normalized so that the lower bound is always 0 and the stride is always 1.
2217// For example, Wolfe gives
2218//
2219// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2220//
2221// where A_k is the coefficient of the kth index in the source subscript,
2222// B_k is the coefficient of the kth index in the destination subscript,
2223// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2224// index, and N_k is the stride of the kth index. Since all loops are normalized
2225// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2226// equation to
2227//
2228// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2229// = (A^-_k - B_k)^- (U_k - 1) - B_k
2230//
2231// Similar simplifications are possible for the other equations.
2232//
2233// When we can't determine the number of iterations for a loop,
2234// we use NULL as an indicator for the worst case, infinity.
2235// When computing the upper bound, NULL denotes +inf;
2236// for the lower bound, NULL denotes -inf.
2237//
2238// Return true if dependence disproved.
2239bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2240 const SmallBitVector &Loops,
2241 FullDependence &Result) const {
2242 if (!isDependenceTestEnabled(Test: DependenceTestType::BanerjeeMIV))
2243 return false;
2244
2245 LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2246 ++BanerjeeApplications;
2247 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
2248 const SCEV *A0;
2249 CoefficientInfo *A = collectCoeffInfo(Subscript: Src, SrcFlag: true, Constant&: A0);
2250 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
2251 const SCEV *B0;
2252 CoefficientInfo *B = collectCoeffInfo(Subscript: Dst, SrcFlag: false, Constant&: B0);
2253 BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2254 const SCEV *Delta = SE->getMinusSCEV(LHS: B0, RHS: A0);
2255 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2256
2257 // Compute bounds for all the * directions.
2258 LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2259 for (unsigned K = 1; K <= MaxLevels; ++K) {
2260 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2261 Bound[K].Direction = Dependence::DVEntry::ALL;
2262 Bound[K].DirSet = Dependence::DVEntry::NONE;
2263 findBoundsALL(A, B, Bound, K);
2264#ifndef NDEBUG
2265 LLVM_DEBUG(dbgs() << "\t " << K << '\t');
2266 if (Bound[K].Lower[Dependence::DVEntry::ALL])
2267 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2268 else
2269 LLVM_DEBUG(dbgs() << "-inf\t");
2270 if (Bound[K].Upper[Dependence::DVEntry::ALL])
2271 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2272 else
2273 LLVM_DEBUG(dbgs() << "+inf\n");
2274#endif
2275 }
2276
2277 // Test the *, *, *, ... case.
2278 bool Disproved = false;
2279 if (testBounds(DirKind: Dependence::DVEntry::ALL, Level: 0, Bound, Delta)) {
2280 // Explore the direction vector hierarchy.
2281 unsigned DepthExpanded = 0;
2282 unsigned NewDeps =
2283 exploreDirections(Level: 1, A, B, Bound, Loops, DepthExpanded, Delta);
2284 if (NewDeps > 0) {
2285 bool Improved = false;
2286 for (unsigned K = 1; K <= CommonLevels; ++K) {
2287 if (Loops[K]) {
2288 unsigned Old = Result.DV[K - 1].Direction;
2289 Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2290 Improved |= Old != Result.DV[K - 1].Direction;
2291 if (!Result.DV[K - 1].Direction) {
2292 Improved = false;
2293 Disproved = true;
2294 break;
2295 }
2296 }
2297 }
2298 if (Improved)
2299 ++BanerjeeSuccesses;
2300 } else {
2301 ++BanerjeeIndependence;
2302 Disproved = true;
2303 }
2304 } else {
2305 ++BanerjeeIndependence;
2306 Disproved = true;
2307 }
2308 delete[] Bound;
2309 delete[] A;
2310 delete[] B;
2311 return Disproved;
2312}
2313
2314// Hierarchically expands the direction vector
2315// search space, combining the directions of discovered dependences
2316// in the DirSet field of Bound. Returns the number of distinct
2317// dependences discovered. If the dependence is disproved,
2318// it will return 0.
2319unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2320 CoefficientInfo *B, BoundInfo *Bound,
2321 const SmallBitVector &Loops,
2322 unsigned &DepthExpanded,
2323 const SCEV *Delta) const {
2324 // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2325 // of common loop levels. To avoid excessive compile-time, pessimize all the
2326 // results and immediately return when the number of common levels is beyond
2327 // the given threshold.
2328 if (CommonLevels > MIVMaxLevelThreshold) {
2329 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2330 "direction exploration is terminated.\n");
2331 for (unsigned K = 1; K <= CommonLevels; ++K)
2332 if (Loops[K])
2333 Bound[K].DirSet = Dependence::DVEntry::ALL;
2334 return 1;
2335 }
2336
2337 if (Level > CommonLevels) {
2338 // record result
2339 LLVM_DEBUG(dbgs() << "\t[");
2340 for (unsigned K = 1; K <= CommonLevels; ++K) {
2341 if (Loops[K]) {
2342 Bound[K].DirSet |= Bound[K].Direction;
2343#ifndef NDEBUG
2344 switch (Bound[K].Direction) {
2345 case Dependence::DVEntry::LT:
2346 LLVM_DEBUG(dbgs() << " <");
2347 break;
2348 case Dependence::DVEntry::EQ:
2349 LLVM_DEBUG(dbgs() << " =");
2350 break;
2351 case Dependence::DVEntry::GT:
2352 LLVM_DEBUG(dbgs() << " >");
2353 break;
2354 case Dependence::DVEntry::ALL:
2355 LLVM_DEBUG(dbgs() << " *");
2356 break;
2357 default:
2358 llvm_unreachable("unexpected Bound[K].Direction");
2359 }
2360#endif
2361 }
2362 }
2363 LLVM_DEBUG(dbgs() << " ]\n");
2364 return 1;
2365 }
2366 if (Loops[Level]) {
2367 if (Level > DepthExpanded) {
2368 DepthExpanded = Level;
2369 // compute bounds for <, =, > at current level
2370 findBoundsLT(A, B, Bound, K: Level);
2371 findBoundsGT(A, B, Bound, K: Level);
2372 findBoundsEQ(A, B, Bound, K: Level);
2373#ifndef NDEBUG
2374 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2375 LLVM_DEBUG(dbgs() << "\t <\t");
2376 if (Bound[Level].Lower[Dependence::DVEntry::LT])
2377 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2378 << '\t');
2379 else
2380 LLVM_DEBUG(dbgs() << "-inf\t");
2381 if (Bound[Level].Upper[Dependence::DVEntry::LT])
2382 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2383 << '\n');
2384 else
2385 LLVM_DEBUG(dbgs() << "+inf\n");
2386 LLVM_DEBUG(dbgs() << "\t =\t");
2387 if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2388 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2389 << '\t');
2390 else
2391 LLVM_DEBUG(dbgs() << "-inf\t");
2392 if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2393 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2394 << '\n');
2395 else
2396 LLVM_DEBUG(dbgs() << "+inf\n");
2397 LLVM_DEBUG(dbgs() << "\t >\t");
2398 if (Bound[Level].Lower[Dependence::DVEntry::GT])
2399 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2400 << '\t');
2401 else
2402 LLVM_DEBUG(dbgs() << "-inf\t");
2403 if (Bound[Level].Upper[Dependence::DVEntry::GT])
2404 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2405 << '\n');
2406 else
2407 LLVM_DEBUG(dbgs() << "+inf\n");
2408#endif
2409 }
2410
2411 unsigned NewDeps = 0;
2412
2413 // test bounds for <, *, *, ...
2414 if (testBounds(DirKind: Dependence::DVEntry::LT, Level, Bound, Delta))
2415 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2416 Delta);
2417
2418 // Test bounds for =, *, *, ...
2419 if (testBounds(DirKind: Dependence::DVEntry::EQ, Level, Bound, Delta))
2420 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2421 Delta);
2422
2423 // test bounds for >, *, *, ...
2424 if (testBounds(DirKind: Dependence::DVEntry::GT, Level, Bound, Delta))
2425 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2426 Delta);
2427
2428 Bound[Level].Direction = Dependence::DVEntry::ALL;
2429 return NewDeps;
2430 } else
2431 return exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2432 Delta);
2433}
2434
2435// Returns true iff the current bounds are plausible.
2436bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2437 BoundInfo *Bound, const SCEV *Delta) const {
2438 Bound[Level].Direction = DirKind;
2439 if (const SCEV *LowerBound = getLowerBound(Bound))
2440 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: LowerBound, RHS: Delta))
2441 return false;
2442 if (const SCEV *UpperBound = getUpperBound(Bound))
2443 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: Delta, RHS: UpperBound))
2444 return false;
2445 return true;
2446}
2447
2448// Computes the upper and lower bounds for level K
2449// using the * direction. Records them in Bound.
2450// Wolfe gives the equations
2451//
2452// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2453// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2454//
2455// Since we normalize loops, we can simplify these equations to
2456//
2457// LB^*_k = (A^-_k - B^+_k)U_k
2458// UB^*_k = (A^+_k - B^-_k)U_k
2459//
2460// We must be careful to handle the case where the upper bound is unknown.
2461// Note that the lower bound is always <= 0
2462// and the upper bound is always >= 0.
2463void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2464 BoundInfo *Bound, unsigned K) const {
2465 Bound[K].Lower[Dependence::DVEntry::ALL] =
2466 nullptr; // Default value = -infinity.
2467 Bound[K].Upper[Dependence::DVEntry::ALL] =
2468 nullptr; // Default value = +infinity.
2469 if (Bound[K].Iterations) {
2470 Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(
2471 LHS: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].PosPart), RHS: Bound[K].Iterations);
2472 Bound[K].Upper[Dependence::DVEntry::ALL] = SE->getMulExpr(
2473 LHS: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].NegPart), RHS: Bound[K].Iterations);
2474 } else {
2475 // If the difference is 0, we won't need to know the number of iterations.
2476 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].NegPart, RHS: B[K].PosPart))
2477 Bound[K].Lower[Dependence::DVEntry::ALL] =
2478 SE->getZero(Ty: A[K].Coeff->getType());
2479 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].PosPart, RHS: B[K].NegPart))
2480 Bound[K].Upper[Dependence::DVEntry::ALL] =
2481 SE->getZero(Ty: A[K].Coeff->getType());
2482 }
2483}
2484
2485// Computes the upper and lower bounds for level K
2486// using the = direction. Records them in Bound.
2487// Wolfe gives the equations
2488//
2489// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2490// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2491//
2492// Since we normalize loops, we can simplify these equations to
2493//
2494// LB^=_k = (A_k - B_k)^- U_k
2495// UB^=_k = (A_k - B_k)^+ U_k
2496//
2497// We must be careful to handle the case where the upper bound is unknown.
2498// Note that the lower bound is always <= 0
2499// and the upper bound is always >= 0.
2500void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
2501 BoundInfo *Bound, unsigned K) const {
2502 Bound[K].Lower[Dependence::DVEntry::EQ] =
2503 nullptr; // Default value = -infinity.
2504 Bound[K].Upper[Dependence::DVEntry::EQ] =
2505 nullptr; // Default value = +infinity.
2506 if (Bound[K].Iterations) {
2507 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2508 const SCEV *NegativePart = getNegativePart(X: Delta);
2509 Bound[K].Lower[Dependence::DVEntry::EQ] =
2510 SE->getMulExpr(LHS: NegativePart, RHS: Bound[K].Iterations);
2511 const SCEV *PositivePart = getPositivePart(X: Delta);
2512 Bound[K].Upper[Dependence::DVEntry::EQ] =
2513 SE->getMulExpr(LHS: PositivePart, RHS: Bound[K].Iterations);
2514 } else {
2515 // If the positive/negative part of the difference is 0,
2516 // we won't need to know the number of iterations.
2517 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2518 const SCEV *NegativePart = getNegativePart(X: Delta);
2519 if (NegativePart->isZero())
2520 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2521 const SCEV *PositivePart = getPositivePart(X: Delta);
2522 if (PositivePart->isZero())
2523 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2524 }
2525}
2526
2527// Computes the upper and lower bounds for level K
2528// using the < direction. Records them in Bound.
2529// Wolfe gives the equations
2530//
2531// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2532// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2533//
2534// Since we normalize loops, we can simplify these equations to
2535//
2536// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2537// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2538//
2539// We must be careful to handle the case where the upper bound is unknown.
2540void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
2541 BoundInfo *Bound, unsigned K) const {
2542 Bound[K].Lower[Dependence::DVEntry::LT] =
2543 nullptr; // Default value = -infinity.
2544 Bound[K].Upper[Dependence::DVEntry::LT] =
2545 nullptr; // Default value = +infinity.
2546 if (Bound[K].Iterations) {
2547 const SCEV *Iter_1 = SE->getMinusSCEV(
2548 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2549 const SCEV *NegPart =
2550 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2551 Bound[K].Lower[Dependence::DVEntry::LT] =
2552 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: B[K].Coeff);
2553 const SCEV *PosPart =
2554 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2555 Bound[K].Upper[Dependence::DVEntry::LT] =
2556 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: B[K].Coeff);
2557 } else {
2558 // If the positive/negative part of the difference is 0,
2559 // we won't need to know the number of iterations.
2560 const SCEV *NegPart =
2561 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2562 if (NegPart->isZero())
2563 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2564 const SCEV *PosPart =
2565 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2566 if (PosPart->isZero())
2567 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2568 }
2569}
2570
2571// Computes the upper and lower bounds for level K
2572// using the > direction. Records them in Bound.
2573// Wolfe gives the equations
2574//
2575// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2576// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2577//
2578// Since we normalize loops, we can simplify these equations to
2579//
2580// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2581// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2582//
2583// We must be careful to handle the case where the upper bound is unknown.
2584void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
2585 BoundInfo *Bound, unsigned K) const {
2586 Bound[K].Lower[Dependence::DVEntry::GT] =
2587 nullptr; // Default value = -infinity.
2588 Bound[K].Upper[Dependence::DVEntry::GT] =
2589 nullptr; // Default value = +infinity.
2590 if (Bound[K].Iterations) {
2591 const SCEV *Iter_1 = SE->getMinusSCEV(
2592 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2593 const SCEV *NegPart =
2594 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2595 Bound[K].Lower[Dependence::DVEntry::GT] =
2596 SE->getAddExpr(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: A[K].Coeff);
2597 const SCEV *PosPart =
2598 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2599 Bound[K].Upper[Dependence::DVEntry::GT] =
2600 SE->getAddExpr(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: A[K].Coeff);
2601 } else {
2602 // If the positive/negative part of the difference is 0,
2603 // we won't need to know the number of iterations.
2604 const SCEV *NegPart =
2605 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2606 if (NegPart->isZero())
2607 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2608 const SCEV *PosPart =
2609 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2610 if (PosPart->isZero())
2611 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2612 }
2613}
2614
2615// X^+ = max(X, 0)
2616const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
2617 return SE->getSMaxExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
2618}
2619
2620// X^- = min(X, 0)
2621const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
2622 return SE->getSMinExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
2623}
2624
2625// Walks through the subscript,
2626// collecting each coefficient, the associated loop bounds,
2627// and recording its positive and negative parts for later use.
2628DependenceInfo::CoefficientInfo *
2629DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
2630 const SCEV *&Constant) const {
2631 const SCEV *Zero = SE->getZero(Ty: Subscript->getType());
2632 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
2633 for (unsigned K = 1; K <= MaxLevels; ++K) {
2634 CI[K].Coeff = Zero;
2635 CI[K].PosPart = Zero;
2636 CI[K].NegPart = Zero;
2637 CI[K].Iterations = nullptr;
2638 }
2639 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Subscript)) {
2640 const Loop *L = AddRec->getLoop();
2641 unsigned K = SrcFlag ? mapSrcLoop(SrcLoop: L) : mapDstLoop(DstLoop: L);
2642 CI[K].Coeff = AddRec->getStepRecurrence(SE&: *SE);
2643 CI[K].PosPart = getPositivePart(X: CI[K].Coeff);
2644 CI[K].NegPart = getNegativePart(X: CI[K].Coeff);
2645 CI[K].Iterations = collectUpperBound(L, T: Subscript->getType());
2646 Subscript = AddRec->getStart();
2647 }
2648 Constant = Subscript;
2649#ifndef NDEBUG
2650 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
2651 for (unsigned K = 1; K <= MaxLevels; ++K) {
2652 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
2653 LLVM_DEBUG(dbgs() << "\tPos Part = ");
2654 LLVM_DEBUG(dbgs() << *CI[K].PosPart);
2655 LLVM_DEBUG(dbgs() << "\tNeg Part = ");
2656 LLVM_DEBUG(dbgs() << *CI[K].NegPart);
2657 LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
2658 if (CI[K].Iterations)
2659 LLVM_DEBUG(dbgs() << *CI[K].Iterations);
2660 else
2661 LLVM_DEBUG(dbgs() << "+inf");
2662 LLVM_DEBUG(dbgs() << '\n');
2663 }
2664 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
2665#endif
2666 return CI;
2667}
2668
2669// Looks through all the bounds info and
2670// computes the lower bound given the current direction settings
2671// at each level. If the lower bound for any level is -inf,
2672// the result is -inf.
2673const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
2674 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
2675 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
2676 if (Bound[K].Lower[Bound[K].Direction])
2677 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Lower[Bound[K].Direction]);
2678 else
2679 Sum = nullptr;
2680 }
2681 return Sum;
2682}
2683
2684// Looks through all the bounds info and
2685// computes the upper bound given the current direction settings
2686// at each level. If the upper bound at any level is +inf,
2687// the result is +inf.
2688const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
2689 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
2690 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
2691 if (Bound[K].Upper[Bound[K].Direction])
2692 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Upper[Bound[K].Direction]);
2693 else
2694 Sum = nullptr;
2695 }
2696 return Sum;
2697}
2698
2699/// Check if we can delinearize the subscripts. If the SCEVs representing the
2700/// source and destination array references are recurrences on a nested loop,
2701/// this function flattens the nested recurrences into separate recurrences
2702/// for each loop level.
2703bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
2704 SmallVectorImpl<Subscript> &Pair) {
2705 assert(isLoadOrStore(Src) && "instruction is not load or store");
2706 assert(isLoadOrStore(Dst) && "instruction is not load or store");
2707 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
2708 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
2709 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
2710 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
2711 const SCEV *SrcAccessFn = SE->getSCEVAtScope(V: SrcPtr, L: SrcLoop);
2712 const SCEV *DstAccessFn = SE->getSCEVAtScope(V: DstPtr, L: DstLoop);
2713 const SCEVUnknown *SrcBase =
2714 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
2715 const SCEVUnknown *DstBase =
2716 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
2717
2718 if (!SrcBase || !DstBase || SrcBase != DstBase)
2719 return false;
2720
2721 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
2722
2723 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
2724 SrcSubscripts, DstSubscripts) &&
2725 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
2726 SrcSubscripts, DstSubscripts))
2727 return false;
2728
2729 assert(isLoopInvariant(SrcBase, SrcLoop) &&
2730 isLoopInvariant(DstBase, DstLoop) &&
2731 "Expected SrcBase and DstBase to be loop invariant");
2732
2733 int Size = SrcSubscripts.size();
2734 LLVM_DEBUG({
2735 dbgs() << "\nSrcSubscripts: ";
2736 for (int I = 0; I < Size; I++)
2737 dbgs() << *SrcSubscripts[I];
2738 dbgs() << "\nDstSubscripts: ";
2739 for (int I = 0; I < Size; I++)
2740 dbgs() << *DstSubscripts[I];
2741 dbgs() << "\n";
2742 });
2743
2744 // The delinearization transforms a single-subscript MIV dependence test into
2745 // a multi-subscript SIV dependence test that is easier to compute. So we
2746 // resize Pair to contain as many pairs of subscripts as the delinearization
2747 // has found, and then initialize the pairs following the delinearization.
2748 Pair.resize(N: Size);
2749 SCEVMonotonicityChecker MonChecker(SE);
2750 const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
2751 for (int I = 0; I < Size; ++I) {
2752 Pair[I].Src = SrcSubscripts[I];
2753 Pair[I].Dst = DstSubscripts[I];
2754
2755 assert(Pair[I].Src->getType() == Pair[I].Dst->getType() &&
2756 "Unexpected different types for the subscripts");
2757
2758 if (EnableMonotonicityCheck) {
2759 if (MonChecker.checkMonotonicity(Expr: Pair[I].Src, OutermostLoop).isUnknown())
2760 return false;
2761 if (MonChecker.checkMonotonicity(Expr: Pair[I].Dst, OutermostLoop).isUnknown())
2762 return false;
2763 }
2764 }
2765
2766 return true;
2767}
2768
2769/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
2770/// arrays accessed are fixed-size arrays. Return true if delinearization was
2771/// successful.
2772bool DependenceInfo::tryDelinearizeFixedSize(
2773 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
2774 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
2775 SmallVectorImpl<const SCEV *> &DstSubscripts) {
2776 LLVM_DEBUG({
2777 const SCEVUnknown *SrcBase =
2778 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
2779 const SCEVUnknown *DstBase =
2780 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
2781 assert(SrcBase && DstBase && SrcBase == DstBase &&
2782 "expected src and dst scev unknowns to be equal");
2783 });
2784
2785 const SCEV *ElemSize = SE->getElementSize(Inst: Src);
2786 assert(ElemSize == SE->getElementSize(Dst) && "Different element sizes");
2787 SmallVector<const SCEV *, 4> SrcSizes, DstSizes;
2788 if (!delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: SrcAccessFn),
2789 Subscripts&: SrcSubscripts, Sizes&: SrcSizes, ElementSize: ElemSize) ||
2790 !delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: DstAccessFn),
2791 Subscripts&: DstSubscripts, Sizes&: DstSizes, ElementSize: ElemSize))
2792 return false;
2793
2794 // Check that the two size arrays are non-empty and equal in length and
2795 // value. SCEV expressions are uniqued, so we can compare pointers.
2796 if (SrcSizes.size() != DstSizes.size() ||
2797 !std::equal(first1: SrcSizes.begin(), last1: SrcSizes.end(), first2: DstSizes.begin())) {
2798 SrcSubscripts.clear();
2799 DstSubscripts.clear();
2800 return false;
2801 }
2802
2803 assert(SrcSubscripts.size() == DstSubscripts.size() &&
2804 "Expected equal number of entries in the list of SrcSubscripts and "
2805 "DstSubscripts.");
2806
2807 // In general we cannot safely assume that the subscripts recovered from GEPs
2808 // are in the range of values defined for their corresponding array
2809 // dimensions. For example some C language usage/interpretation make it
2810 // impossible to verify this at compile-time. As such we can only delinearize
2811 // iff the subscripts are positive and are less than the range of the
2812 // dimension.
2813 if (!DisableDelinearizationChecks) {
2814 if (!validateDelinearizationResult(SE&: *SE, Sizes: SrcSizes, Subscripts: SrcSubscripts) ||
2815 !validateDelinearizationResult(SE&: *SE, Sizes: DstSizes, Subscripts: DstSubscripts)) {
2816 SrcSubscripts.clear();
2817 DstSubscripts.clear();
2818 return false;
2819 }
2820 }
2821 LLVM_DEBUG({
2822 dbgs() << "Delinearized subscripts of fixed-size array\n"
2823 << "SrcGEP:" << *getLoadStorePointerOperand(Src) << "\n"
2824 << "DstGEP:" << *getLoadStorePointerOperand(Dst) << "\n";
2825 });
2826 return true;
2827}
2828
2829bool DependenceInfo::tryDelinearizeParametricSize(
2830 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
2831 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
2832 SmallVectorImpl<const SCEV *> &DstSubscripts) {
2833
2834 const SCEVUnknown *SrcBase =
2835 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
2836 const SCEVUnknown *DstBase =
2837 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
2838 assert(SrcBase && DstBase && SrcBase == DstBase &&
2839 "expected src and dst scev unknowns to be equal");
2840
2841 const SCEV *ElementSize = SE->getElementSize(Inst: Src);
2842 if (ElementSize != SE->getElementSize(Inst: Dst))
2843 return false;
2844
2845 const SCEV *SrcSCEV = SE->getMinusSCEV(LHS: SrcAccessFn, RHS: SrcBase);
2846 const SCEV *DstSCEV = SE->getMinusSCEV(LHS: DstAccessFn, RHS: DstBase);
2847
2848 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(Val: SrcSCEV);
2849 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(Val: DstSCEV);
2850 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
2851 return false;
2852
2853 // First step: collect parametric terms in both array references.
2854 SmallVector<const SCEV *, 4> Terms;
2855 collectParametricTerms(SE&: *SE, Expr: SrcAR, Terms);
2856 collectParametricTerms(SE&: *SE, Expr: DstAR, Terms);
2857
2858 // Second step: find subscript sizes.
2859 SmallVector<const SCEV *, 4> Sizes;
2860 findArrayDimensions(SE&: *SE, Terms, Sizes, ElementSize);
2861
2862 // Third step: compute the access functions for each subscript.
2863 computeAccessFunctions(SE&: *SE, Expr: SrcAR, Subscripts&: SrcSubscripts, Sizes);
2864 computeAccessFunctions(SE&: *SE, Expr: DstAR, Subscripts&: DstSubscripts, Sizes);
2865
2866 // Fail when there is only a subscript: that's a linearized access function.
2867 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
2868 SrcSubscripts.size() != DstSubscripts.size())
2869 return false;
2870
2871 // Statically check that the array bounds are in-range. The first subscript we
2872 // don't have a size for and it cannot overflow into another subscript, so is
2873 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
2874 // and dst.
2875 // FIXME: It may be better to record these sizes and add them as constraints
2876 // to the dependency checks.
2877 if (!DisableDelinearizationChecks)
2878 if (!validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: SrcSubscripts) ||
2879 !validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: DstSubscripts))
2880 return false;
2881
2882 return true;
2883}
2884
2885//===----------------------------------------------------------------------===//
2886
2887#ifndef NDEBUG
2888// For debugging purposes, dump a small bit vector to dbgs().
2889static void dumpSmallBitVector(SmallBitVector &BV) {
2890 dbgs() << "{";
2891 for (unsigned VI : BV.set_bits()) {
2892 dbgs() << VI;
2893 if (BV.find_next(VI) >= 0)
2894 dbgs() << ' ';
2895 }
2896 dbgs() << "}\n";
2897}
2898#endif
2899
2900bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
2901 FunctionAnalysisManager::Invalidator &Inv) {
2902 // Check if the analysis itself has been invalidated.
2903 auto PAC = PA.getChecker<DependenceAnalysis>();
2904 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
2905 return true;
2906
2907 // Check transitive dependencies.
2908 return Inv.invalidate<AAManager>(IR&: F, PA) ||
2909 Inv.invalidate<ScalarEvolutionAnalysis>(IR&: F, PA) ||
2910 Inv.invalidate<LoopAnalysis>(IR&: F, PA);
2911}
2912
2913// depends -
2914// Returns NULL if there is no dependence.
2915// Otherwise, return a Dependence with as many details as possible.
2916// Corresponds to Section 3.1 in the paper
2917//
2918// Practical Dependence Testing
2919// Goff, Kennedy, Tseng
2920// PLDI 1991
2921//
2922std::unique_ptr<Dependence>
2923DependenceInfo::depends(Instruction *Src, Instruction *Dst,
2924 bool UnderRuntimeAssumptions) {
2925 SmallVector<const SCEVPredicate *, 4> Assume;
2926 bool PossiblyLoopIndependent = true;
2927 if (Src == Dst)
2928 PossiblyLoopIndependent = false;
2929
2930 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
2931 // if both instructions don't reference memory, there's no dependence
2932 return nullptr;
2933
2934 if (!isLoadOrStore(I: Src) || !isLoadOrStore(I: Dst)) {
2935 // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
2936 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
2937 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2938 args: SCEVUnionPredicate(Assume, *SE));
2939 }
2940
2941 const MemoryLocation &DstLoc = MemoryLocation::get(Inst: Dst);
2942 const MemoryLocation &SrcLoc = MemoryLocation::get(Inst: Src);
2943
2944 switch (underlyingObjectsAlias(AA, DL: F->getDataLayout(), LocA: DstLoc, LocB: SrcLoc)) {
2945 case AliasResult::MayAlias:
2946 case AliasResult::PartialAlias:
2947 // cannot analyse objects if we don't understand their aliasing.
2948 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
2949 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2950 args: SCEVUnionPredicate(Assume, *SE));
2951 case AliasResult::NoAlias:
2952 // If the objects noalias, they are distinct, accesses are independent.
2953 LLVM_DEBUG(dbgs() << "no alias\n");
2954 return nullptr;
2955 case AliasResult::MustAlias:
2956 break; // The underlying objects alias; test accesses for dependence.
2957 }
2958
2959 if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() ||
2960 !SrcLoc.Size.isPrecise()) {
2961 // The dependence test gets confused if the size of the memory accesses
2962 // differ.
2963 LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
2964 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2965 args: SCEVUnionPredicate(Assume, *SE));
2966 }
2967
2968 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
2969 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
2970 const SCEV *SrcSCEV = SE->getSCEV(V: SrcPtr);
2971 const SCEV *DstSCEV = SE->getSCEV(V: DstPtr);
2972 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
2973 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
2974 const SCEV *SrcBase = SE->getPointerBase(V: SrcSCEV);
2975 const SCEV *DstBase = SE->getPointerBase(V: DstSCEV);
2976 if (SrcBase != DstBase) {
2977 // If two pointers have different bases, trying to analyze indexes won't
2978 // work; we can't compare them to each other. This can happen, for example,
2979 // if one is produced by an LCSSA PHI node.
2980 //
2981 // We check this upfront so we don't crash in cases where getMinusSCEV()
2982 // returns a SCEVCouldNotCompute.
2983 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
2984 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2985 args: SCEVUnionPredicate(Assume, *SE));
2986 }
2987
2988 // Even if the base pointers are the same, they may not be loop-invariant. It
2989 // could lead to incorrect results, as we're analyzing loop-carried
2990 // dependencies. Src and Dst can be in different loops, so we need to check
2991 // the base pointer is invariant in both loops.
2992 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
2993 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
2994 if (!isLoopInvariant(Expression: SrcBase, LoopNest: SrcLoop) ||
2995 !isLoopInvariant(Expression: DstBase, LoopNest: DstLoop)) {
2996 LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
2997 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2998 args: SCEVUnionPredicate(Assume, *SE));
2999 }
3000
3001 uint64_t EltSize = SrcLoc.Size.toRaw();
3002 const SCEV *SrcEv = SE->getMinusSCEV(LHS: SrcSCEV, RHS: SrcBase);
3003 const SCEV *DstEv = SE->getMinusSCEV(LHS: DstSCEV, RHS: DstBase);
3004
3005 // Check that memory access offsets are multiples of element sizes.
3006 if (!SE->isKnownMultipleOf(S: SrcEv, M: EltSize, Assumptions&: Assume) ||
3007 !SE->isKnownMultipleOf(S: DstEv, M: EltSize, Assumptions&: Assume)) {
3008 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
3009 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3010 args: SCEVUnionPredicate(Assume, *SE));
3011 }
3012
3013 // Runtime assumptions needed but not allowed.
3014 if (!Assume.empty() && !UnderRuntimeAssumptions)
3015 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3016 args: SCEVUnionPredicate(Assume, *SE));
3017
3018 unsigned Pairs = 1;
3019 SmallVector<Subscript, 2> Pair(Pairs);
3020 Pair[0].Src = SrcEv;
3021 Pair[0].Dst = DstEv;
3022
3023 SCEVMonotonicityChecker MonChecker(SE);
3024 const Loop *OutermostLoop = SrcLoop ? SrcLoop->getOutermostLoop() : nullptr;
3025 if (EnableMonotonicityCheck)
3026 if (MonChecker.checkMonotonicity(Expr: Pair[0].Src, OutermostLoop).isUnknown() ||
3027 MonChecker.checkMonotonicity(Expr: Pair[0].Dst, OutermostLoop).isUnknown())
3028 return std::make_unique<Dependence>(args&: Src, args&: Dst,
3029 args: SCEVUnionPredicate(Assume, *SE));
3030
3031 if (Delinearize) {
3032 if (tryDelinearize(Src, Dst, Pair)) {
3033 LLVM_DEBUG(dbgs() << " delinearized\n");
3034 Pairs = Pair.size();
3035 }
3036 }
3037
3038 // Establish loop nesting levels considering SameSD loops as common
3039 establishNestingLevels(Src, Dst);
3040
3041 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
3042 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
3043 LLVM_DEBUG(dbgs() << " SameSD nesting levels = " << SameSDLevels << "\n");
3044
3045 // Modify common levels to consider the SameSD levels in the tests
3046 CommonLevels += SameSDLevels;
3047 MaxLevels -= SameSDLevels;
3048 if (SameSDLevels > 0) {
3049 // Not all tests are handled yet over SameSD loops
3050 // Revoke if there are any tests other than ZIV, SIV or RDIV
3051 for (unsigned P = 0; P < Pairs; ++P) {
3052 SmallBitVector Loops;
3053 Subscript::ClassificationKind TestClass =
3054 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
3055 Dst: Pair[P].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops);
3056
3057 if (TestClass != Subscript::ZIV && TestClass != Subscript::SIV &&
3058 TestClass != Subscript::RDIV) {
3059 // Revert the levels to not consider the SameSD levels
3060 CommonLevels -= SameSDLevels;
3061 MaxLevels += SameSDLevels;
3062 SameSDLevels = 0;
3063 break;
3064 }
3065 }
3066 }
3067
3068 if (SameSDLevels > 0)
3069 SameSDLoopsCount++;
3070
3071 FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
3072 PossiblyLoopIndependent, CommonLevels);
3073 ++TotalArrayPairs;
3074
3075 for (unsigned P = 0; P < Pairs; ++P) {
3076 assert(Pair[P].Src->getType()->isIntegerTy() && "Src must be an integer");
3077 assert(Pair[P].Dst->getType()->isIntegerTy() && "Dst must be an integer");
3078 Pair[P].Loops.resize(N: MaxLevels + 1);
3079 Pair[P].GroupLoops.resize(N: MaxLevels + 1);
3080 Pair[P].Group.resize(N: Pairs);
3081 Pair[P].Classification =
3082 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()), Dst: Pair[P].Dst,
3083 DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops&: Pair[P].Loops);
3084 Pair[P].GroupLoops = Pair[P].Loops;
3085 Pair[P].Group.set(P);
3086 LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
3087 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3088 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3089 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3090 LLVM_DEBUG(dbgs() << "\tloops = ");
3091 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
3092 }
3093
3094 // Test each subscript individually
3095 for (unsigned SI = 0; SI < Pairs; ++SI) {
3096 LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3097
3098 // Attempt signed range test first.
3099 ConstantRange SrcRange = SE->getSignedRange(S: Pair[SI].Src);
3100 ConstantRange DstRange = SE->getSignedRange(S: Pair[SI].Dst);
3101 if (SrcRange.intersectWith(CR: DstRange).isEmptySet())
3102 return nullptr;
3103
3104 switch (Pair[SI].Classification) {
3105 case Subscript::NonLinear:
3106 // ignore these, but collect loops for later
3107 ++NonlinearSubscriptPairs;
3108 collectCommonLoops(Expression: Pair[SI].Src, LoopNest: LI->getLoopFor(BB: Src->getParent()),
3109 Loops&: Pair[SI].Loops);
3110 collectCommonLoops(Expression: Pair[SI].Dst, LoopNest: LI->getLoopFor(BB: Dst->getParent()),
3111 Loops&: Pair[SI].Loops);
3112 break;
3113 case Subscript::ZIV:
3114 LLVM_DEBUG(dbgs() << ", ZIV\n");
3115 if (testZIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
3116 return nullptr;
3117 break;
3118 case Subscript::SIV: {
3119 LLVM_DEBUG(dbgs() << ", SIV\n");
3120 unsigned Level;
3121 if (testSIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Level, Result,
3122 UnderRuntimeAssumptions))
3123 return nullptr;
3124 break;
3125 }
3126 case Subscript::RDIV:
3127 LLVM_DEBUG(dbgs() << ", RDIV\n");
3128 if (testRDIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
3129 return nullptr;
3130 break;
3131 case Subscript::MIV:
3132 LLVM_DEBUG(dbgs() << ", MIV\n");
3133 if (testMIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Loops: Pair[SI].Loops, Result))
3134 return nullptr;
3135 break;
3136 }
3137 }
3138
3139 // Make sure the Scalar flags are set correctly.
3140 SmallBitVector CompleteLoops(MaxLevels + 1);
3141 for (unsigned SI = 0; SI < Pairs; ++SI)
3142 CompleteLoops |= Pair[SI].Loops;
3143 for (unsigned II = 1; II <= CommonLevels; ++II)
3144 if (CompleteLoops[II])
3145 Result.DV[II - 1].Scalar = false;
3146
3147 // Set the distance to zero if the direction is EQ.
3148 // TODO: Ideally, the distance should be set to 0 immediately simultaneously
3149 // with the corresponding direction being set to EQ.
3150 for (unsigned II = 1; II <= Result.getLevels(); ++II) {
3151 if (Result.getDirection(Level: II) == Dependence::DVEntry::EQ) {
3152 if (Result.DV[II - 1].Distance == nullptr)
3153 Result.DV[II - 1].Distance = SE->getZero(Ty: SrcSCEV->getType());
3154 else
3155 assert(Result.DV[II - 1].Distance->isZero() &&
3156 "Inconsistency between distance and direction");
3157 }
3158
3159#ifndef NDEBUG
3160 // Check that the converse (i.e., if the distance is zero, then the
3161 // direction is EQ) holds.
3162 const SCEV *Distance = Result.getDistance(II);
3163 if (Distance && Distance->isZero())
3164 assert(Result.getDirection(II) == Dependence::DVEntry::EQ &&
3165 "Distance is zero, but direction is not EQ");
3166#endif
3167 }
3168
3169 if (SameSDLevels > 0) {
3170 // Extracting SameSD levels from the common levels
3171 // Reverting CommonLevels and MaxLevels to their original values
3172 assert(CommonLevels >= SameSDLevels);
3173 CommonLevels -= SameSDLevels;
3174 MaxLevels += SameSDLevels;
3175 std::unique_ptr<FullDependence::DVEntry[]> DV, DVSameSD;
3176 DV = std::make_unique<FullDependence::DVEntry[]>(num: CommonLevels);
3177 DVSameSD = std::make_unique<FullDependence::DVEntry[]>(num: SameSDLevels);
3178 for (unsigned Level = 0; Level < CommonLevels; ++Level)
3179 DV[Level] = Result.DV[Level];
3180 for (unsigned Level = 0; Level < SameSDLevels; ++Level)
3181 DVSameSD[Level] = Result.DV[CommonLevels + Level];
3182 Result.DV = std::move(DV);
3183 Result.DVSameSD = std::move(DVSameSD);
3184 Result.Levels = CommonLevels;
3185 Result.SameSDLevels = SameSDLevels;
3186 }
3187
3188 if (PossiblyLoopIndependent) {
3189 // Make sure the LoopIndependent flag is set correctly.
3190 // All directions must include equal, otherwise no
3191 // loop-independent dependence is possible.
3192 for (unsigned II = 1; II <= CommonLevels; ++II) {
3193 if (!(Result.getDirection(Level: II) & Dependence::DVEntry::EQ)) {
3194 Result.LoopIndependent = false;
3195 break;
3196 }
3197 }
3198 } else {
3199 // On the other hand, if all directions are equal and there's no
3200 // loop-independent dependence possible, then no dependence exists.
3201 // However, if there are runtime assumptions, we must return the result.
3202 bool AllEqual = true;
3203 for (unsigned II = 1; II <= CommonLevels; ++II) {
3204 if (Result.getDirection(Level: II) != Dependence::DVEntry::EQ) {
3205 AllEqual = false;
3206 break;
3207 }
3208 }
3209 if (AllEqual && Result.Assumptions.getPredicates().empty())
3210 return nullptr;
3211 }
3212
3213 return std::make_unique<FullDependence>(args: std::move(Result));
3214}
3215