1//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10// accesses. Currently, it is an (incomplete) implementation of the approach
11// described in
12//
13// Practical Dependence Testing
14// Goff, Kennedy, Tseng
15// PLDI 1991
16//
17// There's a single entry point that analyzes the dependence between a pair
18// of memory references in a function, returning either NULL, for no dependence,
19// or a more-or-less detailed description of the dependence between them.
20//
21// Since Clang linearizes some array subscripts, the dependence
22// analysis is using SCEV->delinearize to recover the representation of multiple
23// subscripts, and thus avoid the more expensive and less precise MIV tests. The
24// delinearization is controlled by the flag -da-delinearize.
25//
26// We should pay some careful attention to the possibility of integer overflow
27// in the implementation of the various tests. This could happen with Add,
28// Subtract, or Multiply, with both APInt's and SCEV's.
29//
30// Some non-linear subscript pairs can be handled by the GCD test
31// (and perhaps other tests).
32// Should explore how often these things occur.
33//
34// Finally, it seems like certain test cases expose weaknesses in the SCEV
35// simplification, especially in the handling of sign and zero extensions.
36// It could be useful to spend time exploring these.
37//
38// Please note that this is work in progress and the interface is subject to
39// change.
40//
41//===----------------------------------------------------------------------===//
42// //
43// In memory of Ken Kennedy, 1945 - 2007 //
44// //
45//===----------------------------------------------------------------------===//
46
47#include "llvm/Analysis/DependenceAnalysis.h"
48#include "llvm/ADT/Statistic.h"
49#include "llvm/Analysis/AliasAnalysis.h"
50#include "llvm/Analysis/Delinearization.h"
51#include "llvm/Analysis/LoopInfo.h"
52#include "llvm/Analysis/ScalarEvolution.h"
53#include "llvm/Analysis/ScalarEvolutionExpressions.h"
54#include "llvm/Analysis/ValueTracking.h"
55#include "llvm/IR/InstIterator.h"
56#include "llvm/IR/Module.h"
57#include "llvm/InitializePasses.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/raw_ostream.h"
62
63using namespace llvm;
64
65#define DEBUG_TYPE "da"
66
67//===----------------------------------------------------------------------===//
68// statistics
69
70STATISTIC(TotalArrayPairs, "Array pairs tested");
71STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
72STATISTIC(ZIVapplications, "ZIV applications");
73STATISTIC(ZIVindependence, "ZIV independence");
74STATISTIC(StrongSIVapplications, "Strong SIV applications");
75STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
76STATISTIC(StrongSIVindependence, "Strong SIV independence");
77STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
78STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
79STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
80STATISTIC(ExactSIVapplications, "Exact SIV applications");
81STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
82STATISTIC(ExactSIVindependence, "Exact SIV independence");
83STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
84STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
85STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
86STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
87STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
88STATISTIC(GCDapplications, "GCD applications");
89STATISTIC(GCDsuccesses, "GCD successes");
90STATISTIC(GCDindependence, "GCD independence");
91STATISTIC(BanerjeeApplications, "Banerjee applications");
92STATISTIC(BanerjeeIndependence, "Banerjee independence");
93STATISTIC(BanerjeeSuccesses, "Banerjee successes");
94STATISTIC(SameSDLoopsCount, "Loops with Same iteration Space and Depth");
95
96static cl::opt<bool>
97 Delinearize("da-delinearize", cl::init(Val: true), cl::Hidden,
98 cl::desc("Try to delinearize array references."));
99static cl::opt<bool> DisableDelinearizationChecks(
100 "da-disable-delinearization-checks", cl::Hidden,
101 cl::desc(
102 "Disable checks that try to statically verify validity of "
103 "delinearized subscripts. Enabling this option may result in incorrect "
104 "dependence vectors for languages that allow the subscript of one "
105 "dimension to underflow or overflow into another dimension."));
106
107static cl::opt<unsigned> MIVMaxLevelThreshold(
108 "da-miv-max-level-threshold", cl::init(Val: 7), cl::Hidden,
109 cl::desc("Maximum depth allowed for the recursive algorithm used to "
110 "explore MIV direction vectors."));
111
112namespace {
113
114/// Types of dependence test routines.
115enum class DependenceTestType {
116 Default, ///< All tests except BanerjeeMIV
117 All,
118 StrongSIV,
119 WeakCrossingSIV,
120 ExactSIV,
121 WeakZeroSIV,
122 ExactRDIV,
123 GCDMIV,
124 BanerjeeMIV,
125};
126
127} // anonymous namespace
128
129static cl::opt<DependenceTestType> EnableDependenceTest(
130 "da-enable-dependence-test", cl::init(Val: DependenceTestType::Default),
131 cl::ReallyHidden,
132 cl::desc("Run only specified dependence test routine and disable others. "
133 "The purpose is mainly to exclude the influence of other "
134 "dependence test routines in regression tests. If set to All, all "
135 "dependence test routines are enabled."),
136 cl::values(clEnumValN(DependenceTestType::Default, "default",
137 "Enable all dependence test routines except "
138 "Banerjee MIV (default)."),
139 clEnumValN(DependenceTestType::All, "all",
140 "Enable all dependence test routines."),
141 clEnumValN(DependenceTestType::StrongSIV, "strong-siv",
142 "Enable only Strong SIV test."),
143 clEnumValN(DependenceTestType::WeakCrossingSIV,
144 "weak-crossing-siv",
145 "Enable only Weak-Crossing SIV test."),
146 clEnumValN(DependenceTestType::ExactSIV, "exact-siv",
147 "Enable only Exact SIV test."),
148 clEnumValN(DependenceTestType::WeakZeroSIV, "weak-zero-siv",
149 "Enable only Weak-Zero SIV test."),
150 clEnumValN(DependenceTestType::ExactRDIV, "exact-rdiv",
151 "Enable only Exact RDIV test."),
152 clEnumValN(DependenceTestType::GCDMIV, "gcd-miv",
153 "Enable only GCD MIV test."),
154 clEnumValN(DependenceTestType::BanerjeeMIV, "banerjee-miv",
155 "Enable only Banerjee MIV test.")));
156
157//===----------------------------------------------------------------------===//
158// basics
159
160DependenceAnalysis::Result
161DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
162 auto &AA = FAM.getResult<AAManager>(IR&: F);
163 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
164 auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
165 return DependenceInfo(&F, &AA, &SE, &LI);
166}
167
168AnalysisKey DependenceAnalysis::Key;
169
170INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
171 "Dependence Analysis", true, true)
172INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
173INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
174INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
175INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
176 true, true)
177
178char DependenceAnalysisWrapperPass::ID = 0;
179
180DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
181 : FunctionPass(ID) {}
182
183FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
184 return new DependenceAnalysisWrapperPass();
185}
186
187bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {
188 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
189 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
190 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
191 info.reset(p: new DependenceInfo(&F, &AA, &SE, &LI));
192 return false;
193}
194
195DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }
196
197void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }
198
199void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
200 AU.setPreservesAll();
201 AU.addRequiredTransitive<AAResultsWrapperPass>();
202 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
203 AU.addRequiredTransitive<LoopInfoWrapperPass>();
204}
205
206namespace {
207
208/// A wrapper class for std::optional<APInt> that provides arithmetic operators
209/// with overflow checking in a signed sense. This allows us to omit inserting
210/// an overflow check at every arithmetic operation, which simplifies the code
211/// if the operations are chained like `a + b + c + ...`.
212///
213/// If an calculation overflows, the result becomes "invalid" which is
214/// internally represented by std::nullopt. If any operand of an arithmetic
215/// operation is "invalid", the result will also be "invalid".
216struct OverflowSafeSignedAPInt {
217 OverflowSafeSignedAPInt() : Value(std::nullopt) {}
218 OverflowSafeSignedAPInt(const APInt &V) : Value(V) {}
219 OverflowSafeSignedAPInt(const std::optional<APInt> &V) : Value(V) {}
220
221 OverflowSafeSignedAPInt operator+(const OverflowSafeSignedAPInt &RHS) const {
222 if (!Value || !RHS.Value)
223 return OverflowSafeSignedAPInt();
224 bool Overflow;
225 APInt Result = Value->sadd_ov(RHS: *RHS.Value, Overflow);
226 if (Overflow)
227 return OverflowSafeSignedAPInt();
228 return OverflowSafeSignedAPInt(Result);
229 }
230
231 OverflowSafeSignedAPInt operator+(int RHS) const {
232 if (!Value)
233 return OverflowSafeSignedAPInt();
234 return *this + fromInt(V: RHS);
235 }
236
237 OverflowSafeSignedAPInt operator-(const OverflowSafeSignedAPInt &RHS) const {
238 if (!Value || !RHS.Value)
239 return OverflowSafeSignedAPInt();
240 bool Overflow;
241 APInt Result = Value->ssub_ov(RHS: *RHS.Value, Overflow);
242 if (Overflow)
243 return OverflowSafeSignedAPInt();
244 return OverflowSafeSignedAPInt(Result);
245 }
246
247 OverflowSafeSignedAPInt operator-(int RHS) const {
248 if (!Value)
249 return OverflowSafeSignedAPInt();
250 return *this - fromInt(V: RHS);
251 }
252
253 OverflowSafeSignedAPInt operator*(const OverflowSafeSignedAPInt &RHS) const {
254 if (!Value || !RHS.Value)
255 return OverflowSafeSignedAPInt();
256 bool Overflow;
257 APInt Result = Value->smul_ov(RHS: *RHS.Value, Overflow);
258 if (Overflow)
259 return OverflowSafeSignedAPInt();
260 return OverflowSafeSignedAPInt(Result);
261 }
262
263 OverflowSafeSignedAPInt operator-() const {
264 if (!Value)
265 return OverflowSafeSignedAPInt();
266 if (Value->isMinSignedValue())
267 return OverflowSafeSignedAPInt();
268 return OverflowSafeSignedAPInt(-*Value);
269 }
270
271 operator bool() const { return Value.has_value(); }
272
273 bool operator!() const { return !Value.has_value(); }
274
275 const APInt &operator*() const {
276 assert(Value && "Value is not available.");
277 return *Value;
278 }
279
280 const APInt *operator->() const {
281 assert(Value && "Value is not available.");
282 return &*Value;
283 }
284
285private:
286 /// Underlying value. std::nullopt means "unknown". An arithmetic operation on
287 /// "unknown" always produces "unknown".
288 std::optional<APInt> Value;
289
290 OverflowSafeSignedAPInt fromInt(uint64_t V) const {
291 assert(Value && "Value is not available.");
292 return OverflowSafeSignedAPInt(
293 APInt(Value->getBitWidth(), V, /*isSigned=*/true));
294 }
295};
296
297} // anonymous namespace
298
299// Used to test the dependence analyzer.
300// Looks through the function, noting instructions that may access memory.
301// Calls depends() on every possible pair and prints out the result.
302// Ignores all other instructions.
303static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
304 ScalarEvolution &SE, LoopInfo &LI,
305 bool NormalizeResults) {
306 auto *F = DA->getFunction();
307
308 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
309 ++SrcI) {
310 if (SrcI->mayReadOrWriteMemory()) {
311 for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE;
312 ++DstI) {
313 if (DstI->mayReadOrWriteMemory()) {
314 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
315 OS << " da analyze - ";
316 if (auto D = DA->depends(Src: &*SrcI, Dst: &*DstI,
317 /*UnderRuntimeAssumptions=*/true)) {
318
319#ifndef NDEBUG
320 // Verify that the distance being zero is equivalent to the
321 // direction being EQ.
322 for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
323 const SCEV *Distance = D->getDistance(Level);
324 bool IsDistanceZero = Distance && Distance->isZero();
325 bool IsDirectionEQ =
326 D->getDirection(Level) == Dependence::DVEntry::EQ;
327 assert(IsDistanceZero == IsDirectionEQ &&
328 "Inconsistent distance and direction.");
329 }
330#endif
331
332 // Normalize negative direction vectors if required by clients.
333 if (NormalizeResults && D->normalize(SE: &SE))
334 OS << "normalized - ";
335 D->dump(OS);
336 } else
337 OS << "none!\n";
338 }
339 }
340 }
341 }
342}
343
344void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
345 const Module *) const {
346 dumpExampleDependence(
347 OS, DA: info.get(), SE&: getAnalysis<ScalarEvolutionWrapperPass>().getSE(),
348 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), NormalizeResults: false);
349}
350
351PreservedAnalyses
352DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
353 OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName()
354 << "':\n";
355 dumpExampleDependence(OS, DA: &FAM.getResult<DependenceAnalysis>(IR&: F),
356 SE&: FAM.getResult<ScalarEvolutionAnalysis>(IR&: F),
357 LI&: FAM.getResult<LoopAnalysis>(IR&: F), NormalizeResults);
358 return PreservedAnalyses::all();
359}
360
361//===----------------------------------------------------------------------===//
362// Dependence methods
363
364// Returns true if this is an input dependence.
365bool Dependence::isInput() const {
366 return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
367}
368
369// Returns true if this is an output dependence.
370bool Dependence::isOutput() const {
371 return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
372}
373
374// Returns true if this is an flow (aka true) dependence.
375bool Dependence::isFlow() const {
376 return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
377}
378
379// Returns true if this is an anti dependence.
380bool Dependence::isAnti() const {
381 return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
382}
383
384// Returns true if a particular level is scalar; that is,
385// if no subscript in the source or destination mention the induction
386// variable associated with the loop at this level.
387// Leave this out of line, so it will serve as a virtual method anchor
388bool Dependence::isScalar(unsigned level, bool IsSameSD) const { return false; }
389
390//===----------------------------------------------------------------------===//
391// FullDependence methods
392
393FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
394 const SCEVUnionPredicate &Assumes,
395 bool PossiblyLoopIndependent,
396 unsigned CommonLevels)
397 : Dependence(Source, Destination, Assumes), Levels(CommonLevels),
398 LoopIndependent(PossiblyLoopIndependent) {
399 SameSDLevels = 0;
400 if (CommonLevels)
401 DV = std::make_unique<DVEntry[]>(num: CommonLevels);
402}
403
404// FIXME: in some cases the meaning of a negative direction vector
405// may not be straightforward, e.g.,
406// for (int i = 0; i < 32; ++i) {
407// Src: A[i] = ...;
408// Dst: use(A[31 - i]);
409// }
410// The dependency is
411// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and
412// anti { Dst[i] -> Src[31 - i] : when i < 16 },
413// -- hence a [<>].
414// As long as a dependence result contains '>' ('<>', '<=>', "*"), it
415// means that a reversed/normalized dependence needs to be considered
416// as well. Nevertheless, current isDirectionNegative() only returns
417// true with a '>' or '>=' dependency for ease of canonicalizing the
418// dependency vector, since the reverse of '<>', '<=>' and "*" is itself.
419bool FullDependence::isDirectionNegative() const {
420 for (unsigned Level = 1; Level <= Levels; ++Level) {
421 unsigned char Direction = DV[Level - 1].Direction;
422 if (Direction == Dependence::DVEntry::EQ)
423 continue;
424 if (Direction == Dependence::DVEntry::GT ||
425 Direction == Dependence::DVEntry::GE)
426 return true;
427 return false;
428 }
429 return false;
430}
431
432void FullDependence::negate(ScalarEvolution &SE) {
433 std::swap(a&: Src, b&: Dst);
434 for (unsigned Level = 1; Level <= Levels; ++Level) {
435 unsigned char Direction = DV[Level - 1].Direction;
436 // Reverse the direction vector, this means LT becomes GT
437 // and GT becomes LT.
438 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
439 if (Direction & Dependence::DVEntry::LT)
440 RevDirection |= Dependence::DVEntry::GT;
441 if (Direction & Dependence::DVEntry::GT)
442 RevDirection |= Dependence::DVEntry::LT;
443 DV[Level - 1].Direction = RevDirection;
444 // Reverse the dependence distance as well.
445 if (DV[Level - 1].Distance != nullptr)
446 DV[Level - 1].Distance = SE.getNegativeSCEV(V: DV[Level - 1].Distance);
447 }
448}
449
450bool FullDependence::normalize(ScalarEvolution *SE) {
451 if (!isDirectionNegative())
452 return false;
453
454 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
455 dump(dbgs()););
456 negate(SE&: *SE);
457 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
458 dump(dbgs()););
459 return true;
460}
461
462// The rest are simple getters that hide the implementation.
463
464// getDirection - Returns the direction associated with a particular common or
465// SameSD level.
466unsigned FullDependence::getDirection(unsigned Level, bool IsSameSD) const {
467 return getDVEntry(Level, IsSameSD).Direction;
468}
469
470// Returns the distance (or NULL) associated with a particular common or
471// SameSD level.
472const SCEV *FullDependence::getDistance(unsigned Level, bool IsSameSD) const {
473 return getDVEntry(Level, IsSameSD).Distance;
474}
475
476// Returns true if a particular regular or SameSD level is scalar; that is,
477// if no subscript in the source or destination mention the induction variable
478// associated with the loop at this level.
479bool FullDependence::isScalar(unsigned Level, bool IsSameSD) const {
480 return getDVEntry(Level, IsSameSD).Scalar;
481}
482
483// inSameSDLoops - Returns true if this level is an SameSD level, i.e.,
484// performed across two separate loop nests that have the Same iteration space
485// and Depth.
486bool FullDependence::inSameSDLoops(unsigned Level) const {
487 assert(0 < Level && Level <= static_cast<unsigned>(Levels) + SameSDLevels &&
488 "Level out of range");
489 return Level > Levels;
490}
491
492//===----------------------------------------------------------------------===//
493// DependenceInfo methods
494
495// For debugging purposes. Dumps a dependence to OS.
496void Dependence::dump(raw_ostream &OS) const {
497 if (isConfused())
498 OS << "confused";
499 else {
500 if (isFlow())
501 OS << "flow";
502 else if (isOutput())
503 OS << "output";
504 else if (isAnti())
505 OS << "anti";
506 else if (isInput())
507 OS << "input";
508 dumpImp(OS);
509 unsigned SameSDLevels = getSameSDLevels();
510 if (SameSDLevels > 0) {
511 OS << " / assuming " << SameSDLevels << " loop level(s) fused: ";
512 dumpImp(OS, IsSameSD: true);
513 }
514 }
515 OS << "!\n";
516
517 SCEVUnionPredicate Assumptions = getRuntimeAssumptions();
518 if (!Assumptions.isAlwaysTrue()) {
519 OS << " Runtime Assumptions:\n";
520 Assumptions.print(OS, Depth: 2);
521 }
522}
523
524// For debugging purposes. Dumps a dependence to OS with or without considering
525// the SameSD levels.
526void Dependence::dumpImp(raw_ostream &OS, bool IsSameSD) const {
527 unsigned Levels = getLevels();
528 unsigned SameSDLevels = getSameSDLevels();
529 bool OnSameSD = false;
530 unsigned LevelNum = Levels;
531 if (IsSameSD)
532 LevelNum += SameSDLevels;
533 OS << " [";
534 for (unsigned II = 1; II <= LevelNum; ++II) {
535 if (!OnSameSD && inSameSDLoops(Level: II))
536 OnSameSD = true;
537 const SCEV *Distance = getDistance(Level: II, SameSD: OnSameSD);
538 if (Distance)
539 OS << *Distance;
540 else if (isScalar(level: II, IsSameSD: OnSameSD))
541 OS << "S";
542 else {
543 unsigned Direction = getDirection(Level: II, SameSD: OnSameSD);
544 if (Direction == DVEntry::ALL)
545 OS << "*";
546 else {
547 if (Direction & DVEntry::LT)
548 OS << "<";
549 if (Direction & DVEntry::EQ)
550 OS << "=";
551 if (Direction & DVEntry::GT)
552 OS << ">";
553 }
554 }
555 if (II < LevelNum)
556 OS << " ";
557 }
558 if (isLoopIndependent())
559 OS << "|<";
560 OS << "]";
561}
562
563// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
564// underlaying objects. If LocA and LocB are known to not alias (for any reason:
565// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
566// Otherwise the underlying objects are checked to see if they point to
567// different identifiable objects.
568static AliasResult underlyingObjectsAlias(AAResults *AA, const DataLayout &DL,
569 const MemoryLocation &LocA,
570 const MemoryLocation &LocB) {
571 // Check the original locations (minus size) for noalias, which can happen for
572 // tbaa, incompatible underlying object locations, etc.
573 MemoryLocation LocAS =
574 MemoryLocation::getBeforeOrAfter(Ptr: LocA.Ptr, AATags: LocA.AATags);
575 MemoryLocation LocBS =
576 MemoryLocation::getBeforeOrAfter(Ptr: LocB.Ptr, AATags: LocB.AATags);
577 BatchAAResults BAA(*AA);
578 BAA.enableCrossIterationMode();
579
580 if (BAA.isNoAlias(LocA: LocAS, LocB: LocBS))
581 return AliasResult::NoAlias;
582
583 // Check the underlying objects are the same
584 const Value *AObj = getUnderlyingObject(V: LocA.Ptr);
585 const Value *BObj = getUnderlyingObject(V: LocB.Ptr);
586
587 // If the underlying objects are the same, they must alias
588 if (AObj == BObj)
589 return AliasResult::MustAlias;
590
591 // We may have hit the recursion limit for underlying objects, or have
592 // underlying objects where we don't know they will alias.
593 if (!isIdentifiedObject(V: AObj) || !isIdentifiedObject(V: BObj))
594 return AliasResult::MayAlias;
595
596 // Otherwise we know the objects are different and both identified objects so
597 // must not alias.
598 return AliasResult::NoAlias;
599}
600
601// Returns true if the load or store can be analyzed. Atomic and volatile
602// operations have properties which this analysis does not understand.
603static bool isLoadOrStore(const Instruction *I) {
604 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
605 return LI->isUnordered();
606 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
607 return SI->isUnordered();
608 return false;
609}
610
611// Returns true if two loops have the Same iteration Space and Depth. To be
612// more specific, two loops have SameSD if they are in the same nesting
613// depth and have the same backedge count. SameSD stands for Same iteration
614// Space and Depth.
615bool DependenceInfo::haveSameSD(const Loop *SrcLoop,
616 const Loop *DstLoop) const {
617 if (SrcLoop == DstLoop)
618 return true;
619
620 if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
621 return false;
622
623 if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
624 !DstLoop->getLoopLatch())
625 return false;
626
627 const SCEV *SrcUB = SE->getBackedgeTakenCount(L: SrcLoop);
628 const SCEV *DstUB = SE->getBackedgeTakenCount(L: DstLoop);
629 if (isa<SCEVCouldNotCompute>(Val: SrcUB) || isa<SCEVCouldNotCompute>(Val: DstUB))
630 return false;
631
632 Type *WiderType = SE->getWiderType(Ty1: SrcUB->getType(), Ty2: DstUB->getType());
633 SrcUB = SE->getNoopOrZeroExtend(V: SrcUB, Ty: WiderType);
634 DstUB = SE->getNoopOrZeroExtend(V: DstUB, Ty: WiderType);
635
636 if (SrcUB == DstUB)
637 return true;
638
639 return false;
640}
641
642// Examines the loop nesting of the Src and Dst
643// instructions and establishes their shared loops. Sets the variables
644// CommonLevels, SrcLevels, and MaxLevels.
645// The source and destination instructions needn't be contained in the same
646// loop. The routine establishNestingLevels finds the level of most deeply
647// nested loop that contains them both, CommonLevels. An instruction that's
648// not contained in a loop is at level = 0. MaxLevels is equal to the level
649// of the source plus the level of the destination, minus CommonLevels.
650// This lets us allocate vectors MaxLevels in length, with room for every
651// distinct loop referenced in both the source and destination subscripts.
652// The variable SrcLevels is the nesting depth of the source instruction.
653// It's used to help calculate distinct loops referenced by the destination.
654// Here's the map from loops to levels:
655// 0 - unused
656// 1 - outermost common loop
657// ... - other common loops
658// CommonLevels - innermost common loop
659// ... - loops containing Src but not Dst
660// SrcLevels - innermost loop containing Src but not Dst
661// ... - loops containing Dst but not Src
662// MaxLevels - innermost loops containing Dst but not Src
663// Consider the follow code fragment:
664// for (a = ...) {
665// for (b = ...) {
666// for (c = ...) {
667// for (d = ...) {
668// A[] = ...;
669// }
670// }
671// for (e = ...) {
672// for (f = ...) {
673// for (g = ...) {
674// ... = A[];
675// }
676// }
677// }
678// }
679// }
680// If we're looking at the possibility of a dependence between the store
681// to A (the Src) and the load from A (the Dst), we'll note that they
682// have 2 loops in common, so CommonLevels will equal 2 and the direction
683// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
684// A map from loop names to loop numbers would look like
685// a - 1
686// b - 2 = CommonLevels
687// c - 3
688// d - 4 = SrcLevels
689// e - 5
690// f - 6
691// g - 7 = MaxLevels
692// SameSDLevels counts the number of levels after common levels that are
693// not common but have the same iteration space and depth. Internally this
694// is checked using haveSameSD. Currently we only need to check for SameSD
695// levels up to one level after the common levels, and therefore SameSDLevels
696// will be either 0 or 1.
697// 1. Assume that in this code fragment, levels c and e have the same iteration
698// space and depth, but levels d and f does not. Then SameSDLevels is set to 1.
699// In that case the level numbers for the previous code look like
700// a - 1
701// b - 2
702// c,e - 3 = CommonLevels
703// d - 4 = SrcLevels
704// f - 5
705// g - 6 = MaxLevels
706void DependenceInfo::establishNestingLevels(const Instruction *Src,
707 const Instruction *Dst) {
708 const BasicBlock *SrcBlock = Src->getParent();
709 const BasicBlock *DstBlock = Dst->getParent();
710 unsigned SrcLevel = LI->getLoopDepth(BB: SrcBlock);
711 unsigned DstLevel = LI->getLoopDepth(BB: DstBlock);
712 const Loop *SrcLoop = LI->getLoopFor(BB: SrcBlock);
713 const Loop *DstLoop = LI->getLoopFor(BB: DstBlock);
714 SrcLevels = SrcLevel;
715 MaxLevels = SrcLevel + DstLevel;
716 SameSDLevels = 0;
717 while (SrcLevel > DstLevel) {
718 SrcLoop = SrcLoop->getParentLoop();
719 SrcLevel--;
720 }
721 while (DstLevel > SrcLevel) {
722 DstLoop = DstLoop->getParentLoop();
723 DstLevel--;
724 }
725
726 const Loop *SrcUncommonFrontier = nullptr, *DstUncommonFrontier = nullptr;
727 // Find the first uncommon level pair and check if the associated levels have
728 // the SameSD.
729 while (SrcLoop != DstLoop) {
730 SrcUncommonFrontier = SrcLoop;
731 DstUncommonFrontier = DstLoop;
732 SrcLoop = SrcLoop->getParentLoop();
733 DstLoop = DstLoop->getParentLoop();
734 SrcLevel--;
735 }
736 if (SrcUncommonFrontier && DstUncommonFrontier &&
737 haveSameSD(SrcLoop: SrcUncommonFrontier, DstLoop: DstUncommonFrontier))
738 SameSDLevels = 1;
739 CommonLevels = SrcLevel;
740 MaxLevels -= CommonLevels;
741}
742
743// Given one of the loops containing the source, return
744// its level index in our numbering scheme.
745unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
746 return SrcLoop->getLoopDepth();
747}
748
749// Given one of the loops containing the destination,
750// return its level index in our numbering scheme.
751unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
752 unsigned D = DstLoop->getLoopDepth();
753 if (D > CommonLevels)
754 // This tries to make sure that we assign unique numbers to src and dst when
755 // the memory accesses reside in different loops that have the same depth.
756 return D - CommonLevels + SrcLevels;
757 else
758 return D;
759}
760
761// Returns true if Expression is loop invariant in LoopNest.
762bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
763 const Loop *LoopNest) const {
764 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
765 // any loop as invariant, because we only consier expression evaluation at a
766 // specific position (where the array access takes place), and not across the
767 // entire function.
768 if (!LoopNest)
769 return true;
770
771 // If the expression is invariant in the outermost loop of the loop nest, it
772 // is invariant anywhere in the loop nest.
773 return SE->isLoopInvariant(S: Expression, L: LoopNest->getOutermostLoop());
774}
775
776// Finds the set of loops from the LoopNest that
777// have a level <= CommonLevels and are referred to by the SCEV Expression.
778void DependenceInfo::collectCommonLoops(const SCEV *Expression,
779 const Loop *LoopNest,
780 SmallBitVector &Loops) const {
781 while (LoopNest) {
782 unsigned Level = LoopNest->getLoopDepth();
783 if (Level <= CommonLevels && !SE->isLoopInvariant(S: Expression, L: LoopNest))
784 Loops.set(Level);
785 LoopNest = LoopNest->getParentLoop();
786 }
787}
788
789// Examine the scev and return true iff it's affine.
790// Collect any loops mentioned in the set of "Loops".
791bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
792 SmallBitVector &Loops, bool IsSrc) {
793 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
794 if (!AddRec)
795 return isLoopInvariant(Expression: Expr, LoopNest);
796
797 // The AddRec must depend on one of the containing loops. Otherwise,
798 // mapSrcLoop and mapDstLoop return indices outside the intended range. This
799 // can happen when a subscript in one loop references an IV from a sibling
800 // loop that could not be replaced with a concrete exit value by
801 // getSCEVAtScope.
802 const Loop *L = LoopNest;
803 while (L && AddRec->getLoop() != L)
804 L = L->getParentLoop();
805 if (!L)
806 return false;
807
808 if (!AddRec->hasNoSignedWrap())
809 return false;
810
811 const SCEV *Start = AddRec->getStart();
812 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
813 if (!isLoopInvariant(Expression: Step, LoopNest))
814 return false;
815 if (IsSrc)
816 Loops.set(mapSrcLoop(SrcLoop: AddRec->getLoop()));
817 else
818 Loops.set(mapDstLoop(DstLoop: AddRec->getLoop()));
819 return checkSubscript(Expr: Start, LoopNest, Loops, IsSrc);
820}
821
822// Examine the scev and return true iff it's linear.
823// Collect any loops mentioned in the set of "Loops".
824bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
825 SmallBitVector &Loops) {
826 return checkSubscript(Expr: Src, LoopNest, Loops, IsSrc: true);
827}
828
829// Examine the scev and return true iff it's linear.
830// Collect any loops mentioned in the set of "Loops".
831bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
832 SmallBitVector &Loops) {
833 return checkSubscript(Expr: Dst, LoopNest, Loops, IsSrc: false);
834}
835
836// Examines the subscript pair (the Src and Dst SCEVs)
837// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
838// Collects the associated loops in a set.
839DependenceInfo::Subscript::ClassificationKind
840DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
841 const SCEV *Dst, const Loop *DstLoopNest,
842 SmallBitVector &Loops) {
843 SmallBitVector SrcLoops(MaxLevels + 1);
844 SmallBitVector DstLoops(MaxLevels + 1);
845 if (!checkSrcSubscript(Src, LoopNest: SrcLoopNest, Loops&: SrcLoops))
846 return Subscript::NonLinear;
847 if (!checkDstSubscript(Dst, LoopNest: DstLoopNest, Loops&: DstLoops))
848 return Subscript::NonLinear;
849 Loops = SrcLoops;
850 Loops |= DstLoops;
851 unsigned N = Loops.count();
852 if (N == 0)
853 return Subscript::ZIV;
854 if (N == 1)
855 return Subscript::SIV;
856 if (N == 2 && SrcLoops.count() == 1 && DstLoops.count() == 1)
857 return Subscript::RDIV;
858 return Subscript::MIV;
859}
860
861// All subscripts are all the same type.
862// Loop bound may be smaller (e.g., a char).
863// Should zero extend loop bound, since it's always >= 0.
864// This routine collects upper bound and extends or truncates if needed.
865// Truncating is safe when subscripts are known not to wrap. Cases without
866// nowrap flags should have been rejected earlier.
867// Return null if no bound available.
868const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
869 if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
870 const SCEV *UB = SE->getBackedgeTakenCount(L);
871 return SE->getTruncateOrZeroExtend(V: UB, Ty: T);
872 }
873 return nullptr;
874}
875
876// Calls collectUpperBound(), then attempts to cast it to APInt.
877// If the cast fails, returns std::nullopt.
878std::optional<APInt>
879DependenceInfo::collectNonNegativeConstantUpperBound(const Loop *L,
880 Type *T) const {
881 if (const SCEV *UB = collectUpperBound(L, T))
882 if (auto *C = dyn_cast<SCEVConstant>(Val: UB)) {
883 APInt Res = C->getAPInt();
884 if (Res.isNonNegative())
885 return Res;
886 }
887 return std::nullopt;
888}
889
890/// Returns \p A - \p B if it guaranteed not to signed wrap. Otherwise returns
891/// nullptr. \p A and \p B must have the same integer type.
892static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
893 ScalarEvolution &SE) {
894 if (SE.willNotOverflow(BinOp: Instruction::Sub, /*Signed=*/true, LHS: A, RHS: B))
895 return SE.getMinusSCEV(LHS: A, RHS: B);
896 return nullptr;
897}
898
899/// Returns true iff \p Test is enabled.
900static bool isDependenceTestEnabled(DependenceTestType Test) {
901 if (EnableDependenceTest == DependenceTestType::All)
902 return true;
903 // The Banerjee test is disabled by default because of correctness issues,
904 // but can be enabled with -da-enable-dependence-test=banerjee-miv or
905 // -da-enable-dependence-test=all.
906 if (EnableDependenceTest == DependenceTestType::Default)
907 return Test != DependenceTestType::BanerjeeMIV;
908 return EnableDependenceTest == Test;
909}
910
911// testZIV -
912// When we have a pair of subscripts of the form [c1] and [c2],
913// where c1 and c2 are both loop invariant, we attack it using
914// the ZIV test. Basically, we test by comparing the two values,
915// but there are actually three possible results:
916// 1) the values are equal, so there's a dependence
917// 2) the values are different, so there's no dependence
918// 3) the values might be equal, so we have to assume a dependence.
919//
920// Return true if dependence disproved.
921bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
922 FullDependence &Result) const {
923 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
924 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
925 ++ZIVapplications;
926 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: Src, RHS: Dst)) {
927 LLVM_DEBUG(dbgs() << " provably dependent\n");
928 return false; // provably dependent
929 }
930 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_NE, LHS: Src, RHS: Dst)) {
931 LLVM_DEBUG(dbgs() << " provably independent\n");
932 ++ZIVindependence;
933 return true; // provably independent
934 }
935 LLVM_DEBUG(dbgs() << " possibly dependent\n");
936 return false; // possibly dependent
937}
938
939// strongSIVtest -
940// From the paper, Practical Dependence Testing, Section 4.2.1
941//
942// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
943// where i is an induction variable, c1 and c2 are loop invariant,
944// and a is a constant, we can solve it exactly using the Strong SIV test.
945//
946// Can prove independence. Failing that, can compute distance (and direction).
947// In the presence of symbolic terms, we can sometimes make progress.
948//
949// If there's a dependence,
950//
951// c1 + a*i = c2 + a*i'
952//
953// The dependence distance is
954//
955// d = i' - i = (c1 - c2)/a
956//
957// A dependence only exists if d is an integer and abs(d) <= U, where U is the
958// loop's upper bound. If a dependence exists, the dependence direction is
959// defined as
960//
961// { < if d > 0
962// direction = { = if d = 0
963// { > if d < 0
964//
965// Return true if dependence disproved.
966bool DependenceInfo::strongSIVtest(const SCEVAddRecExpr *Src,
967 const SCEVAddRecExpr *Dst, unsigned Level,
968 FullDependence &Result,
969 bool UnderRuntimeAssumptions) {
970 if (!isDependenceTestEnabled(Test: DependenceTestType::StrongSIV))
971 return false;
972
973 const SCEV *Coeff = Src->getStepRecurrence(SE&: *SE);
974 assert(Coeff == Dst->getStepRecurrence(*SE) &&
975 "Expecting same coefficient in Strong SIV test");
976 const SCEV *SrcConst = Src->getStart();
977 const SCEV *DstConst = Dst->getStart();
978 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
979 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
980 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
981 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
982 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
983 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
984 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
985 ++StrongSIVapplications;
986 assert(0 < Level && Level <= CommonLevels && "level out of range");
987 Level--;
988
989 const SCEV *Delta = minusSCEVNoSignedOverflow(A: SrcConst, B: DstConst, SE&: *SE);
990 if (!Delta)
991 return false;
992 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
993 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
994
995 // Can we compute distance?
996 if (isa<SCEVConstant>(Val: Delta) && isa<SCEVConstant>(Val: Coeff)) {
997 APInt ConstDelta = cast<SCEVConstant>(Val: Delta)->getAPInt();
998 APInt ConstCoeff = cast<SCEVConstant>(Val: Coeff)->getAPInt();
999 APInt Distance = ConstDelta; // these need to be initialized
1000 APInt Remainder = ConstDelta;
1001 APInt::sdivrem(LHS: ConstDelta, RHS: ConstCoeff, Quotient&: Distance, Remainder);
1002 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1003 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1004 // Make sure Coeff divides Delta exactly
1005 if (Remainder != 0) {
1006 // Coeff doesn't divide Distance, no dependence
1007 ++StrongSIVindependence;
1008 ++StrongSIVsuccesses;
1009 return true;
1010 }
1011 Result.DV[Level].Distance = SE->getConstant(Val: Distance);
1012 if (Distance.sgt(RHS: 0))
1013 Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1014 else if (Distance.slt(RHS: 0))
1015 Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1016 else
1017 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1018 ++StrongSIVsuccesses;
1019 } else if (Delta->isZero()) {
1020 // Check if coefficient could be zero. If so, 0/0 is undefined and we
1021 // cannot conclude that only same-iteration dependencies exist.
1022 // When coeff=0, all iterations access the same location.
1023 if (SE->isKnownNonZero(S: Coeff)) {
1024 LLVM_DEBUG(
1025 dbgs() << "\t Coefficient proven non-zero by SCEV analysis\n");
1026 } else {
1027 // Cannot prove at compile time, would need runtime assumption.
1028 if (UnderRuntimeAssumptions) {
1029 const SCEVPredicate *Pred = SE->getComparePredicate(
1030 Pred: ICmpInst::ICMP_NE, LHS: Coeff, RHS: SE->getZero(Ty: Coeff->getType()));
1031 Result.Assumptions = Result.Assumptions.getUnionWith(N: Pred, SE&: *SE);
1032 LLVM_DEBUG(dbgs() << "\t Added runtime assumption: " << *Coeff
1033 << " != 0\n");
1034 } else {
1035 // Cannot add runtime assumptions, this test cannot handle this case.
1036 // Let more complex tests try.
1037 LLVM_DEBUG(dbgs() << "\t Would need runtime assumption " << *Coeff
1038 << " != 0, but not allowed. Failing this test.\n");
1039 return false;
1040 }
1041 }
1042 // Since 0/X == 0 (where X is known non-zero or assumed non-zero).
1043 Result.DV[Level].Distance = Delta;
1044 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1045 ++StrongSIVsuccesses;
1046 } else {
1047 if (Coeff->isOne()) {
1048 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
1049 Result.DV[Level].Distance = Delta; // since X/1 == X
1050 }
1051
1052 // maybe we can get a useful direction
1053 bool DeltaMaybeZero = !SE->isKnownNonZero(S: Delta);
1054 bool DeltaMaybePositive = !SE->isKnownNonPositive(S: Delta);
1055 bool DeltaMaybeNegative = !SE->isKnownNonNegative(S: Delta);
1056 bool CoeffMaybePositive = !SE->isKnownNonPositive(S: Coeff);
1057 bool CoeffMaybeNegative = !SE->isKnownNonNegative(S: Coeff);
1058 // The double negatives above are confusing.
1059 // It helps to read !SE->isKnownNonZero(Delta)
1060 // as "Delta might be Zero"
1061 unsigned NewDirection = Dependence::DVEntry::NONE;
1062 if ((DeltaMaybePositive && CoeffMaybePositive) ||
1063 (DeltaMaybeNegative && CoeffMaybeNegative))
1064 NewDirection = Dependence::DVEntry::LT;
1065 if (DeltaMaybeZero)
1066 NewDirection |= Dependence::DVEntry::EQ;
1067 if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1068 (DeltaMaybePositive && CoeffMaybeNegative))
1069 NewDirection |= Dependence::DVEntry::GT;
1070 if (NewDirection < Result.DV[Level].Direction)
1071 ++StrongSIVsuccesses;
1072 Result.DV[Level].Direction &= NewDirection;
1073 }
1074 return false;
1075}
1076
1077// weakCrossingSIVtest -
1078// From the paper, Practical Dependence Testing, Section 4.2.2
1079//
1080// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1081// where i is an induction variable, c1 and c2 are loop invariant,
1082// and a is a constant, we can solve it exactly using the
1083// Weak-Crossing SIV test.
1084//
1085// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1086// the two lines, where i = i', yielding
1087//
1088// c1 + a*i = c2 - a*i
1089// 2a*i = c2 - c1
1090// i = (c2 - c1)/2a
1091//
1092// If i < 0, there is no dependence.
1093// If i > upperbound, there is no dependence.
1094// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1095// If i = upperbound, there's a dependence with distance = 0.
1096// If i is integral, there's a dependence (all directions).
1097// If the non-integer part = 1/2, there's a dependence (<> directions).
1098// Otherwise, there's no dependence.
1099//
1100// Can prove independence. Failing that,
1101// can sometimes refine the directions.
1102// Can determine iteration for splitting.
1103//
1104// Return true if dependence disproved.
1105bool DependenceInfo::weakCrossingSIVtest(const SCEVAddRecExpr *Src,
1106 const SCEVAddRecExpr *Dst,
1107 unsigned Level,
1108 FullDependence &Result) const {
1109 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakCrossingSIV))
1110 return false;
1111
1112 const SCEV *Coeff = Src->getStepRecurrence(SE&: *SE);
1113 const SCEV *SrcConst = Src->getStart();
1114 const SCEV *DstConst = Dst->getStart();
1115
1116 assert(Coeff == SE->getNegativeSCEV(Dst->getStepRecurrence(*SE)) &&
1117 "Unexpected input for weakCrossingSIVtest");
1118
1119 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1120 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
1121 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1122 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1123 ++WeakCrossingSIVapplications;
1124 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1125 Level--;
1126 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1127 if (!Delta)
1128 return false;
1129
1130 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1131 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: Coeff);
1132 if (!ConstCoeff)
1133 return false;
1134
1135 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1136 if (!ConstDelta)
1137 return false;
1138
1139 ConstantRange SrcRange = SE->getSignedRange(S: Src);
1140 ConstantRange DstRange = SE->getSignedRange(S: Dst);
1141 LLVM_DEBUG(dbgs() << "\t SrcRange = " << SrcRange << "\n");
1142 LLVM_DEBUG(dbgs() << "\t DstRange = " << DstRange << "\n");
1143 if (SrcRange.intersectWith(CR: DstRange).isSingleElement()) {
1144 // The ranges touch at exactly one value (i = i' = 0 or i = i' = BTC).
1145 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1146 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1147 ++WeakCrossingSIVsuccesses;
1148 if (!Result.DV[Level].Direction) {
1149 ++WeakCrossingSIVindependence;
1150 return true;
1151 }
1152 Result.DV[Level].Distance = SE->getZero(Ty: Delta->getType());
1153 return false;
1154 }
1155
1156 // check that Coeff divides Delta
1157 APInt APDelta = ConstDelta->getAPInt();
1158 APInt APCoeff = ConstCoeff->getAPInt();
1159 APInt Distance = APDelta; // these need to be initialzed
1160 APInt Remainder = APDelta;
1161 APInt::sdivrem(LHS: APDelta, RHS: APCoeff, Quotient&: Distance, Remainder);
1162 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1163 if (Remainder != 0) {
1164 // Coeff doesn't divide Delta, no dependence
1165 ++WeakCrossingSIVindependence;
1166 ++WeakCrossingSIVsuccesses;
1167 return true;
1168 }
1169 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1170
1171 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1172 if (Distance[0]) {
1173 // Equal direction isn't possible
1174 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;
1175 ++WeakCrossingSIVsuccesses;
1176 }
1177 return false;
1178}
1179
1180// Kirch's algorithm, from
1181//
1182// Optimizing Supercompilers for Supercomputers
1183// Michael Wolfe
1184// MIT Press, 1989
1185//
1186// Program 2.1, page 29.
1187// Computes the GCD of AM and BM.
1188// Also finds a solution to the equation ax - by = gcd(a, b).
1189// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1190//
1191// We don't use OverflowSafeSignedAPInt here because it's known that this
1192// algorithm doesn't overflow.
1193static std::optional<bool> findGCD(unsigned Bits, const APInt &AM,
1194 const APInt &BM, const APInt &Delta,
1195 APInt &G, APInt &X, APInt &Y) {
1196 LLVM_DEBUG(dbgs() << "\t AM = " << AM << "\n");
1197 LLVM_DEBUG(dbgs() << "\t BM = " << BM << "\n");
1198 LLVM_DEBUG(dbgs() << "\t Delta = " << Delta << "\n");
1199 APInt A0(Bits, 1, true), A1(Bits, 0, true);
1200 APInt B0(Bits, 0, true), B1(Bits, 1, true);
1201
1202 // APInt::abs will overflow. In that case, bail out early.
1203 if (AM.isMinSignedValue() || BM.isMinSignedValue())
1204 return std::nullopt;
1205
1206 APInt G0 = AM.abs();
1207 APInt G1 = BM.abs();
1208 APInt Q = G0; // these need to be initialized
1209 APInt R = G0;
1210 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1211 while (R != 0) {
1212 // clang-format off
1213 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1214 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1215 G0 = G1; G1 = R;
1216 // clang-format on
1217 APInt::sdivrem(LHS: G0, RHS: G1, Quotient&: Q, Remainder&: R);
1218 }
1219 G = G1;
1220 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
1221 X = AM.slt(RHS: 0) ? -A1 : A1;
1222 Y = BM.slt(RHS: 0) ? B1 : -B1;
1223
1224 // make sure gcd divides Delta
1225 R = Delta.srem(RHS: G);
1226 if (R != 0)
1227 return true; // gcd doesn't divide Delta, no dependence
1228 Q = Delta.sdiv(RHS: G);
1229 return false;
1230}
1231
1232static OverflowSafeSignedAPInt
1233floorOfQuotient(const OverflowSafeSignedAPInt &OA,
1234 const OverflowSafeSignedAPInt &OB) {
1235 if (!OA || !OB)
1236 return OverflowSafeSignedAPInt();
1237
1238 APInt A = *OA;
1239 APInt B = *OB;
1240 APInt Q = A; // these need to be initialized
1241 APInt R = A;
1242 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1243 if (R == 0)
1244 return Q;
1245 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1246 return Q;
1247 return OverflowSafeSignedAPInt(Q) - 1;
1248}
1249
1250static OverflowSafeSignedAPInt
1251ceilingOfQuotient(const OverflowSafeSignedAPInt &OA,
1252 const OverflowSafeSignedAPInt &OB) {
1253 if (!OA || !OB)
1254 return OverflowSafeSignedAPInt();
1255
1256 APInt A = *OA;
1257 APInt B = *OB;
1258 APInt Q = A; // these need to be initialized
1259 APInt R = A;
1260 APInt::sdivrem(LHS: A, RHS: B, Quotient&: Q, Remainder&: R);
1261 if (R == 0)
1262 return Q;
1263 if ((A.sgt(RHS: 0) && B.sgt(RHS: 0)) || (A.slt(RHS: 0) && B.slt(RHS: 0)))
1264 return OverflowSafeSignedAPInt(Q) + 1;
1265 return Q;
1266}
1267
1268/// Given an affine expression of the form A*k + B, where k is an arbitrary
1269/// integer, infer the possible range of k based on the known range of the
1270/// affine expression. If we know A*k + B is non-negative, i.e.,
1271///
1272/// A*k + B >=s 0
1273///
1274/// we can derive the following inequalities for k when A is positive:
1275///
1276/// k >=s -B / A
1277///
1278/// Since k is an integer, it means k is greater than or equal to the
1279/// ceil(-B / A).
1280///
1281/// If the upper bound of the affine expression \p UB is passed, the following
1282/// inequality can be derived as well:
1283///
1284/// A*k + B <=s UB
1285///
1286/// which leads to:
1287///
1288/// k <=s (UB - B) / A
1289///
1290/// Again, as k is an integer, it means k is less than or equal to the
1291/// floor((UB - B) / A).
1292///
1293/// The similar logic applies when A is negative, but the inequalities sign flip
1294/// while working with them.
1295///
1296/// Preconditions: \p A is non-zero, and we know A*k + B and \p UB are
1297/// non-negative.
1298static std::pair<OverflowSafeSignedAPInt, OverflowSafeSignedAPInt>
1299inferDomainOfAffine(OverflowSafeSignedAPInt A, OverflowSafeSignedAPInt B,
1300 OverflowSafeSignedAPInt UB) {
1301 assert(A && B && "A and B must be available");
1302 assert(*A != 0 && "A must be non-zero");
1303 assert((!UB || UB->isNonNegative()) && "UB must be non-negative");
1304 OverflowSafeSignedAPInt TL, TU;
1305 if (A->sgt(RHS: 0)) {
1306 TL = ceilingOfQuotient(OA: -B, OB: A);
1307 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1308
1309 // New bound check - modification to Banerjee's e3 check
1310 TU = floorOfQuotient(OA: UB - B, OB: A);
1311 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1312 } else {
1313 TU = floorOfQuotient(OA: -B, OB: A);
1314 LLVM_DEBUG(if (TU) dbgs() << "\t Possible TU = " << *TU << "\n");
1315
1316 // New bound check - modification to Banerjee's e3 check
1317 TL = ceilingOfQuotient(OA: UB - B, OB: A);
1318 LLVM_DEBUG(if (TL) dbgs() << "\t Possible TL = " << *TL << "\n");
1319 }
1320 return std::make_pair(x&: TL, y&: TU);
1321}
1322
1323// exactSIVtest -
1324// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1325// where i is an induction variable, c1 and c2 are loop invariant, and a1
1326// and a2 are constant, we can solve it exactly using an algorithm developed
1327// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1328//
1329// Dependence Analysis for Supercomputing
1330// Utpal Banerjee
1331// Kluwer Academic Publishers, 1988
1332//
1333// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1334// so use them if possible. They're also a bit better with symbolics and,
1335// in the case of the strong SIV test, can compute Distances.
1336//
1337// Return true if dependence disproved.
1338//
1339// This is a modified version of the original Banerjee algorithm. The original
1340// only tested whether Dst depends on Src. This algorithm extends that and
1341// returns all the dependencies that exist between Dst and Src.
1342bool DependenceInfo::exactSIVtest(const SCEVAddRecExpr *Src,
1343 const SCEVAddRecExpr *Dst, unsigned Level,
1344 FullDependence &Result) const {
1345 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactSIV))
1346 return false;
1347
1348 LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1349 ++ExactSIVapplications;
1350 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1351 Level--;
1352 bool Res = exactTestImpl(Src, Dst, Result, Level);
1353 if (Res) {
1354 ++ExactSIVsuccesses;
1355 ++ExactSIVindependence;
1356 }
1357 return Res;
1358}
1359
1360// Return true if the divisor evenly divides the dividend.
1361static bool isRemainderZero(const SCEVConstant *Dividend,
1362 const SCEVConstant *Divisor) {
1363 const APInt &ConstDividend = Dividend->getAPInt();
1364 const APInt &ConstDivisor = Divisor->getAPInt();
1365 return ConstDividend.srem(RHS: ConstDivisor) == 0;
1366}
1367
1368bool DependenceInfo::weakZeroSIVtestImpl(const SCEVAddRecExpr *AR,
1369 const SCEV *Const, unsigned Level,
1370 FullDependence &Result) const {
1371 const SCEV *ARCoeff = AR->getStepRecurrence(SE&: *SE);
1372 const SCEV *ARConst = AR->getStart();
1373
1374 if (Const == ARConst && SE->isKnownNonZero(S: ARCoeff)) {
1375 if (Level < CommonLevels) {
1376 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1377 ++WeakZeroSIVsuccesses;
1378 }
1379 return false; // dependences caused by first iteration
1380 }
1381
1382 const SCEV *Delta = minusSCEVNoSignedOverflow(A: Const, B: ARConst, SE&: *SE);
1383 if (!Delta)
1384 return false;
1385 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Val: ARCoeff);
1386 if (!ConstCoeff)
1387 return false;
1388
1389 if (const SCEV *UpperBound =
1390 collectUpperBound(L: AR->getLoop(), T: Delta->getType())) {
1391 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1392 bool OverlapAtLast = [&] {
1393 if (!SE->isKnownNonZero(S: ConstCoeff))
1394 return false;
1395 const SCEV *Last = AR->evaluateAtIteration(It: UpperBound, SE&: *SE);
1396 return Last == Const;
1397 }();
1398 if (OverlapAtLast) {
1399 // dependences caused by last iteration
1400 if (Level < CommonLevels) {
1401 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1402 ++WeakZeroSIVsuccesses;
1403 }
1404 return false;
1405 }
1406 }
1407
1408 // if ARCoeff doesn't divide Delta, then no dependence
1409 if (isa<SCEVConstant>(Val: Delta) &&
1410 !isRemainderZero(Dividend: cast<SCEVConstant>(Val: Delta), Divisor: ConstCoeff)) {
1411 ++WeakZeroSIVindependence;
1412 ++WeakZeroSIVsuccesses;
1413 return true;
1414 }
1415 return false;
1416}
1417
1418// weakZeroSrcSIVtest -
1419// From the paper, Practical Dependence Testing, Section 4.2.2
1420//
1421// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1422// where i is an induction variable, c1 and c2 are loop invariant,
1423// and a is a constant, we can solve it exactly using the
1424// Weak-Zero SIV test.
1425//
1426// Given
1427//
1428// c1 = c2 + a*i
1429//
1430// we get
1431//
1432// (c1 - c2)/a = i
1433//
1434// If i is not an integer, there's no dependence.
1435// If i < 0 or > UB, there's no dependence.
1436// If i = 0, the direction is >=.
1437// If i = UB, the direction is <=.
1438// Otherwise, the direction is *.
1439//
1440// Can prove independence. Failing that, we can sometimes refine
1441// the directions. Can sometimes show that first or last
1442// iteration carries all the dependences (so worth peeling).
1443//
1444// (see also weakZeroDstSIVtest)
1445//
1446// Return true if dependence disproved.
1447bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *SrcConst,
1448 const SCEVAddRecExpr *Dst,
1449 unsigned Level,
1450 FullDependence &Result) const {
1451 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1452 return false;
1453
1454 // For the WeakSIV test, it's possible the loop isn't common to
1455 // the Src and Dst loops. If it isn't, then there's no need to
1456 // record a direction.
1457 [[maybe_unused]] const SCEV *DstCoeff = Dst->getStepRecurrence(SE&: *SE);
1458 [[maybe_unused]] const SCEV *DstConst = Dst->getStart();
1459 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1460 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1461 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1462 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1463 ++WeakZeroSIVapplications;
1464 assert(0 < Level && Level <= MaxLevels && "Level out of range");
1465 Level--;
1466
1467 // We have analyzed a dependence from Src to Dst, so \c Result may represent a
1468 // dependence in that direction. However, \c weakZeroSIVtestImpl will analyze
1469 // a dependence from \c Dst to \c SrcConst. To keep the consistency, we need
1470 // to negate the current result before passing it to \c weakZeroSIVtestImpl,
1471 // and negate it back after that.
1472 Result.negate(SE&: *SE);
1473 bool Res = weakZeroSIVtestImpl(AR: Dst, Const: SrcConst, Level, Result);
1474 Result.negate(SE&: *SE);
1475 return Res;
1476}
1477
1478// weakZeroDstSIVtest -
1479// From the paper, Practical Dependence Testing, Section 4.2.2
1480//
1481// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1482// where i is an induction variable, c1 and c2 are loop invariant,
1483// and a is a constant, we can solve it exactly using the
1484// Weak-Zero SIV test.
1485//
1486// Given
1487//
1488// c1 + a*i = c2
1489//
1490// we get
1491//
1492// i = (c2 - c1)/a
1493//
1494// If i is not an integer, there's no dependence.
1495// If i < 0 or > UB, there's no dependence.
1496// If i = 0, the direction is <=.
1497// If i = UB, the direction is >=.
1498// Otherwise, the direction is *.
1499//
1500// Can prove independence. Failing that, we can sometimes refine
1501// the directions. Can sometimes show that first or last
1502// iteration carries all the dependences (so worth peeling).
1503//
1504// (see also weakZeroSrcSIVtest)
1505//
1506// Return true if dependence disproved.
1507bool DependenceInfo::weakZeroDstSIVtest(const SCEVAddRecExpr *Src,
1508 const SCEV *DstConst, unsigned Level,
1509 FullDependence &Result) const {
1510 if (!isDependenceTestEnabled(Test: DependenceTestType::WeakZeroSIV))
1511 return false;
1512
1513 // For the WeakSIV test, it's possible the loop isn't common to the
1514 // Src and Dst loops. If it isn't, then there's no need to record a direction.
1515 [[maybe_unused]] const SCEV *SrcCoeff = Src->getStepRecurrence(SE&: *SE);
1516 [[maybe_unused]] const SCEV *SrcConst = Src->getStart();
1517 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1518 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1519 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1520 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1521 ++WeakZeroSIVapplications;
1522 assert(0 < Level && Level <= SrcLevels && "Level out of range");
1523 Level--;
1524
1525 return weakZeroSIVtestImpl(AR: Src, Const: DstConst, Level, Result);
1526}
1527
1528// exactRDIVtest - Tests the RDIV subscript pair for dependence.
1529// Things of the form [c1 + a*i] and [c2 + b*j],
1530// where i and j are induction variable, c1 and c2 are loop invariant,
1531// and a and b are constants.
1532// Returns true if any possible dependence is disproved.
1533// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1534bool DependenceInfo::exactRDIVtest(const SCEVAddRecExpr *Src,
1535 const SCEVAddRecExpr *Dst,
1536 FullDependence &Result) const {
1537 if (!isDependenceTestEnabled(Test: DependenceTestType::ExactRDIV))
1538 return false;
1539
1540 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
1541 ++ExactRDIVapplications;
1542 bool Res = exactTestImpl(Src, Dst, Result, Level: std::nullopt);
1543 if (Res)
1544 ++ExactRDIVindependence;
1545 return Res;
1546}
1547
1548bool DependenceInfo::exactTestImpl(const SCEVAddRecExpr *Src,
1549 const SCEVAddRecExpr *Dst,
1550 FullDependence &Result,
1551 std::optional<unsigned> Level) const {
1552 const SCEV *SrcCoeff = Src->getStepRecurrence(SE&: *SE);
1553 const SCEV *SrcConst = Src->getStart();
1554 const SCEV *DstCoeff = Dst->getStepRecurrence(SE&: *SE);
1555 const SCEV *DstConst = Dst->getStart();
1556 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1557 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1558 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1559 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1560
1561 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1562 if (!Delta)
1563 return false;
1564 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1565 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Val: Delta);
1566 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(Val: SrcCoeff);
1567 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(Val: DstCoeff);
1568 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1569 return false;
1570
1571 // find gcd
1572 APInt G, X, Y;
1573 APInt AM = ConstSrcCoeff->getAPInt();
1574 APInt BM = ConstDstCoeff->getAPInt();
1575 APInt CM = ConstDelta->getAPInt();
1576 unsigned Bits = AM.getBitWidth();
1577 std::optional<bool> GCDRes = findGCD(Bits, AM, BM, Delta: CM, G, X, Y);
1578
1579 // GCD calculation failed so we cannot proceed with this test.
1580 if (!GCDRes)
1581 return false;
1582 if (*GCDRes) {
1583 // gcd doesn't divide Delta, no dependence
1584 return true;
1585 }
1586
1587 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1588
1589 // since SCEV construction seems to normalize, LM = 0
1590 std::optional<APInt> SrcUM =
1591 collectNonNegativeConstantUpperBound(L: Src->getLoop(), T: Delta->getType());
1592 if (SrcUM)
1593 LLVM_DEBUG(dbgs() << "\t SrcUM = " << *SrcUM << "\n");
1594
1595 std::optional<APInt> DstUM =
1596 collectNonNegativeConstantUpperBound(L: Dst->getLoop(), T: Delta->getType());
1597 if (DstUM)
1598 LLVM_DEBUG(dbgs() << "\t DstUM = " << *DstUM << "\n");
1599
1600 APInt TU(APInt::getSignedMaxValue(numBits: Bits));
1601 APInt TL(APInt::getSignedMinValue(numBits: Bits));
1602 OverflowSafeSignedAPInt TC = CM.sdiv(RHS: G);
1603 OverflowSafeSignedAPInt TX = OverflowSafeSignedAPInt(X) * TC;
1604 OverflowSafeSignedAPInt TY = OverflowSafeSignedAPInt(Y) * TC;
1605 if (!TC || !TX || !TY)
1606 return false;
1607 LLVM_DEBUG(dbgs() << "\t TC = " << *TC << "\n");
1608 LLVM_DEBUG(dbgs() << "\t TX = " << *TX << "\n");
1609 LLVM_DEBUG(dbgs() << "\t TY = " << *TY << "\n");
1610
1611 APInt TB = BM.sdiv(RHS: G);
1612 APInt TA = AM.sdiv(RHS: G);
1613
1614 // At this point, we have the following equations:
1615 //
1616 // TA*i - TB*j = TC
1617 //
1618 // Also, we know that the all pairs of (i, j) can be expressed as:
1619 //
1620 // (TX + k*TB, TY + k*TA)
1621 //
1622 // where k is an arbitrary integer.
1623 auto [TL0, TU0] = inferDomainOfAffine(A: TB, B: TX, UB: SrcUM);
1624 auto [TL1, TU1] = inferDomainOfAffine(A: TA, B: TY, UB: DstUM);
1625
1626 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
1627 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
1628
1629 auto GetMaxOrMin = [](const OverflowSafeSignedAPInt &V0,
1630 const OverflowSafeSignedAPInt &V1,
1631 bool IsMin) -> std::optional<APInt> {
1632 if (V0 && V1)
1633 return IsMin ? APIntOps::smin(A: *V0, B: *V1) : APIntOps::smax(A: *V0, B: *V1);
1634 if (V0)
1635 return *V0;
1636 if (V1)
1637 return *V1;
1638 return std::nullopt;
1639 };
1640
1641 std::optional<APInt> OptTL = GetMaxOrMin(TL0, TL1, false);
1642 std::optional<APInt> OptTU = GetMaxOrMin(TU0, TU1, true);
1643 if (!OptTL || !OptTU)
1644 return false;
1645
1646 TL = std::move(*OptTL);
1647 TU = std::move(*OptTU);
1648 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
1649 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
1650
1651 if (TL.sgt(RHS: TU))
1652 return true;
1653
1654 if (!Level)
1655 return false;
1656 assert(SrcUM == DstUM && "Expecting same upper bound for Src and Dst");
1657
1658 // explore directions
1659 unsigned NewDirection = Dependence::DVEntry::NONE;
1660 OverflowSafeSignedAPInt LowerDistance, UpperDistance;
1661 OverflowSafeSignedAPInt OTY(TY), OTX(TX), OTA(TA), OTB(TB), OTL(TL), OTU(TU);
1662 // NOTE: It's unclear whether these calculations can overflow. At the moment,
1663 // we conservatively assume they can.
1664 if (TA.sgt(RHS: TB)) {
1665 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1666 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1667 } else {
1668 LowerDistance = (OTY - OTX) + (OTA - OTB) * OTU;
1669 UpperDistance = (OTY - OTX) + (OTA - OTB) * OTL;
1670 }
1671
1672 if (!LowerDistance || !UpperDistance)
1673 return false;
1674
1675 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << *LowerDistance << "\n");
1676 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << *UpperDistance << "\n");
1677
1678 if (LowerDistance->sle(RHS: 0) && UpperDistance->sge(RHS: 0))
1679 NewDirection |= Dependence::DVEntry::EQ;
1680 if (LowerDistance->slt(RHS: 0))
1681 NewDirection |= Dependence::DVEntry::GT;
1682 if (UpperDistance->sgt(RHS: 0))
1683 NewDirection |= Dependence::DVEntry::LT;
1684
1685 // finished
1686 Result.DV[*Level].Direction &= NewDirection;
1687 LLVM_DEBUG(dbgs() << "\t Result = ");
1688 LLVM_DEBUG(Result.dump(dbgs()));
1689 return Result.DV[*Level].Direction == Dependence::DVEntry::NONE;
1690}
1691
1692// testSIV -
1693// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
1694// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
1695// a2 are constant, we attack it with an SIV test. While they can all be
1696// solved with the Exact SIV test, it's worthwhile to use simpler tests when
1697// they apply; they're cheaper and sometimes more precise.
1698//
1699// Return true if dependence disproved.
1700bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
1701 FullDependence &Result,
1702 bool UnderRuntimeAssumptions) {
1703 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1704 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1705 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
1706 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
1707 if (SrcAddRec && DstAddRec) {
1708 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(SE&: *SE);
1709 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(SE&: *SE);
1710 const Loop *CurSrcLoop = SrcAddRec->getLoop();
1711 [[maybe_unused]] const Loop *CurDstLoop = DstAddRec->getLoop();
1712 assert(haveSameSD(CurSrcLoop, CurDstLoop) &&
1713 "Loops in the SIV test should have the same iteration space and "
1714 "depth");
1715 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
1716 bool disproven = false;
1717 if (SrcCoeff == DstCoeff)
1718 disproven = strongSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result,
1719 UnderRuntimeAssumptions);
1720 else if (SrcCoeff == SE->getNegativeSCEV(V: DstCoeff))
1721 disproven = weakCrossingSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result);
1722 return disproven || exactSIVtest(Src: SrcAddRec, Dst: DstAddRec, Level, Result);
1723 }
1724 if (SrcAddRec) {
1725 const Loop *CurSrcLoop = SrcAddRec->getLoop();
1726 Level = mapSrcLoop(SrcLoop: CurSrcLoop);
1727 return weakZeroDstSIVtest(Src: SrcAddRec, DstConst: Dst, Level, Result);
1728 }
1729 if (DstAddRec) {
1730 const Loop *CurDstLoop = DstAddRec->getLoop();
1731 Level = mapDstLoop(DstLoop: CurDstLoop);
1732 return weakZeroSrcSIVtest(SrcConst: Src, Dst: DstAddRec, Level, Result);
1733 }
1734 llvm_unreachable("SIV test expected at least one AddRec");
1735 return false;
1736}
1737
1738// testRDIV -
1739// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
1740// where i and j are induction variables, c1 and c2 are loop invariant,
1741// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
1742// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
1743// It doesn't make sense to talk about distance or direction in this case,
1744// so there's no point in making special versions of the Strong SIV test or
1745// the Weak-crossing SIV test.
1746//
1747// Return true if dependence disproved.
1748bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
1749 FullDependence &Result) const {
1750 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1751 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1752 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Val: Src);
1753 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Val: Dst);
1754 assert(SrcAddRec && DstAddRec && "Unexpected non-addrec input");
1755 return exactRDIVtest(Src: SrcAddRec, Dst: DstAddRec, Result) ||
1756 gcdMIVtest(Src, Dst, Result);
1757}
1758
1759// Tests the single-subscript MIV pair (Src and Dst) for dependence.
1760// Return true if dependence disproved.
1761// Can sometimes refine direction vectors.
1762bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
1763 const SmallBitVector &Loops,
1764 FullDependence &Result) const {
1765 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1766 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1767 return gcdMIVtest(Src, Dst, Result) ||
1768 banerjeeMIVtest(Src, Dst, Loops, Result);
1769}
1770
1771/// Given a SCEVMulExpr, returns its first operand if its first operand is a
1772/// constant and the product doesn't overflow in a signed sense. Otherwise,
1773/// returns std::nullopt. For example, given (10 * X * Y)<nsw>, it returns 10.
1774/// Notably, if it doesn't have nsw, the multiplication may overflow, and if
1775/// so, it may not a multiple of 10.
1776static std::optional<APInt> getConstantCoefficient(const SCEV *Expr) {
1777 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Expr))
1778 return Constant->getAPInt();
1779 if (const auto *Product = dyn_cast<SCEVMulExpr>(Val: Expr))
1780 if (const auto *Constant = dyn_cast<SCEVConstant>(Val: Product->getOperand(i: 0)))
1781 if (Product->hasNoSignedWrap())
1782 return Constant->getAPInt();
1783 return std::nullopt;
1784}
1785
1786const SCEV *DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
1787 const Loop *CurLoop,
1788 const SCEV *&CurLoopCoeff,
1789 APInt &RunningGCD) const {
1790 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Expr);
1791 if (!AddRec) {
1792 assert(isLoopInvariant(Expr, CurLoop) &&
1793 "Expected loop invariant expression");
1794 return Expr;
1795 }
1796
1797 assert(AddRec->isAffine() && "Unexpected Expr");
1798 const SCEV *Start = AddRec->getStart();
1799 const SCEV *Step = AddRec->getStepRecurrence(SE&: *SE);
1800 if (AddRec->getLoop() == CurLoop) {
1801 CurLoopCoeff = Step;
1802 } else {
1803 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Step);
1804
1805 // If the coefficient is the product of a constant and other stuff, we can
1806 // use the constant in the GCD computation.
1807 if (!ConstCoeff)
1808 return nullptr;
1809
1810 // TODO: What happens if ConstCoeff is the "most negative" signed number
1811 // (e.g. -128 for 8 bit wide APInt)?
1812 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
1813 }
1814
1815 return accumulateCoefficientsGCD(Expr: Start, CurLoop, CurLoopCoeff, RunningGCD);
1816}
1817
1818//===----------------------------------------------------------------------===//
1819// gcdMIVtest -
1820// Tests an MIV subscript pair for dependence.
1821// Returns true if any possible dependence is disproved.
1822// Can sometimes disprove the equal direction for 1 or more loops,
1823// as discussed in Michael Wolfe's book,
1824// High Performance Compilers for Parallel Computing, page 235.
1825//
1826// We spend some effort (code!) to handle cases like
1827// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
1828// but M and N are just loop-invariant variables.
1829// This should help us handle linearized subscripts;
1830// also makes this test a useful backup to the various SIV tests.
1831//
1832// It occurs to me that the presence of loop-invariant variables
1833// changes the nature of the test from "greatest common divisor"
1834// to "a common divisor".
1835bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
1836 FullDependence &Result) const {
1837 if (!isDependenceTestEnabled(Test: DependenceTestType::GCDMIV))
1838 return false;
1839
1840 LLVM_DEBUG(dbgs() << "starting gcd\n");
1841 ++GCDapplications;
1842 unsigned BitWidth = SE->getTypeSizeInBits(Ty: Src->getType());
1843 APInt RunningGCD = APInt::getZero(numBits: BitWidth);
1844
1845 const SCEV *Dummy = nullptr;
1846 const SCEV *SrcConst =
1847 accumulateCoefficientsGCD(Expr: Src, CurLoop: nullptr, CurLoopCoeff&: Dummy, RunningGCD);
1848 if (!SrcConst)
1849 return false;
1850 const SCEV *DstConst =
1851 accumulateCoefficientsGCD(Expr: Dst, CurLoop: nullptr, CurLoopCoeff&: Dummy, RunningGCD);
1852 if (!DstConst)
1853 return false;
1854
1855 const SCEV *Delta = minusSCEVNoSignedOverflow(A: DstConst, B: SrcConst, SE&: *SE);
1856 if (!Delta)
1857 return false;
1858 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
1859 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Val: Delta);
1860 if (!Constant)
1861 return false;
1862 APInt ConstDelta = cast<SCEVConstant>(Val: Constant)->getAPInt();
1863 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
1864 if (ConstDelta == 0)
1865 return false;
1866 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
1867 APInt Remainder = ConstDelta.srem(RHS: RunningGCD);
1868 if (Remainder != 0) {
1869 ++GCDindependence;
1870 return true;
1871 }
1872
1873 // Try to disprove equal directions.
1874 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
1875 // the code above can't disprove the dependence because the GCD = 1.
1876 // So we consider what happen if i = i' and what happens if j = j'.
1877 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
1878 // which is infeasible, so we can disallow the = direction for the i level.
1879 // Setting j = j' doesn't help matters, so we end up with a direction vector
1880 // of [<>, *]
1881
1882 bool Improved = false;
1883 const SCEV *Coefficients = Src;
1884 while (const SCEVAddRecExpr *AddRec =
1885 dyn_cast<SCEVAddRecExpr>(Val: Coefficients)) {
1886 Coefficients = AddRec->getStart();
1887 const Loop *CurLoop = AddRec->getLoop();
1888 RunningGCD = 0;
1889 const SCEV *SrcCoeff = AddRec->getStepRecurrence(SE&: *SE);
1890 const SCEV *DstCoeff = SE->getMinusSCEV(LHS: SrcCoeff, RHS: SrcCoeff);
1891
1892 if (!accumulateCoefficientsGCD(Expr: Src, CurLoop, CurLoopCoeff&: SrcCoeff, RunningGCD) ||
1893 !accumulateCoefficientsGCD(Expr: Dst, CurLoop, CurLoopCoeff&: DstCoeff, RunningGCD))
1894 return false;
1895
1896 Delta = minusSCEVNoSignedOverflow(A: DstCoeff, B: SrcCoeff, SE&: *SE);
1897 if (!Delta)
1898 continue;
1899 // If the coefficient is the product of a constant and other stuff,
1900 // we can use the constant in the GCD computation.
1901 std::optional<APInt> ConstCoeff = getConstantCoefficient(Expr: Delta);
1902 if (!ConstCoeff)
1903 // The difference of the two coefficients might not be a product
1904 // or constant, in which case we give up on this direction.
1905 continue;
1906 RunningGCD = APIntOps::GreatestCommonDivisor(A: RunningGCD, B: ConstCoeff->abs());
1907 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
1908 if (RunningGCD != 0) {
1909 Remainder = ConstDelta.srem(RHS: RunningGCD);
1910 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
1911 if (Remainder != 0) {
1912 unsigned Level = mapSrcLoop(SrcLoop: CurLoop);
1913 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;
1914 Improved = true;
1915 }
1916 }
1917 }
1918 if (Improved)
1919 ++GCDsuccesses;
1920 LLVM_DEBUG(dbgs() << "all done\n");
1921 return false;
1922}
1923
1924//===----------------------------------------------------------------------===//
1925// banerjeeMIVtest -
1926// Use Banerjee's Inequalities to test an MIV subscript pair.
1927// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
1928// Generally follows the discussion in Section 2.5.2 of
1929//
1930// Optimizing Supercompilers for Supercomputers
1931// Michael Wolfe
1932//
1933// The inequalities given on page 25 are simplified in that loops are
1934// normalized so that the lower bound is always 0 and the stride is always 1.
1935// For example, Wolfe gives
1936//
1937// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
1938//
1939// where A_k is the coefficient of the kth index in the source subscript,
1940// B_k is the coefficient of the kth index in the destination subscript,
1941// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
1942// index, and N_k is the stride of the kth index. Since all loops are normalized
1943// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
1944// equation to
1945//
1946// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
1947// = (A^-_k - B_k)^- (U_k - 1) - B_k
1948//
1949// Similar simplifications are possible for the other equations.
1950//
1951// When we can't determine the number of iterations for a loop,
1952// we use NULL as an indicator for the worst case, infinity.
1953// When computing the upper bound, NULL denotes +inf;
1954// for the lower bound, NULL denotes -inf.
1955//
1956// Return true if dependence disproved.
1957bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
1958 const SmallBitVector &Loops,
1959 FullDependence &Result) const {
1960 if (!isDependenceTestEnabled(Test: DependenceTestType::BanerjeeMIV))
1961 return false;
1962
1963 LLVM_DEBUG(dbgs() << "starting Banerjee\n");
1964 ++BanerjeeApplications;
1965 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
1966 const SCEV *A0;
1967 SmallVector<CoefficientInfo, 4> A;
1968 collectCoeffInfo(Subscript: Src, SrcFlag: true, Constant&: A0, CI&: A);
1969 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
1970 const SCEV *B0;
1971 SmallVector<CoefficientInfo, 4> B;
1972 collectCoeffInfo(Subscript: Dst, SrcFlag: false, Constant&: B0, CI&: B);
1973 SmallVector<BoundInfo, 4> Bound(MaxLevels + 1);
1974 const SCEV *Delta = minusSCEVNoSignedOverflow(A: B0, B: A0, SE&: *SE);
1975 if (!Delta)
1976 return false;
1977 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
1978
1979 // Compute bounds for all the * directions.
1980 LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
1981 for (unsigned K = 1; K <= MaxLevels; ++K) {
1982 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
1983 Bound[K].Direction = Dependence::DVEntry::ALL;
1984 Bound[K].DirSet = Dependence::DVEntry::NONE;
1985 findBoundsALL(A, B, Bound, K);
1986#ifndef NDEBUG
1987 LLVM_DEBUG(dbgs() << "\t " << K << '\t');
1988 if (Bound[K].Lower[Dependence::DVEntry::ALL])
1989 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
1990 else
1991 LLVM_DEBUG(dbgs() << "-inf\t");
1992 if (Bound[K].Upper[Dependence::DVEntry::ALL])
1993 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
1994 else
1995 LLVM_DEBUG(dbgs() << "+inf\n");
1996#endif
1997 }
1998
1999 // Test the *, *, *, ... case.
2000 bool Disproved = false;
2001 if (testBounds(DirKind: Dependence::DVEntry::ALL, Level: 0, Bound, Delta)) {
2002 // Explore the direction vector hierarchy.
2003 unsigned DepthExpanded = 0;
2004 unsigned NewDeps =
2005 exploreDirections(Level: 1, A, B, Bound, Loops, DepthExpanded, Delta);
2006 if (NewDeps > 0) {
2007 bool Improved = false;
2008 for (unsigned K = 1; K <= CommonLevels; ++K) {
2009 if (Loops[K]) {
2010 unsigned Old = Result.DV[K - 1].Direction;
2011 Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2012 Improved |= Old != Result.DV[K - 1].Direction;
2013 if (!Result.DV[K - 1].Direction) {
2014 Improved = false;
2015 Disproved = true;
2016 break;
2017 }
2018 }
2019 }
2020 if (Improved)
2021 ++BanerjeeSuccesses;
2022 } else {
2023 ++BanerjeeIndependence;
2024 Disproved = true;
2025 }
2026 } else {
2027 ++BanerjeeIndependence;
2028 Disproved = true;
2029 }
2030 return Disproved;
2031}
2032
2033// Hierarchically expands the direction vector
2034// search space, combining the directions of discovered dependences
2035// in the DirSet field of Bound. Returns the number of distinct
2036// dependences discovered. If the dependence is disproved,
2037// it will return 0.
2038unsigned DependenceInfo::exploreDirections(
2039 unsigned Level, ArrayRef<CoefficientInfo> A, ArrayRef<CoefficientInfo> B,
2040 MutableArrayRef<BoundInfo> Bound, const SmallBitVector &Loops,
2041 unsigned &DepthExpanded, const SCEV *Delta) const {
2042 // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2043 // of common loop levels. To avoid excessive compile-time, pessimize all the
2044 // results and immediately return when the number of common levels is beyond
2045 // the given threshold.
2046 if (CommonLevels > MIVMaxLevelThreshold) {
2047 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2048 "direction exploration is terminated.\n");
2049 for (unsigned K = 1; K <= CommonLevels; ++K)
2050 if (Loops[K])
2051 Bound[K].DirSet = Dependence::DVEntry::ALL;
2052 return 1;
2053 }
2054
2055 if (Level > CommonLevels) {
2056 // record result
2057 LLVM_DEBUG(dbgs() << "\t[");
2058 for (unsigned K = 1; K <= CommonLevels; ++K) {
2059 if (Loops[K]) {
2060 Bound[K].DirSet |= Bound[K].Direction;
2061#ifndef NDEBUG
2062 switch (Bound[K].Direction) {
2063 case Dependence::DVEntry::LT:
2064 LLVM_DEBUG(dbgs() << " <");
2065 break;
2066 case Dependence::DVEntry::EQ:
2067 LLVM_DEBUG(dbgs() << " =");
2068 break;
2069 case Dependence::DVEntry::GT:
2070 LLVM_DEBUG(dbgs() << " >");
2071 break;
2072 case Dependence::DVEntry::ALL:
2073 LLVM_DEBUG(dbgs() << " *");
2074 break;
2075 default:
2076 llvm_unreachable("unexpected Bound[K].Direction");
2077 }
2078#endif
2079 }
2080 }
2081 LLVM_DEBUG(dbgs() << " ]\n");
2082 return 1;
2083 }
2084 if (Loops[Level]) {
2085 if (Level > DepthExpanded) {
2086 DepthExpanded = Level;
2087 // compute bounds for <, =, > at current level
2088 findBoundsLT(A, B, Bound, K: Level);
2089 findBoundsGT(A, B, Bound, K: Level);
2090 findBoundsEQ(A, B, Bound, K: Level);
2091#ifndef NDEBUG
2092 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2093 LLVM_DEBUG(dbgs() << "\t <\t");
2094 if (Bound[Level].Lower[Dependence::DVEntry::LT])
2095 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2096 << '\t');
2097 else
2098 LLVM_DEBUG(dbgs() << "-inf\t");
2099 if (Bound[Level].Upper[Dependence::DVEntry::LT])
2100 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2101 << '\n');
2102 else
2103 LLVM_DEBUG(dbgs() << "+inf\n");
2104 LLVM_DEBUG(dbgs() << "\t =\t");
2105 if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2106 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2107 << '\t');
2108 else
2109 LLVM_DEBUG(dbgs() << "-inf\t");
2110 if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2111 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2112 << '\n');
2113 else
2114 LLVM_DEBUG(dbgs() << "+inf\n");
2115 LLVM_DEBUG(dbgs() << "\t >\t");
2116 if (Bound[Level].Lower[Dependence::DVEntry::GT])
2117 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2118 << '\t');
2119 else
2120 LLVM_DEBUG(dbgs() << "-inf\t");
2121 if (Bound[Level].Upper[Dependence::DVEntry::GT])
2122 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2123 << '\n');
2124 else
2125 LLVM_DEBUG(dbgs() << "+inf\n");
2126#endif
2127 }
2128
2129 unsigned NewDeps = 0;
2130
2131 // test bounds for <, *, *, ...
2132 if (testBounds(DirKind: Dependence::DVEntry::LT, Level, Bound, Delta))
2133 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2134 Delta);
2135
2136 // Test bounds for =, *, *, ...
2137 if (testBounds(DirKind: Dependence::DVEntry::EQ, Level, Bound, Delta))
2138 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2139 Delta);
2140
2141 // test bounds for >, *, *, ...
2142 if (testBounds(DirKind: Dependence::DVEntry::GT, Level, Bound, Delta))
2143 NewDeps += exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2144 Delta);
2145
2146 Bound[Level].Direction = Dependence::DVEntry::ALL;
2147 return NewDeps;
2148 } else
2149 return exploreDirections(Level: Level + 1, A, B, Bound, Loops, DepthExpanded,
2150 Delta);
2151}
2152
2153// Returns true iff the current bounds are plausible.
2154bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2155 MutableArrayRef<BoundInfo> Bound,
2156 const SCEV *Delta) const {
2157 Bound[Level].Direction = DirKind;
2158 if (const SCEV *LowerBound = getLowerBound(Bound))
2159 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: LowerBound, RHS: Delta))
2160 return false;
2161 if (const SCEV *UpperBound = getUpperBound(Bound))
2162 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_SGT, LHS: Delta, RHS: UpperBound))
2163 return false;
2164 return true;
2165}
2166
2167// Computes the upper and lower bounds for level K
2168// using the * direction. Records them in Bound.
2169// Wolfe gives the equations
2170//
2171// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2172// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2173//
2174// Since we normalize loops, we can simplify these equations to
2175//
2176// LB^*_k = (A^-_k - B^+_k)U_k
2177// UB^*_k = (A^+_k - B^-_k)U_k
2178//
2179// We must be careful to handle the case where the upper bound is unknown.
2180// Note that the lower bound is always <= 0
2181// and the upper bound is always >= 0.
2182void DependenceInfo::findBoundsALL(ArrayRef<CoefficientInfo> A,
2183 ArrayRef<CoefficientInfo> B,
2184 MutableArrayRef<BoundInfo> Bound,
2185 unsigned K) const {
2186 Bound[K].Lower[Dependence::DVEntry::ALL] =
2187 nullptr; // Default value = -infinity.
2188 Bound[K].Upper[Dependence::DVEntry::ALL] =
2189 nullptr; // Default value = +infinity.
2190 if (Bound[K].Iterations) {
2191 Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(
2192 LHS: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].PosPart), RHS: Bound[K].Iterations);
2193 Bound[K].Upper[Dependence::DVEntry::ALL] = SE->getMulExpr(
2194 LHS: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].NegPart), RHS: Bound[K].Iterations);
2195 } else {
2196 // If the difference is 0, we won't need to know the number of iterations.
2197 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].NegPart, RHS: B[K].PosPart))
2198 Bound[K].Lower[Dependence::DVEntry::ALL] =
2199 SE->getZero(Ty: A[K].Coeff->getType());
2200 if (SE->isKnownPredicate(Pred: CmpInst::ICMP_EQ, LHS: A[K].PosPart, RHS: B[K].NegPart))
2201 Bound[K].Upper[Dependence::DVEntry::ALL] =
2202 SE->getZero(Ty: A[K].Coeff->getType());
2203 }
2204}
2205
2206// Computes the upper and lower bounds for level K
2207// using the = direction. Records them in Bound.
2208// Wolfe gives the equations
2209//
2210// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2211// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2212//
2213// Since we normalize loops, we can simplify these equations to
2214//
2215// LB^=_k = (A_k - B_k)^- U_k
2216// UB^=_k = (A_k - B_k)^+ U_k
2217//
2218// We must be careful to handle the case where the upper bound is unknown.
2219// Note that the lower bound is always <= 0
2220// and the upper bound is always >= 0.
2221void DependenceInfo::findBoundsEQ(ArrayRef<CoefficientInfo> A,
2222 ArrayRef<CoefficientInfo> B,
2223 MutableArrayRef<BoundInfo> Bound,
2224 unsigned K) const {
2225 Bound[K].Lower[Dependence::DVEntry::EQ] =
2226 nullptr; // Default value = -infinity.
2227 Bound[K].Upper[Dependence::DVEntry::EQ] =
2228 nullptr; // Default value = +infinity.
2229 if (Bound[K].Iterations) {
2230 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2231 const SCEV *NegativePart = getNegativePart(X: Delta);
2232 Bound[K].Lower[Dependence::DVEntry::EQ] =
2233 SE->getMulExpr(LHS: NegativePart, RHS: Bound[K].Iterations);
2234 const SCEV *PositivePart = getPositivePart(X: Delta);
2235 Bound[K].Upper[Dependence::DVEntry::EQ] =
2236 SE->getMulExpr(LHS: PositivePart, RHS: Bound[K].Iterations);
2237 } else {
2238 // If the positive/negative part of the difference is 0,
2239 // we won't need to know the number of iterations.
2240 const SCEV *Delta = SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].Coeff);
2241 const SCEV *NegativePart = getNegativePart(X: Delta);
2242 if (NegativePart->isZero())
2243 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2244 const SCEV *PositivePart = getPositivePart(X: Delta);
2245 if (PositivePart->isZero())
2246 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2247 }
2248}
2249
2250// Computes the upper and lower bounds for level K
2251// using the < direction. Records them in Bound.
2252// Wolfe gives the equations
2253//
2254// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2255// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2256//
2257// Since we normalize loops, we can simplify these equations to
2258//
2259// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2260// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2261//
2262// We must be careful to handle the case where the upper bound is unknown.
2263void DependenceInfo::findBoundsLT(ArrayRef<CoefficientInfo> A,
2264 ArrayRef<CoefficientInfo> B,
2265 MutableArrayRef<BoundInfo> Bound,
2266 unsigned K) const {
2267 Bound[K].Lower[Dependence::DVEntry::LT] =
2268 nullptr; // Default value = -infinity.
2269 Bound[K].Upper[Dependence::DVEntry::LT] =
2270 nullptr; // Default value = +infinity.
2271 if (Bound[K].Iterations) {
2272 const SCEV *Iter_1 = SE->getMinusSCEV(
2273 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2274 const SCEV *NegPart =
2275 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2276 Bound[K].Lower[Dependence::DVEntry::LT] =
2277 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: B[K].Coeff);
2278 const SCEV *PosPart =
2279 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2280 Bound[K].Upper[Dependence::DVEntry::LT] =
2281 SE->getMinusSCEV(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: B[K].Coeff);
2282 } else {
2283 // If the positive/negative part of the difference is 0,
2284 // we won't need to know the number of iterations.
2285 const SCEV *NegPart =
2286 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].NegPart, RHS: B[K].Coeff));
2287 if (NegPart->isZero())
2288 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2289 const SCEV *PosPart =
2290 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].PosPart, RHS: B[K].Coeff));
2291 if (PosPart->isZero())
2292 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(V: B[K].Coeff);
2293 }
2294}
2295
2296// Computes the upper and lower bounds for level K
2297// using the > direction. Records them in Bound.
2298// Wolfe gives the equations
2299//
2300// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2301// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2302//
2303// Since we normalize loops, we can simplify these equations to
2304//
2305// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2306// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2307//
2308// We must be careful to handle the case where the upper bound is unknown.
2309void DependenceInfo::findBoundsGT(ArrayRef<CoefficientInfo> A,
2310 ArrayRef<CoefficientInfo> B,
2311 MutableArrayRef<BoundInfo> Bound,
2312 unsigned K) const {
2313 Bound[K].Lower[Dependence::DVEntry::GT] =
2314 nullptr; // Default value = -infinity.
2315 Bound[K].Upper[Dependence::DVEntry::GT] =
2316 nullptr; // Default value = +infinity.
2317 if (Bound[K].Iterations) {
2318 const SCEV *Iter_1 = SE->getMinusSCEV(
2319 LHS: Bound[K].Iterations, RHS: SE->getOne(Ty: Bound[K].Iterations->getType()));
2320 const SCEV *NegPart =
2321 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2322 Bound[K].Lower[Dependence::DVEntry::GT] =
2323 SE->getAddExpr(LHS: SE->getMulExpr(LHS: NegPart, RHS: Iter_1), RHS: A[K].Coeff);
2324 const SCEV *PosPart =
2325 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2326 Bound[K].Upper[Dependence::DVEntry::GT] =
2327 SE->getAddExpr(LHS: SE->getMulExpr(LHS: PosPart, RHS: Iter_1), RHS: A[K].Coeff);
2328 } else {
2329 // If the positive/negative part of the difference is 0,
2330 // we won't need to know the number of iterations.
2331 const SCEV *NegPart =
2332 getNegativePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].PosPart));
2333 if (NegPart->isZero())
2334 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2335 const SCEV *PosPart =
2336 getPositivePart(X: SE->getMinusSCEV(LHS: A[K].Coeff, RHS: B[K].NegPart));
2337 if (PosPart->isZero())
2338 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2339 }
2340}
2341
2342// X^+ = max(X, 0)
2343const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
2344 return SE->getSMaxExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
2345}
2346
2347// X^- = min(X, 0)
2348const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
2349 return SE->getSMinExpr(LHS: X, RHS: SE->getZero(Ty: X->getType()));
2350}
2351
2352// Walks through the subscript,
2353// collecting each coefficient, the associated loop bounds,
2354// and recording its positive and negative parts for later use.
2355void DependenceInfo::collectCoeffInfo(
2356 const SCEV *Subscript, bool SrcFlag, const SCEV *&Constant,
2357 SmallVectorImpl<CoefficientInfo> &CI) const {
2358 const SCEV *Zero = SE->getZero(Ty: Subscript->getType());
2359 CI.resize(N: MaxLevels + 1);
2360 for (unsigned K = 1; K <= MaxLevels; ++K) {
2361 CI[K].Coeff = Zero;
2362 CI[K].PosPart = Zero;
2363 CI[K].NegPart = Zero;
2364 CI[K].Iterations = nullptr;
2365 }
2366 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Val: Subscript)) {
2367 const Loop *L = AddRec->getLoop();
2368 unsigned K = SrcFlag ? mapSrcLoop(SrcLoop: L) : mapDstLoop(DstLoop: L);
2369 CI[K].Coeff = AddRec->getStepRecurrence(SE&: *SE);
2370 CI[K].PosPart = getPositivePart(X: CI[K].Coeff);
2371 CI[K].NegPart = getNegativePart(X: CI[K].Coeff);
2372 CI[K].Iterations = collectUpperBound(L, T: Subscript->getType());
2373 Subscript = AddRec->getStart();
2374 }
2375 Constant = Subscript;
2376#ifndef NDEBUG
2377 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
2378 for (unsigned K = 1; K <= MaxLevels; ++K) {
2379 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
2380 LLVM_DEBUG(dbgs() << "\tPos Part = ");
2381 LLVM_DEBUG(dbgs() << *CI[K].PosPart);
2382 LLVM_DEBUG(dbgs() << "\tNeg Part = ");
2383 LLVM_DEBUG(dbgs() << *CI[K].NegPart);
2384 LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
2385 if (CI[K].Iterations)
2386 LLVM_DEBUG(dbgs() << *CI[K].Iterations);
2387 else
2388 LLVM_DEBUG(dbgs() << "+inf");
2389 LLVM_DEBUG(dbgs() << '\n');
2390 }
2391 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
2392#endif
2393}
2394
2395// Looks through all the bounds info and
2396// computes the lower bound given the current direction settings
2397// at each level. If the lower bound for any level is -inf,
2398// the result is -inf.
2399const SCEV *DependenceInfo::getLowerBound(ArrayRef<BoundInfo> Bound) const {
2400 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
2401 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
2402 if (Bound[K].Lower[Bound[K].Direction])
2403 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Lower[Bound[K].Direction]);
2404 else
2405 Sum = nullptr;
2406 }
2407 return Sum;
2408}
2409
2410// Looks through all the bounds info and
2411// computes the upper bound given the current direction settings
2412// at each level. If the upper bound at any level is +inf,
2413// the result is +inf.
2414const SCEV *DependenceInfo::getUpperBound(ArrayRef<BoundInfo> Bound) const {
2415 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
2416 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
2417 if (Bound[K].Upper[Bound[K].Direction])
2418 Sum = SE->getAddExpr(LHS: Sum, RHS: Bound[K].Upper[Bound[K].Direction]);
2419 else
2420 Sum = nullptr;
2421 }
2422 return Sum;
2423}
2424
2425/// Check if we can delinearize the subscripts. If the SCEVs representing the
2426/// source and destination array references are recurrences on a nested loop,
2427/// this function flattens the nested recurrences into separate recurrences
2428/// for each loop level.
2429bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
2430 SmallVectorImpl<Subscript> &Pair) {
2431 assert(isLoadOrStore(Src) && "instruction is not load or store");
2432 assert(isLoadOrStore(Dst) && "instruction is not load or store");
2433 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
2434 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
2435 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
2436 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
2437 const SCEV *SrcAccessFn = SE->getSCEVAtScope(V: SrcPtr, L: SrcLoop);
2438 const SCEV *DstAccessFn = SE->getSCEVAtScope(V: DstPtr, L: DstLoop);
2439 const SCEVUnknown *SrcBase =
2440 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
2441 const SCEVUnknown *DstBase =
2442 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
2443
2444 if (!SrcBase || !DstBase || SrcBase != DstBase)
2445 return false;
2446
2447 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
2448
2449 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
2450 SrcSubscripts, DstSubscripts) &&
2451 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
2452 SrcSubscripts, DstSubscripts))
2453 return false;
2454
2455 assert(isLoopInvariant(SrcBase, SrcLoop) &&
2456 isLoopInvariant(DstBase, DstLoop) &&
2457 "Expected SrcBase and DstBase to be loop invariant");
2458
2459 int Size = SrcSubscripts.size();
2460 LLVM_DEBUG({
2461 dbgs() << "\nSrcSubscripts: ";
2462 for (int I = 0; I < Size; I++)
2463 dbgs() << *SrcSubscripts[I];
2464 dbgs() << "\nDstSubscripts: ";
2465 for (int I = 0; I < Size; I++)
2466 dbgs() << *DstSubscripts[I];
2467 dbgs() << "\n";
2468 });
2469
2470 // The delinearization transforms a single-subscript MIV dependence test into
2471 // a multi-subscript SIV dependence test that is easier to compute. So we
2472 // resize Pair to contain as many pairs of subscripts as the delinearization
2473 // has found, and then initialize the pairs following the delinearization.
2474 Pair.resize(N: Size);
2475 for (int I = 0; I < Size; ++I) {
2476 Pair[I].Src = SrcSubscripts[I];
2477 Pair[I].Dst = DstSubscripts[I];
2478
2479 assert(Pair[I].Src->getType() == Pair[I].Dst->getType() &&
2480 "Unexpected different types for the subscripts");
2481 }
2482
2483 return true;
2484}
2485
2486/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
2487/// arrays accessed are fixed-size arrays. Return true if delinearization was
2488/// successful.
2489bool DependenceInfo::tryDelinearizeFixedSize(
2490 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
2491 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
2492 SmallVectorImpl<const SCEV *> &DstSubscripts) {
2493 LLVM_DEBUG({
2494 const SCEVUnknown *SrcBase =
2495 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
2496 const SCEVUnknown *DstBase =
2497 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
2498 assert(SrcBase && DstBase && SrcBase == DstBase &&
2499 "expected src and dst scev unknowns to be equal");
2500 });
2501
2502 const SCEV *ElemSize = SE->getElementSize(Inst: Src);
2503 assert(ElemSize == SE->getElementSize(Dst) && "Different element sizes");
2504 SmallVector<const SCEV *, 4> SrcSizes, DstSizes;
2505 if (!delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: SrcAccessFn),
2506 Subscripts&: SrcSubscripts, Sizes&: SrcSizes, ElementSize: ElemSize) ||
2507 !delinearizeFixedSizeArray(SE&: *SE, Expr: SE->removePointerBase(S: DstAccessFn),
2508 Subscripts&: DstSubscripts, Sizes&: DstSizes, ElementSize: ElemSize))
2509 return false;
2510
2511 // Check that the two size arrays are non-empty and equal in length and
2512 // value. SCEV expressions are uniqued, so we can compare pointers.
2513 if (SrcSizes.size() != DstSizes.size() ||
2514 !std::equal(first1: SrcSizes.begin(), last1: SrcSizes.end(), first2: DstSizes.begin())) {
2515 SrcSubscripts.clear();
2516 DstSubscripts.clear();
2517 return false;
2518 }
2519
2520 assert(SrcSubscripts.size() == DstSubscripts.size() &&
2521 "Expected equal number of entries in the list of SrcSubscripts and "
2522 "DstSubscripts.");
2523
2524 // In general we cannot safely assume that the subscripts recovered from GEPs
2525 // are in the range of values defined for their corresponding array
2526 // dimensions. For example some C language usage/interpretation make it
2527 // impossible to verify this at compile-time. As such we can only delinearize
2528 // iff the subscripts are positive and are less than the range of the
2529 // dimension.
2530 if (!DisableDelinearizationChecks) {
2531 if (!validateDelinearizationResult(SE&: *SE, Sizes: SrcSizes, Subscripts: SrcSubscripts) ||
2532 !validateDelinearizationResult(SE&: *SE, Sizes: DstSizes, Subscripts: DstSubscripts)) {
2533 SrcSubscripts.clear();
2534 DstSubscripts.clear();
2535 return false;
2536 }
2537 }
2538 LLVM_DEBUG({
2539 dbgs() << "Delinearized subscripts of fixed-size array\n"
2540 << "SrcGEP:" << *getLoadStorePointerOperand(Src) << "\n"
2541 << "DstGEP:" << *getLoadStorePointerOperand(Dst) << "\n";
2542 });
2543 return true;
2544}
2545
2546bool DependenceInfo::tryDelinearizeParametricSize(
2547 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
2548 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
2549 SmallVectorImpl<const SCEV *> &DstSubscripts) {
2550
2551 const SCEVUnknown *SrcBase =
2552 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: SrcAccessFn));
2553 const SCEVUnknown *DstBase =
2554 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: DstAccessFn));
2555 assert(SrcBase && DstBase && SrcBase == DstBase &&
2556 "expected src and dst scev unknowns to be equal");
2557
2558 const SCEV *ElementSize = SE->getElementSize(Inst: Src);
2559 if (ElementSize != SE->getElementSize(Inst: Dst))
2560 return false;
2561
2562 const SCEV *SrcSCEV = SE->getMinusSCEV(LHS: SrcAccessFn, RHS: SrcBase);
2563 const SCEV *DstSCEV = SE->getMinusSCEV(LHS: DstAccessFn, RHS: DstBase);
2564
2565 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(Val: SrcSCEV);
2566 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(Val: DstSCEV);
2567 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
2568 return false;
2569
2570 // First step: collect parametric terms in both array references.
2571 SmallVector<const SCEV *, 4> Terms;
2572 collectParametricTerms(SE&: *SE, Expr: SrcAR, Terms);
2573 collectParametricTerms(SE&: *SE, Expr: DstAR, Terms);
2574
2575 // Second step: find subscript sizes.
2576 SmallVector<const SCEV *, 4> Sizes;
2577 findArrayDimensions(SE&: *SE, Terms, Sizes, ElementSize);
2578
2579 // Third step: compute the access functions for each subscript.
2580 computeAccessFunctions(SE&: *SE, Expr: SrcAR, Subscripts&: SrcSubscripts, Sizes);
2581 computeAccessFunctions(SE&: *SE, Expr: DstAR, Subscripts&: DstSubscripts, Sizes);
2582
2583 // Fail when there is only a subscript: that's a linearized access function.
2584 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
2585 SrcSubscripts.size() != DstSubscripts.size())
2586 return false;
2587
2588 // Statically check that the array bounds are in-range. The first subscript we
2589 // don't have a size for and it cannot overflow into another subscript, so is
2590 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
2591 // and dst.
2592 // FIXME: It may be better to record these sizes and add them as constraints
2593 // to the dependency checks.
2594 if (!DisableDelinearizationChecks)
2595 if (!validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: SrcSubscripts) ||
2596 !validateDelinearizationResult(SE&: *SE, Sizes, Subscripts: DstSubscripts))
2597 return false;
2598
2599 return true;
2600}
2601
2602//===----------------------------------------------------------------------===//
2603
2604#ifndef NDEBUG
2605// For debugging purposes, dump a small bit vector to dbgs().
2606static void dumpSmallBitVector(SmallBitVector &BV) {
2607 dbgs() << "{";
2608 for (unsigned VI : BV.set_bits()) {
2609 dbgs() << VI;
2610 if (BV.find_next(VI) >= 0)
2611 dbgs() << ' ';
2612 }
2613 dbgs() << "}\n";
2614}
2615#endif
2616
2617bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
2618 FunctionAnalysisManager::Invalidator &Inv) {
2619 // Check if the analysis itself has been invalidated.
2620 auto PAC = PA.getChecker<DependenceAnalysis>();
2621 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
2622 return true;
2623
2624 // Check transitive dependencies.
2625 return Inv.invalidate<AAManager>(IR&: F, PA) ||
2626 Inv.invalidate<ScalarEvolutionAnalysis>(IR&: F, PA) ||
2627 Inv.invalidate<LoopAnalysis>(IR&: F, PA);
2628}
2629
2630// depends -
2631// Returns NULL if there is no dependence.
2632// Otherwise, return a Dependence with as many details as possible.
2633// Corresponds to Section 3.1 in the paper
2634//
2635// Practical Dependence Testing
2636// Goff, Kennedy, Tseng
2637// PLDI 1991
2638//
2639std::unique_ptr<Dependence>
2640DependenceInfo::depends(Instruction *Src, Instruction *Dst,
2641 bool UnderRuntimeAssumptions) {
2642 SmallVector<const SCEVPredicate *, 4> Assume;
2643 bool PossiblyLoopIndependent = true;
2644 if (Src == Dst)
2645 PossiblyLoopIndependent = false;
2646
2647 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
2648 // if both instructions don't reference memory, there's no dependence
2649 return nullptr;
2650
2651 if (!isLoadOrStore(I: Src) || !isLoadOrStore(I: Dst)) {
2652 // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
2653 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
2654 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2655 args: SCEVUnionPredicate(Assume, *SE));
2656 }
2657
2658 const MemoryLocation &DstLoc = MemoryLocation::get(Inst: Dst);
2659 const MemoryLocation &SrcLoc = MemoryLocation::get(Inst: Src);
2660
2661 switch (underlyingObjectsAlias(AA, DL: F->getDataLayout(), LocA: DstLoc, LocB: SrcLoc)) {
2662 case AliasResult::MayAlias:
2663 case AliasResult::PartialAlias:
2664 // cannot analyse objects if we don't understand their aliasing.
2665 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
2666 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2667 args: SCEVUnionPredicate(Assume, *SE));
2668 case AliasResult::NoAlias:
2669 // If the objects noalias, they are distinct, accesses are independent.
2670 LLVM_DEBUG(dbgs() << "no alias\n");
2671 return nullptr;
2672 case AliasResult::MustAlias:
2673 break; // The underlying objects alias; test accesses for dependence.
2674 }
2675
2676 if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() ||
2677 !SrcLoc.Size.isPrecise()) {
2678 // The dependence test gets confused if the size of the memory accesses
2679 // differ.
2680 LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
2681 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2682 args: SCEVUnionPredicate(Assume, *SE));
2683 }
2684
2685 Value *SrcPtr = getLoadStorePointerOperand(V: Src);
2686 Value *DstPtr = getLoadStorePointerOperand(V: Dst);
2687 const SCEV *SrcSCEV = SE->getSCEV(V: SrcPtr);
2688 const SCEV *DstSCEV = SE->getSCEV(V: DstPtr);
2689 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
2690 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
2691 const SCEV *SrcBase = SE->getPointerBase(V: SrcSCEV);
2692 const SCEV *DstBase = SE->getPointerBase(V: DstSCEV);
2693 if (SrcBase != DstBase) {
2694 // If two pointers have different bases, trying to analyze indexes won't
2695 // work; we can't compare them to each other. This can happen, for example,
2696 // if one is produced by an LCSSA PHI node.
2697 //
2698 // We check this upfront so we don't crash in cases where getMinusSCEV()
2699 // returns a SCEVCouldNotCompute.
2700 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
2701 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2702 args: SCEVUnionPredicate(Assume, *SE));
2703 }
2704
2705 // Even if the base pointers are the same, they may not be loop-invariant. It
2706 // could lead to incorrect results, as we're analyzing loop-carried
2707 // dependencies. Src and Dst can be in different loops, so we need to check
2708 // the base pointer is invariant in both loops.
2709 Loop *SrcLoop = LI->getLoopFor(BB: Src->getParent());
2710 Loop *DstLoop = LI->getLoopFor(BB: Dst->getParent());
2711 if (!isLoopInvariant(Expression: SrcBase, LoopNest: SrcLoop) ||
2712 !isLoopInvariant(Expression: DstBase, LoopNest: DstLoop)) {
2713 LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
2714 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2715 args: SCEVUnionPredicate(Assume, *SE));
2716 }
2717
2718 uint64_t EltSize = SrcLoc.Size.toRaw();
2719 const SCEV *SrcEv = SE->getMinusSCEV(LHS: SrcSCEV, RHS: SrcBase);
2720 const SCEV *DstEv = SE->getMinusSCEV(LHS: DstSCEV, RHS: DstBase);
2721
2722 // Check that memory access offsets are multiples of element sizes.
2723 if (!SE->isKnownMultipleOf(S: SrcEv, M: EltSize, Assumptions&: Assume) ||
2724 !SE->isKnownMultipleOf(S: DstEv, M: EltSize, Assumptions&: Assume)) {
2725 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
2726 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2727 args: SCEVUnionPredicate(Assume, *SE));
2728 }
2729
2730 // Runtime assumptions needed but not allowed.
2731 if (!Assume.empty() && !UnderRuntimeAssumptions)
2732 return std::make_unique<Dependence>(args&: Src, args&: Dst,
2733 args: SCEVUnionPredicate(Assume, *SE));
2734
2735 unsigned Pairs = 1;
2736 SmallVector<Subscript, 2> Pair(Pairs);
2737 Pair[0].Src = SrcEv;
2738 Pair[0].Dst = DstEv;
2739 if (Delinearize) {
2740 if (tryDelinearize(Src, Dst, Pair)) {
2741 LLVM_DEBUG(dbgs() << " delinearized\n");
2742 Pairs = Pair.size();
2743 }
2744 }
2745
2746 // Establish loop nesting levels considering SameSD loops as common
2747 establishNestingLevels(Src, Dst);
2748
2749 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
2750 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
2751 LLVM_DEBUG(dbgs() << " SameSD nesting levels = " << SameSDLevels << "\n");
2752
2753 // Modify common levels to consider the SameSD levels in the tests
2754 CommonLevels += SameSDLevels;
2755 MaxLevels -= SameSDLevels;
2756 if (SameSDLevels > 0) {
2757 // Not all tests are handled yet over SameSD loops
2758 // Revoke if there are any tests other than ZIV, SIV or RDIV
2759 for (unsigned P = 0; P < Pairs; ++P) {
2760 SmallBitVector Loops;
2761 Subscript::ClassificationKind TestClass =
2762 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()),
2763 Dst: Pair[P].Dst, DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops);
2764
2765 if (TestClass != Subscript::ZIV && TestClass != Subscript::SIV &&
2766 TestClass != Subscript::RDIV) {
2767 // Revert the levels to not consider the SameSD levels
2768 CommonLevels -= SameSDLevels;
2769 MaxLevels += SameSDLevels;
2770 SameSDLevels = 0;
2771 break;
2772 }
2773 }
2774 }
2775
2776 if (SameSDLevels > 0)
2777 SameSDLoopsCount++;
2778
2779 FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
2780 PossiblyLoopIndependent, CommonLevels);
2781 ++TotalArrayPairs;
2782
2783 for (unsigned P = 0; P < Pairs; ++P) {
2784 assert(Pair[P].Src->getType()->isIntegerTy() && "Src must be an integer");
2785 assert(Pair[P].Dst->getType()->isIntegerTy() && "Dst must be an integer");
2786 Pair[P].Loops.resize(N: MaxLevels + 1);
2787 Pair[P].GroupLoops.resize(N: MaxLevels + 1);
2788 Pair[P].Group.resize(N: Pairs);
2789 Pair[P].Classification =
2790 classifyPair(Src: Pair[P].Src, SrcLoopNest: LI->getLoopFor(BB: Src->getParent()), Dst: Pair[P].Dst,
2791 DstLoopNest: LI->getLoopFor(BB: Dst->getParent()), Loops&: Pair[P].Loops);
2792 Pair[P].GroupLoops = Pair[P].Loops;
2793 Pair[P].Group.set(P);
2794 LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
2795 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
2796 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
2797 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
2798 LLVM_DEBUG(dbgs() << "\tloops = ");
2799 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));
2800 }
2801
2802 // Test each subscript individually
2803 for (unsigned SI = 0; SI < Pairs; ++SI) {
2804 LLVM_DEBUG(dbgs() << "testing subscript " << SI);
2805
2806 // Attempt signed range test first.
2807 ConstantRange SrcRange = SE->getSignedRange(S: Pair[SI].Src);
2808 ConstantRange DstRange = SE->getSignedRange(S: Pair[SI].Dst);
2809 if (SrcRange.intersectWith(CR: DstRange).isEmptySet())
2810 return nullptr;
2811
2812 switch (Pair[SI].Classification) {
2813 case Subscript::NonLinear:
2814 // ignore these, but collect loops for later
2815 ++NonlinearSubscriptPairs;
2816 collectCommonLoops(Expression: Pair[SI].Src, LoopNest: LI->getLoopFor(BB: Src->getParent()),
2817 Loops&: Pair[SI].Loops);
2818 collectCommonLoops(Expression: Pair[SI].Dst, LoopNest: LI->getLoopFor(BB: Dst->getParent()),
2819 Loops&: Pair[SI].Loops);
2820 break;
2821 case Subscript::ZIV:
2822 LLVM_DEBUG(dbgs() << ", ZIV\n");
2823 if (testZIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
2824 return nullptr;
2825 break;
2826 case Subscript::SIV: {
2827 LLVM_DEBUG(dbgs() << ", SIV\n");
2828 unsigned Level;
2829 if (testSIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Level, Result,
2830 UnderRuntimeAssumptions))
2831 return nullptr;
2832 break;
2833 }
2834 case Subscript::RDIV:
2835 LLVM_DEBUG(dbgs() << ", RDIV\n");
2836 if (testRDIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Result))
2837 return nullptr;
2838 break;
2839 case Subscript::MIV:
2840 LLVM_DEBUG(dbgs() << ", MIV\n");
2841 if (testMIV(Src: Pair[SI].Src, Dst: Pair[SI].Dst, Loops: Pair[SI].Loops, Result))
2842 return nullptr;
2843 break;
2844 }
2845 }
2846
2847 // Make sure the Scalar flags are set correctly.
2848 SmallBitVector CompleteLoops(MaxLevels + 1);
2849 for (unsigned SI = 0; SI < Pairs; ++SI)
2850 CompleteLoops |= Pair[SI].Loops;
2851 for (unsigned II = 1; II <= CommonLevels; ++II)
2852 if (CompleteLoops[II])
2853 Result.DV[II - 1].Scalar = false;
2854
2855 // Set the distance to zero if the direction is EQ.
2856 // TODO: Ideally, the distance should be set to 0 immediately simultaneously
2857 // with the corresponding direction being set to EQ.
2858 for (unsigned II = 1; II <= Result.getLevels(); ++II) {
2859 if (Result.getDirection(Level: II) == Dependence::DVEntry::EQ) {
2860 if (Result.DV[II - 1].Distance == nullptr)
2861 Result.DV[II - 1].Distance = SE->getZero(Ty: SrcSCEV->getType());
2862 else
2863 assert(Result.DV[II - 1].Distance->isZero() &&
2864 "Inconsistency between distance and direction");
2865 }
2866
2867#ifndef NDEBUG
2868 // Check that the converse (i.e., if the distance is zero, then the
2869 // direction is EQ) holds.
2870 const SCEV *Distance = Result.getDistance(II);
2871 if (Distance && Distance->isZero())
2872 assert(Result.getDirection(II) == Dependence::DVEntry::EQ &&
2873 "Distance is zero, but direction is not EQ");
2874#endif
2875 }
2876
2877 if (SameSDLevels > 0) {
2878 // Extracting SameSD levels from the common levels
2879 // Reverting CommonLevels and MaxLevels to their original values
2880 assert(CommonLevels >= SameSDLevels);
2881 CommonLevels -= SameSDLevels;
2882 MaxLevels += SameSDLevels;
2883 std::unique_ptr<FullDependence::DVEntry[]> DV, DVSameSD;
2884 DV = std::make_unique<FullDependence::DVEntry[]>(num: CommonLevels);
2885 DVSameSD = std::make_unique<FullDependence::DVEntry[]>(num: SameSDLevels);
2886 for (unsigned Level = 0; Level < CommonLevels; ++Level)
2887 DV[Level] = Result.DV[Level];
2888 for (unsigned Level = 0; Level < SameSDLevels; ++Level)
2889 DVSameSD[Level] = Result.DV[CommonLevels + Level];
2890 Result.DV = std::move(DV);
2891 Result.DVSameSD = std::move(DVSameSD);
2892 Result.Levels = CommonLevels;
2893 Result.SameSDLevels = SameSDLevels;
2894 }
2895
2896 if (PossiblyLoopIndependent) {
2897 // Make sure the LoopIndependent flag is set correctly.
2898 // All directions must include equal, otherwise no
2899 // loop-independent dependence is possible.
2900 for (unsigned II = 1; II <= CommonLevels; ++II) {
2901 if (!(Result.getDirection(Level: II) & Dependence::DVEntry::EQ)) {
2902 Result.LoopIndependent = false;
2903 break;
2904 }
2905 }
2906 } else {
2907 // On the other hand, if all directions are equal and there's no
2908 // loop-independent dependence possible, then no dependence exists.
2909 // However, if there are runtime assumptions, we must return the result.
2910 bool AllEqual = true;
2911 for (unsigned II = 1; II <= CommonLevels; ++II) {
2912 if (Result.getDirection(Level: II) != Dependence::DVEntry::EQ) {
2913 AllEqual = false;
2914 break;
2915 }
2916 }
2917 if (AllEqual && Result.Assumptions.getPredicates().empty())
2918 return nullptr;
2919 }
2920
2921 return std::make_unique<FullDependence>(args: std::move(Result));
2922}
2923