1//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements an analysis pass that tries to delinearize all GEP
10// instructions in all loops using the SCEV analysis functionality. This pass is
11// only used for testing purposes: if your pass needs delinearization, please
12// use the on-demand SCEVAddRecExpr::delinearize() function.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Analysis/Delinearization.h"
17#include "llvm/Analysis/LoopInfo.h"
18#include "llvm/Analysis/ScalarEvolution.h"
19#include "llvm/Analysis/ScalarEvolutionDivision.h"
20#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/DerivedTypes.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/InstIterator.h"
25#include "llvm/IR/Instructions.h"
26#include "llvm/IR/PassManager.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/raw_ostream.h"
30
31using namespace llvm;
32
33#define DL_NAME "delinearize"
34#define DEBUG_TYPE DL_NAME
35
36static cl::opt<bool> UseFixedSizeArrayHeuristic(
37 "delinearize-use-fixed-size-array-heuristic", cl::init(Val: true), cl::Hidden,
38 cl::desc("When printing analysis, use the heuristic for fixed-size arrays "
39 "if the default delinearizetion fails."));
40
41// Return true when S contains at least an undef value.
42static inline bool containsUndefs(const SCEV *S) {
43 return SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
44 if (const auto *SU = dyn_cast<SCEVUnknown>(Val: S))
45 return isa<UndefValue>(Val: SU->getValue());
46 return false;
47 });
48}
49
50namespace {
51
52// Collect all steps of SCEV expressions.
53struct SCEVCollectStrides {
54 ScalarEvolution &SE;
55 SmallVectorImpl<const SCEV *> &Strides;
56
57 SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
58 : SE(SE), Strides(S) {}
59
60 bool follow(const SCEV *S) {
61 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
62 Strides.push_back(Elt: AR->getStepRecurrence(SE));
63 return true;
64 }
65
66 bool isDone() const { return false; }
67};
68
69// Collect all SCEVUnknown and SCEVMulExpr expressions.
70struct SCEVCollectTerms {
71 SmallVectorImpl<const SCEV *> &Terms;
72
73 SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
74
75 bool follow(const SCEV *S) {
76 if (isa<SCEVUnknown>(Val: S) || isa<SCEVMulExpr>(Val: S) ||
77 isa<SCEVSignExtendExpr>(Val: S)) {
78 if (!containsUndefs(S))
79 Terms.push_back(Elt: S);
80
81 // Stop recursion: once we collected a term, do not walk its operands.
82 return false;
83 }
84
85 // Keep looking.
86 return true;
87 }
88
89 bool isDone() const { return false; }
90};
91
92// Check if a SCEV contains an AddRecExpr.
93struct SCEVHasAddRec {
94 bool &ContainsAddRec;
95
96 SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
97 ContainsAddRec = false;
98 }
99
100 bool follow(const SCEV *S) {
101 if (isa<SCEVAddRecExpr>(Val: S)) {
102 ContainsAddRec = true;
103
104 // Stop recursion: once we collected a term, do not walk its operands.
105 return false;
106 }
107
108 // Keep looking.
109 return true;
110 }
111
112 bool isDone() const { return false; }
113};
114
115// Find factors that are multiplied with an expression that (possibly as a
116// subexpression) contains an AddRecExpr. In the expression:
117//
118// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
119//
120// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
121// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
122// parameters as they form a product with an induction variable.
123//
124// This collector expects all array size parameters to be in the same MulExpr.
125// It might be necessary to later add support for collecting parameters that are
126// spread over different nested MulExpr.
127struct SCEVCollectAddRecMultiplies {
128 SmallVectorImpl<const SCEV *> &Terms;
129 ScalarEvolution &SE;
130
131 SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T,
132 ScalarEvolution &SE)
133 : Terms(T), SE(SE) {}
134
135 bool follow(const SCEV *S) {
136 if (auto *Mul = dyn_cast<SCEVMulExpr>(Val: S)) {
137 bool HasAddRec = false;
138 SmallVector<const SCEV *, 0> Operands;
139 for (const SCEV *Op : Mul->operands()) {
140 const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Val: Op);
141 if (Unknown && !isa<CallInst>(Val: Unknown->getValue())) {
142 Operands.push_back(Elt: Op);
143 } else if (Unknown) {
144 HasAddRec = true;
145 } else {
146 bool ContainsAddRec = false;
147 SCEVHasAddRec ContiansAddRec(ContainsAddRec);
148 visitAll(Root: Op, Visitor&: ContiansAddRec);
149 HasAddRec |= ContainsAddRec;
150 }
151 }
152 if (Operands.size() == 0)
153 return true;
154
155 if (!HasAddRec)
156 return false;
157
158 Terms.push_back(Elt: SE.getMulExpr(Ops&: Operands));
159 // Stop recursion: once we collected a term, do not walk its operands.
160 return false;
161 }
162
163 // Keep looking.
164 return true;
165 }
166
167 bool isDone() const { return false; }
168};
169
170} // end anonymous namespace
171
172/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
173/// two places:
174/// 1) The strides of AddRec expressions.
175/// 2) Unknowns that are multiplied with AddRec expressions.
176void llvm::collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
177 SmallVectorImpl<const SCEV *> &Terms) {
178 SmallVector<const SCEV *, 4> Strides;
179 SCEVCollectStrides StrideCollector(SE, Strides);
180 visitAll(Root: Expr, Visitor&: StrideCollector);
181
182 LLVM_DEBUG({
183 dbgs() << "Strides:\n";
184 for (const SCEV *S : Strides)
185 dbgs().indent(2) << *S << "\n";
186 });
187
188 for (const SCEV *S : Strides) {
189 SCEVCollectTerms TermCollector(Terms);
190 visitAll(Root: S, Visitor&: TermCollector);
191 }
192
193 LLVM_DEBUG({
194 dbgs() << "Terms:\n";
195 for (const SCEV *T : Terms)
196 dbgs().indent(2) << *T << "\n";
197 });
198
199 SCEVCollectAddRecMultiplies MulCollector(Terms, SE);
200 visitAll(Root: Expr, Visitor&: MulCollector);
201}
202
203static bool findArrayDimensionsRec(ScalarEvolution &SE,
204 SmallVectorImpl<const SCEV *> &Terms,
205 SmallVectorImpl<const SCEV *> &Sizes) {
206 int Last = Terms.size() - 1;
207 const SCEV *Step = Terms[Last];
208
209 // End of recursion.
210 if (Last == 0) {
211 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: Step)) {
212 SmallVector<const SCEV *, 2> Qs;
213 for (const SCEV *Op : M->operands())
214 if (!isa<SCEVConstant>(Val: Op))
215 Qs.push_back(Elt: Op);
216
217 Step = SE.getMulExpr(Ops&: Qs);
218 }
219
220 Sizes.push_back(Elt: Step);
221 return true;
222 }
223
224 for (const SCEV *&Term : Terms) {
225 // Normalize the terms before the next call to findArrayDimensionsRec.
226 const SCEV *Q, *R;
227 SCEVDivision::divide(SE, Numerator: Term, Denominator: Step, Quotient: &Q, Remainder: &R);
228
229 // Bail out when GCD does not evenly divide one of the terms.
230 if (!R->isZero())
231 return false;
232
233 Term = Q;
234 }
235
236 // Remove all SCEVConstants.
237 erase_if(C&: Terms, P: [](const SCEV *E) { return isa<SCEVConstant>(Val: E); });
238
239 if (Terms.size() > 0)
240 if (!findArrayDimensionsRec(SE, Terms, Sizes))
241 return false;
242
243 Sizes.push_back(Elt: Step);
244 return true;
245}
246
247// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
248static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
249 for (const SCEV *T : Terms)
250 if (SCEVExprContains(Root: T, Pred: [](const SCEV *S) { return isa<SCEVUnknown>(Val: S); }))
251 return true;
252
253 return false;
254}
255
256// Return the number of product terms in S.
257static inline int numberOfTerms(const SCEV *S) {
258 if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(Val: S))
259 return Expr->getNumOperands();
260 return 1;
261}
262
263static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
264 if (isa<SCEVConstant>(Val: T))
265 return nullptr;
266
267 if (isa<SCEVUnknown>(Val: T))
268 return T;
269
270 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: T)) {
271 SmallVector<const SCEV *, 2> Factors;
272 for (const SCEV *Op : M->operands())
273 if (!isa<SCEVConstant>(Val: Op))
274 Factors.push_back(Elt: Op);
275
276 return SE.getMulExpr(Ops&: Factors);
277 }
278
279 return T;
280}
281
282void llvm::findArrayDimensions(ScalarEvolution &SE,
283 SmallVectorImpl<const SCEV *> &Terms,
284 SmallVectorImpl<const SCEV *> &Sizes,
285 const SCEV *ElementSize) {
286 if (Terms.size() < 1 || !ElementSize)
287 return;
288
289 // Early return when Terms do not contain parameters: we do not delinearize
290 // non parametric SCEVs.
291 if (!containsParameters(Terms))
292 return;
293
294 LLVM_DEBUG({
295 dbgs() << "Terms:\n";
296 for (const SCEV *T : Terms)
297 dbgs().indent(2) << *T << "\n";
298 });
299
300 // Remove duplicates.
301 array_pod_sort(Start: Terms.begin(), End: Terms.end());
302 Terms.erase(CS: llvm::unique(R&: Terms), CE: Terms.end());
303
304 // Put larger terms first.
305 llvm::sort(C&: Terms, Comp: [](const SCEV *LHS, const SCEV *RHS) {
306 return numberOfTerms(S: LHS) > numberOfTerms(S: RHS);
307 });
308
309 // Try to divide all terms by the element size. If term is not divisible by
310 // element size, proceed with the original term.
311 for (const SCEV *&Term : Terms) {
312 const SCEV *Q, *R;
313 SCEVDivision::divide(SE, Numerator: Term, Denominator: ElementSize, Quotient: &Q, Remainder: &R);
314 if (!Q->isZero())
315 Term = Q;
316 }
317
318 SmallVector<const SCEV *, 4> NewTerms;
319
320 // Remove constant factors.
321 for (const SCEV *T : Terms)
322 if (const SCEV *NewT = removeConstantFactors(SE, T))
323 NewTerms.push_back(Elt: NewT);
324
325 LLVM_DEBUG({
326 dbgs() << "Terms after sorting:\n";
327 for (const SCEV *T : NewTerms)
328 dbgs().indent(2) << *T << "\n";
329 });
330
331 if (NewTerms.empty() || !findArrayDimensionsRec(SE, Terms&: NewTerms, Sizes)) {
332 Sizes.clear();
333 return;
334 }
335
336 // The last element to be pushed into Sizes is the size of an element.
337 Sizes.push_back(Elt: ElementSize);
338
339 LLVM_DEBUG({
340 dbgs() << "Sizes:\n";
341 for (const SCEV *S : Sizes)
342 dbgs().indent(2) << *S << "\n";
343 });
344}
345
346void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
347 SmallVectorImpl<const SCEV *> &Subscripts,
348 SmallVectorImpl<const SCEV *> &Sizes) {
349 // Early exit in case this SCEV is not an affine multivariate function.
350 if (Sizes.empty())
351 return;
352
353 if (auto *AR = dyn_cast<SCEVAddRecExpr>(Val: Expr))
354 if (!AR->isAffine())
355 return;
356
357 // Clear output vector.
358 Subscripts.clear();
359
360 LLVM_DEBUG(dbgs() << "\ncomputeAccessFunctions\n"
361 << "Memory Access Function: " << *Expr << "\n");
362
363 const SCEV *Res = Expr;
364 int Last = Sizes.size() - 1;
365
366 for (int i = Last; i >= 0; i--) {
367 const SCEV *Size = Sizes[i];
368 const SCEV *Q, *R;
369
370 SCEVDivision::divide(SE, Numerator: Res, Denominator: Size, Quotient: &Q, Remainder: &R);
371
372 LLVM_DEBUG({
373 dbgs() << "Computing 'MemAccFn / Sizes[" << i << "]':\n";
374 dbgs() << " MemAccFn: " << *Res << "\n";
375 dbgs() << " Sizes[" << i << "]: " << *Size << "\n";
376 dbgs() << " Quotient (Leftover): " << *Q << "\n";
377 dbgs() << " Remainder (Subscript Access Function): " << *R << "\n";
378 });
379
380 Res = Q;
381
382 // Do not record the last subscript corresponding to the size of elements in
383 // the array.
384 if (i == Last) {
385
386 // Bail out if the byte offset is non-zero.
387 if (!R->isZero()) {
388 Subscripts.clear();
389 Sizes.clear();
390 return;
391 }
392
393 continue;
394 }
395
396 // Record the access function for the current subscript.
397 Subscripts.push_back(Elt: R);
398 }
399
400 // Also push in last position the remainder of the last division: it will be
401 // the access function of the innermost dimension.
402 Subscripts.push_back(Elt: Res);
403
404 std::reverse(first: Subscripts.begin(), last: Subscripts.end());
405
406 LLVM_DEBUG({
407 dbgs() << "Subscripts:\n";
408 for (const SCEV *S : Subscripts)
409 dbgs().indent(2) << *S << "\n";
410 dbgs() << "\n";
411 });
412}
413
414/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
415/// sizes of an array access. Returns the remainder of the delinearization that
416/// is the offset start of the array. The SCEV->delinearize algorithm computes
417/// the multiples of SCEV coefficients: that is a pattern matching of sub
418/// expressions in the stride and base of a SCEV corresponding to the
419/// computation of a GCD (greatest common divisor) of base and stride. When
420/// SCEV->delinearize fails, it returns the SCEV unchanged.
421///
422/// For example: when analyzing the memory access A[i][j][k] in this loop nest
423///
424/// void foo(long n, long m, long o, double A[n][m][o]) {
425///
426/// for (long i = 0; i < n; i++)
427/// for (long j = 0; j < m; j++)
428/// for (long k = 0; k < o; k++)
429/// A[i][j][k] = 1.0;
430/// }
431///
432/// the delinearization input is the following AddRec SCEV:
433///
434/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
435///
436/// From this SCEV, we are able to say that the base offset of the access is %A
437/// because it appears as an offset that does not divide any of the strides in
438/// the loops:
439///
440/// CHECK: Base offset: %A
441///
442/// and then SCEV->delinearize determines the size of some of the dimensions of
443/// the array as these are the multiples by which the strides are happening:
444///
445/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double)
446/// bytes.
447///
448/// Note that the outermost dimension remains of UnknownSize because there are
449/// no strides that would help identifying the size of the last dimension: when
450/// the array has been statically allocated, one could compute the size of that
451/// dimension by dividing the overall size of the array by the size of the known
452/// dimensions: %m * %o * 8.
453///
454/// Finally delinearize provides the access functions for the array reference
455/// that does correspond to A[i][j][k] of the above C testcase:
456///
457/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
458///
459/// The testcases are checking the output of a function pass:
460/// DelinearizationPass that walks through all loads and stores of a function
461/// asking for the SCEV of the memory access with respect to all enclosing
462/// loops, calling SCEV->delinearize on that and printing the results.
463void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
464 SmallVectorImpl<const SCEV *> &Subscripts,
465 SmallVectorImpl<const SCEV *> &Sizes,
466 const SCEV *ElementSize) {
467 // Clear output vectors.
468 Subscripts.clear();
469 Sizes.clear();
470
471 // First step: collect parametric terms.
472 SmallVector<const SCEV *, 4> Terms;
473 collectParametricTerms(SE, Expr, Terms);
474
475 if (Terms.empty())
476 return;
477
478 // Second step: find subscript sizes.
479 findArrayDimensions(SE, Terms, Sizes, ElementSize);
480
481 if (Sizes.empty())
482 return;
483
484 // Third step: compute the access functions for each subscript.
485 computeAccessFunctions(SE, Expr, Subscripts, Sizes);
486}
487
488static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
489 if (const auto *Const = dyn_cast<SCEVConstant>(Val: S))
490 return Const->getAPInt();
491 return std::nullopt;
492}
493
494/// Collects the absolute values of constant steps for all induction variables.
495/// Returns true if we can prove that all step recurrences are constants and \p
496/// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p
497/// Steps after divided by \p ElementSize.
498static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
499 SmallVectorImpl<uint64_t> &Steps,
500 uint64_t ElementSize) {
501 // End of recursion. The constant value also must be a multiple of
502 // ElementSize.
503 if (const auto *Const = dyn_cast<SCEVConstant>(Val: Expr)) {
504 const uint64_t Mod = Const->getAPInt().urem(RHS: ElementSize);
505 return Mod == 0;
506 }
507
508 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Expr);
509 if (!AR || !AR->isAffine())
510 return false;
511
512 const SCEV *Step = AR->getStepRecurrence(SE);
513 std::optional<APInt> StepAPInt = tryIntoAPInt(S: Step);
514 if (!StepAPInt)
515 return false;
516
517 APInt Q;
518 uint64_t R;
519 APInt::udivrem(LHS: StepAPInt->abs(), RHS: ElementSize, Quotient&: Q, Remainder&: R);
520 if (R != 0)
521 return false;
522
523 // Bail out when the step is too large.
524 std::optional<uint64_t> StepVal = Q.tryZExtValue();
525 if (!StepVal)
526 return false;
527
528 Steps.push_back(Elt: *StepVal);
529 return collectConstantAbsSteps(SE, Expr: AR->getStart(), Steps, ElementSize);
530}
531
532bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
533 SmallVectorImpl<uint64_t> &Sizes,
534 const SCEV *ElementSize) {
535 if (!ElementSize)
536 return false;
537
538 std::optional<APInt> ElementSizeAPInt = tryIntoAPInt(S: ElementSize);
539 if (!ElementSizeAPInt || *ElementSizeAPInt == 0)
540 return false;
541
542 std::optional<uint64_t> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
543
544 // Early exit when ElementSize is not a positive constant.
545 if (!ElementSizeConst)
546 return false;
547
548 if (!collectConstantAbsSteps(SE, Expr, Steps&: Sizes, ElementSize: *ElementSizeConst) ||
549 Sizes.empty()) {
550 Sizes.clear();
551 return false;
552 }
553
554 // At this point, Sizes contains the absolute step recurrences for all
555 // induction variables. Each step recurrence must be a multiple of the size of
556 // the array element. Assuming that the each value represents the size of an
557 // array for each dimension, attempts to restore the length of each dimension
558 // by dividing the step recurrence by the next smaller value. For example, if
559 // we have the following AddRec SCEV:
560 //
561 // AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
562 //
563 // Then Sizes will become [256, 32, 1] after sorted. We don't know the size of
564 // the outermost dimension, the next dimension will be computed as 256 / 32 =
565 // 8, and the last dimension will be computed as 32 / 1 = 32. Thus it results
566 // in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is
567 // a base pointer.
568 //
569 // TODO: Catch more cases, e.g., when a step recurrence is not divisible by
570 // the next smaller one, like A[i][3*j].
571 llvm::sort(Start: Sizes.rbegin(), End: Sizes.rend());
572 Sizes.erase(CS: llvm::unique(R&: Sizes), CE: Sizes.end());
573
574 // The last element in Sizes should be ElementSize. At this point, all values
575 // in Sizes are assumed to be divided by ElementSize, so replace it with 1.
576 assert(Sizes.back() != 0 && "Unexpected zero size in Sizes.");
577 Sizes.back() = 1;
578
579 for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
580 uint64_t PrevSize = Sizes[I + 1];
581 if (Sizes[I] % PrevSize) {
582 Sizes.clear();
583 return false;
584 }
585 Sizes[I] /= PrevSize;
586 }
587
588 // Finally, the last element in Sizes should be ElementSize.
589 Sizes.back() = *ElementSizeConst;
590 return true;
591}
592
593/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
594/// sizes of an array access, assuming that the array is a fixed size array.
595///
596/// E.g., if we have the code like as follows:
597///
598/// double A[42][8][32];
599/// for i
600/// for j
601/// for k
602/// use A[i][j][k]
603///
604/// The access function will be represented as an AddRec SCEV like:
605///
606/// AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
607///
608/// Then findFixedSizeArrayDimensions infers the size of each dimension of the
609/// array based on the fact that the value of the step recurrence is a multiple
610/// of the size of the corresponding array element. In the above example, it
611/// results in the following:
612///
613/// CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes.
614///
615/// Finally each subscript will be computed as follows:
616///
617/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
618///
619/// Note that this function doesn't check the range of possible values for each
620/// subscript, so the caller should perform additional boundary checks if
621/// necessary.
622///
623/// Also note that this function doesn't guarantee that the original array size
624/// is restored "correctly". For example, in the following case:
625///
626/// double A[42][4][64];
627/// double B[42][8][32];
628/// for i
629/// for j
630/// for k
631/// use A[i][j][k]
632/// use B[i][2*j][k]
633///
634/// The access function for both accesses will be the same:
635///
636/// AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8)
637///
638/// The array sizes for both A and B will be computed as
639/// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B.
640///
641/// TODO: At the moment, this function can handle only simple cases. For
642/// example, we cannot handle a case where a step recurrence is not divisible
643/// by the next smaller step recurrence, e.g., A[i][3*j].
644bool llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
645 SmallVectorImpl<const SCEV *> &Subscripts,
646 SmallVectorImpl<const SCEV *> &Sizes,
647 const SCEV *ElementSize) {
648 // Clear output vectors.
649 Subscripts.clear();
650 Sizes.clear();
651
652 // First step: find the fixed array size.
653 SmallVector<uint64_t, 4> ConstSizes;
654 if (!findFixedSizeArrayDimensions(SE, Expr, Sizes&: ConstSizes, ElementSize)) {
655 Sizes.clear();
656 return false;
657 }
658
659 // Convert the constant size to SCEV.
660 for (uint64_t Size : ConstSizes)
661 Sizes.push_back(Elt: SE.getConstant(Ty: Expr->getType(), V: Size));
662
663 // Second step: compute the access functions for each subscript.
664 computeAccessFunctions(SE, Expr, Subscripts, Sizes);
665
666 return !Subscripts.empty();
667}
668
669bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
670 ArrayRef<const SCEV *> Sizes,
671 ArrayRef<const SCEV *> Subscripts) {
672 // Sizes and Subscripts are as follows:
673 //
674 // Sizes: [UNK][S_2]...[S_n]
675 // Subscripts: [I_1][I_2]...[I_n]
676 //
677 // where the size of the outermost dimension is unknown (UNK).
678
679 auto AddOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
680 if (!SE.willNotOverflow(BinOp: Instruction::Add, /*IsSigned=*/Signed: true, LHS: A, RHS: B))
681 return nullptr;
682 return SE.getAddExpr(LHS: A, RHS: B);
683 };
684
685 auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
686 if (!SE.willNotOverflow(BinOp: Instruction::Mul, /*IsSigned=*/Signed: true, LHS: A, RHS: B))
687 return nullptr;
688 return SE.getMulExpr(LHS: A, RHS: B);
689 };
690
691 // Range check: 0 <= I_k < S_k for k = 2..n.
692 for (size_t I = 1; I < Sizes.size(); ++I) {
693 const SCEV *Size = Sizes[I - 1];
694 const SCEV *Subscript = Subscripts[I];
695 if (!SE.isKnownNonNegative(S: Subscript))
696 return false;
697
698 // TODO: It may be better that delinearization itself unifies the types of
699 // all elements in Sizes and Subscripts.
700 Type *WiderTy = SE.getWiderType(Ty1: Subscript->getType(), Ty2: Size->getType());
701 Subscript = SE.getNoopOrSignExtend(V: Subscript, Ty: WiderTy);
702 Size = SE.getNoopOrSignExtend(V: Size, Ty: WiderTy);
703 if (!SE.isKnownPredicate(Pred: ICmpInst::ICMP_SLT, LHS: Subscript, RHS: Size)) {
704 LLVM_DEBUG(dbgs() << "Range check failed: " << *Subscript << " <s "
705 << *Size << "\n");
706 return false;
707 }
708 }
709
710 // The offset computation is as follows:
711 //
712 // Offset = I_n +
713 // S_n * I_{n-1} +
714 // ... +
715 // (S_2 * ... * S_n) * I_1
716 //
717 // Regarding this as a function from (I_1, I_2, ..., I_n) to integers, it
718 // must be injective. To guarantee it, the above calculation must not
719 // overflow. Since we have already checked that 0 <= I_k < S_k for k = 2..n,
720 // the minimum and maximum values occur in the following cases:
721 //
722 // Min = [I_1][0]...[0] = S_2 * ... * S_n * I_1
723 // Max = [I_1][S_2-1]...[S_n-1]
724 // = (S_2 * ... * S_n) * I_1 +
725 // (S_2 * ... * S_{n-1}) * (S_2 - 1) +
726 // ... +
727 // (S_n - 1)
728 // = (S_2 * ... * S_n) * I_1 +
729 // (S_2 * ... * S_n) - 1 (can be proven by induction)
730 // = Min + (S_2 * ... * S_n) - 1
731 //
732 // NOTE: I_1 can be negative, so Min is not just 0.
733 const SCEV *Prod = SE.getOne(Ty: Sizes[0]->getType());
734 for (const SCEV *Size : Sizes) {
735 Prod = MulOverflow(Prod, Size);
736 if (!Prod)
737 return false;
738 }
739 const SCEV *Min = MulOverflow(Prod, Subscripts[0]);
740 if (!Min)
741 return false;
742
743 // We have already checked that Min and Prod don't overflow, so it's enough
744 // to check whether Min + Prod - 1 doesn't overflow.
745 const SCEV *MaxPlusOne = AddOverflow(Min, Prod);
746 if (!MaxPlusOne)
747 return false;
748 if (!SE.willNotOverflow(BinOp: Instruction::Sub, /*IsSigned=*/Signed: true, LHS: MaxPlusOne,
749 RHS: SE.getOne(Ty: MaxPlusOne->getType())))
750 return false;
751
752 return true;
753}
754
755bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
756 const GetElementPtrInst *GEP,
757 SmallVectorImpl<const SCEV *> &Subscripts,
758 SmallVectorImpl<const SCEV *> &Sizes) {
759 assert(Subscripts.empty() && Sizes.empty() &&
760 "Expected output lists to be empty on entry to this function.");
761 assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
762 LLVM_DEBUG(dbgs() << "\nGEP to delinearize: " << *GEP << "\n");
763 Type *Ty = nullptr;
764 Type *IndexTy = SE.getEffectiveSCEVType(Ty: GEP->getPointerOperandType());
765 bool DroppedFirstDim = false;
766 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
767 const SCEV *Expr = SE.getSCEV(V: GEP->getOperand(i_nocapture: i));
768 if (i == 1) {
769 Ty = GEP->getSourceElementType();
770 if (auto *Const = dyn_cast<SCEVConstant>(Val: Expr))
771 if (Const->getValue()->isZero()) {
772 DroppedFirstDim = true;
773 continue;
774 }
775 Subscripts.push_back(Elt: Expr);
776 continue;
777 }
778
779 auto *ArrayTy = dyn_cast<ArrayType>(Val: Ty);
780 if (!ArrayTy) {
781 LLVM_DEBUG(dbgs() << "GEP delinearize failed: " << *Ty
782 << " is not an array type.\n");
783 Subscripts.clear();
784 Sizes.clear();
785 return false;
786 }
787
788 Subscripts.push_back(Elt: Expr);
789 if (!(DroppedFirstDim && i == 2))
790 Sizes.push_back(Elt: SE.getConstant(Ty: IndexTy, V: ArrayTy->getNumElements()));
791
792 Ty = ArrayTy->getElementType();
793 }
794 LLVM_DEBUG({
795 dbgs() << "Subscripts:\n";
796 for (const SCEV *S : Subscripts)
797 dbgs() << *S << "\n";
798 dbgs() << "\n";
799 });
800
801 return !Subscripts.empty();
802}
803
804namespace {
805
806void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
807 ScalarEvolution *SE) {
808 O << "Printing analysis 'Delinearization' for function '" << F->getName()
809 << "':";
810 for (Instruction &Inst : instructions(F)) {
811 // Only analyze loads and stores.
812 if (!isa<StoreInst>(Val: &Inst) && !isa<LoadInst>(Val: &Inst))
813 continue;
814
815 const BasicBlock *BB = Inst.getParent();
816 Loop *L = LI->getLoopFor(BB);
817 // Only delinearize the memory access in the innermost loop.
818 // Do not analyze memory accesses outside loops.
819 if (!L)
820 continue;
821
822 const SCEV *AccessFn = SE->getSCEVAtScope(V: getPointerOperand(V: &Inst), L);
823
824 const SCEVUnknown *BasePointer =
825 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: AccessFn));
826 // Do not delinearize if we cannot find the base pointer.
827 if (!BasePointer)
828 break;
829 AccessFn = SE->getMinusSCEV(LHS: AccessFn, RHS: BasePointer);
830
831 O << "\n";
832 O << "Inst:" << Inst << "\n";
833 O << "AccessFunction: " << *AccessFn << "\n";
834
835 SmallVector<const SCEV *, 3> Subscripts, Sizes;
836
837 auto IsDelinearizationFailed = [&]() {
838 return Subscripts.size() == 0 || Sizes.size() == 0 ||
839 Subscripts.size() != Sizes.size();
840 };
841
842 delinearize(SE&: *SE, Expr: AccessFn, Subscripts, Sizes, ElementSize: SE->getElementSize(Inst: &Inst));
843 if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) {
844 Subscripts.clear();
845 Sizes.clear();
846 delinearizeFixedSizeArray(SE&: *SE, Expr: AccessFn, Subscripts, Sizes,
847 ElementSize: SE->getElementSize(Inst: &Inst));
848 }
849
850 if (IsDelinearizationFailed()) {
851 O << "failed to delinearize\n";
852 continue;
853 }
854
855 O << "Base offset: " << *BasePointer << "\n";
856 O << "ArrayDecl[UnknownSize]";
857 int Size = Subscripts.size();
858 for (int i = 0; i < Size - 1; i++)
859 O << "[" << *Sizes[i] << "]";
860 O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
861
862 O << "ArrayRef";
863 for (int i = 0; i < Size; i++)
864 O << "[" << *Subscripts[i] << "]";
865 O << "\n";
866
867 bool IsValid = validateDelinearizationResult(SE&: *SE, Sizes, Subscripts);
868 O << "Delinearization validation: " << (IsValid ? "Succeeded" : "Failed")
869 << "\n";
870 }
871}
872
873} // end anonymous namespace
874
875DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS)
876 : OS(OS) {}
877PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
878 FunctionAnalysisManager &AM) {
879 printDelinearization(O&: OS, F: &F, LI: &AM.getResult<LoopAnalysis>(IR&: F),
880 SE: &AM.getResult<ScalarEvolutionAnalysis>(IR&: F));
881 return PreservedAnalyses::all();
882}
883