1//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements an analysis pass that tries to delinearize all GEP
10// instructions in all loops using the SCEV analysis functionality.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/Delinearization.h"
15#include "llvm/Analysis/LoopInfo.h"
16#include "llvm/Analysis/ScalarEvolution.h"
17#include "llvm/Analysis/ScalarEvolutionDivision.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/IR/Constants.h"
20#include "llvm/IR/DerivedTypes.h"
21#include "llvm/IR/Function.h"
22#include "llvm/IR/InstIterator.h"
23#include "llvm/IR/Instructions.h"
24#include "llvm/IR/PassManager.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31#define DL_NAME "delinearize"
32#define DEBUG_TYPE DL_NAME
33
34static cl::opt<bool> UseFixedSizeArrayHeuristic(
35 "delinearize-use-fixed-size-array-heuristic", cl::init(Val: true), cl::Hidden,
36 cl::desc("When printing analysis, use the heuristic for fixed-size arrays "
37 "if the default delinearizetion fails."));
38
39// Return true when S contains at least an undef value.
40static inline bool containsUndefs(const SCEV *S) {
41 return SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
42 if (const auto *SU = dyn_cast<SCEVUnknown>(Val: S))
43 return isa<UndefValue>(Val: SU->getValue());
44 return false;
45 });
46}
47
48namespace {
49
50// Collect all steps of SCEV expressions.
51struct SCEVCollectStrides {
52 ScalarEvolution &SE;
53 SmallVectorImpl<const SCEV *> &Strides;
54
55 SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
56 : SE(SE), Strides(S) {}
57
58 bool follow(const SCEV *S) {
59 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
60 Strides.push_back(Elt: AR->getStepRecurrence(SE));
61 return true;
62 }
63
64 bool isDone() const { return false; }
65};
66
67// Collect all SCEVUnknown and SCEVMulExpr expressions.
68struct SCEVCollectTerms {
69 SmallVectorImpl<const SCEV *> &Terms;
70
71 SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
72
73 bool follow(const SCEV *S) {
74 if (isa<SCEVUnknown>(Val: S) || isa<SCEVMulExpr>(Val: S) ||
75 isa<SCEVSignExtendExpr>(Val: S))
76 if (!containsUndefs(S))
77 Terms.push_back(Elt: S);
78
79 // Keep looking when S is a specific type expression.
80 return isa<SCEVAddExpr, SCEVAddRecExpr>(Val: S);
81 }
82
83 bool isDone() const { return false; }
84};
85
86// Check if a SCEV contains an AddRecExpr.
87struct SCEVHasAddRec {
88 bool &ContainsAddRec;
89
90 SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
91 ContainsAddRec = false;
92 }
93
94 bool follow(const SCEV *S) {
95 if (isa<SCEVAddRecExpr>(Val: S)) {
96 ContainsAddRec = true;
97
98 // Stop recursion: once we collected a term, do not walk its operands.
99 return false;
100 }
101
102 // Keep looking.
103 return true;
104 }
105
106 bool isDone() const { return false; }
107};
108
109// Find factors that are multiplied with an expression that (possibly as a
110// subexpression) contains an AddRecExpr. In the expression:
111//
112// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
113//
114// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
115// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
116// parameters as they form a product with an induction variable.
117//
118// This collector expects all array size parameters to be in the same MulExpr.
119// It might be necessary to later add support for collecting parameters that are
120// spread over different nested MulExpr.
121struct SCEVCollectAddRecMultiplies {
122 SmallVectorImpl<const SCEV *> &Terms;
123 ScalarEvolution &SE;
124
125 SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T,
126 ScalarEvolution &SE)
127 : Terms(T), SE(SE) {}
128
129 bool follow(const SCEV *S) {
130 if (auto *Mul = dyn_cast<SCEVMulExpr>(Val: S)) {
131 bool HasAddRec = false;
132 SmallVector<SCEVUse, 0> Operands;
133 for (const SCEV *Op : Mul->operands()) {
134 const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Val: Op);
135 if (Unknown && !isa<CallInst>(Val: Unknown->getValue())) {
136 Operands.push_back(Elt: Op);
137 } else if (Unknown) {
138 HasAddRec = true;
139 } else {
140 bool ContainsAddRec = false;
141 SCEVHasAddRec ContiansAddRec(ContainsAddRec);
142 visitAll(Root: Op, Visitor&: ContiansAddRec);
143 HasAddRec |= ContainsAddRec;
144 }
145 }
146 if (Operands.size() == 0)
147 return true;
148
149 if (!HasAddRec)
150 return false;
151
152 Terms.push_back(Elt: SE.getMulExpr(Ops&: Operands));
153 }
154
155 // Keep looking when S is a specific type expression.
156 return isa<SCEVAddExpr, SCEVAddRecExpr>(Val: S);
157 }
158
159 bool isDone() const { return false; }
160};
161
162} // end anonymous namespace
163
164/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
165/// two places:
166/// 1) The strides of AddRec expressions.
167/// 2) Unknowns that are multiplied with AddRec expressions.
168void llvm::collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
169 SmallVectorImpl<const SCEV *> &Terms) {
170 SmallVector<const SCEV *, 4> Strides;
171 SCEVCollectStrides StrideCollector(SE, Strides);
172 visitAll(Root: Expr, Visitor&: StrideCollector);
173
174 LLVM_DEBUG({
175 dbgs() << "Strides:\n";
176 for (const SCEV *S : Strides)
177 dbgs().indent(2) << *S << "\n";
178 });
179
180 for (const SCEV *S : Strides) {
181 SCEVCollectTerms TermCollector(Terms);
182 visitAll(Root: S, Visitor&: TermCollector);
183 }
184
185 LLVM_DEBUG({
186 dbgs() << "Terms:\n";
187 for (const SCEV *T : Terms)
188 dbgs().indent(2) << *T << "\n";
189 });
190
191 SCEVCollectAddRecMultiplies MulCollector(Terms, SE);
192 visitAll(Root: Expr, Visitor&: MulCollector);
193}
194
195static bool findArrayDimensionsRec(ScalarEvolution &SE,
196 SmallVectorImpl<const SCEV *> &Terms,
197 SmallVectorImpl<const SCEV *> &Sizes) {
198 int Last = Terms.size() - 1;
199 const SCEV *Step = Terms[Last];
200
201 // End of recursion.
202 if (Last == 0) {
203 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: Step)) {
204 SmallVector<SCEVUse, 2> Qs;
205 for (const SCEV *Op : M->operands())
206 if (!isa<SCEVConstant>(Val: Op))
207 Qs.push_back(Elt: Op);
208
209 Step = SE.getMulExpr(Ops&: Qs);
210 }
211
212 Sizes.push_back(Elt: Step);
213 return true;
214 }
215
216 for (const SCEV *&Term : Terms) {
217 // Normalize the terms before the next call to findArrayDimensionsRec.
218 const SCEV *Q, *R;
219 SCEVDivision::divide(SE, Numerator: Term, Denominator: Step, Quotient: &Q, Remainder: &R);
220
221 // Bail out when GCD does not evenly divide one of the terms.
222 if (!R->isZero())
223 return false;
224
225 Term = Q;
226 }
227
228 // Remove all SCEVConstants.
229 erase_if(C&: Terms, P: [](const SCEV *E) { return isa<SCEVConstant>(Val: E); });
230
231 if (Terms.size() > 0)
232 if (!findArrayDimensionsRec(SE, Terms, Sizes))
233 return false;
234
235 Sizes.push_back(Elt: Step);
236 return true;
237}
238
239// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
240static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
241 for (const SCEV *T : Terms)
242 if (SCEVExprContains(Root: T, Pred: [](const SCEV *S) { return isa<SCEVUnknown>(Val: S); }))
243 return true;
244
245 return false;
246}
247
248// Return the number of product terms in S.
249static inline int numberOfTerms(const SCEV *S) {
250 if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(Val: S))
251 return Expr->getNumOperands();
252 return 1;
253}
254
255static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
256 if (isa<SCEVConstant>(Val: T))
257 return nullptr;
258
259 if (isa<SCEVUnknown>(Val: T))
260 return T;
261
262 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: T)) {
263 SmallVector<SCEVUse, 2> Factors;
264 for (const SCEV *Op : M->operands())
265 if (!isa<SCEVConstant>(Val: Op))
266 Factors.push_back(Elt: Op);
267
268 return SE.getMulExpr(Ops&: Factors);
269 }
270
271 return T;
272}
273
274void llvm::findArrayDimensions(ScalarEvolution &SE,
275 SmallVectorImpl<const SCEV *> &Terms,
276 SmallVectorImpl<const SCEV *> &Sizes,
277 const SCEV *ElementSize) {
278 if (Terms.size() < 1 || !ElementSize)
279 return;
280
281 // Early return when Terms do not contain parameters: we do not delinearize
282 // non parametric SCEVs.
283 if (!containsParameters(Terms))
284 return;
285
286 LLVM_DEBUG({
287 dbgs() << "Terms:\n";
288 for (const SCEV *T : Terms)
289 dbgs().indent(2) << *T << "\n";
290 });
291
292 // Remove duplicates.
293 array_pod_sort(Start: Terms.begin(), End: Terms.end());
294 Terms.erase(CS: llvm::unique(R&: Terms), CE: Terms.end());
295
296 // Put larger terms first.
297 llvm::sort(C&: Terms, Comp: [](const SCEV *LHS, const SCEV *RHS) {
298 return numberOfTerms(S: LHS) > numberOfTerms(S: RHS);
299 });
300
301 // Try to divide all terms by the element size. If term is not divisible by
302 // element size, proceed with the original term.
303 for (const SCEV *&Term : Terms) {
304 const SCEV *Q, *R;
305 SCEVDivision::divide(SE, Numerator: Term, Denominator: ElementSize, Quotient: &Q, Remainder: &R);
306 if (!Q->isZero())
307 Term = Q;
308 }
309
310 SmallVector<const SCEV *, 4> NewTerms;
311
312 // Remove constant factors.
313 for (const SCEV *T : Terms)
314 if (const SCEV *NewT = removeConstantFactors(SE, T))
315 NewTerms.push_back(Elt: NewT);
316
317 LLVM_DEBUG({
318 dbgs() << "Terms after sorting:\n";
319 for (const SCEV *T : NewTerms)
320 dbgs().indent(2) << *T << "\n";
321 });
322
323 if (NewTerms.empty() || !findArrayDimensionsRec(SE, Terms&: NewTerms, Sizes)) {
324 Sizes.clear();
325 return;
326 }
327
328 // The last element to be pushed into Sizes is the size of an element.
329 Sizes.push_back(Elt: ElementSize);
330
331 LLVM_DEBUG({
332 dbgs() << "Sizes:\n";
333 for (const SCEV *S : Sizes)
334 dbgs().indent(2) << *S << "\n";
335 });
336}
337
338void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
339 SmallVectorImpl<const SCEV *> &Subscripts,
340 SmallVectorImpl<const SCEV *> &Sizes) {
341 // Early exit in case this SCEV is not an affine multivariate function.
342 if (Sizes.empty())
343 return;
344
345 if (auto *AR = dyn_cast<SCEVAddRecExpr>(Val: Expr))
346 if (!AR->isAffine())
347 return;
348
349 // Clear output vector.
350 Subscripts.clear();
351
352 LLVM_DEBUG(dbgs() << "\ncomputeAccessFunctions\n"
353 << "Memory Access Function: " << *Expr << "\n");
354
355 const SCEV *Res = Expr;
356 int Last = Sizes.size() - 1;
357
358 for (int i = Last; i >= 0; i--) {
359 const SCEV *Size = Sizes[i];
360 const SCEV *Q, *R;
361
362 SCEVDivision::divide(SE, Numerator: Res, Denominator: Size, Quotient: &Q, Remainder: &R);
363
364 LLVM_DEBUG({
365 dbgs() << "Computing 'MemAccFn / Sizes[" << i << "]':\n";
366 dbgs() << " MemAccFn: " << *Res << "\n";
367 dbgs() << " Sizes[" << i << "]: " << *Size << "\n";
368 dbgs() << " Quotient (Leftover): " << *Q << "\n";
369 dbgs() << " Remainder (Subscript Access Function): " << *R << "\n";
370 });
371
372 Res = Q;
373
374 // Do not record the last subscript corresponding to the size of elements in
375 // the array.
376 if (i == Last) {
377
378 // Bail out if the byte offset is non-zero.
379 if (!R->isZero()) {
380 Subscripts.clear();
381 Sizes.clear();
382 return;
383 }
384
385 continue;
386 }
387
388 // Record the access function for the current subscript.
389 Subscripts.push_back(Elt: R);
390 }
391
392 // Also push in last position the remainder of the last division: it will be
393 // the access function of the innermost dimension.
394 Subscripts.push_back(Elt: Res);
395
396 std::reverse(first: Subscripts.begin(), last: Subscripts.end());
397
398 LLVM_DEBUG({
399 dbgs() << "Subscripts:\n";
400 for (const SCEV *S : Subscripts)
401 dbgs().indent(2) << *S << "\n";
402 dbgs() << "\n";
403 });
404}
405
406/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
407/// sizes of an array access. Returns the remainder of the delinearization that
408/// is the offset start of the array. The SCEV->delinearize algorithm computes
409/// the multiples of SCEV coefficients: that is a pattern matching of sub
410/// expressions in the stride and base of a SCEV corresponding to the
411/// computation of a GCD (greatest common divisor) of base and stride. When
412/// SCEV->delinearize fails, it returns the SCEV unchanged.
413///
414/// For example: when analyzing the memory access A[i][j][k] in this loop nest
415///
416/// void foo(long n, long m, long o, double A[n][m][o]) {
417///
418/// for (long i = 0; i < n; i++)
419/// for (long j = 0; j < m; j++)
420/// for (long k = 0; k < o; k++)
421/// A[i][j][k] = 1.0;
422/// }
423///
424/// the delinearization input is the following AddRec SCEV:
425///
426/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
427///
428/// From this SCEV, we are able to say that the base offset of the access is %A
429/// because it appears as an offset that does not divide any of the strides in
430/// the loops:
431///
432/// CHECK: Base offset: %A
433///
434/// and then SCEV->delinearize determines the size of some of the dimensions of
435/// the array as these are the multiples by which the strides are happening:
436///
437/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double)
438/// bytes.
439///
440/// Note that the outermost dimension remains of UnknownSize because there are
441/// no strides that would help identifying the size of the last dimension: when
442/// the array has been statically allocated, one could compute the size of that
443/// dimension by dividing the overall size of the array by the size of the known
444/// dimensions: %m * %o * 8.
445///
446/// Finally delinearize provides the access functions for the array reference
447/// that does correspond to A[i][j][k] of the above C testcase:
448///
449/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
450///
451/// The testcases are checking the output of a function pass:
452/// DelinearizationPass that walks through all loads and stores of a function
453/// asking for the SCEV of the memory access with respect to all enclosing
454/// loops, calling SCEV->delinearize on that and printing the results.
455void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
456 SmallVectorImpl<const SCEV *> &Subscripts,
457 SmallVectorImpl<const SCEV *> &Sizes,
458 const SCEV *ElementSize) {
459 // Clear output vectors.
460 Subscripts.clear();
461 Sizes.clear();
462
463 // First step: collect parametric terms.
464 SmallVector<const SCEV *, 4> Terms;
465 collectParametricTerms(SE, Expr, Terms);
466
467 if (Terms.empty())
468 return;
469
470 // Second step: find subscript sizes.
471 findArrayDimensions(SE, Terms, Sizes, ElementSize);
472
473 if (Sizes.empty())
474 return;
475
476 // Third step: compute the access functions for each subscript.
477 computeAccessFunctions(SE, Expr, Subscripts, Sizes);
478}
479
480static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
481 if (const auto *Const = dyn_cast<SCEVConstant>(Val: S))
482 return Const->getAPInt();
483 return std::nullopt;
484}
485
486/// Collects the absolute values of constant steps for all induction variables.
487/// Returns true if we can prove that all step recurrences are constants and \p
488/// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p
489/// Steps after divided by \p ElementSize.
490static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
491 SmallVectorImpl<uint64_t> &Steps,
492 uint64_t ElementSize) {
493 // End of recursion. The constant value also must be a multiple of
494 // ElementSize.
495 if (const auto *Const = dyn_cast<SCEVConstant>(Val: Expr)) {
496 const uint64_t Mod = Const->getAPInt().urem(RHS: ElementSize);
497 return Mod == 0;
498 }
499
500 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Expr);
501 if (!AR || !AR->isAffine())
502 return false;
503
504 const SCEV *Step = AR->getStepRecurrence(SE);
505 std::optional<APInt> StepAPInt = tryIntoAPInt(S: Step);
506 if (!StepAPInt)
507 return false;
508
509 APInt Q;
510 uint64_t R;
511 APInt::udivrem(LHS: StepAPInt->abs(), RHS: ElementSize, Quotient&: Q, Remainder&: R);
512 if (R != 0)
513 return false;
514
515 // Bail out when the step is too large.
516 std::optional<uint64_t> StepVal = Q.tryZExtValue();
517 if (!StepVal)
518 return false;
519
520 Steps.push_back(Elt: *StepVal);
521 return collectConstantAbsSteps(SE, Expr: AR->getStart(), Steps, ElementSize);
522}
523
524bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
525 SmallVectorImpl<uint64_t> &Sizes,
526 const SCEV *ElementSize) {
527 if (!ElementSize)
528 return false;
529
530 std::optional<APInt> ElementSizeAPInt = tryIntoAPInt(S: ElementSize);
531 if (!ElementSizeAPInt || *ElementSizeAPInt == 0)
532 return false;
533
534 std::optional<uint64_t> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
535
536 // Early exit when ElementSize is not a positive constant.
537 if (!ElementSizeConst)
538 return false;
539
540 if (!collectConstantAbsSteps(SE, Expr, Steps&: Sizes, ElementSize: *ElementSizeConst) ||
541 Sizes.empty()) {
542 Sizes.clear();
543 return false;
544 }
545
546 // At this point, Sizes contains the absolute step recurrences for all
547 // induction variables. Each step recurrence must be a multiple of the size of
548 // the array element. Assuming that the each value represents the size of an
549 // array for each dimension, attempts to restore the length of each dimension
550 // by dividing the step recurrence by the next smaller value. For example, if
551 // we have the following AddRec SCEV:
552 //
553 // AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
554 //
555 // Then Sizes will become [256, 32, 1] after sorted. We don't know the size of
556 // the outermost dimension, the next dimension will be computed as 256 / 32 =
557 // 8, and the last dimension will be computed as 32 / 1 = 32. Thus it results
558 // in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is
559 // a base pointer.
560 //
561 // TODO: Catch more cases, e.g., when a step recurrence is not divisible by
562 // the next smaller one, like A[i][3*j].
563 llvm::sort(Start: Sizes.rbegin(), End: Sizes.rend());
564 Sizes.erase(CS: llvm::unique(R&: Sizes), CE: Sizes.end());
565
566 // The last element in Sizes should be ElementSize. At this point, all values
567 // in Sizes are assumed to be divided by ElementSize, so replace it with 1.
568 assert(Sizes.back() != 0 && "Unexpected zero size in Sizes.");
569 Sizes.back() = 1;
570
571 for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
572 uint64_t PrevSize = Sizes[I + 1];
573 if (Sizes[I] % PrevSize) {
574 Sizes.clear();
575 return false;
576 }
577 Sizes[I] /= PrevSize;
578 }
579
580 // Finally, the last element in Sizes should be ElementSize.
581 Sizes.back() = *ElementSizeConst;
582 return true;
583}
584
585/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
586/// sizes of an array access, assuming that the array is a fixed size array.
587///
588/// E.g., if we have the code like as follows:
589///
590/// double A[42][8][32];
591/// for i
592/// for j
593/// for k
594/// use A[i][j][k]
595///
596/// The access function will be represented as an AddRec SCEV like:
597///
598/// AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
599///
600/// Then findFixedSizeArrayDimensions infers the size of each dimension of the
601/// array based on the fact that the value of the step recurrence is a multiple
602/// of the size of the corresponding array element. In the above example, it
603/// results in the following:
604///
605/// CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes.
606///
607/// Finally each subscript will be computed as follows:
608///
609/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
610///
611/// Note that this function doesn't check the range of possible values for each
612/// subscript, so the caller should perform additional boundary checks if
613/// necessary.
614///
615/// Also note that this function doesn't guarantee that the original array size
616/// is restored "correctly". For example, in the following case:
617///
618/// double A[42][4][64];
619/// double B[42][8][32];
620/// for i
621/// for j
622/// for k
623/// use A[i][j][k]
624/// use B[i][2*j][k]
625///
626/// The access function for both accesses will be the same:
627///
628/// AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8)
629///
630/// The array sizes for both A and B will be computed as
631/// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B.
632///
633/// TODO: At the moment, this function can handle only simple cases. For
634/// example, we cannot handle a case where a step recurrence is not divisible
635/// by the next smaller step recurrence, e.g., A[i][3*j].
636bool llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
637 SmallVectorImpl<const SCEV *> &Subscripts,
638 SmallVectorImpl<const SCEV *> &Sizes,
639 const SCEV *ElementSize) {
640 // Clear output vectors.
641 Subscripts.clear();
642 Sizes.clear();
643
644 // First step: find the fixed array size.
645 SmallVector<uint64_t, 4> ConstSizes;
646 if (!findFixedSizeArrayDimensions(SE, Expr, Sizes&: ConstSizes, ElementSize)) {
647 Sizes.clear();
648 return false;
649 }
650
651 // Convert the constant size to SCEV.
652 for (uint64_t Size : ConstSizes)
653 Sizes.push_back(Elt: SE.getConstant(Ty: Expr->getType(), V: Size));
654
655 // Second step: compute the access functions for each subscript.
656 computeAccessFunctions(SE, Expr, Subscripts, Sizes);
657
658 return !Subscripts.empty();
659}
660
661bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
662 ArrayRef<const SCEV *> Sizes,
663 ArrayRef<const SCEV *> Subscripts) {
664 // Sizes and Subscripts are as follows:
665 //
666 // Sizes: [UNK][S_2]...[S_n]
667 // Subscripts: [I_1][I_2]...[I_n]
668 //
669 // where the size of the outermost dimension is unknown (UNK).
670
671 auto AddOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
672 if (!SE.willNotOverflow(BinOp: Instruction::Add, /*IsSigned=*/Signed: true, LHS: A, RHS: B))
673 return nullptr;
674 return SE.getAddExpr(LHS: A, RHS: B);
675 };
676
677 auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
678 if (!SE.willNotOverflow(BinOp: Instruction::Mul, /*IsSigned=*/Signed: true, LHS: A, RHS: B))
679 return nullptr;
680 return SE.getMulExpr(LHS: A, RHS: B);
681 };
682
683 // Range check: 0 <= I_k < S_k for k = 2..n.
684 for (size_t I = 1; I < Sizes.size(); ++I) {
685 const SCEV *Size = Sizes[I - 1];
686 const SCEV *Subscript = Subscripts[I];
687 if (!SE.isKnownNonNegative(S: Subscript))
688 return false;
689
690 // TODO: It may be better that delinearization itself unifies the types of
691 // all elements in Sizes and Subscripts.
692 Type *WiderTy = SE.getWiderType(Ty1: Subscript->getType(), Ty2: Size->getType());
693 Subscript = SE.getNoopOrSignExtend(V: Subscript, Ty: WiderTy);
694 Size = SE.getNoopOrSignExtend(V: Size, Ty: WiderTy);
695 if (!SE.isKnownPredicate(Pred: ICmpInst::ICMP_SLT, LHS: Subscript, RHS: Size)) {
696 LLVM_DEBUG(dbgs() << "Range check failed: " << *Subscript << " <s "
697 << *Size << "\n");
698 return false;
699 }
700 }
701
702 // The offset computation is as follows:
703 //
704 // Offset = I_n +
705 // S_n * I_{n-1} +
706 // ... +
707 // (S_2 * ... * S_n) * I_1
708 //
709 // Regarding this as a function from (I_1, I_2, ..., I_n) to integers, it
710 // must be injective. To guarantee it, the above calculation must not
711 // overflow. Since we have already checked that 0 <= I_k < S_k for k = 2..n,
712 // the minimum and maximum values occur in the following cases:
713 //
714 // Min = [I_1][0]...[0] = S_2 * ... * S_n * I_1
715 // Max = [I_1][S_2-1]...[S_n-1]
716 // = (S_2 * ... * S_n) * I_1 +
717 // (S_2 * ... * S_{n-1}) * (S_2 - 1) +
718 // ... +
719 // (S_n - 1)
720 // = (S_2 * ... * S_n) * I_1 +
721 // (S_2 * ... * S_n) - 1 (can be proven by induction)
722 // = Min + (S_2 * ... * S_n) - 1
723 //
724 // NOTE: I_1 can be negative, so Min is not just 0.
725 const SCEV *Prod = SE.getOne(Ty: Sizes[0]->getType());
726 for (const SCEV *Size : Sizes) {
727 Prod = MulOverflow(Prod, Size);
728 if (!Prod)
729 return false;
730 }
731 const SCEV *Min = MulOverflow(Prod, Subscripts[0]);
732 if (!Min)
733 return false;
734
735 // We have already checked that Min and Prod don't overflow, so it's enough
736 // to check whether Min + Prod - 1 doesn't overflow.
737 const SCEV *MaxPlusOne = AddOverflow(Min, Prod);
738 if (!MaxPlusOne)
739 return false;
740 if (!SE.willNotOverflow(BinOp: Instruction::Sub, /*IsSigned=*/Signed: true, LHS: MaxPlusOne,
741 RHS: SE.getOne(Ty: MaxPlusOne->getType())))
742 return false;
743
744 return true;
745}
746
747bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
748 const GetElementPtrInst *GEP,
749 SmallVectorImpl<const SCEV *> &Subscripts,
750 SmallVectorImpl<const SCEV *> &Sizes) {
751 assert(Subscripts.empty() && Sizes.empty() &&
752 "Expected output lists to be empty on entry to this function.");
753 assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
754 LLVM_DEBUG(dbgs() << "\nGEP to delinearize: " << *GEP << "\n");
755 Type *Ty = nullptr;
756 Type *IndexTy = SE.getEffectiveSCEVType(Ty: GEP->getPointerOperandType());
757 bool DroppedFirstDim = false;
758 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
759 const SCEV *Expr = SE.getSCEV(V: GEP->getOperand(i_nocapture: i));
760 if (i == 1) {
761 Ty = GEP->getSourceElementType();
762 if (auto *Const = dyn_cast<SCEVConstant>(Val: Expr))
763 if (Const->getValue()->isZero()) {
764 DroppedFirstDim = true;
765 continue;
766 }
767 Subscripts.push_back(Elt: Expr);
768 continue;
769 }
770
771 auto *ArrayTy = dyn_cast<ArrayType>(Val: Ty);
772 if (!ArrayTy) {
773 LLVM_DEBUG(dbgs() << "GEP delinearize failed: " << *Ty
774 << " is not an array type.\n");
775 Subscripts.clear();
776 Sizes.clear();
777 return false;
778 }
779
780 Subscripts.push_back(Elt: Expr);
781 if (!(DroppedFirstDim && i == 2))
782 Sizes.push_back(Elt: SE.getConstant(Ty: IndexTy, V: ArrayTy->getNumElements()));
783
784 Ty = ArrayTy->getElementType();
785 }
786 LLVM_DEBUG({
787 dbgs() << "Subscripts:\n";
788 for (const SCEV *S : Subscripts)
789 dbgs() << *S << "\n";
790 dbgs() << "\n";
791 });
792
793 return !Subscripts.empty();
794}
795
796namespace {
797
798void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
799 ScalarEvolution *SE) {
800 O << "Printing analysis 'Delinearization' for function '" << F->getName()
801 << "':";
802 for (Instruction &Inst : instructions(F)) {
803 // Only analyze loads and stores.
804 if (!isa<StoreInst>(Val: &Inst) && !isa<LoadInst>(Val: &Inst))
805 continue;
806
807 const BasicBlock *BB = Inst.getParent();
808 Loop *L = LI->getLoopFor(BB);
809 // Only delinearize the memory access in the innermost loop.
810 // Do not analyze memory accesses outside loops.
811 if (!L)
812 continue;
813
814 const SCEV *AccessFn = SE->getSCEVAtScope(V: getPointerOperand(V: &Inst), L);
815
816 const SCEVUnknown *BasePointer =
817 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: AccessFn));
818 // Do not delinearize if we cannot find the base pointer.
819 if (!BasePointer)
820 break;
821 AccessFn = SE->getMinusSCEV(LHS: AccessFn, RHS: BasePointer);
822
823 O << "\n";
824 O << "Inst:" << Inst << "\n";
825 O << "AccessFunction: " << *AccessFn << "\n";
826
827 SmallVector<const SCEV *, 3> Subscripts, Sizes;
828
829 auto IsDelinearizationFailed = [&]() {
830 return Subscripts.size() == 0 || Sizes.size() == 0 ||
831 Subscripts.size() != Sizes.size();
832 };
833
834 delinearize(SE&: *SE, Expr: AccessFn, Subscripts, Sizes, ElementSize: SE->getElementSize(Inst: &Inst));
835 if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) {
836 Subscripts.clear();
837 Sizes.clear();
838 delinearizeFixedSizeArray(SE&: *SE, Expr: AccessFn, Subscripts, Sizes,
839 ElementSize: SE->getElementSize(Inst: &Inst));
840 }
841
842 if (IsDelinearizationFailed()) {
843 O << "failed to delinearize\n";
844 continue;
845 }
846
847 O << "Base offset: " << *BasePointer << "\n";
848 O << "ArrayDecl[UnknownSize]";
849 int Size = Subscripts.size();
850 for (int i = 0; i < Size - 1; i++)
851 O << "[" << *Sizes[i] << "]";
852 O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
853
854 O << "ArrayRef";
855 for (int i = 0; i < Size; i++)
856 O << "[" << *Subscripts[i] << "]";
857 O << "\n";
858
859 bool IsValid = validateDelinearizationResult(SE&: *SE, Sizes, Subscripts);
860 O << "Delinearization validation: " << (IsValid ? "Succeeded" : "Failed")
861 << "\n";
862 }
863}
864
865} // end anonymous namespace
866
867DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS)
868 : OS(OS) {}
869PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
870 FunctionAnalysisManager &AM) {
871 printDelinearization(O&: OS, F: &F, LI: &AM.getResult<LoopAnalysis>(IR&: F),
872 SE: &AM.getResult<ScalarEvolutionAnalysis>(IR&: F));
873 return PreservedAnalyses::all();
874}
875