1//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements an analysis pass that tries to delinearize all GEP
10// instructions in all loops using the SCEV analysis functionality.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/Delinearization.h"
15#include "llvm/Analysis/LoopInfo.h"
16#include "llvm/Analysis/ScalarEvolution.h"
17#include "llvm/Analysis/ScalarEvolutionDivision.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/IR/Constants.h"
20#include "llvm/IR/DerivedTypes.h"
21#include "llvm/IR/Function.h"
22#include "llvm/IR/InstIterator.h"
23#include "llvm/IR/Instructions.h"
24#include "llvm/IR/PassManager.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31#define DL_NAME "delinearize"
32#define DEBUG_TYPE DL_NAME
33
34static cl::opt<bool> UseFixedSizeArrayHeuristic(
35 "delinearize-use-fixed-size-array-heuristic", cl::init(Val: true), cl::Hidden,
36 cl::desc("When printing analysis, use the heuristic for fixed-size arrays "
37 "if the default delinearizetion fails."));
38
39// Return true when S contains at least an undef value.
40static inline bool containsUndefs(const SCEV *S) {
41 return SCEVExprContains(Root: S, Pred: [](const SCEV *S) {
42 if (const auto *SU = dyn_cast<SCEVUnknown>(Val: S))
43 return isa<UndefValue>(Val: SU->getValue());
44 return false;
45 });
46}
47
48namespace {
49
50// Collect all steps of SCEV expressions.
51struct SCEVCollectStrides {
52 ScalarEvolution &SE;
53 SmallVectorImpl<const SCEV *> &Strides;
54
55 SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
56 : SE(SE), Strides(S) {}
57
58 bool follow(const SCEV *S) {
59 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S))
60 Strides.push_back(Elt: AR->getStepRecurrence(SE));
61 return true;
62 }
63
64 bool isDone() const { return false; }
65};
66
67// Collect all SCEVUnknown and SCEVMulExpr expressions.
68struct SCEVCollectTerms {
69 SmallVectorImpl<const SCEV *> &Terms;
70
71 SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
72
73 bool follow(const SCEV *S) {
74 if (isa<SCEVUnknown>(Val: S) || isa<SCEVMulExpr>(Val: S) ||
75 isa<SCEVSignExtendExpr>(Val: S)) {
76 if (!containsUndefs(S))
77 Terms.push_back(Elt: S);
78
79 // Stop recursion: once we collected a term, do not walk its operands.
80 return false;
81 }
82
83 // Keep looking.
84 return true;
85 }
86
87 bool isDone() const { return false; }
88};
89
90// Check if a SCEV contains an AddRecExpr.
91struct SCEVHasAddRec {
92 bool &ContainsAddRec;
93
94 SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
95 ContainsAddRec = false;
96 }
97
98 bool follow(const SCEV *S) {
99 if (isa<SCEVAddRecExpr>(Val: S)) {
100 ContainsAddRec = true;
101
102 // Stop recursion: once we collected a term, do not walk its operands.
103 return false;
104 }
105
106 // Keep looking.
107 return true;
108 }
109
110 bool isDone() const { return false; }
111};
112
113// Find factors that are multiplied with an expression that (possibly as a
114// subexpression) contains an AddRecExpr. In the expression:
115//
116// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
117//
118// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
119// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
120// parameters as they form a product with an induction variable.
121//
122// This collector expects all array size parameters to be in the same MulExpr.
123// It might be necessary to later add support for collecting parameters that are
124// spread over different nested MulExpr.
125struct SCEVCollectAddRecMultiplies {
126 SmallVectorImpl<const SCEV *> &Terms;
127 ScalarEvolution &SE;
128
129 SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T,
130 ScalarEvolution &SE)
131 : Terms(T), SE(SE) {}
132
133 bool follow(const SCEV *S) {
134 if (auto *Mul = dyn_cast<SCEVMulExpr>(Val: S)) {
135 bool HasAddRec = false;
136 SmallVector<SCEVUse, 0> Operands;
137 for (const SCEV *Op : Mul->operands()) {
138 const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Val: Op);
139 if (Unknown && !isa<CallInst>(Val: Unknown->getValue())) {
140 Operands.push_back(Elt: Op);
141 } else if (Unknown) {
142 HasAddRec = true;
143 } else {
144 bool ContainsAddRec = false;
145 SCEVHasAddRec ContiansAddRec(ContainsAddRec);
146 visitAll(Root: Op, Visitor&: ContiansAddRec);
147 HasAddRec |= ContainsAddRec;
148 }
149 }
150 if (Operands.size() == 0)
151 return true;
152
153 if (!HasAddRec)
154 return false;
155
156 Terms.push_back(Elt: SE.getMulExpr(Ops&: Operands));
157 // Stop recursion: once we collected a term, do not walk its operands.
158 return false;
159 }
160
161 // Keep looking.
162 return true;
163 }
164
165 bool isDone() const { return false; }
166};
167
168} // end anonymous namespace
169
170/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
171/// two places:
172/// 1) The strides of AddRec expressions.
173/// 2) Unknowns that are multiplied with AddRec expressions.
174void llvm::collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
175 SmallVectorImpl<const SCEV *> &Terms) {
176 SmallVector<const SCEV *, 4> Strides;
177 SCEVCollectStrides StrideCollector(SE, Strides);
178 visitAll(Root: Expr, Visitor&: StrideCollector);
179
180 LLVM_DEBUG({
181 dbgs() << "Strides:\n";
182 for (const SCEV *S : Strides)
183 dbgs().indent(2) << *S << "\n";
184 });
185
186 for (const SCEV *S : Strides) {
187 SCEVCollectTerms TermCollector(Terms);
188 visitAll(Root: S, Visitor&: TermCollector);
189 }
190
191 LLVM_DEBUG({
192 dbgs() << "Terms:\n";
193 for (const SCEV *T : Terms)
194 dbgs().indent(2) << *T << "\n";
195 });
196
197 SCEVCollectAddRecMultiplies MulCollector(Terms, SE);
198 visitAll(Root: Expr, Visitor&: MulCollector);
199}
200
201static bool findArrayDimensionsRec(ScalarEvolution &SE,
202 SmallVectorImpl<const SCEV *> &Terms,
203 SmallVectorImpl<const SCEV *> &Sizes) {
204 int Last = Terms.size() - 1;
205 const SCEV *Step = Terms[Last];
206
207 // End of recursion.
208 if (Last == 0) {
209 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: Step)) {
210 SmallVector<SCEVUse, 2> Qs;
211 for (const SCEV *Op : M->operands())
212 if (!isa<SCEVConstant>(Val: Op))
213 Qs.push_back(Elt: Op);
214
215 Step = SE.getMulExpr(Ops&: Qs);
216 }
217
218 Sizes.push_back(Elt: Step);
219 return true;
220 }
221
222 for (const SCEV *&Term : Terms) {
223 // Normalize the terms before the next call to findArrayDimensionsRec.
224 const SCEV *Q, *R;
225 SCEVDivision::divide(SE, Numerator: Term, Denominator: Step, Quotient: &Q, Remainder: &R);
226
227 // Bail out when GCD does not evenly divide one of the terms.
228 if (!R->isZero())
229 return false;
230
231 Term = Q;
232 }
233
234 // Remove all SCEVConstants.
235 erase_if(C&: Terms, P: [](const SCEV *E) { return isa<SCEVConstant>(Val: E); });
236
237 if (Terms.size() > 0)
238 if (!findArrayDimensionsRec(SE, Terms, Sizes))
239 return false;
240
241 Sizes.push_back(Elt: Step);
242 return true;
243}
244
245// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
246static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
247 for (const SCEV *T : Terms)
248 if (SCEVExprContains(Root: T, Pred: [](const SCEV *S) { return isa<SCEVUnknown>(Val: S); }))
249 return true;
250
251 return false;
252}
253
254// Return the number of product terms in S.
255static inline int numberOfTerms(const SCEV *S) {
256 if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(Val: S))
257 return Expr->getNumOperands();
258 return 1;
259}
260
261static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
262 if (isa<SCEVConstant>(Val: T))
263 return nullptr;
264
265 if (isa<SCEVUnknown>(Val: T))
266 return T;
267
268 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: T)) {
269 SmallVector<SCEVUse, 2> Factors;
270 for (const SCEV *Op : M->operands())
271 if (!isa<SCEVConstant>(Val: Op))
272 Factors.push_back(Elt: Op);
273
274 return SE.getMulExpr(Ops&: Factors);
275 }
276
277 return T;
278}
279
280void llvm::findArrayDimensions(ScalarEvolution &SE,
281 SmallVectorImpl<const SCEV *> &Terms,
282 SmallVectorImpl<const SCEV *> &Sizes,
283 const SCEV *ElementSize) {
284 if (Terms.size() < 1 || !ElementSize)
285 return;
286
287 // Early return when Terms do not contain parameters: we do not delinearize
288 // non parametric SCEVs.
289 if (!containsParameters(Terms))
290 return;
291
292 LLVM_DEBUG({
293 dbgs() << "Terms:\n";
294 for (const SCEV *T : Terms)
295 dbgs().indent(2) << *T << "\n";
296 });
297
298 // Remove duplicates.
299 array_pod_sort(Start: Terms.begin(), End: Terms.end());
300 Terms.erase(CS: llvm::unique(R&: Terms), CE: Terms.end());
301
302 // Put larger terms first.
303 llvm::sort(C&: Terms, Comp: [](const SCEV *LHS, const SCEV *RHS) {
304 return numberOfTerms(S: LHS) > numberOfTerms(S: RHS);
305 });
306
307 // Try to divide all terms by the element size. If term is not divisible by
308 // element size, proceed with the original term.
309 for (const SCEV *&Term : Terms) {
310 const SCEV *Q, *R;
311 SCEVDivision::divide(SE, Numerator: Term, Denominator: ElementSize, Quotient: &Q, Remainder: &R);
312 if (!Q->isZero())
313 Term = Q;
314 }
315
316 SmallVector<const SCEV *, 4> NewTerms;
317
318 // Remove constant factors.
319 for (const SCEV *T : Terms)
320 if (const SCEV *NewT = removeConstantFactors(SE, T))
321 NewTerms.push_back(Elt: NewT);
322
323 LLVM_DEBUG({
324 dbgs() << "Terms after sorting:\n";
325 for (const SCEV *T : NewTerms)
326 dbgs().indent(2) << *T << "\n";
327 });
328
329 if (NewTerms.empty() || !findArrayDimensionsRec(SE, Terms&: NewTerms, Sizes)) {
330 Sizes.clear();
331 return;
332 }
333
334 // The last element to be pushed into Sizes is the size of an element.
335 Sizes.push_back(Elt: ElementSize);
336
337 LLVM_DEBUG({
338 dbgs() << "Sizes:\n";
339 for (const SCEV *S : Sizes)
340 dbgs().indent(2) << *S << "\n";
341 });
342}
343
344void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
345 SmallVectorImpl<const SCEV *> &Subscripts,
346 SmallVectorImpl<const SCEV *> &Sizes) {
347 // Early exit in case this SCEV is not an affine multivariate function.
348 if (Sizes.empty())
349 return;
350
351 if (auto *AR = dyn_cast<SCEVAddRecExpr>(Val: Expr))
352 if (!AR->isAffine())
353 return;
354
355 // Clear output vector.
356 Subscripts.clear();
357
358 LLVM_DEBUG(dbgs() << "\ncomputeAccessFunctions\n"
359 << "Memory Access Function: " << *Expr << "\n");
360
361 const SCEV *Res = Expr;
362 int Last = Sizes.size() - 1;
363
364 for (int i = Last; i >= 0; i--) {
365 const SCEV *Size = Sizes[i];
366 const SCEV *Q, *R;
367
368 SCEVDivision::divide(SE, Numerator: Res, Denominator: Size, Quotient: &Q, Remainder: &R);
369
370 LLVM_DEBUG({
371 dbgs() << "Computing 'MemAccFn / Sizes[" << i << "]':\n";
372 dbgs() << " MemAccFn: " << *Res << "\n";
373 dbgs() << " Sizes[" << i << "]: " << *Size << "\n";
374 dbgs() << " Quotient (Leftover): " << *Q << "\n";
375 dbgs() << " Remainder (Subscript Access Function): " << *R << "\n";
376 });
377
378 Res = Q;
379
380 // Do not record the last subscript corresponding to the size of elements in
381 // the array.
382 if (i == Last) {
383
384 // Bail out if the byte offset is non-zero.
385 if (!R->isZero()) {
386 Subscripts.clear();
387 Sizes.clear();
388 return;
389 }
390
391 continue;
392 }
393
394 // Record the access function for the current subscript.
395 Subscripts.push_back(Elt: R);
396 }
397
398 // Also push in last position the remainder of the last division: it will be
399 // the access function of the innermost dimension.
400 Subscripts.push_back(Elt: Res);
401
402 std::reverse(first: Subscripts.begin(), last: Subscripts.end());
403
404 LLVM_DEBUG({
405 dbgs() << "Subscripts:\n";
406 for (const SCEV *S : Subscripts)
407 dbgs().indent(2) << *S << "\n";
408 dbgs() << "\n";
409 });
410}
411
412/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
413/// sizes of an array access. Returns the remainder of the delinearization that
414/// is the offset start of the array. The SCEV->delinearize algorithm computes
415/// the multiples of SCEV coefficients: that is a pattern matching of sub
416/// expressions in the stride and base of a SCEV corresponding to the
417/// computation of a GCD (greatest common divisor) of base and stride. When
418/// SCEV->delinearize fails, it returns the SCEV unchanged.
419///
420/// For example: when analyzing the memory access A[i][j][k] in this loop nest
421///
422/// void foo(long n, long m, long o, double A[n][m][o]) {
423///
424/// for (long i = 0; i < n; i++)
425/// for (long j = 0; j < m; j++)
426/// for (long k = 0; k < o; k++)
427/// A[i][j][k] = 1.0;
428/// }
429///
430/// the delinearization input is the following AddRec SCEV:
431///
432/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
433///
434/// From this SCEV, we are able to say that the base offset of the access is %A
435/// because it appears as an offset that does not divide any of the strides in
436/// the loops:
437///
438/// CHECK: Base offset: %A
439///
440/// and then SCEV->delinearize determines the size of some of the dimensions of
441/// the array as these are the multiples by which the strides are happening:
442///
443/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double)
444/// bytes.
445///
446/// Note that the outermost dimension remains of UnknownSize because there are
447/// no strides that would help identifying the size of the last dimension: when
448/// the array has been statically allocated, one could compute the size of that
449/// dimension by dividing the overall size of the array by the size of the known
450/// dimensions: %m * %o * 8.
451///
452/// Finally delinearize provides the access functions for the array reference
453/// that does correspond to A[i][j][k] of the above C testcase:
454///
455/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
456///
457/// The testcases are checking the output of a function pass:
458/// DelinearizationPass that walks through all loads and stores of a function
459/// asking for the SCEV of the memory access with respect to all enclosing
460/// loops, calling SCEV->delinearize on that and printing the results.
461void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
462 SmallVectorImpl<const SCEV *> &Subscripts,
463 SmallVectorImpl<const SCEV *> &Sizes,
464 const SCEV *ElementSize) {
465 // Clear output vectors.
466 Subscripts.clear();
467 Sizes.clear();
468
469 // First step: collect parametric terms.
470 SmallVector<const SCEV *, 4> Terms;
471 collectParametricTerms(SE, Expr, Terms);
472
473 if (Terms.empty())
474 return;
475
476 // Second step: find subscript sizes.
477 findArrayDimensions(SE, Terms, Sizes, ElementSize);
478
479 if (Sizes.empty())
480 return;
481
482 // Third step: compute the access functions for each subscript.
483 computeAccessFunctions(SE, Expr, Subscripts, Sizes);
484}
485
486static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
487 if (const auto *Const = dyn_cast<SCEVConstant>(Val: S))
488 return Const->getAPInt();
489 return std::nullopt;
490}
491
492/// Collects the absolute values of constant steps for all induction variables.
493/// Returns true if we can prove that all step recurrences are constants and \p
494/// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p
495/// Steps after divided by \p ElementSize.
496static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
497 SmallVectorImpl<uint64_t> &Steps,
498 uint64_t ElementSize) {
499 // End of recursion. The constant value also must be a multiple of
500 // ElementSize.
501 if (const auto *Const = dyn_cast<SCEVConstant>(Val: Expr)) {
502 const uint64_t Mod = Const->getAPInt().urem(RHS: ElementSize);
503 return Mod == 0;
504 }
505
506 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Expr);
507 if (!AR || !AR->isAffine())
508 return false;
509
510 const SCEV *Step = AR->getStepRecurrence(SE);
511 std::optional<APInt> StepAPInt = tryIntoAPInt(S: Step);
512 if (!StepAPInt)
513 return false;
514
515 APInt Q;
516 uint64_t R;
517 APInt::udivrem(LHS: StepAPInt->abs(), RHS: ElementSize, Quotient&: Q, Remainder&: R);
518 if (R != 0)
519 return false;
520
521 // Bail out when the step is too large.
522 std::optional<uint64_t> StepVal = Q.tryZExtValue();
523 if (!StepVal)
524 return false;
525
526 Steps.push_back(Elt: *StepVal);
527 return collectConstantAbsSteps(SE, Expr: AR->getStart(), Steps, ElementSize);
528}
529
530bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
531 SmallVectorImpl<uint64_t> &Sizes,
532 const SCEV *ElementSize) {
533 if (!ElementSize)
534 return false;
535
536 std::optional<APInt> ElementSizeAPInt = tryIntoAPInt(S: ElementSize);
537 if (!ElementSizeAPInt || *ElementSizeAPInt == 0)
538 return false;
539
540 std::optional<uint64_t> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
541
542 // Early exit when ElementSize is not a positive constant.
543 if (!ElementSizeConst)
544 return false;
545
546 if (!collectConstantAbsSteps(SE, Expr, Steps&: Sizes, ElementSize: *ElementSizeConst) ||
547 Sizes.empty()) {
548 Sizes.clear();
549 return false;
550 }
551
552 // At this point, Sizes contains the absolute step recurrences for all
553 // induction variables. Each step recurrence must be a multiple of the size of
554 // the array element. Assuming that the each value represents the size of an
555 // array for each dimension, attempts to restore the length of each dimension
556 // by dividing the step recurrence by the next smaller value. For example, if
557 // we have the following AddRec SCEV:
558 //
559 // AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
560 //
561 // Then Sizes will become [256, 32, 1] after sorted. We don't know the size of
562 // the outermost dimension, the next dimension will be computed as 256 / 32 =
563 // 8, and the last dimension will be computed as 32 / 1 = 32. Thus it results
564 // in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is
565 // a base pointer.
566 //
567 // TODO: Catch more cases, e.g., when a step recurrence is not divisible by
568 // the next smaller one, like A[i][3*j].
569 llvm::sort(Start: Sizes.rbegin(), End: Sizes.rend());
570 Sizes.erase(CS: llvm::unique(R&: Sizes), CE: Sizes.end());
571
572 // The last element in Sizes should be ElementSize. At this point, all values
573 // in Sizes are assumed to be divided by ElementSize, so replace it with 1.
574 assert(Sizes.back() != 0 && "Unexpected zero size in Sizes.");
575 Sizes.back() = 1;
576
577 for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
578 uint64_t PrevSize = Sizes[I + 1];
579 if (Sizes[I] % PrevSize) {
580 Sizes.clear();
581 return false;
582 }
583 Sizes[I] /= PrevSize;
584 }
585
586 // Finally, the last element in Sizes should be ElementSize.
587 Sizes.back() = *ElementSizeConst;
588 return true;
589}
590
591/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
592/// sizes of an array access, assuming that the array is a fixed size array.
593///
594/// E.g., if we have the code like as follows:
595///
596/// double A[42][8][32];
597/// for i
598/// for j
599/// for k
600/// use A[i][j][k]
601///
602/// The access function will be represented as an AddRec SCEV like:
603///
604/// AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
605///
606/// Then findFixedSizeArrayDimensions infers the size of each dimension of the
607/// array based on the fact that the value of the step recurrence is a multiple
608/// of the size of the corresponding array element. In the above example, it
609/// results in the following:
610///
611/// CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes.
612///
613/// Finally each subscript will be computed as follows:
614///
615/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
616///
617/// Note that this function doesn't check the range of possible values for each
618/// subscript, so the caller should perform additional boundary checks if
619/// necessary.
620///
621/// Also note that this function doesn't guarantee that the original array size
622/// is restored "correctly". For example, in the following case:
623///
624/// double A[42][4][64];
625/// double B[42][8][32];
626/// for i
627/// for j
628/// for k
629/// use A[i][j][k]
630/// use B[i][2*j][k]
631///
632/// The access function for both accesses will be the same:
633///
634/// AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8)
635///
636/// The array sizes for both A and B will be computed as
637/// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B.
638///
639/// TODO: At the moment, this function can handle only simple cases. For
640/// example, we cannot handle a case where a step recurrence is not divisible
641/// by the next smaller step recurrence, e.g., A[i][3*j].
642bool llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
643 SmallVectorImpl<const SCEV *> &Subscripts,
644 SmallVectorImpl<const SCEV *> &Sizes,
645 const SCEV *ElementSize) {
646 // Clear output vectors.
647 Subscripts.clear();
648 Sizes.clear();
649
650 // First step: find the fixed array size.
651 SmallVector<uint64_t, 4> ConstSizes;
652 if (!findFixedSizeArrayDimensions(SE, Expr, Sizes&: ConstSizes, ElementSize)) {
653 Sizes.clear();
654 return false;
655 }
656
657 // Convert the constant size to SCEV.
658 for (uint64_t Size : ConstSizes)
659 Sizes.push_back(Elt: SE.getConstant(Ty: Expr->getType(), V: Size));
660
661 // Second step: compute the access functions for each subscript.
662 computeAccessFunctions(SE, Expr, Subscripts, Sizes);
663
664 return !Subscripts.empty();
665}
666
667bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
668 ArrayRef<const SCEV *> Sizes,
669 ArrayRef<const SCEV *> Subscripts) {
670 // Sizes and Subscripts are as follows:
671 //
672 // Sizes: [UNK][S_2]...[S_n]
673 // Subscripts: [I_1][I_2]...[I_n]
674 //
675 // where the size of the outermost dimension is unknown (UNK).
676
677 auto AddOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
678 if (!SE.willNotOverflow(BinOp: Instruction::Add, /*IsSigned=*/Signed: true, LHS: A, RHS: B))
679 return nullptr;
680 return SE.getAddExpr(LHS: A, RHS: B);
681 };
682
683 auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * {
684 if (!SE.willNotOverflow(BinOp: Instruction::Mul, /*IsSigned=*/Signed: true, LHS: A, RHS: B))
685 return nullptr;
686 return SE.getMulExpr(LHS: A, RHS: B);
687 };
688
689 // Range check: 0 <= I_k < S_k for k = 2..n.
690 for (size_t I = 1; I < Sizes.size(); ++I) {
691 const SCEV *Size = Sizes[I - 1];
692 const SCEV *Subscript = Subscripts[I];
693 if (!SE.isKnownNonNegative(S: Subscript))
694 return false;
695
696 // TODO: It may be better that delinearization itself unifies the types of
697 // all elements in Sizes and Subscripts.
698 Type *WiderTy = SE.getWiderType(Ty1: Subscript->getType(), Ty2: Size->getType());
699 Subscript = SE.getNoopOrSignExtend(V: Subscript, Ty: WiderTy);
700 Size = SE.getNoopOrSignExtend(V: Size, Ty: WiderTy);
701 if (!SE.isKnownPredicate(Pred: ICmpInst::ICMP_SLT, LHS: Subscript, RHS: Size)) {
702 LLVM_DEBUG(dbgs() << "Range check failed: " << *Subscript << " <s "
703 << *Size << "\n");
704 return false;
705 }
706 }
707
708 // The offset computation is as follows:
709 //
710 // Offset = I_n +
711 // S_n * I_{n-1} +
712 // ... +
713 // (S_2 * ... * S_n) * I_1
714 //
715 // Regarding this as a function from (I_1, I_2, ..., I_n) to integers, it
716 // must be injective. To guarantee it, the above calculation must not
717 // overflow. Since we have already checked that 0 <= I_k < S_k for k = 2..n,
718 // the minimum and maximum values occur in the following cases:
719 //
720 // Min = [I_1][0]...[0] = S_2 * ... * S_n * I_1
721 // Max = [I_1][S_2-1]...[S_n-1]
722 // = (S_2 * ... * S_n) * I_1 +
723 // (S_2 * ... * S_{n-1}) * (S_2 - 1) +
724 // ... +
725 // (S_n - 1)
726 // = (S_2 * ... * S_n) * I_1 +
727 // (S_2 * ... * S_n) - 1 (can be proven by induction)
728 // = Min + (S_2 * ... * S_n) - 1
729 //
730 // NOTE: I_1 can be negative, so Min is not just 0.
731 const SCEV *Prod = SE.getOne(Ty: Sizes[0]->getType());
732 for (const SCEV *Size : Sizes) {
733 Prod = MulOverflow(Prod, Size);
734 if (!Prod)
735 return false;
736 }
737 const SCEV *Min = MulOverflow(Prod, Subscripts[0]);
738 if (!Min)
739 return false;
740
741 // We have already checked that Min and Prod don't overflow, so it's enough
742 // to check whether Min + Prod - 1 doesn't overflow.
743 const SCEV *MaxPlusOne = AddOverflow(Min, Prod);
744 if (!MaxPlusOne)
745 return false;
746 if (!SE.willNotOverflow(BinOp: Instruction::Sub, /*IsSigned=*/Signed: true, LHS: MaxPlusOne,
747 RHS: SE.getOne(Ty: MaxPlusOne->getType())))
748 return false;
749
750 return true;
751}
752
753bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
754 const GetElementPtrInst *GEP,
755 SmallVectorImpl<const SCEV *> &Subscripts,
756 SmallVectorImpl<const SCEV *> &Sizes) {
757 assert(Subscripts.empty() && Sizes.empty() &&
758 "Expected output lists to be empty on entry to this function.");
759 assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
760 LLVM_DEBUG(dbgs() << "\nGEP to delinearize: " << *GEP << "\n");
761 Type *Ty = nullptr;
762 Type *IndexTy = SE.getEffectiveSCEVType(Ty: GEP->getPointerOperandType());
763 bool DroppedFirstDim = false;
764 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
765 const SCEV *Expr = SE.getSCEV(V: GEP->getOperand(i_nocapture: i));
766 if (i == 1) {
767 Ty = GEP->getSourceElementType();
768 if (auto *Const = dyn_cast<SCEVConstant>(Val: Expr))
769 if (Const->getValue()->isZero()) {
770 DroppedFirstDim = true;
771 continue;
772 }
773 Subscripts.push_back(Elt: Expr);
774 continue;
775 }
776
777 auto *ArrayTy = dyn_cast<ArrayType>(Val: Ty);
778 if (!ArrayTy) {
779 LLVM_DEBUG(dbgs() << "GEP delinearize failed: " << *Ty
780 << " is not an array type.\n");
781 Subscripts.clear();
782 Sizes.clear();
783 return false;
784 }
785
786 Subscripts.push_back(Elt: Expr);
787 if (!(DroppedFirstDim && i == 2))
788 Sizes.push_back(Elt: SE.getConstant(Ty: IndexTy, V: ArrayTy->getNumElements()));
789
790 Ty = ArrayTy->getElementType();
791 }
792 LLVM_DEBUG({
793 dbgs() << "Subscripts:\n";
794 for (const SCEV *S : Subscripts)
795 dbgs() << *S << "\n";
796 dbgs() << "\n";
797 });
798
799 return !Subscripts.empty();
800}
801
802namespace {
803
804void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
805 ScalarEvolution *SE) {
806 O << "Printing analysis 'Delinearization' for function '" << F->getName()
807 << "':";
808 for (Instruction &Inst : instructions(F)) {
809 // Only analyze loads and stores.
810 if (!isa<StoreInst>(Val: &Inst) && !isa<LoadInst>(Val: &Inst))
811 continue;
812
813 const BasicBlock *BB = Inst.getParent();
814 Loop *L = LI->getLoopFor(BB);
815 // Only delinearize the memory access in the innermost loop.
816 // Do not analyze memory accesses outside loops.
817 if (!L)
818 continue;
819
820 const SCEV *AccessFn = SE->getSCEVAtScope(V: getPointerOperand(V: &Inst), L);
821
822 const SCEVUnknown *BasePointer =
823 dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: AccessFn));
824 // Do not delinearize if we cannot find the base pointer.
825 if (!BasePointer)
826 break;
827 AccessFn = SE->getMinusSCEV(LHS: AccessFn, RHS: BasePointer);
828
829 O << "\n";
830 O << "Inst:" << Inst << "\n";
831 O << "AccessFunction: " << *AccessFn << "\n";
832
833 SmallVector<const SCEV *, 3> Subscripts, Sizes;
834
835 auto IsDelinearizationFailed = [&]() {
836 return Subscripts.size() == 0 || Sizes.size() == 0 ||
837 Subscripts.size() != Sizes.size();
838 };
839
840 delinearize(SE&: *SE, Expr: AccessFn, Subscripts, Sizes, ElementSize: SE->getElementSize(Inst: &Inst));
841 if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) {
842 Subscripts.clear();
843 Sizes.clear();
844 delinearizeFixedSizeArray(SE&: *SE, Expr: AccessFn, Subscripts, Sizes,
845 ElementSize: SE->getElementSize(Inst: &Inst));
846 }
847
848 if (IsDelinearizationFailed()) {
849 O << "failed to delinearize\n";
850 continue;
851 }
852
853 O << "Base offset: " << *BasePointer << "\n";
854 O << "ArrayDecl[UnknownSize]";
855 int Size = Subscripts.size();
856 for (int i = 0; i < Size - 1; i++)
857 O << "[" << *Sizes[i] << "]";
858 O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
859
860 O << "ArrayRef";
861 for (int i = 0; i < Size; i++)
862 O << "[" << *Subscripts[i] << "]";
863 O << "\n";
864
865 bool IsValid = validateDelinearizationResult(SE&: *SE, Sizes, Subscripts);
866 O << "Delinearization validation: " << (IsValid ? "Succeeded" : "Failed")
867 << "\n";
868 }
869}
870
871} // end anonymous namespace
872
873DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS)
874 : OS(OS) {}
875PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
876 FunctionAnalysisManager &AM) {
877 printDelinearization(O&: OS, F: &F, LI: &AM.getResult<LoopAnalysis>(IR&: F),
878 SE: &AM.getResult<ScalarEvolutionAnalysis>(IR&: F));
879 return PreservedAnalyses::all();
880}
881