| 1 | //===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This implements an analysis pass that tries to delinearize all GEP |
| 10 | // instructions in all loops using the SCEV analysis functionality. This pass is |
| 11 | // only used for testing purposes: if your pass needs delinearization, please |
| 12 | // use the on-demand SCEVAddRecExpr::delinearize() function. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "llvm/Analysis/Delinearization.h" |
| 17 | #include "llvm/Analysis/LoopInfo.h" |
| 18 | #include "llvm/Analysis/ScalarEvolution.h" |
| 19 | #include "llvm/Analysis/ScalarEvolutionDivision.h" |
| 20 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 21 | #include "llvm/IR/Constants.h" |
| 22 | #include "llvm/IR/DerivedTypes.h" |
| 23 | #include "llvm/IR/Function.h" |
| 24 | #include "llvm/IR/InstIterator.h" |
| 25 | #include "llvm/IR/Instructions.h" |
| 26 | #include "llvm/IR/PassManager.h" |
| 27 | #include "llvm/Support/CommandLine.h" |
| 28 | #include "llvm/Support/Debug.h" |
| 29 | #include "llvm/Support/raw_ostream.h" |
| 30 | |
| 31 | using namespace llvm; |
| 32 | |
| 33 | #define DL_NAME "delinearize" |
| 34 | #define DEBUG_TYPE DL_NAME |
| 35 | |
| 36 | static cl::opt<bool> UseFixedSizeArrayHeuristic( |
| 37 | "delinearize-use-fixed-size-array-heuristic" , cl::init(Val: true), cl::Hidden, |
| 38 | cl::desc("When printing analysis, use the heuristic for fixed-size arrays " |
| 39 | "if the default delinearizetion fails." )); |
| 40 | |
| 41 | // Return true when S contains at least an undef value. |
| 42 | static inline bool containsUndefs(const SCEV *S) { |
| 43 | return SCEVExprContains(Root: S, Pred: [](const SCEV *S) { |
| 44 | if (const auto *SU = dyn_cast<SCEVUnknown>(Val: S)) |
| 45 | return isa<UndefValue>(Val: SU->getValue()); |
| 46 | return false; |
| 47 | }); |
| 48 | } |
| 49 | |
| 50 | namespace { |
| 51 | |
| 52 | // Collect all steps of SCEV expressions. |
| 53 | struct SCEVCollectStrides { |
| 54 | ScalarEvolution &SE; |
| 55 | SmallVectorImpl<const SCEV *> &Strides; |
| 56 | |
| 57 | SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S) |
| 58 | : SE(SE), Strides(S) {} |
| 59 | |
| 60 | bool follow(const SCEV *S) { |
| 61 | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: S)) |
| 62 | Strides.push_back(Elt: AR->getStepRecurrence(SE)); |
| 63 | return true; |
| 64 | } |
| 65 | |
| 66 | bool isDone() const { return false; } |
| 67 | }; |
| 68 | |
| 69 | // Collect all SCEVUnknown and SCEVMulExpr expressions. |
| 70 | struct SCEVCollectTerms { |
| 71 | SmallVectorImpl<const SCEV *> &Terms; |
| 72 | |
| 73 | SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {} |
| 74 | |
| 75 | bool follow(const SCEV *S) { |
| 76 | if (isa<SCEVUnknown>(Val: S) || isa<SCEVMulExpr>(Val: S) || |
| 77 | isa<SCEVSignExtendExpr>(Val: S)) { |
| 78 | if (!containsUndefs(S)) |
| 79 | Terms.push_back(Elt: S); |
| 80 | |
| 81 | // Stop recursion: once we collected a term, do not walk its operands. |
| 82 | return false; |
| 83 | } |
| 84 | |
| 85 | // Keep looking. |
| 86 | return true; |
| 87 | } |
| 88 | |
| 89 | bool isDone() const { return false; } |
| 90 | }; |
| 91 | |
| 92 | // Check if a SCEV contains an AddRecExpr. |
| 93 | struct SCEVHasAddRec { |
| 94 | bool &ContainsAddRec; |
| 95 | |
| 96 | SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { |
| 97 | ContainsAddRec = false; |
| 98 | } |
| 99 | |
| 100 | bool follow(const SCEV *S) { |
| 101 | if (isa<SCEVAddRecExpr>(Val: S)) { |
| 102 | ContainsAddRec = true; |
| 103 | |
| 104 | // Stop recursion: once we collected a term, do not walk its operands. |
| 105 | return false; |
| 106 | } |
| 107 | |
| 108 | // Keep looking. |
| 109 | return true; |
| 110 | } |
| 111 | |
| 112 | bool isDone() const { return false; } |
| 113 | }; |
| 114 | |
| 115 | // Find factors that are multiplied with an expression that (possibly as a |
| 116 | // subexpression) contains an AddRecExpr. In the expression: |
| 117 | // |
| 118 | // 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) |
| 119 | // |
| 120 | // "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" |
| 121 | // that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size |
| 122 | // parameters as they form a product with an induction variable. |
| 123 | // |
| 124 | // This collector expects all array size parameters to be in the same MulExpr. |
| 125 | // It might be necessary to later add support for collecting parameters that are |
| 126 | // spread over different nested MulExpr. |
| 127 | struct SCEVCollectAddRecMultiplies { |
| 128 | SmallVectorImpl<const SCEV *> &Terms; |
| 129 | ScalarEvolution &SE; |
| 130 | |
| 131 | SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, |
| 132 | ScalarEvolution &SE) |
| 133 | : Terms(T), SE(SE) {} |
| 134 | |
| 135 | bool follow(const SCEV *S) { |
| 136 | if (auto *Mul = dyn_cast<SCEVMulExpr>(Val: S)) { |
| 137 | bool HasAddRec = false; |
| 138 | SmallVector<const SCEV *, 0> Operands; |
| 139 | for (const SCEV *Op : Mul->operands()) { |
| 140 | const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Val: Op); |
| 141 | if (Unknown && !isa<CallInst>(Val: Unknown->getValue())) { |
| 142 | Operands.push_back(Elt: Op); |
| 143 | } else if (Unknown) { |
| 144 | HasAddRec = true; |
| 145 | } else { |
| 146 | bool ContainsAddRec = false; |
| 147 | SCEVHasAddRec ContiansAddRec(ContainsAddRec); |
| 148 | visitAll(Root: Op, Visitor&: ContiansAddRec); |
| 149 | HasAddRec |= ContainsAddRec; |
| 150 | } |
| 151 | } |
| 152 | if (Operands.size() == 0) |
| 153 | return true; |
| 154 | |
| 155 | if (!HasAddRec) |
| 156 | return false; |
| 157 | |
| 158 | Terms.push_back(Elt: SE.getMulExpr(Ops&: Operands)); |
| 159 | // Stop recursion: once we collected a term, do not walk its operands. |
| 160 | return false; |
| 161 | } |
| 162 | |
| 163 | // Keep looking. |
| 164 | return true; |
| 165 | } |
| 166 | |
| 167 | bool isDone() const { return false; } |
| 168 | }; |
| 169 | |
| 170 | } // end anonymous namespace |
| 171 | |
| 172 | /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in |
| 173 | /// two places: |
| 174 | /// 1) The strides of AddRec expressions. |
| 175 | /// 2) Unknowns that are multiplied with AddRec expressions. |
| 176 | void llvm::collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr, |
| 177 | SmallVectorImpl<const SCEV *> &Terms) { |
| 178 | SmallVector<const SCEV *, 4> Strides; |
| 179 | SCEVCollectStrides StrideCollector(SE, Strides); |
| 180 | visitAll(Root: Expr, Visitor&: StrideCollector); |
| 181 | |
| 182 | LLVM_DEBUG({ |
| 183 | dbgs() << "Strides:\n" ; |
| 184 | for (const SCEV *S : Strides) |
| 185 | dbgs().indent(2) << *S << "\n" ; |
| 186 | }); |
| 187 | |
| 188 | for (const SCEV *S : Strides) { |
| 189 | SCEVCollectTerms TermCollector(Terms); |
| 190 | visitAll(Root: S, Visitor&: TermCollector); |
| 191 | } |
| 192 | |
| 193 | LLVM_DEBUG({ |
| 194 | dbgs() << "Terms:\n" ; |
| 195 | for (const SCEV *T : Terms) |
| 196 | dbgs().indent(2) << *T << "\n" ; |
| 197 | }); |
| 198 | |
| 199 | SCEVCollectAddRecMultiplies MulCollector(Terms, SE); |
| 200 | visitAll(Root: Expr, Visitor&: MulCollector); |
| 201 | } |
| 202 | |
| 203 | static bool findArrayDimensionsRec(ScalarEvolution &SE, |
| 204 | SmallVectorImpl<const SCEV *> &Terms, |
| 205 | SmallVectorImpl<const SCEV *> &Sizes) { |
| 206 | int Last = Terms.size() - 1; |
| 207 | const SCEV *Step = Terms[Last]; |
| 208 | |
| 209 | // End of recursion. |
| 210 | if (Last == 0) { |
| 211 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: Step)) { |
| 212 | SmallVector<const SCEV *, 2> Qs; |
| 213 | for (const SCEV *Op : M->operands()) |
| 214 | if (!isa<SCEVConstant>(Val: Op)) |
| 215 | Qs.push_back(Elt: Op); |
| 216 | |
| 217 | Step = SE.getMulExpr(Ops&: Qs); |
| 218 | } |
| 219 | |
| 220 | Sizes.push_back(Elt: Step); |
| 221 | return true; |
| 222 | } |
| 223 | |
| 224 | for (const SCEV *&Term : Terms) { |
| 225 | // Normalize the terms before the next call to findArrayDimensionsRec. |
| 226 | const SCEV *Q, *R; |
| 227 | SCEVDivision::divide(SE, Numerator: Term, Denominator: Step, Quotient: &Q, Remainder: &R); |
| 228 | |
| 229 | // Bail out when GCD does not evenly divide one of the terms. |
| 230 | if (!R->isZero()) |
| 231 | return false; |
| 232 | |
| 233 | Term = Q; |
| 234 | } |
| 235 | |
| 236 | // Remove all SCEVConstants. |
| 237 | erase_if(C&: Terms, P: [](const SCEV *E) { return isa<SCEVConstant>(Val: E); }); |
| 238 | |
| 239 | if (Terms.size() > 0) |
| 240 | if (!findArrayDimensionsRec(SE, Terms, Sizes)) |
| 241 | return false; |
| 242 | |
| 243 | Sizes.push_back(Elt: Step); |
| 244 | return true; |
| 245 | } |
| 246 | |
| 247 | // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. |
| 248 | static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) { |
| 249 | for (const SCEV *T : Terms) |
| 250 | if (SCEVExprContains(Root: T, Pred: [](const SCEV *S) { return isa<SCEVUnknown>(Val: S); })) |
| 251 | return true; |
| 252 | |
| 253 | return false; |
| 254 | } |
| 255 | |
| 256 | // Return the number of product terms in S. |
| 257 | static inline int numberOfTerms(const SCEV *S) { |
| 258 | if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(Val: S)) |
| 259 | return Expr->getNumOperands(); |
| 260 | return 1; |
| 261 | } |
| 262 | |
| 263 | static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { |
| 264 | if (isa<SCEVConstant>(Val: T)) |
| 265 | return nullptr; |
| 266 | |
| 267 | if (isa<SCEVUnknown>(Val: T)) |
| 268 | return T; |
| 269 | |
| 270 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: T)) { |
| 271 | SmallVector<const SCEV *, 2> Factors; |
| 272 | for (const SCEV *Op : M->operands()) |
| 273 | if (!isa<SCEVConstant>(Val: Op)) |
| 274 | Factors.push_back(Elt: Op); |
| 275 | |
| 276 | return SE.getMulExpr(Ops&: Factors); |
| 277 | } |
| 278 | |
| 279 | return T; |
| 280 | } |
| 281 | |
| 282 | void llvm::findArrayDimensions(ScalarEvolution &SE, |
| 283 | SmallVectorImpl<const SCEV *> &Terms, |
| 284 | SmallVectorImpl<const SCEV *> &Sizes, |
| 285 | const SCEV *ElementSize) { |
| 286 | if (Terms.size() < 1 || !ElementSize) |
| 287 | return; |
| 288 | |
| 289 | // Early return when Terms do not contain parameters: we do not delinearize |
| 290 | // non parametric SCEVs. |
| 291 | if (!containsParameters(Terms)) |
| 292 | return; |
| 293 | |
| 294 | LLVM_DEBUG({ |
| 295 | dbgs() << "Terms:\n" ; |
| 296 | for (const SCEV *T : Terms) |
| 297 | dbgs().indent(2) << *T << "\n" ; |
| 298 | }); |
| 299 | |
| 300 | // Remove duplicates. |
| 301 | array_pod_sort(Start: Terms.begin(), End: Terms.end()); |
| 302 | Terms.erase(CS: llvm::unique(R&: Terms), CE: Terms.end()); |
| 303 | |
| 304 | // Put larger terms first. |
| 305 | llvm::sort(C&: Terms, Comp: [](const SCEV *LHS, const SCEV *RHS) { |
| 306 | return numberOfTerms(S: LHS) > numberOfTerms(S: RHS); |
| 307 | }); |
| 308 | |
| 309 | // Try to divide all terms by the element size. If term is not divisible by |
| 310 | // element size, proceed with the original term. |
| 311 | for (const SCEV *&Term : Terms) { |
| 312 | const SCEV *Q, *R; |
| 313 | SCEVDivision::divide(SE, Numerator: Term, Denominator: ElementSize, Quotient: &Q, Remainder: &R); |
| 314 | if (!Q->isZero()) |
| 315 | Term = Q; |
| 316 | } |
| 317 | |
| 318 | SmallVector<const SCEV *, 4> NewTerms; |
| 319 | |
| 320 | // Remove constant factors. |
| 321 | for (const SCEV *T : Terms) |
| 322 | if (const SCEV *NewT = removeConstantFactors(SE, T)) |
| 323 | NewTerms.push_back(Elt: NewT); |
| 324 | |
| 325 | LLVM_DEBUG({ |
| 326 | dbgs() << "Terms after sorting:\n" ; |
| 327 | for (const SCEV *T : NewTerms) |
| 328 | dbgs().indent(2) << *T << "\n" ; |
| 329 | }); |
| 330 | |
| 331 | if (NewTerms.empty() || !findArrayDimensionsRec(SE, Terms&: NewTerms, Sizes)) { |
| 332 | Sizes.clear(); |
| 333 | return; |
| 334 | } |
| 335 | |
| 336 | // The last element to be pushed into Sizes is the size of an element. |
| 337 | Sizes.push_back(Elt: ElementSize); |
| 338 | |
| 339 | LLVM_DEBUG({ |
| 340 | dbgs() << "Sizes:\n" ; |
| 341 | for (const SCEV *S : Sizes) |
| 342 | dbgs().indent(2) << *S << "\n" ; |
| 343 | }); |
| 344 | } |
| 345 | |
| 346 | void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr, |
| 347 | SmallVectorImpl<const SCEV *> &Subscripts, |
| 348 | SmallVectorImpl<const SCEV *> &Sizes) { |
| 349 | // Early exit in case this SCEV is not an affine multivariate function. |
| 350 | if (Sizes.empty()) |
| 351 | return; |
| 352 | |
| 353 | if (auto *AR = dyn_cast<SCEVAddRecExpr>(Val: Expr)) |
| 354 | if (!AR->isAffine()) |
| 355 | return; |
| 356 | |
| 357 | // Clear output vector. |
| 358 | Subscripts.clear(); |
| 359 | |
| 360 | LLVM_DEBUG(dbgs() << "\ncomputeAccessFunctions\n" |
| 361 | << "Memory Access Function: " << *Expr << "\n" ); |
| 362 | |
| 363 | const SCEV *Res = Expr; |
| 364 | int Last = Sizes.size() - 1; |
| 365 | |
| 366 | for (int i = Last; i >= 0; i--) { |
| 367 | const SCEV *Size = Sizes[i]; |
| 368 | const SCEV *Q, *R; |
| 369 | |
| 370 | SCEVDivision::divide(SE, Numerator: Res, Denominator: Size, Quotient: &Q, Remainder: &R); |
| 371 | |
| 372 | LLVM_DEBUG({ |
| 373 | dbgs() << "Computing 'MemAccFn / Sizes[" << i << "]':\n" ; |
| 374 | dbgs() << " MemAccFn: " << *Res << "\n" ; |
| 375 | dbgs() << " Sizes[" << i << "]: " << *Size << "\n" ; |
| 376 | dbgs() << " Quotient (Leftover): " << *Q << "\n" ; |
| 377 | dbgs() << " Remainder (Subscript Access Function): " << *R << "\n" ; |
| 378 | }); |
| 379 | |
| 380 | Res = Q; |
| 381 | |
| 382 | // Do not record the last subscript corresponding to the size of elements in |
| 383 | // the array. |
| 384 | if (i == Last) { |
| 385 | |
| 386 | // Bail out if the byte offset is non-zero. |
| 387 | if (!R->isZero()) { |
| 388 | Subscripts.clear(); |
| 389 | Sizes.clear(); |
| 390 | return; |
| 391 | } |
| 392 | |
| 393 | continue; |
| 394 | } |
| 395 | |
| 396 | // Record the access function for the current subscript. |
| 397 | Subscripts.push_back(Elt: R); |
| 398 | } |
| 399 | |
| 400 | // Also push in last position the remainder of the last division: it will be |
| 401 | // the access function of the innermost dimension. |
| 402 | Subscripts.push_back(Elt: Res); |
| 403 | |
| 404 | std::reverse(first: Subscripts.begin(), last: Subscripts.end()); |
| 405 | |
| 406 | LLVM_DEBUG({ |
| 407 | dbgs() << "Subscripts:\n" ; |
| 408 | for (const SCEV *S : Subscripts) |
| 409 | dbgs().indent(2) << *S << "\n" ; |
| 410 | dbgs() << "\n" ; |
| 411 | }); |
| 412 | } |
| 413 | |
| 414 | /// Splits the SCEV into two vectors of SCEVs representing the subscripts and |
| 415 | /// sizes of an array access. Returns the remainder of the delinearization that |
| 416 | /// is the offset start of the array. The SCEV->delinearize algorithm computes |
| 417 | /// the multiples of SCEV coefficients: that is a pattern matching of sub |
| 418 | /// expressions in the stride and base of a SCEV corresponding to the |
| 419 | /// computation of a GCD (greatest common divisor) of base and stride. When |
| 420 | /// SCEV->delinearize fails, it returns the SCEV unchanged. |
| 421 | /// |
| 422 | /// For example: when analyzing the memory access A[i][j][k] in this loop nest |
| 423 | /// |
| 424 | /// void foo(long n, long m, long o, double A[n][m][o]) { |
| 425 | /// |
| 426 | /// for (long i = 0; i < n; i++) |
| 427 | /// for (long j = 0; j < m; j++) |
| 428 | /// for (long k = 0; k < o; k++) |
| 429 | /// A[i][j][k] = 1.0; |
| 430 | /// } |
| 431 | /// |
| 432 | /// the delinearization input is the following AddRec SCEV: |
| 433 | /// |
| 434 | /// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> |
| 435 | /// |
| 436 | /// From this SCEV, we are able to say that the base offset of the access is %A |
| 437 | /// because it appears as an offset that does not divide any of the strides in |
| 438 | /// the loops: |
| 439 | /// |
| 440 | /// CHECK: Base offset: %A |
| 441 | /// |
| 442 | /// and then SCEV->delinearize determines the size of some of the dimensions of |
| 443 | /// the array as these are the multiples by which the strides are happening: |
| 444 | /// |
| 445 | /// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) |
| 446 | /// bytes. |
| 447 | /// |
| 448 | /// Note that the outermost dimension remains of UnknownSize because there are |
| 449 | /// no strides that would help identifying the size of the last dimension: when |
| 450 | /// the array has been statically allocated, one could compute the size of that |
| 451 | /// dimension by dividing the overall size of the array by the size of the known |
| 452 | /// dimensions: %m * %o * 8. |
| 453 | /// |
| 454 | /// Finally delinearize provides the access functions for the array reference |
| 455 | /// that does correspond to A[i][j][k] of the above C testcase: |
| 456 | /// |
| 457 | /// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] |
| 458 | /// |
| 459 | /// The testcases are checking the output of a function pass: |
| 460 | /// DelinearizationPass that walks through all loads and stores of a function |
| 461 | /// asking for the SCEV of the memory access with respect to all enclosing |
| 462 | /// loops, calling SCEV->delinearize on that and printing the results. |
| 463 | void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr, |
| 464 | SmallVectorImpl<const SCEV *> &Subscripts, |
| 465 | SmallVectorImpl<const SCEV *> &Sizes, |
| 466 | const SCEV *ElementSize) { |
| 467 | // Clear output vectors. |
| 468 | Subscripts.clear(); |
| 469 | Sizes.clear(); |
| 470 | |
| 471 | // First step: collect parametric terms. |
| 472 | SmallVector<const SCEV *, 4> Terms; |
| 473 | collectParametricTerms(SE, Expr, Terms); |
| 474 | |
| 475 | if (Terms.empty()) |
| 476 | return; |
| 477 | |
| 478 | // Second step: find subscript sizes. |
| 479 | findArrayDimensions(SE, Terms, Sizes, ElementSize); |
| 480 | |
| 481 | if (Sizes.empty()) |
| 482 | return; |
| 483 | |
| 484 | // Third step: compute the access functions for each subscript. |
| 485 | computeAccessFunctions(SE, Expr, Subscripts, Sizes); |
| 486 | } |
| 487 | |
| 488 | static std::optional<APInt> tryIntoAPInt(const SCEV *S) { |
| 489 | if (const auto *Const = dyn_cast<SCEVConstant>(Val: S)) |
| 490 | return Const->getAPInt(); |
| 491 | return std::nullopt; |
| 492 | } |
| 493 | |
| 494 | /// Collects the absolute values of constant steps for all induction variables. |
| 495 | /// Returns true if we can prove that all step recurrences are constants and \p |
| 496 | /// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p |
| 497 | /// Steps after divided by \p ElementSize. |
| 498 | static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr, |
| 499 | SmallVectorImpl<uint64_t> &Steps, |
| 500 | uint64_t ElementSize) { |
| 501 | // End of recursion. The constant value also must be a multiple of |
| 502 | // ElementSize. |
| 503 | if (const auto *Const = dyn_cast<SCEVConstant>(Val: Expr)) { |
| 504 | const uint64_t Mod = Const->getAPInt().urem(RHS: ElementSize); |
| 505 | return Mod == 0; |
| 506 | } |
| 507 | |
| 508 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: Expr); |
| 509 | if (!AR || !AR->isAffine()) |
| 510 | return false; |
| 511 | |
| 512 | const SCEV *Step = AR->getStepRecurrence(SE); |
| 513 | std::optional<APInt> StepAPInt = tryIntoAPInt(S: Step); |
| 514 | if (!StepAPInt) |
| 515 | return false; |
| 516 | |
| 517 | APInt Q; |
| 518 | uint64_t R; |
| 519 | APInt::udivrem(LHS: StepAPInt->abs(), RHS: ElementSize, Quotient&: Q, Remainder&: R); |
| 520 | if (R != 0) |
| 521 | return false; |
| 522 | |
| 523 | // Bail out when the step is too large. |
| 524 | std::optional<uint64_t> StepVal = Q.tryZExtValue(); |
| 525 | if (!StepVal) |
| 526 | return false; |
| 527 | |
| 528 | Steps.push_back(Elt: *StepVal); |
| 529 | return collectConstantAbsSteps(SE, Expr: AR->getStart(), Steps, ElementSize); |
| 530 | } |
| 531 | |
| 532 | bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr, |
| 533 | SmallVectorImpl<uint64_t> &Sizes, |
| 534 | const SCEV *ElementSize) { |
| 535 | if (!ElementSize) |
| 536 | return false; |
| 537 | |
| 538 | std::optional<APInt> ElementSizeAPInt = tryIntoAPInt(S: ElementSize); |
| 539 | if (!ElementSizeAPInt || *ElementSizeAPInt == 0) |
| 540 | return false; |
| 541 | |
| 542 | std::optional<uint64_t> ElementSizeConst = ElementSizeAPInt->tryZExtValue(); |
| 543 | |
| 544 | // Early exit when ElementSize is not a positive constant. |
| 545 | if (!ElementSizeConst) |
| 546 | return false; |
| 547 | |
| 548 | if (!collectConstantAbsSteps(SE, Expr, Steps&: Sizes, ElementSize: *ElementSizeConst) || |
| 549 | Sizes.empty()) { |
| 550 | Sizes.clear(); |
| 551 | return false; |
| 552 | } |
| 553 | |
| 554 | // At this point, Sizes contains the absolute step recurrences for all |
| 555 | // induction variables. Each step recurrence must be a multiple of the size of |
| 556 | // the array element. Assuming that the each value represents the size of an |
| 557 | // array for each dimension, attempts to restore the length of each dimension |
| 558 | // by dividing the step recurrence by the next smaller value. For example, if |
| 559 | // we have the following AddRec SCEV: |
| 560 | // |
| 561 | // AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8) |
| 562 | // |
| 563 | // Then Sizes will become [256, 32, 1] after sorted. We don't know the size of |
| 564 | // the outermost dimension, the next dimension will be computed as 256 / 32 = |
| 565 | // 8, and the last dimension will be computed as 32 / 1 = 32. Thus it results |
| 566 | // in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is |
| 567 | // a base pointer. |
| 568 | // |
| 569 | // TODO: Catch more cases, e.g., when a step recurrence is not divisible by |
| 570 | // the next smaller one, like A[i][3*j]. |
| 571 | llvm::sort(Start: Sizes.rbegin(), End: Sizes.rend()); |
| 572 | Sizes.erase(CS: llvm::unique(R&: Sizes), CE: Sizes.end()); |
| 573 | |
| 574 | // The last element in Sizes should be ElementSize. At this point, all values |
| 575 | // in Sizes are assumed to be divided by ElementSize, so replace it with 1. |
| 576 | assert(Sizes.back() != 0 && "Unexpected zero size in Sizes." ); |
| 577 | Sizes.back() = 1; |
| 578 | |
| 579 | for (unsigned I = 0; I + 1 < Sizes.size(); I++) { |
| 580 | uint64_t PrevSize = Sizes[I + 1]; |
| 581 | if (Sizes[I] % PrevSize) { |
| 582 | Sizes.clear(); |
| 583 | return false; |
| 584 | } |
| 585 | Sizes[I] /= PrevSize; |
| 586 | } |
| 587 | |
| 588 | // Finally, the last element in Sizes should be ElementSize. |
| 589 | Sizes.back() = *ElementSizeConst; |
| 590 | return true; |
| 591 | } |
| 592 | |
| 593 | /// Splits the SCEV into two vectors of SCEVs representing the subscripts and |
| 594 | /// sizes of an array access, assuming that the array is a fixed size array. |
| 595 | /// |
| 596 | /// E.g., if we have the code like as follows: |
| 597 | /// |
| 598 | /// double A[42][8][32]; |
| 599 | /// for i |
| 600 | /// for j |
| 601 | /// for k |
| 602 | /// use A[i][j][k] |
| 603 | /// |
| 604 | /// The access function will be represented as an AddRec SCEV like: |
| 605 | /// |
| 606 | /// AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8) |
| 607 | /// |
| 608 | /// Then findFixedSizeArrayDimensions infers the size of each dimension of the |
| 609 | /// array based on the fact that the value of the step recurrence is a multiple |
| 610 | /// of the size of the corresponding array element. In the above example, it |
| 611 | /// results in the following: |
| 612 | /// |
| 613 | /// CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes. |
| 614 | /// |
| 615 | /// Finally each subscript will be computed as follows: |
| 616 | /// |
| 617 | /// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] |
| 618 | /// |
| 619 | /// Note that this function doesn't check the range of possible values for each |
| 620 | /// subscript, so the caller should perform additional boundary checks if |
| 621 | /// necessary. |
| 622 | /// |
| 623 | /// Also note that this function doesn't guarantee that the original array size |
| 624 | /// is restored "correctly". For example, in the following case: |
| 625 | /// |
| 626 | /// double A[42][4][64]; |
| 627 | /// double B[42][8][32]; |
| 628 | /// for i |
| 629 | /// for j |
| 630 | /// for k |
| 631 | /// use A[i][j][k] |
| 632 | /// use B[i][2*j][k] |
| 633 | /// |
| 634 | /// The access function for both accesses will be the same: |
| 635 | /// |
| 636 | /// AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8) |
| 637 | /// |
| 638 | /// The array sizes for both A and B will be computed as |
| 639 | /// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B. |
| 640 | /// |
| 641 | /// TODO: At the moment, this function can handle only simple cases. For |
| 642 | /// example, we cannot handle a case where a step recurrence is not divisible |
| 643 | /// by the next smaller step recurrence, e.g., A[i][3*j]. |
| 644 | bool llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr, |
| 645 | SmallVectorImpl<const SCEV *> &Subscripts, |
| 646 | SmallVectorImpl<const SCEV *> &Sizes, |
| 647 | const SCEV *ElementSize) { |
| 648 | // Clear output vectors. |
| 649 | Subscripts.clear(); |
| 650 | Sizes.clear(); |
| 651 | |
| 652 | // First step: find the fixed array size. |
| 653 | SmallVector<uint64_t, 4> ConstSizes; |
| 654 | if (!findFixedSizeArrayDimensions(SE, Expr, Sizes&: ConstSizes, ElementSize)) { |
| 655 | Sizes.clear(); |
| 656 | return false; |
| 657 | } |
| 658 | |
| 659 | // Convert the constant size to SCEV. |
| 660 | for (uint64_t Size : ConstSizes) |
| 661 | Sizes.push_back(Elt: SE.getConstant(Ty: Expr->getType(), V: Size)); |
| 662 | |
| 663 | // Second step: compute the access functions for each subscript. |
| 664 | computeAccessFunctions(SE, Expr, Subscripts, Sizes); |
| 665 | |
| 666 | return !Subscripts.empty(); |
| 667 | } |
| 668 | |
| 669 | bool llvm::validateDelinearizationResult(ScalarEvolution &SE, |
| 670 | ArrayRef<const SCEV *> Sizes, |
| 671 | ArrayRef<const SCEV *> Subscripts) { |
| 672 | // Sizes and Subscripts are as follows: |
| 673 | // |
| 674 | // Sizes: [UNK][S_2]...[S_n] |
| 675 | // Subscripts: [I_1][I_2]...[I_n] |
| 676 | // |
| 677 | // where the size of the outermost dimension is unknown (UNK). |
| 678 | |
| 679 | auto AddOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * { |
| 680 | if (!SE.willNotOverflow(BinOp: Instruction::Add, /*IsSigned=*/Signed: true, LHS: A, RHS: B)) |
| 681 | return nullptr; |
| 682 | return SE.getAddExpr(LHS: A, RHS: B); |
| 683 | }; |
| 684 | |
| 685 | auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * { |
| 686 | if (!SE.willNotOverflow(BinOp: Instruction::Mul, /*IsSigned=*/Signed: true, LHS: A, RHS: B)) |
| 687 | return nullptr; |
| 688 | return SE.getMulExpr(LHS: A, RHS: B); |
| 689 | }; |
| 690 | |
| 691 | // Range check: 0 <= I_k < S_k for k = 2..n. |
| 692 | for (size_t I = 1; I < Sizes.size(); ++I) { |
| 693 | const SCEV *Size = Sizes[I - 1]; |
| 694 | const SCEV *Subscript = Subscripts[I]; |
| 695 | if (!SE.isKnownNonNegative(S: Subscript)) |
| 696 | return false; |
| 697 | |
| 698 | // TODO: It may be better that delinearization itself unifies the types of |
| 699 | // all elements in Sizes and Subscripts. |
| 700 | Type *WiderTy = SE.getWiderType(Ty1: Subscript->getType(), Ty2: Size->getType()); |
| 701 | Subscript = SE.getNoopOrSignExtend(V: Subscript, Ty: WiderTy); |
| 702 | Size = SE.getNoopOrSignExtend(V: Size, Ty: WiderTy); |
| 703 | if (!SE.isKnownPredicate(Pred: ICmpInst::ICMP_SLT, LHS: Subscript, RHS: Size)) { |
| 704 | LLVM_DEBUG(dbgs() << "Range check failed: " << *Subscript << " <s " |
| 705 | << *Size << "\n" ); |
| 706 | return false; |
| 707 | } |
| 708 | } |
| 709 | |
| 710 | // The offset computation is as follows: |
| 711 | // |
| 712 | // Offset = I_n + |
| 713 | // S_n * I_{n-1} + |
| 714 | // ... + |
| 715 | // (S_2 * ... * S_n) * I_1 |
| 716 | // |
| 717 | // Regarding this as a function from (I_1, I_2, ..., I_n) to integers, it |
| 718 | // must be injective. To guarantee it, the above calculation must not |
| 719 | // overflow. Since we have already checked that 0 <= I_k < S_k for k = 2..n, |
| 720 | // the minimum and maximum values occur in the following cases: |
| 721 | // |
| 722 | // Min = [I_1][0]...[0] = S_2 * ... * S_n * I_1 |
| 723 | // Max = [I_1][S_2-1]...[S_n-1] |
| 724 | // = (S_2 * ... * S_n) * I_1 + |
| 725 | // (S_2 * ... * S_{n-1}) * (S_2 - 1) + |
| 726 | // ... + |
| 727 | // (S_n - 1) |
| 728 | // = (S_2 * ... * S_n) * I_1 + |
| 729 | // (S_2 * ... * S_n) - 1 (can be proven by induction) |
| 730 | // = Min + (S_2 * ... * S_n) - 1 |
| 731 | // |
| 732 | // NOTE: I_1 can be negative, so Min is not just 0. |
| 733 | const SCEV *Prod = SE.getOne(Ty: Sizes[0]->getType()); |
| 734 | for (const SCEV *Size : Sizes) { |
| 735 | Prod = MulOverflow(Prod, Size); |
| 736 | if (!Prod) |
| 737 | return false; |
| 738 | } |
| 739 | const SCEV *Min = MulOverflow(Prod, Subscripts[0]); |
| 740 | if (!Min) |
| 741 | return false; |
| 742 | |
| 743 | // We have already checked that Min and Prod don't overflow, so it's enough |
| 744 | // to check whether Min + Prod - 1 doesn't overflow. |
| 745 | const SCEV *MaxPlusOne = AddOverflow(Min, Prod); |
| 746 | if (!MaxPlusOne) |
| 747 | return false; |
| 748 | if (!SE.willNotOverflow(BinOp: Instruction::Sub, /*IsSigned=*/Signed: true, LHS: MaxPlusOne, |
| 749 | RHS: SE.getOne(Ty: MaxPlusOne->getType()))) |
| 750 | return false; |
| 751 | |
| 752 | return true; |
| 753 | } |
| 754 | |
| 755 | bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE, |
| 756 | const GetElementPtrInst *GEP, |
| 757 | SmallVectorImpl<const SCEV *> &Subscripts, |
| 758 | SmallVectorImpl<const SCEV *> &Sizes) { |
| 759 | assert(Subscripts.empty() && Sizes.empty() && |
| 760 | "Expected output lists to be empty on entry to this function." ); |
| 761 | assert(GEP && "getIndexExpressionsFromGEP called with a null GEP" ); |
| 762 | LLVM_DEBUG(dbgs() << "\nGEP to delinearize: " << *GEP << "\n" ); |
| 763 | Type *Ty = nullptr; |
| 764 | Type *IndexTy = SE.getEffectiveSCEVType(Ty: GEP->getPointerOperandType()); |
| 765 | bool DroppedFirstDim = false; |
| 766 | for (unsigned i = 1; i < GEP->getNumOperands(); i++) { |
| 767 | const SCEV *Expr = SE.getSCEV(V: GEP->getOperand(i_nocapture: i)); |
| 768 | if (i == 1) { |
| 769 | Ty = GEP->getSourceElementType(); |
| 770 | if (auto *Const = dyn_cast<SCEVConstant>(Val: Expr)) |
| 771 | if (Const->getValue()->isZero()) { |
| 772 | DroppedFirstDim = true; |
| 773 | continue; |
| 774 | } |
| 775 | Subscripts.push_back(Elt: Expr); |
| 776 | continue; |
| 777 | } |
| 778 | |
| 779 | auto *ArrayTy = dyn_cast<ArrayType>(Val: Ty); |
| 780 | if (!ArrayTy) { |
| 781 | LLVM_DEBUG(dbgs() << "GEP delinearize failed: " << *Ty |
| 782 | << " is not an array type.\n" ); |
| 783 | Subscripts.clear(); |
| 784 | Sizes.clear(); |
| 785 | return false; |
| 786 | } |
| 787 | |
| 788 | Subscripts.push_back(Elt: Expr); |
| 789 | if (!(DroppedFirstDim && i == 2)) |
| 790 | Sizes.push_back(Elt: SE.getConstant(Ty: IndexTy, V: ArrayTy->getNumElements())); |
| 791 | |
| 792 | Ty = ArrayTy->getElementType(); |
| 793 | } |
| 794 | LLVM_DEBUG({ |
| 795 | dbgs() << "Subscripts:\n" ; |
| 796 | for (const SCEV *S : Subscripts) |
| 797 | dbgs() << *S << "\n" ; |
| 798 | dbgs() << "\n" ; |
| 799 | }); |
| 800 | |
| 801 | return !Subscripts.empty(); |
| 802 | } |
| 803 | |
| 804 | namespace { |
| 805 | |
| 806 | void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI, |
| 807 | ScalarEvolution *SE) { |
| 808 | O << "Printing analysis 'Delinearization' for function '" << F->getName() |
| 809 | << "':" ; |
| 810 | for (Instruction &Inst : instructions(F)) { |
| 811 | // Only analyze loads and stores. |
| 812 | if (!isa<StoreInst>(Val: &Inst) && !isa<LoadInst>(Val: &Inst)) |
| 813 | continue; |
| 814 | |
| 815 | const BasicBlock *BB = Inst.getParent(); |
| 816 | Loop *L = LI->getLoopFor(BB); |
| 817 | // Only delinearize the memory access in the innermost loop. |
| 818 | // Do not analyze memory accesses outside loops. |
| 819 | if (!L) |
| 820 | continue; |
| 821 | |
| 822 | const SCEV *AccessFn = SE->getSCEVAtScope(V: getPointerOperand(V: &Inst), L); |
| 823 | |
| 824 | const SCEVUnknown *BasePointer = |
| 825 | dyn_cast<SCEVUnknown>(Val: SE->getPointerBase(V: AccessFn)); |
| 826 | // Do not delinearize if we cannot find the base pointer. |
| 827 | if (!BasePointer) |
| 828 | break; |
| 829 | AccessFn = SE->getMinusSCEV(LHS: AccessFn, RHS: BasePointer); |
| 830 | |
| 831 | O << "\n" ; |
| 832 | O << "Inst:" << Inst << "\n" ; |
| 833 | O << "AccessFunction: " << *AccessFn << "\n" ; |
| 834 | |
| 835 | SmallVector<const SCEV *, 3> Subscripts, Sizes; |
| 836 | |
| 837 | auto IsDelinearizationFailed = [&]() { |
| 838 | return Subscripts.size() == 0 || Sizes.size() == 0 || |
| 839 | Subscripts.size() != Sizes.size(); |
| 840 | }; |
| 841 | |
| 842 | delinearize(SE&: *SE, Expr: AccessFn, Subscripts, Sizes, ElementSize: SE->getElementSize(Inst: &Inst)); |
| 843 | if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) { |
| 844 | Subscripts.clear(); |
| 845 | Sizes.clear(); |
| 846 | delinearizeFixedSizeArray(SE&: *SE, Expr: AccessFn, Subscripts, Sizes, |
| 847 | ElementSize: SE->getElementSize(Inst: &Inst)); |
| 848 | } |
| 849 | |
| 850 | if (IsDelinearizationFailed()) { |
| 851 | O << "failed to delinearize\n" ; |
| 852 | continue; |
| 853 | } |
| 854 | |
| 855 | O << "Base offset: " << *BasePointer << "\n" ; |
| 856 | O << "ArrayDecl[UnknownSize]" ; |
| 857 | int Size = Subscripts.size(); |
| 858 | for (int i = 0; i < Size - 1; i++) |
| 859 | O << "[" << *Sizes[i] << "]" ; |
| 860 | O << " with elements of " << *Sizes[Size - 1] << " bytes.\n" ; |
| 861 | |
| 862 | O << "ArrayRef" ; |
| 863 | for (int i = 0; i < Size; i++) |
| 864 | O << "[" << *Subscripts[i] << "]" ; |
| 865 | O << "\n" ; |
| 866 | |
| 867 | bool IsValid = validateDelinearizationResult(SE&: *SE, Sizes, Subscripts); |
| 868 | O << "Delinearization validation: " << (IsValid ? "Succeeded" : "Failed" ) |
| 869 | << "\n" ; |
| 870 | } |
| 871 | } |
| 872 | |
| 873 | } // end anonymous namespace |
| 874 | |
| 875 | DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS) |
| 876 | : OS(OS) {} |
| 877 | PreservedAnalyses DelinearizationPrinterPass::run(Function &F, |
| 878 | FunctionAnalysisManager &AM) { |
| 879 | printDelinearization(O&: OS, F: &F, LI: &AM.getResult<LoopAnalysis>(IR&: F), |
| 880 | SE: &AM.getResult<ScalarEvolutionAnalysis>(IR&: F)); |
| 881 | return PreservedAnalyses::all(); |
| 882 | } |
| 883 | |