| 1 | //===- HexagonVectorLoopCarriedReuse.h ------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass removes the computation of provably redundant expressions that have |
| 10 | // been computed earlier in a previous iteration. It relies on the use of PHIs |
| 11 | // to identify loop carried dependences. This is scalar replacement for vector |
| 12 | // types. |
| 13 | // |
| 14 | //----------------------------------------------------------------------------- |
| 15 | // Motivation: Consider the case where we have the following loop structure. |
| 16 | // |
| 17 | // Loop: |
| 18 | // t0 = a[i]; |
| 19 | // t1 = f(t0); |
| 20 | // t2 = g(t1); |
| 21 | // ... |
| 22 | // t3 = a[i+1]; |
| 23 | // t4 = f(t3); |
| 24 | // t5 = g(t4); |
| 25 | // t6 = op(t2, t5) |
| 26 | // cond_branch <Loop> |
| 27 | // |
| 28 | // This can be converted to |
| 29 | // t00 = a[0]; |
| 30 | // t10 = f(t00); |
| 31 | // t20 = g(t10); |
| 32 | // Loop: |
| 33 | // t2 = t20; |
| 34 | // t3 = a[i+1]; |
| 35 | // t4 = f(t3); |
| 36 | // t5 = g(t4); |
| 37 | // t6 = op(t2, t5) |
| 38 | // t20 = t5 |
| 39 | // cond_branch <Loop> |
| 40 | // |
| 41 | // SROA does a good job of reusing a[i+1] as a[i] in the next iteration. |
| 42 | // Such a loop comes to this pass in the following form. |
| 43 | // |
| 44 | // LoopPreheader: |
| 45 | // X0 = a[0]; |
| 46 | // Loop: |
| 47 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> |
| 48 | // t1 = f(X2) <-- I1 |
| 49 | // t2 = g(t1) |
| 50 | // ... |
| 51 | // X1 = a[i+1] |
| 52 | // t4 = f(X1) <-- I2 |
| 53 | // t5 = g(t4) |
| 54 | // t6 = op(t2, t5) |
| 55 | // cond_branch <Loop> |
| 56 | // |
| 57 | // In this pass, we look for PHIs such as X2 whose incoming values come only |
| 58 | // from the Loop Preheader and over the backedge and additionally, both these |
| 59 | // values are the results of the same operation in terms of opcode. We call such |
| 60 | // a PHI node a dependence chain or DepChain. In this case, the dependence of X2 |
| 61 | // over X1 is carried over only one iteration and so the DepChain is only one |
| 62 | // PHI node long. |
| 63 | // |
| 64 | // Then, we traverse the uses of the PHI (X2) and the uses of the value of the |
| 65 | // PHI coming over the backedge (X1). We stop at the first pair of such users |
| 66 | // I1 (of X2) and I2 (of X1) that meet the following conditions. |
| 67 | // 1. I1 and I2 are the same operation, but with different operands. |
| 68 | // 2. X2 and X1 are used at the same operand number in the two instructions. |
| 69 | // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a |
| 70 | // a DepChain from Op1 to Op2 of the same length as that between X2 and X1. |
| 71 | // |
| 72 | // We then make the following transformation |
| 73 | // LoopPreheader: |
| 74 | // X0 = a[0]; |
| 75 | // Y0 = f(X0); |
| 76 | // Loop: |
| 77 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> |
| 78 | // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> |
| 79 | // t1 = f(X2) <-- Will be removed by DCE. |
| 80 | // t2 = g(Y2) |
| 81 | // ... |
| 82 | // X1 = a[i+1] |
| 83 | // t4 = f(X1) |
| 84 | // t5 = g(t4) |
| 85 | // t6 = op(t2, t5) |
| 86 | // cond_branch <Loop> |
| 87 | // |
| 88 | // We proceed until we cannot find any more such instructions I1 and I2. |
| 89 | // |
| 90 | // --- DepChains & Loop carried dependences --- |
| 91 | // Consider a single basic block loop such as |
| 92 | // |
| 93 | // LoopPreheader: |
| 94 | // X0 = ... |
| 95 | // Y0 = ... |
| 96 | // Loop: |
| 97 | // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> |
| 98 | // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> |
| 99 | // ... |
| 100 | // X1 = ... |
| 101 | // ... |
| 102 | // cond_branch <Loop> |
| 103 | // |
| 104 | // Then there is a dependence between X2 and X1 that goes back one iteration, |
| 105 | // i.e. X1 is used as X2 in the very next iteration. We represent this as a |
| 106 | // DepChain from X2 to X1 (X2->X1). |
| 107 | // Similarly, there is a dependence between Y2 and X1 that goes back two |
| 108 | // iterations. X1 is used as Y2 two iterations after it is computed. This is |
| 109 | // represented by a DepChain as (Y2->X2->X1). |
| 110 | // |
| 111 | // A DepChain has the following properties. |
| 112 | // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of |
| 113 | // iterations of carried dependence + 1. |
| 114 | // 2. All instructions in the DepChain except the last are PHIs. |
| 115 | // |
| 116 | //===----------------------------------------------------------------------===// |
| 117 | |
| 118 | #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H |
| 119 | #define LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H |
| 120 | |
| 121 | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
| 122 | |
| 123 | namespace llvm { |
| 124 | |
| 125 | class Loop; |
| 126 | |
| 127 | /// Hexagon Vector Loop Carried Reuse Pass |
| 128 | struct HexagonVectorLoopCarriedReusePass |
| 129 | : public PassInfoMixin<HexagonVectorLoopCarriedReusePass> { |
| 130 | HexagonVectorLoopCarriedReusePass() = default; |
| 131 | |
| 132 | /// Run pass over the Loop. |
| 133 | PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, |
| 134 | LoopStandardAnalysisResults &AR, LPMUpdater &U); |
| 135 | }; |
| 136 | |
| 137 | } // end namespace llvm |
| 138 | |
| 139 | #endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H |
| 140 | |