| 1 | //===- HexagonVectorLoopCarriedReuse.h ------------------------------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // This pass removes the computation of provably redundant expressions that have | 
|---|
| 10 | // been computed earlier in a previous iteration. It relies on the use of PHIs | 
|---|
| 11 | // to identify loop carried dependences. This is scalar replacement for vector | 
|---|
| 12 | // types. | 
|---|
| 13 | // | 
|---|
| 14 | //----------------------------------------------------------------------------- | 
|---|
| 15 | // Motivation: Consider the case where we have the following loop structure. | 
|---|
| 16 | // | 
|---|
| 17 | // Loop: | 
|---|
| 18 | //  t0 = a[i]; | 
|---|
| 19 | //  t1 = f(t0); | 
|---|
| 20 | //  t2 = g(t1); | 
|---|
| 21 | //  ... | 
|---|
| 22 | //  t3 = a[i+1]; | 
|---|
| 23 | //  t4 = f(t3); | 
|---|
| 24 | //  t5 = g(t4); | 
|---|
| 25 | //  t6 = op(t2, t5) | 
|---|
| 26 | //  cond_branch <Loop> | 
|---|
| 27 | // | 
|---|
| 28 | // This can be converted to | 
|---|
| 29 | //  t00 = a[0]; | 
|---|
| 30 | //  t10 = f(t00); | 
|---|
| 31 | //  t20 = g(t10); | 
|---|
| 32 | // Loop: | 
|---|
| 33 | //  t2 = t20; | 
|---|
| 34 | //  t3 = a[i+1]; | 
|---|
| 35 | //  t4 = f(t3); | 
|---|
| 36 | //  t5 = g(t4); | 
|---|
| 37 | //  t6 = op(t2, t5) | 
|---|
| 38 | //  t20 = t5 | 
|---|
| 39 | //  cond_branch <Loop> | 
|---|
| 40 | // | 
|---|
| 41 | // SROA does a good job of reusing a[i+1] as a[i] in the next iteration. | 
|---|
| 42 | // Such a loop comes to this pass in the following form. | 
|---|
| 43 | // | 
|---|
| 44 | // LoopPreheader: | 
|---|
| 45 | //  X0 = a[0]; | 
|---|
| 46 | // Loop: | 
|---|
| 47 | //  X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | 
|---|
| 48 | //  t1 = f(X2)   <-- I1 | 
|---|
| 49 | //  t2 = g(t1) | 
|---|
| 50 | //  ... | 
|---|
| 51 | //  X1 = a[i+1] | 
|---|
| 52 | //  t4 = f(X1)   <-- I2 | 
|---|
| 53 | //  t5 = g(t4) | 
|---|
| 54 | //  t6 = op(t2, t5) | 
|---|
| 55 | //  cond_branch <Loop> | 
|---|
| 56 | // | 
|---|
| 57 | // In this pass, we look for PHIs such as X2 whose incoming values come only | 
|---|
| 58 | // from the Loop Preheader and over the backedge and additionally, both these | 
|---|
| 59 | // values are the results of the same operation in terms of opcode. We call such | 
|---|
| 60 | // a PHI node a dependence chain or DepChain. In this case, the dependence of X2 | 
|---|
| 61 | // over X1 is carried over only one iteration and so the DepChain is only one | 
|---|
| 62 | // PHI node long. | 
|---|
| 63 | // | 
|---|
| 64 | // Then, we traverse the uses of the PHI (X2) and the uses of the value of the | 
|---|
| 65 | // PHI coming  over the backedge (X1). We stop at the first pair of such users | 
|---|
| 66 | // I1 (of X2) and I2 (of X1) that meet the following conditions. | 
|---|
| 67 | // 1. I1 and I2 are the same operation, but with different operands. | 
|---|
| 68 | // 2. X2 and X1 are used at the same operand number in the two instructions. | 
|---|
| 69 | // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a | 
|---|
| 70 | //    a DepChain from Op1 to Op2 of the same length as that between X2 and X1. | 
|---|
| 71 | // | 
|---|
| 72 | // We then make the following transformation | 
|---|
| 73 | // LoopPreheader: | 
|---|
| 74 | //  X0 = a[0]; | 
|---|
| 75 | //  Y0 = f(X0); | 
|---|
| 76 | // Loop: | 
|---|
| 77 | //  X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | 
|---|
| 78 | //  Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> | 
|---|
| 79 | //  t1 = f(X2)   <-- Will be removed by DCE. | 
|---|
| 80 | //  t2 = g(Y2) | 
|---|
| 81 | //  ... | 
|---|
| 82 | //  X1 = a[i+1] | 
|---|
| 83 | //  t4 = f(X1) | 
|---|
| 84 | //  t5 = g(t4) | 
|---|
| 85 | //  t6 = op(t2, t5) | 
|---|
| 86 | //  cond_branch <Loop> | 
|---|
| 87 | // | 
|---|
| 88 | // We proceed until we cannot find any more such instructions I1 and I2. | 
|---|
| 89 | // | 
|---|
| 90 | // --- DepChains & Loop carried dependences --- | 
|---|
| 91 | // Consider a single basic block loop such as | 
|---|
| 92 | // | 
|---|
| 93 | // LoopPreheader: | 
|---|
| 94 | //  X0 = ... | 
|---|
| 95 | //  Y0 = ... | 
|---|
| 96 | // Loop: | 
|---|
| 97 | //  X2 = PHI<(X0, LoopPreheader), (X1, Loop)> | 
|---|
| 98 | //  Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> | 
|---|
| 99 | //  ... | 
|---|
| 100 | //  X1 = ... | 
|---|
| 101 | //  ... | 
|---|
| 102 | //  cond_branch <Loop> | 
|---|
| 103 | // | 
|---|
| 104 | // Then there is a dependence between X2 and X1 that goes back one iteration, | 
|---|
| 105 | // i.e. X1 is used as X2 in the very next iteration. We represent this as a | 
|---|
| 106 | // DepChain from X2 to X1 (X2->X1). | 
|---|
| 107 | // Similarly, there is a dependence between Y2 and X1 that goes back two | 
|---|
| 108 | // iterations. X1 is used as Y2 two iterations after it is computed. This is | 
|---|
| 109 | // represented by a DepChain as (Y2->X2->X1). | 
|---|
| 110 | // | 
|---|
| 111 | // A DepChain has the following properties. | 
|---|
| 112 | // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of | 
|---|
| 113 | //    iterations of carried dependence + 1. | 
|---|
| 114 | // 2. All instructions in the DepChain except the last are PHIs. | 
|---|
| 115 | // | 
|---|
| 116 | //===----------------------------------------------------------------------===// | 
|---|
| 117 |  | 
|---|
| 118 | #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H | 
|---|
| 119 | #define LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H | 
|---|
| 120 |  | 
|---|
| 121 | #include "llvm/Transforms/Scalar/LoopPassManager.h" | 
|---|
| 122 |  | 
|---|
| 123 | namespace llvm { | 
|---|
| 124 |  | 
|---|
| 125 | class Loop; | 
|---|
| 126 |  | 
|---|
| 127 | /// Hexagon Vector Loop Carried Reuse Pass | 
|---|
| 128 | struct HexagonVectorLoopCarriedReusePass | 
|---|
| 129 | : public PassInfoMixin<HexagonVectorLoopCarriedReusePass> { | 
|---|
| 130 | HexagonVectorLoopCarriedReusePass() = default; | 
|---|
| 131 |  | 
|---|
| 132 | /// Run pass over the Loop. | 
|---|
| 133 | PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, | 
|---|
| 134 | LoopStandardAnalysisResults &AR, LPMUpdater &U); | 
|---|
| 135 | }; | 
|---|
| 136 |  | 
|---|
| 137 | } // end namespace llvm | 
|---|
| 138 |  | 
|---|
| 139 | #endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONVLCR_H | 
|---|
| 140 |  | 
|---|