| 1 | //===-- AArch64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file declares data for the optimal way to build a perfect shuffle using |
| 10 | // AdvSIMD instructions. The data is generated by llvm-PerfectShuffle. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H |
| 15 | #define LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H |
| 16 | |
| 17 | #include "llvm/ADT/ArrayRef.h" |
| 18 | #include "llvm/ADT/STLExtras.h" |
| 19 | |
| 20 | namespace llvm { |
| 21 | |
| 22 | extern const unsigned PerfectShuffleTable[6561 + 1]; |
| 23 | |
| 24 | inline unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) { |
| 25 | assert(M.size() == 4 && "Expected a 4 entry perfect shuffle" ); |
| 26 | |
| 27 | // Special case zero-cost nop copies, from either LHS or RHS. |
| 28 | if (llvm::all_of(Range: llvm::enumerate(First&: M), P: [](const auto &E) { |
| 29 | return E.value() < 0 || E.value() == (int)E.index(); |
| 30 | })) |
| 31 | return 0; |
| 32 | if (llvm::all_of(Range: llvm::enumerate(First&: M), P: [](const auto &E) { |
| 33 | return E.value() < 0 || E.value() == (int)E.index() + 4; |
| 34 | })) |
| 35 | return 0; |
| 36 | |
| 37 | // Get the four mask elementd from the 2 inputs. Perfect shuffles encode undef |
| 38 | // elements with value 8. |
| 39 | unsigned PFIndexes[4]; |
| 40 | for (unsigned i = 0; i != 4; ++i) { |
| 41 | assert(M[i] < 8 && "Expected a maximum entry of 8 for shuffle mask" ); |
| 42 | if (M[i] < 0) |
| 43 | PFIndexes[i] = 8; |
| 44 | else |
| 45 | PFIndexes[i] = M[i]; |
| 46 | } |
| 47 | |
| 48 | // Compute the index in the perfect shuffle table. |
| 49 | unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + |
| 50 | PFIndexes[2] * 9 + PFIndexes[3]; |
| 51 | unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; |
| 52 | // And extract the cost from the upper bits. The cost is encoded as Cost-1. |
| 53 | return (PFEntry >> 30) + 1; |
| 54 | } |
| 55 | |
| 56 | /// Return true for zip1 or zip2 masks of the form: |
| 57 | /// <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0, OperandOrderOut = 0) or |
| 58 | /// <4, 12, 5, 13, 6, 14, 7, 15> (WhichResultOut = 1, OperandOrderOut = 0) or |
| 59 | /// <8, 0, 9, 1, 10, 2, 11, 3> (WhichResultOut = 0, OperandOrderOut = 1) or |
| 60 | /// <12, 4, 13, 5, 14, 6, 15, 7> (WhichResultOut = 1, OperandOrderOut = 1) |
| 61 | inline bool isZIPMask(ArrayRef<int> M, unsigned NumElts, |
| 62 | unsigned &WhichResultOut, unsigned &OperandOrderOut) { |
| 63 | if (NumElts % 2 != 0) |
| 64 | return false; |
| 65 | |
| 66 | // "Result" corresponds to "WhichResultOut", selecting between zip1 and zip2. |
| 67 | // "Order" corresponds to "OperandOrderOut", selecting the order of operands |
| 68 | // for the instruction (flipped or not). |
| 69 | bool Result0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0 |
| 70 | bool Result1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0 |
| 71 | bool Result0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1 |
| 72 | bool Result1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1 |
| 73 | // Check all elements match. |
| 74 | for (unsigned i = 0; i != NumElts; i += 2) { |
| 75 | if (M[i] >= 0) { |
| 76 | unsigned EvenElt = (unsigned)M[i]; |
| 77 | if (EvenElt != i / 2) |
| 78 | Result0Order0 = false; |
| 79 | if (EvenElt != NumElts / 2 + i / 2) |
| 80 | Result1Order0 = false; |
| 81 | if (EvenElt != NumElts + i / 2) |
| 82 | Result0Order1 = false; |
| 83 | if (EvenElt != NumElts + NumElts / 2 + i / 2) |
| 84 | Result1Order1 = false; |
| 85 | } |
| 86 | if (M[i + 1] >= 0) { |
| 87 | unsigned OddElt = (unsigned)M[i + 1]; |
| 88 | if (OddElt != NumElts + i / 2) |
| 89 | Result0Order0 = false; |
| 90 | if (OddElt != NumElts + NumElts / 2 + i / 2) |
| 91 | Result1Order0 = false; |
| 92 | if (OddElt != i / 2) |
| 93 | Result0Order1 = false; |
| 94 | if (OddElt != NumElts / 2 + i / 2) |
| 95 | Result1Order1 = false; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | if (Result0Order0 + Result1Order0 + Result0Order1 + Result1Order1 != 1) |
| 100 | return false; |
| 101 | |
| 102 | WhichResultOut = (Result0Order0 || Result0Order1) ? 0 : 1; |
| 103 | OperandOrderOut = (Result0Order0 || Result1Order0) ? 0 : 1; |
| 104 | return true; |
| 105 | } |
| 106 | |
| 107 | /// Return true for uzp1 or uzp2 masks of the form: |
| 108 | /// <0, 2, 4, 6, 8, 10, 12, 14> or |
| 109 | /// <1, 3, 5, 7, 9, 11, 13, 15> |
| 110 | inline bool isUZPMask(ArrayRef<int> M, unsigned NumElts, |
| 111 | unsigned &WhichResultOut) { |
| 112 | // Check the first non-undef element for which half to use. |
| 113 | unsigned WhichResult = 2; |
| 114 | for (unsigned i = 0; i != NumElts; i++) { |
| 115 | if (M[i] >= 0) { |
| 116 | WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1); |
| 117 | break; |
| 118 | } |
| 119 | } |
| 120 | if (WhichResult == 2) |
| 121 | return false; |
| 122 | |
| 123 | // Check all elements match. |
| 124 | for (unsigned i = 0; i != NumElts; ++i) { |
| 125 | if (M[i] < 0) |
| 126 | continue; // ignore UNDEF indices |
| 127 | if ((unsigned)M[i] != 2 * i + WhichResult) |
| 128 | return false; |
| 129 | } |
| 130 | WhichResultOut = WhichResult; |
| 131 | return true; |
| 132 | } |
| 133 | |
| 134 | /// Return true for trn1 or trn2 masks of the form: |
| 135 | /// <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0, OperandOrderOut = 0) or |
| 136 | /// <1, 9, 3, 11, 5, 13, 7, 15> (WhichResultOut = 1, OperandOrderOut = 0) or |
| 137 | /// <8, 0, 10, 2, 12, 4, 14, 6> (WhichResultOut = 0, OperandOrderOut = 1) or |
| 138 | /// <9, 1, 11, 3, 13, 5, 15, 7> (WhichResultOut = 1, OperandOrderOut = 1) or |
| 139 | inline bool isTRNMask(ArrayRef<int> M, unsigned NumElts, |
| 140 | unsigned &WhichResultOut, unsigned &OperandOrderOut) { |
| 141 | if (NumElts % 2 != 0) |
| 142 | return false; |
| 143 | |
| 144 | // "Result" corresponds to "WhichResultOut", selecting between trn1 and trn2. |
| 145 | // "Order" corresponds to "OperandOrderOut", selecting the order of operands |
| 146 | // for the instruction (flipped or not). |
| 147 | bool Result0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0 |
| 148 | bool Result1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0 |
| 149 | bool Result0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1 |
| 150 | bool Result1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1 |
| 151 | // Check all elements match. |
| 152 | for (unsigned i = 0; i != NumElts; i += 2) { |
| 153 | if (M[i] >= 0) { |
| 154 | unsigned EvenElt = (unsigned)M[i]; |
| 155 | if (EvenElt != i) |
| 156 | Result0Order0 = false; |
| 157 | if (EvenElt != i + 1) |
| 158 | Result1Order0 = false; |
| 159 | if (EvenElt != NumElts + i) |
| 160 | Result0Order1 = false; |
| 161 | if (EvenElt != NumElts + i + 1) |
| 162 | Result1Order1 = false; |
| 163 | } |
| 164 | if (M[i + 1] >= 0) { |
| 165 | unsigned OddElt = (unsigned)M[i + 1]; |
| 166 | if (OddElt != NumElts + i) |
| 167 | Result0Order0 = false; |
| 168 | if (OddElt != NumElts + i + 1) |
| 169 | Result1Order0 = false; |
| 170 | if (OddElt != i) |
| 171 | Result0Order1 = false; |
| 172 | if (OddElt != i + 1) |
| 173 | Result1Order1 = false; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | if (Result0Order0 + Result1Order0 + Result0Order1 + Result1Order1 != 1) |
| 178 | return false; |
| 179 | |
| 180 | WhichResultOut = (Result0Order0 || Result0Order1) ? 0 : 1; |
| 181 | OperandOrderOut = (Result0Order0 || Result1Order0) ? 0 : 1; |
| 182 | return true; |
| 183 | } |
| 184 | |
| 185 | /// isREVMask - Check if a vector shuffle corresponds to a REV |
| 186 | /// instruction with the specified blocksize. (The order of the elements |
| 187 | /// within each block of the vector is reversed.) |
| 188 | inline bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts, |
| 189 | unsigned BlockSize) { |
| 190 | assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64 || |
| 191 | BlockSize == 128) && |
| 192 | "Only possible block sizes for REV are: 16, 32, 64, 128" ); |
| 193 | |
| 194 | unsigned BlockElts = M[0] + 1; |
| 195 | // If the first shuffle index is UNDEF, be optimistic. |
| 196 | if (M[0] < 0) |
| 197 | BlockElts = BlockSize / EltSize; |
| 198 | |
| 199 | if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize) |
| 200 | return false; |
| 201 | |
| 202 | for (unsigned i = 0; i < NumElts; ++i) { |
| 203 | if (M[i] < 0) |
| 204 | continue; // ignore UNDEF indices |
| 205 | if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) |
| 206 | return false; |
| 207 | } |
| 208 | |
| 209 | return true; |
| 210 | } |
| 211 | |
| 212 | /// isDUPQMask - matches a splat of equivalent lanes within segments of a given |
| 213 | /// number of elements. |
| 214 | inline std::optional<unsigned> isDUPQMask(ArrayRef<int> Mask, unsigned Segments, |
| 215 | unsigned SegmentSize) { |
| 216 | unsigned Lane = unsigned(Mask[0]); |
| 217 | |
| 218 | // Make sure there's no size changes. |
| 219 | if (SegmentSize * Segments != Mask.size()) |
| 220 | return std::nullopt; |
| 221 | |
| 222 | // Check the first index corresponds to one of the lanes in the first segment. |
| 223 | if (Lane >= SegmentSize) |
| 224 | return std::nullopt; |
| 225 | |
| 226 | // Check that all lanes match the first, adjusted for segment. |
| 227 | // Undef/poison lanes (<0) are also accepted. |
| 228 | if (all_of(Range: enumerate(First&: Mask), P: [&](auto P) { |
| 229 | const unsigned SegmentIndex = P.index() / SegmentSize; |
| 230 | return P.value() < 0 || |
| 231 | unsigned(P.value()) == Lane + SegmentIndex * SegmentSize; |
| 232 | })) |
| 233 | return Lane; |
| 234 | |
| 235 | return std::nullopt; |
| 236 | } |
| 237 | |
| 238 | /// isDUPFirstSegmentMask - matches a splat of the first 128b segment. |
| 239 | inline bool isDUPFirstSegmentMask(ArrayRef<int> Mask, unsigned Segments, |
| 240 | unsigned SegmentSize) { |
| 241 | // Make sure there's no size changes. |
| 242 | if (SegmentSize * Segments != Mask.size()) |
| 243 | return false; |
| 244 | |
| 245 | // Check that all lanes refer to the equivalent lane in the first segment. |
| 246 | // Undef/poison lanes (<0) are also accepted. |
| 247 | return all_of(Range: enumerate(First&: Mask), P: [&](auto P) { |
| 248 | const unsigned IndexWithinSegment = P.index() % SegmentSize; |
| 249 | return P.value() < 0 || unsigned(P.value()) == IndexWithinSegment; |
| 250 | }); |
| 251 | } |
| 252 | |
| 253 | } // namespace llvm |
| 254 | |
| 255 | #endif |
| 256 | |