1 | //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Define several functions to decode x86 specific shuffle semantics using |
10 | // constants from the constant pool. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "X86ShuffleDecodeConstantPool.h" |
15 | #include "MCTargetDesc/X86ShuffleDecode.h" |
16 | #include "llvm/ADT/APInt.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/IR/Constants.h" |
19 | |
20 | //===----------------------------------------------------------------------===// |
21 | // Vector Mask Decoding |
22 | //===----------------------------------------------------------------------===// |
23 | |
24 | namespace llvm { |
25 | |
26 | static bool (const Constant *C, unsigned MaskEltSizeInBits, |
27 | APInt &UndefElts, |
28 | SmallVectorImpl<uint64_t> &RawMask) { |
29 | // It is not an error for shuffle masks to not be a vector of |
30 | // MaskEltSizeInBits because the constant pool uniques constants by their |
31 | // bit representation. |
32 | // e.g. the following take up the same space in the constant pool: |
33 | // i128 -170141183420855150465331762880109871104 |
34 | // |
35 | // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160> |
36 | // |
37 | // <4 x i32> <i32 -2147483648, i32 -2147483648, |
38 | // i32 -2147483648, i32 -2147483648> |
39 | auto *CstTy = dyn_cast<FixedVectorType>(Val: C->getType()); |
40 | if (!CstTy) |
41 | return false; |
42 | |
43 | Type *CstEltTy = CstTy->getElementType(); |
44 | if (!CstEltTy->isIntegerTy()) |
45 | return false; |
46 | |
47 | unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); |
48 | unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); |
49 | unsigned NumCstElts = CstTy->getNumElements(); |
50 | |
51 | assert((CstSizeInBits % MaskEltSizeInBits) == 0 && |
52 | "Unaligned shuffle mask size" ); |
53 | |
54 | unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits; |
55 | UndefElts = APInt(NumMaskElts, 0); |
56 | RawMask.resize(N: NumMaskElts, NV: 0); |
57 | |
58 | // Fast path - if the constants match the mask size then copy direct. |
59 | if (MaskEltSizeInBits == CstEltSizeInBits) { |
60 | assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size" ); |
61 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
62 | Constant *COp = C->getAggregateElement(Elt: i); |
63 | if (!COp || (!isa<UndefValue>(Val: COp) && !isa<ConstantInt>(Val: COp))) |
64 | return false; |
65 | |
66 | if (isa<UndefValue>(Val: COp)) { |
67 | UndefElts.setBit(i); |
68 | RawMask[i] = 0; |
69 | continue; |
70 | } |
71 | |
72 | auto *Elt = cast<ConstantInt>(Val: COp); |
73 | RawMask[i] = Elt->getValue().getZExtValue(); |
74 | } |
75 | return true; |
76 | } |
77 | |
78 | // Extract all the undef/constant element data and pack into single bitsets. |
79 | APInt UndefBits(CstSizeInBits, 0); |
80 | APInt MaskBits(CstSizeInBits, 0); |
81 | for (unsigned i = 0; i != NumCstElts; ++i) { |
82 | Constant *COp = C->getAggregateElement(Elt: i); |
83 | if (!COp || (!isa<UndefValue>(Val: COp) && !isa<ConstantInt>(Val: COp))) |
84 | return false; |
85 | |
86 | unsigned BitOffset = i * CstEltSizeInBits; |
87 | |
88 | if (isa<UndefValue>(Val: COp)) { |
89 | UndefBits.setBits(loBit: BitOffset, hiBit: BitOffset + CstEltSizeInBits); |
90 | continue; |
91 | } |
92 | |
93 | MaskBits.insertBits(SubBits: cast<ConstantInt>(Val: COp)->getValue(), bitPosition: BitOffset); |
94 | } |
95 | |
96 | // Now extract the undef/constant bit data into the raw shuffle masks. |
97 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
98 | unsigned BitOffset = i * MaskEltSizeInBits; |
99 | APInt EltUndef = UndefBits.extractBits(numBits: MaskEltSizeInBits, bitPosition: BitOffset); |
100 | |
101 | // Only treat the element as UNDEF if all bits are UNDEF, otherwise |
102 | // treat it as zero. |
103 | if (EltUndef.isAllOnes()) { |
104 | UndefElts.setBit(i); |
105 | RawMask[i] = 0; |
106 | continue; |
107 | } |
108 | |
109 | APInt EltBits = MaskBits.extractBits(numBits: MaskEltSizeInBits, bitPosition: BitOffset); |
110 | RawMask[i] = EltBits.getZExtValue(); |
111 | } |
112 | |
113 | return true; |
114 | } |
115 | |
116 | void DecodePSHUFBMask(const Constant *C, unsigned Width, |
117 | SmallVectorImpl<int> &ShuffleMask) { |
118 | assert((Width == 128 || Width == 256 || Width == 512) && |
119 | C->getType()->getPrimitiveSizeInBits() >= Width && |
120 | "Unexpected vector size." ); |
121 | |
122 | // The shuffle mask requires a byte vector. |
123 | APInt UndefElts; |
124 | SmallVector<uint64_t, 64> RawMask; |
125 | if (!extractConstantMask(C, MaskEltSizeInBits: 8, UndefElts, RawMask)) |
126 | return; |
127 | |
128 | unsigned NumElts = Width / 8; |
129 | assert((NumElts == 16 || NumElts == 32 || NumElts == 64) && |
130 | "Unexpected number of vector elements." ); |
131 | |
132 | for (unsigned i = 0; i != NumElts; ++i) { |
133 | if (UndefElts[i]) { |
134 | ShuffleMask.push_back(Elt: SM_SentinelUndef); |
135 | continue; |
136 | } |
137 | |
138 | uint64_t Element = RawMask[i]; |
139 | // If the high bit (7) of the byte is set, the element is zeroed. |
140 | if (Element & (1 << 7)) |
141 | ShuffleMask.push_back(Elt: SM_SentinelZero); |
142 | else { |
143 | // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte |
144 | // lane of the vector we're inside. |
145 | unsigned Base = i & ~0xf; |
146 | |
147 | // Only the least significant 4 bits of the byte are used. |
148 | int Index = Base + (Element & 0xf); |
149 | ShuffleMask.push_back(Elt: Index); |
150 | } |
151 | } |
152 | } |
153 | |
154 | void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width, |
155 | SmallVectorImpl<int> &ShuffleMask) { |
156 | assert((Width == 128 || Width == 256 || Width == 512) && |
157 | C->getType()->getPrimitiveSizeInBits() >= Width && |
158 | "Unexpected vector size." ); |
159 | assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size." ); |
160 | |
161 | // The shuffle mask requires elements the same size as the target. |
162 | APInt UndefElts; |
163 | SmallVector<uint64_t, 16> RawMask; |
164 | if (!extractConstantMask(C, MaskEltSizeInBits: ElSize, UndefElts, RawMask)) |
165 | return; |
166 | |
167 | unsigned NumElts = Width / ElSize; |
168 | unsigned NumEltsPerLane = 128 / ElSize; |
169 | assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) && |
170 | "Unexpected number of vector elements." ); |
171 | |
172 | for (unsigned i = 0; i != NumElts; ++i) { |
173 | if (UndefElts[i]) { |
174 | ShuffleMask.push_back(Elt: SM_SentinelUndef); |
175 | continue; |
176 | } |
177 | |
178 | int Index = i & ~(NumEltsPerLane - 1); |
179 | uint64_t Element = RawMask[i]; |
180 | if (ElSize == 64) |
181 | Index += (Element >> 1) & 0x1; |
182 | else |
183 | Index += Element & 0x3; |
184 | |
185 | ShuffleMask.push_back(Elt: Index); |
186 | } |
187 | } |
188 | |
189 | void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize, |
190 | unsigned Width, SmallVectorImpl<int> &ShuffleMask) { |
191 | Type *MaskTy = C->getType(); |
192 | unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); |
193 | (void)MaskTySize; |
194 | assert((MaskTySize == 128 || MaskTySize == 256) && Width >= MaskTySize && |
195 | "Unexpected vector size." ); |
196 | |
197 | // The shuffle mask requires elements the same size as the target. |
198 | APInt UndefElts; |
199 | SmallVector<uint64_t, 8> RawMask; |
200 | if (!extractConstantMask(C, MaskEltSizeInBits: ElSize, UndefElts, RawMask)) |
201 | return; |
202 | |
203 | unsigned NumElts = Width / ElSize; |
204 | unsigned NumEltsPerLane = 128 / ElSize; |
205 | assert((NumElts == 2 || NumElts == 4 || NumElts == 8) && |
206 | "Unexpected number of vector elements." ); |
207 | |
208 | for (unsigned i = 0; i != NumElts; ++i) { |
209 | if (UndefElts[i]) { |
210 | ShuffleMask.push_back(Elt: SM_SentinelUndef); |
211 | continue; |
212 | } |
213 | |
214 | // VPERMIL2 Operation. |
215 | // Bits[3] - Match Bit. |
216 | // Bits[2:1] - (Per Lane) PD Shuffle Mask. |
217 | // Bits[2:0] - (Per Lane) PS Shuffle Mask. |
218 | uint64_t Selector = RawMask[i]; |
219 | unsigned MatchBit = (Selector >> 3) & 0x1; |
220 | |
221 | // M2Z[0:1] MatchBit |
222 | // 0Xb X Source selected by Selector index. |
223 | // 10b 0 Source selected by Selector index. |
224 | // 10b 1 Zero. |
225 | // 11b 0 Zero. |
226 | // 11b 1 Source selected by Selector index. |
227 | if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) { |
228 | ShuffleMask.push_back(Elt: SM_SentinelZero); |
229 | continue; |
230 | } |
231 | |
232 | int Index = i & ~(NumEltsPerLane - 1); |
233 | if (ElSize == 64) |
234 | Index += (Selector >> 1) & 0x1; |
235 | else |
236 | Index += Selector & 0x3; |
237 | |
238 | int Src = (Selector >> 2) & 0x1; |
239 | Index += Src * NumElts; |
240 | ShuffleMask.push_back(Elt: Index); |
241 | } |
242 | } |
243 | |
244 | void DecodeVPPERMMask(const Constant *C, unsigned Width, |
245 | SmallVectorImpl<int> &ShuffleMask) { |
246 | Type *MaskTy = C->getType(); |
247 | unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); |
248 | (void)MaskTySize; |
249 | assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size." ); |
250 | |
251 | // The shuffle mask requires a byte vector. |
252 | APInt UndefElts; |
253 | SmallVector<uint64_t, 16> RawMask; |
254 | if (!extractConstantMask(C, MaskEltSizeInBits: 8, UndefElts, RawMask)) |
255 | return; |
256 | |
257 | unsigned NumElts = Width / 8; |
258 | assert(NumElts == 16 && "Unexpected number of vector elements." ); |
259 | |
260 | for (unsigned i = 0; i != NumElts; ++i) { |
261 | if (UndefElts[i]) { |
262 | ShuffleMask.push_back(Elt: SM_SentinelUndef); |
263 | continue; |
264 | } |
265 | |
266 | // VPPERM Operation |
267 | // Bits[4:0] - Byte Index (0 - 31) |
268 | // Bits[7:5] - Permute Operation |
269 | // |
270 | // Permute Operation: |
271 | // 0 - Source byte (no logical operation). |
272 | // 1 - Invert source byte. |
273 | // 2 - Bit reverse of source byte. |
274 | // 3 - Bit reverse of inverted source byte. |
275 | // 4 - 00h (zero - fill). |
276 | // 5 - FFh (ones - fill). |
277 | // 6 - Most significant bit of source byte replicated in all bit positions. |
278 | // 7 - Invert most significant bit of source byte and replicate in all bit |
279 | // positions. |
280 | uint64_t Element = RawMask[i]; |
281 | uint64_t Index = Element & 0x1F; |
282 | uint64_t PermuteOp = (Element >> 5) & 0x7; |
283 | |
284 | if (PermuteOp == 4) { |
285 | ShuffleMask.push_back(Elt: SM_SentinelZero); |
286 | continue; |
287 | } |
288 | if (PermuteOp != 0) { |
289 | ShuffleMask.clear(); |
290 | return; |
291 | } |
292 | ShuffleMask.push_back(Elt: (int)Index); |
293 | } |
294 | } |
295 | |
296 | } // namespace llvm |
297 | |