X86ShuffleDecodeConstantPool.cpp source code [llvm_projects/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp]

1	//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Define several functions to decode x86 specific shuffle semantics using
10	// constants from the constant pool.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "X86ShuffleDecodeConstantPool.h"
15	#include "MCTargetDesc/X86ShuffleDecode.h"
16	#include "llvm/ADT/APInt.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/IR/Constants.h"
19
20	//===----------------------------------------------------------------------===//
21	// Vector Mask Decoding
22	//===----------------------------------------------------------------------===//
23
24	namespace llvm {
25
26	static bool extractConstantMask(const Constant C, unsigned* MaskEltSizeInBits,
27	APInt &UndefElts,
28	SmallVectorImpl<uint64_t> &RawMask) {
29	// It is not an error for shuffle masks to not be a vector of
30	// MaskEltSizeInBits because the constant pool uniques constants by their
31	// bit representation.
32	// e.g. the following take up the same space in the constant pool:
33	// i128 -170141183420855150465331762880109871104
34	//
35	// <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
36	//
37	// <4 x i32> <i32 -2147483648, i32 -2147483648,
38	// i32 -2147483648, i32 -2147483648>
39	auto *CstTy = dyn_cast<FixedVectorType>(Val: C->getType());
40	if (!CstTy)
41	return false;
42
43	Type *CstEltTy = CstTy->getElementType();
44	if (!CstEltTy->isIntegerTy())
45	return false;
46
47	unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
48	unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
49	unsigned NumCstElts = CstTy->getNumElements();
50
51	assert((CstSizeInBits % MaskEltSizeInBits) == `0` &&
52	"Unaligned shuffle mask size");
53
54	unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
55	UndefElts = APInt (NumMaskElts, `0`);
56	RawMask.resize(N: NumMaskElts, NV: `0`);
57
58	// Fast path - if the constants match the mask size then copy direct.
59	if (MaskEltSizeInBits == CstEltSizeInBits) {
60	assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
61	for (unsigned i = `0`; i != NumMaskElts; ++i) {
62	Constant *COp = C->getAggregateElement(Elt: i);
63	if (!COp \|\| (!isa<UndefValue>(Val: COp) && !isa<ConstantInt>(Val: COp)))
64	return false;
65
66	if (isa<UndefValue>(Val: COp)) {
67	UndefElts.setBit(i);
68	RawMask [i] = `0`;
69	continue;
70	}
71
72	auto *Elt = cast<ConstantInt>(Val: COp);
73	RawMask [i] = Elt->getValue().getZExtValue();
74	}
75	return true;
76	}
77
78	// Extract all the undef/constant element data and pack into single bitsets.
79	APInt UndefBits(CstSizeInBits, `0`);
80	APInt MaskBits(CstSizeInBits, `0`);
81	for (unsigned i = `0`; i != NumCstElts; ++i) {
82	Constant *COp = C->getAggregateElement(Elt: i);
83	if (!COp \|\| (!isa<UndefValue>(Val: COp) && !isa<ConstantInt>(Val: COp)))
84	return false;
85
86	unsigned BitOffset = i * CstEltSizeInBits;
87
88	if (isa<UndefValue>(Val: COp)) {
89	UndefBits.setBits(loBit: BitOffset, hiBit: BitOffset + CstEltSizeInBits);
90	continue;
91	}
92
93	MaskBits.insertBits(SubBits: cast<ConstantInt>(Val: COp)->getValue(), bitPosition: BitOffset);
94	}
95
96	// Now extract the undef/constant bit data into the raw shuffle masks.
97	for (unsigned i = `0`; i != NumMaskElts; ++i) {
98	unsigned BitOffset = i * MaskEltSizeInBits;
99	APInt EltUndef = UndefBits.extractBits(numBits: MaskEltSizeInBits, bitPosition: BitOffset);
100
101	// Only treat the element as UNDEF if all bits are UNDEF, otherwise
102	// treat it as zero.
103	if (EltUndef.isAllOnes()) {
104	UndefElts.setBit(i);
105	RawMask [i] = `0`;
106	continue;
107	}
108
109	APInt EltBits = MaskBits.extractBits(numBits: MaskEltSizeInBits, bitPosition: BitOffset);
110	RawMask [i] = EltBits.getZExtValue();
111	}
112
113	return true;
114	}
115
116	void DecodePSHUFBMask(const Constant C, unsigned* Width,
117	SmallVectorImpl<int> &ShuffleMask) {
118	assert((Width == `128` \|\| Width == `256` \|\| Width == `512`) &&
119	C->getType()->getPrimitiveSizeInBits() >= Width &&
120	"Unexpected vector size.");
121
122	// The shuffle mask requires a byte vector.
123	APInt UndefElts;
124	SmallVector<uint64_t, `64`> RawMask;
125	if (!extractConstantMask(C, MaskEltSizeInBits: `8`, UndefElts, RawMask))
126	return;
127
128	unsigned NumElts = Width / `8`;
129	assert((NumElts == `16` \|\| NumElts == `32` \|\| NumElts == `64`) &&
130	"Unexpected number of vector elements.");
131
132	for (unsigned i = `0`; i != NumElts; ++i) {
133	if (UndefElts [i]) {
134	ShuffleMask.push_back(Elt: SM_SentinelUndef);
135	continue;
136	}
137
138	uint64_t Element = RawMask [i];
139	// If the high bit (7) of the byte is set, the element is zeroed.
140	if (Element & (`1` << `7`))
141	ShuffleMask.push_back(Elt: SM_SentinelZero);
142	else {
143	// For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
144	// lane of the vector we're inside.
145	unsigned Base = i & ~`0xf`;
146
147	// Only the least significant 4 bits of the byte are used.
148	int Index = Base + (Element & `0xf`);
149	ShuffleMask.push_back(Elt: Index);
150	}
151	}
152	}
153
154	void DecodeVPERMILPMask(const Constant C, unsigned* ElSize, unsigned Width,
155	SmallVectorImpl<int> &ShuffleMask) {
156	assert((Width == `128` \|\| Width == `256` \|\| Width == `512`) &&
157	C->getType()->getPrimitiveSizeInBits() >= Width &&
158	"Unexpected vector size.");
159	assert((ElSize == `32` \|\| ElSize == `64`) && "Unexpected vector element size.");
160
161	// The shuffle mask requires elements the same size as the target.
162	APInt UndefElts;
163	SmallVector<uint64_t, `16`> RawMask;
164	if (!extractConstantMask(C, MaskEltSizeInBits: ElSize, UndefElts, RawMask))
165	return;
166
167	unsigned NumElts = Width / ElSize;
168	unsigned NumEltsPerLane = `128` / ElSize;
169	assert((NumElts == `2` \|\| NumElts == `4` \|\| NumElts == `8` \|\| NumElts == `16`) &&
170	"Unexpected number of vector elements.");
171
172	for (unsigned i = `0`; i != NumElts; ++i) {
173	if (UndefElts [i]) {
174	ShuffleMask.push_back(Elt: SM_SentinelUndef);
175	continue;
176	}
177
178	int Index = i & ~(NumEltsPerLane - `1`);
179	uint64_t Element = RawMask [i];
180	if (ElSize == `64`)
181	Index += (Element >> `1`) & `0x1`;
182	else
183	Index += Element & `0x3`;
184
185	ShuffleMask.push_back(Elt: Index);
186	}
187	}
188
189	void DecodeVPERMIL2PMask(const Constant C, unsigned* M2Z, unsigned ElSize,
190	unsigned Width, SmallVectorImpl<int> &ShuffleMask) {
191	Type *MaskTy = C->getType();
192	unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
193	(void)MaskTySize;
194	assert((MaskTySize == `128` \|\| MaskTySize == `256`) && Width >= MaskTySize &&
195	"Unexpected vector size.");
196
197	// The shuffle mask requires elements the same size as the target.
198	APInt UndefElts;
199	SmallVector<uint64_t, `8`> RawMask;
200	if (!extractConstantMask(C, MaskEltSizeInBits: ElSize, UndefElts, RawMask))
201	return;
202
203	unsigned NumElts = Width / ElSize;
204	unsigned NumEltsPerLane = `128` / ElSize;
205	assert((NumElts == `2` \|\| NumElts == `4` \|\| NumElts == `8`) &&
206	"Unexpected number of vector elements.");
207
208	for (unsigned i = `0`; i != NumElts; ++i) {
209	if (UndefElts [i]) {
210	ShuffleMask.push_back(Elt: SM_SentinelUndef);
211	continue;
212	}
213
214	// VPERMIL2 Operation.
215	// Bits[3] - Match Bit.
216	// Bits[2:1] - (Per Lane) PD Shuffle Mask.
217	// Bits[2:0] - (Per Lane) PS Shuffle Mask.
218	uint64_t Selector = RawMask [i];
219	unsigned MatchBit = (Selector >> `3`) & `0x1`;
220
221	// M2Z[0:1] MatchBit
222	// 0Xb X Source selected by Selector index.
223	// 10b 0 Source selected by Selector index.
224	// 10b 1 Zero.
225	// 11b 0 Zero.
226	// 11b 1 Source selected by Selector index.
227	if ((M2Z & `0x2`) != `0u` && MatchBit != (M2Z & `0x1`)) {
228	ShuffleMask.push_back(Elt: SM_SentinelZero);
229	continue;
230	}
231
232	int Index = i & ~(NumEltsPerLane - `1`);
233	if (ElSize == `64`)
234	Index += (Selector >> `1`) & `0x1`;
235	else
236	Index += Selector & `0x3`;
237
238	int Src = (Selector >> `2`) & `0x1`;
239	Index += Src * NumElts;
240	ShuffleMask.push_back(Elt: Index);
241	}
242	}
243
244	void DecodeVPPERMMask(const Constant C, unsigned* Width,
245	SmallVectorImpl<int> &ShuffleMask) {
246	Type *MaskTy = C->getType();
247	unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
248	(void)MaskTySize;
249	assert(Width == `128` && Width >= MaskTySize && "Unexpected vector size.");
250
251	// The shuffle mask requires a byte vector.
252	APInt UndefElts;
253	SmallVector<uint64_t, `16`> RawMask;
254	if (!extractConstantMask(C, MaskEltSizeInBits: `8`, UndefElts, RawMask))
255	return;
256
257	unsigned NumElts = Width / `8`;
258	assert(NumElts == `16` && "Unexpected number of vector elements.");
259
260	for (unsigned i = `0`; i != NumElts; ++i) {
261	if (UndefElts [i]) {
262	ShuffleMask.push_back(Elt: SM_SentinelUndef);
263	continue;
264	}
265
266	// VPPERM Operation
267	// Bits[4:0] - Byte Index (0 - 31)
268	// Bits[7:5] - Permute Operation
269	//
270	// Permute Operation:
271	// 0 - Source byte (no logical operation).
272	// 1 - Invert source byte.
273	// 2 - Bit reverse of source byte.
274	// 3 - Bit reverse of inverted source byte.
275	// 4 - 00h (zero - fill).
276	// 5 - FFh (ones - fill).
277	// 6 - Most significant bit of source byte replicated in all bit positions.
278	// 7 - Invert most significant bit of source byte and replicate in all bit
279	// positions.
280	uint64_t Element = RawMask [i];
281	uint64_t Index = Element & `0x1F`;
282	uint64_t PermuteOp = (Element >> `5`) & `0x7`;
283
284	if (PermuteOp == `4`) {
285	ShuffleMask.push_back(Elt: SM_SentinelZero);
286	continue;
287	}
288	if (PermuteOp != `0`) {
289	ShuffleMask.clear();
290	return;
291	}
292	ShuffleMask.push_back(Elt: (int)Index);
293	}
294	}
295
296	} // namespace llvm
297

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp