1//===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file computes an optimal sequence of instructions for doing all shuffles
10// of two 4-element vectors. With a release build and when configured to emit
11// an altivec instruction table, this takes about 30s to run on a 2.7Ghz
12// PowerPC G5.
13//
14//===----------------------------------------------------------------------===//
15
16#include <cassert>
17#include <cstdlib>
18#include <iomanip>
19#include <iostream>
20#include <vector>
21
22#define GENERATE_NEON
23#define GENERATE_NEON_INS
24
25struct Operator;
26
27// Masks are 4-nibble hex numbers. Values 0-7 in any nibble means that it takes
28// an element from that value of the input vectors. A value of 8 means the
29// entry is undefined.
30
31// Mask manipulation functions.
32static inline unsigned short MakeMask(unsigned V0, unsigned V1,
33 unsigned V2, unsigned V3) {
34 return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
35}
36
37/// getMaskElt - Return element N of the specified mask.
38static unsigned getMaskElt(unsigned Mask, unsigned Elt) {
39 return (Mask >> ((3-Elt)*4)) & 0xF;
40}
41
42static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) {
43 unsigned FieldShift = ((3-Elt)*4);
44 return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift);
45}
46
47// Reject elements where the values are 9-15.
48static bool isValidMask(unsigned short Mask) {
49 unsigned short UndefBits = Mask & 0x8888;
50 return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0;
51}
52
53/// hasUndefElements - Return true if any of the elements in the mask are undefs
54///
55static bool hasUndefElements(unsigned short Mask) {
56 return (Mask & 0x8888) != 0;
57}
58
59/// isOnlyLHSMask - Return true if this mask only refers to its LHS, not
60/// including undef values..
61static bool isOnlyLHSMask(unsigned short Mask) {
62 return (Mask & 0x4444) == 0;
63}
64
65/// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
66/// refer to the LHS only (for when one argument value is passed into the same
67/// function twice).
68#if 0
69static unsigned short getLHSOnlyMask(unsigned short Mask) {
70 return Mask & 0xBBBB; // Keep only LHS and Undefs.
71}
72#endif
73
74/// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
75/// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
76static unsigned getCompressedMask(unsigned short Mask) {
77 return getMaskElt(Mask, Elt: 0)*9*9*9 + getMaskElt(Mask, Elt: 1)*9*9 +
78 getMaskElt(Mask, Elt: 2)*9 + getMaskElt(Mask, Elt: 3);
79}
80
81static void PrintMask(unsigned i, std::ostream &OS) {
82 OS << "<" << (char)(getMaskElt(Mask: i, Elt: 0) == 8 ? 'u' : ('0'+getMaskElt(Mask: i, Elt: 0)))
83 << "," << (char)(getMaskElt(Mask: i, Elt: 1) == 8 ? 'u' : ('0'+getMaskElt(Mask: i, Elt: 1)))
84 << "," << (char)(getMaskElt(Mask: i, Elt: 2) == 8 ? 'u' : ('0'+getMaskElt(Mask: i, Elt: 2)))
85 << "," << (char)(getMaskElt(Mask: i, Elt: 3) == 8 ? 'u' : ('0'+getMaskElt(Mask: i, Elt: 3)))
86 << ">";
87}
88
89/// ShuffleVal - This represents a shufflevector operation.
90struct ShuffleVal {
91 Operator *Op; // The Operation used to generate this value.
92 unsigned Cost; // Number of instrs used to generate this value.
93 unsigned short Arg0, Arg1; // Input operands for this value.
94
95 ShuffleVal() : Cost(1000000) {}
96};
97
98
99/// ShufTab - This is the actual shuffle table that we are trying to generate.
100///
101static ShuffleVal ShufTab[65536];
102
103/// TheOperators - All of the operators that this target supports.
104static std::vector<Operator*> TheOperators;
105
106/// Operator - This is a vector operation that is available for use.
107struct Operator {
108 const char *Name;
109 unsigned short ShuffleMask;
110 unsigned short OpNum;
111 unsigned Cost;
112
113 Operator(unsigned short shufflemask, const char *name, unsigned opnum,
114 unsigned cost = 1)
115 : Name(name), ShuffleMask(shufflemask), OpNum(opnum),Cost(cost) {
116 TheOperators.push_back(x: this);
117 }
118 ~Operator() {
119 assert(TheOperators.back() == this);
120 TheOperators.pop_back();
121 }
122
123 bool isOnlyLHSOperator() const {
124 return isOnlyLHSMask(Mask: ShuffleMask);
125 }
126
127 const char *getName() const { return Name; }
128 unsigned getCost() const { return Cost; }
129
130 unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
131 // Extract the elements from LHSMask and RHSMask, as appropriate.
132 unsigned Result = 0;
133 for (unsigned i = 0; i != 4; ++i) {
134 unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF;
135 unsigned ResElt;
136 if (SrcElt < 4)
137 ResElt = getMaskElt(Mask: LHSMask, Elt: SrcElt);
138 else if (SrcElt < 8)
139 ResElt = getMaskElt(Mask: RHSMask, Elt: SrcElt-4);
140 else {
141 assert(SrcElt == 8 && "Bad src elt!");
142 ResElt = 8;
143 }
144 Result |= ResElt << (4*i);
145 }
146 return Result;
147 }
148};
149
150#ifdef GENERATE_NEON_INS
151// Special case "insert" op identifier used below
152static Operator InsOp(0, "ins", 15, 1);
153#endif
154
155static const char *getZeroCostOpName(unsigned short Op) {
156 if (ShufTab[Op].Arg0 == 0x0123)
157 return "LHS";
158 else if (ShufTab[Op].Arg0 == 0x4567)
159 return "RHS";
160 else {
161 assert(0 && "bad zero cost operation");
162 abort();
163 }
164}
165
166static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
167 unsigned short ThisOp = Vals[ValNo];
168 std::cerr << "t" << ValNo;
169 PrintMask(i: ThisOp, OS&: std::cerr);
170 std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
171
172 if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
173 std::cerr << getZeroCostOpName(Op: ShufTab[ThisOp].Arg0);
174 PrintMask(i: ShufTab[ThisOp].Arg0, OS&: std::cerr);
175 } else {
176 // Figure out what tmp # it is.
177 for (unsigned i = 0; ; ++i)
178 if (Vals[i] == ShufTab[ThisOp].Arg0) {
179 std::cerr << "t" << i;
180 break;
181 }
182 }
183
184#ifdef GENERATE_NEON_INS
185 if (ShufTab[ThisOp].Op == &InsOp) {
186 std::cerr << ", lane " << ShufTab[ThisOp].Arg1;
187 } else
188#endif
189 if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
190 std::cerr << ", ";
191 if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
192 std::cerr << getZeroCostOpName(Op: ShufTab[ThisOp].Arg1);
193 PrintMask(i: ShufTab[ThisOp].Arg1, OS&: std::cerr);
194 } else {
195 // Figure out what tmp # it is.
196 for (unsigned i = 0; ; ++i)
197 if (Vals[i] == ShufTab[ThisOp].Arg1) {
198 std::cerr << "t" << i;
199 break;
200 }
201 }
202 }
203 std::cerr << ") ";
204}
205
206static unsigned getNumEntered() {
207 unsigned Count = 0;
208 for (unsigned i = 0; i != 65536; ++i)
209 Count += ShufTab[i].Cost < 100;
210 return Count;
211}
212
213static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
214 unsigned &NumVals) {
215 if (ShufTab[Elt].Cost == 0) return;
216#ifdef GENERATE_NEON_INS
217 if (ShufTab[Elt].Op == &InsOp) {
218 EvaluateOps(Elt: ShufTab[Elt].Arg0, Vals, NumVals);
219 Vals[NumVals++] = Elt;
220 return;
221 }
222#endif
223
224 // If this value has already been evaluated, it is free. FIXME: match undefs.
225 for (unsigned i = 0, e = NumVals; i != e; ++i)
226 if (Vals[i] == Elt) return;
227
228 // Otherwise, get the operands of the value, then add it.
229 unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
230 if (ShufTab[Arg0].Cost)
231 EvaluateOps(Elt: Arg0, Vals, NumVals);
232 if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
233 EvaluateOps(Elt: Arg1, Vals, NumVals);
234
235 Vals[NumVals++] = Elt;
236}
237
238
239int main() {
240 // Seed the table with accesses to the LHS and RHS.
241 ShufTab[0x0123].Cost = 0;
242 ShufTab[0x0123].Op = nullptr;
243 ShufTab[0x0123].Arg0 = 0x0123;
244 ShufTab[0x4567].Cost = 0;
245 ShufTab[0x4567].Op = nullptr;
246 ShufTab[0x4567].Arg0 = 0x4567;
247
248 // Seed the first-level of shuffles, shuffles whose inputs are the input to
249 // the vectorshuffle operation.
250 bool MadeChange = true;
251 unsigned OpCount = 0;
252 while (MadeChange) {
253 MadeChange = false;
254 ++OpCount;
255 std::cerr << "Starting iteration #" << OpCount << " with "
256 << getNumEntered() << " entries established.\n";
257
258 // Scan the table for two reasons: First, compute the maximum cost of any
259 // operation left in the table. Second, make sure that values with undefs
260 // have the cheapest alternative that they match.
261 unsigned MaxCost = ShufTab[0].Cost;
262 for (unsigned i = 1; i != 0x8889; ++i) {
263 if (!isValidMask(Mask: i)) continue;
264 if (ShufTab[i].Cost > MaxCost)
265 MaxCost = ShufTab[i].Cost;
266
267 // If this value has an undef, make it be computed the cheapest possible
268 // way of any of the things that it matches.
269 if (hasUndefElements(Mask: i)) {
270 // This code is a little bit tricky, so here's the idea: consider some
271 // permutation, like 7u4u. To compute the lowest cost for 7u4u, we
272 // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries. If
273 // there are 3 undefs, the number rises to 729 entries we have to scan,
274 // and for the 4 undef case, we have to scan the whole table.
275 //
276 // Instead of doing this huge amount of scanning, we process the table
277 // entries *in order*, and use the fact that 'u' is 8, larger than any
278 // valid index. Given an entry like 7u4u then, we only need to scan
279 // 7[0-7]4u - 8 entries. We can get away with this, because we already
280 // know that each of 704u, 714u, 724u, etc contain the minimum value of
281 // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively.
282 unsigned UndefIdx;
283 if (i & 0x8000)
284 UndefIdx = 0;
285 else if (i & 0x0800)
286 UndefIdx = 1;
287 else if (i & 0x0080)
288 UndefIdx = 2;
289 else if (i & 0x0008)
290 UndefIdx = 3;
291 else
292 abort();
293
294 unsigned MinVal = i;
295 unsigned MinCost = ShufTab[i].Cost;
296
297 // Scan the 8 entries.
298 for (unsigned j = 0; j != 8; ++j) {
299 unsigned NewElt = setMaskElt(Mask: i, Elt: UndefIdx, NewVal: j);
300 if (ShufTab[NewElt].Cost < MinCost) {
301 MinCost = ShufTab[NewElt].Cost;
302 MinVal = NewElt;
303 }
304 }
305
306 // If we found something cheaper than what was here before, use it.
307 if (i != MinVal) {
308 MadeChange = true;
309 ShufTab[i] = ShufTab[MinVal];
310 }
311 }
312#ifdef GENERATE_NEON_INS
313 else {
314 // Similarly, if we take the mask (eg 3,6,1,0) and take the cost with
315 // undef for each lane (eg u,6,1,0 or 3,u,1,0 etc), we can use a single
316 // lane insert to fixup the result.
317 for (unsigned LaneIdx = 0; LaneIdx < 4; LaneIdx++) {
318 if (getMaskElt(Mask: i, Elt: LaneIdx) == 8)
319 continue;
320 unsigned NewElt = setMaskElt(Mask: i, Elt: LaneIdx, NewVal: 8);
321 if (ShufTab[NewElt].Cost + 1 < ShufTab[i].Cost) {
322 MadeChange = true;
323 ShufTab[i].Cost = ShufTab[NewElt].Cost + 1;
324 ShufTab[i].Op = &InsOp;
325 ShufTab[i].Arg0 = NewElt;
326 ShufTab[i].Arg1 = LaneIdx;
327 }
328 }
329
330 // Similar idea for using a D register mov, masking out 2 lanes to undef
331 for (unsigned LaneIdx = 0; LaneIdx < 4; LaneIdx += 2) {
332 unsigned Ln0 = getMaskElt(Mask: i, Elt: LaneIdx);
333 unsigned Ln1 = getMaskElt(Mask: i, Elt: LaneIdx + 1);
334 if ((Ln0 == 0 && Ln1 == 1) || (Ln0 == 2 && Ln1 == 3) ||
335 (Ln0 == 4 && Ln1 == 5) || (Ln0 == 6 && Ln1 == 7)) {
336 unsigned NewElt = setMaskElt(Mask: i, Elt: LaneIdx, NewVal: 8);
337 NewElt = setMaskElt(Mask: NewElt, Elt: LaneIdx + 1, NewVal: 8);
338 if (ShufTab[NewElt].Cost + 1 < ShufTab[i].Cost) {
339 MadeChange = true;
340 ShufTab[i].Cost = ShufTab[NewElt].Cost + 1;
341 ShufTab[i].Op = &InsOp;
342 ShufTab[i].Arg0 = NewElt;
343 ShufTab[i].Arg1 = (LaneIdx >> 1) | 0x4;
344 }
345 }
346 }
347 }
348#endif
349 }
350
351 for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
352 if (!isValidMask(Mask: LHS)) continue;
353 if (ShufTab[LHS].Cost > 1000) continue;
354
355 // If nothing involving this operand could possibly be cheaper than what
356 // we already have, don't consider it.
357 if (ShufTab[LHS].Cost + 1 >= MaxCost)
358 continue;
359
360 for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
361 Operator *Op = TheOperators[opnum];
362#ifdef GENERATE_NEON_INS
363 if (Op == &InsOp)
364 continue;
365#endif
366
367 // Evaluate op(LHS,LHS)
368 unsigned ResultMask = Op->getTransformedMask(LHSMask: LHS, RHSMask: LHS);
369
370 unsigned Cost = ShufTab[LHS].Cost + Op->getCost();
371 if (Cost < ShufTab[ResultMask].Cost) {
372 ShufTab[ResultMask].Cost = Cost;
373 ShufTab[ResultMask].Op = Op;
374 ShufTab[ResultMask].Arg0 = LHS;
375 ShufTab[ResultMask].Arg1 = LHS;
376 MadeChange = true;
377 }
378
379 // If this is a two input instruction, include the op(x,y) cases. If
380 // this is a one input instruction, skip this.
381 if (Op->isOnlyLHSOperator()) continue;
382
383 for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
384 if (!isValidMask(Mask: RHS)) continue;
385 if (ShufTab[RHS].Cost > 1000) continue;
386
387 // If nothing involving this operand could possibly be cheaper than
388 // what we already have, don't consider it.
389 if (ShufTab[RHS].Cost + 1 >= MaxCost)
390 continue;
391
392
393 // Evaluate op(LHS,RHS)
394 unsigned ResultMask = Op->getTransformedMask(LHSMask: LHS, RHSMask: RHS);
395
396 if (ShufTab[ResultMask].Cost <= OpCount ||
397 ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
398 ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
399 continue;
400
401 // Figure out the cost to evaluate this, knowing that CSE's only need
402 // to be evaluated once.
403 unsigned short Vals[30];
404 unsigned NumVals = 0;
405 EvaluateOps(Elt: LHS, Vals, NumVals);
406 EvaluateOps(Elt: RHS, Vals, NumVals);
407
408 unsigned Cost = NumVals + Op->getCost();
409 if (Cost < ShufTab[ResultMask].Cost) {
410 ShufTab[ResultMask].Cost = Cost;
411 ShufTab[ResultMask].Op = Op;
412 ShufTab[ResultMask].Arg0 = LHS;
413 ShufTab[ResultMask].Arg1 = RHS;
414 MadeChange = true;
415 }
416 }
417 }
418 }
419 }
420
421 std::cerr << "Finished Table has " << getNumEntered()
422 << " entries established.\n";
423
424 unsigned CostArray[10] = { 0 };
425
426 // Compute a cost histogram.
427 for (unsigned i = 0; i != 65536; ++i) {
428 if (!isValidMask(Mask: i)) continue;
429 if (ShufTab[i].Cost > 9)
430 ++CostArray[9];
431 else
432 ++CostArray[ShufTab[i].Cost];
433 }
434
435 for (unsigned i = 0; i != 9; ++i)
436 if (CostArray[i])
437 std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
438 if (CostArray[9])
439 std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
440
441
442 // Build up the table to emit.
443 std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
444 std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n";
445
446 for (unsigned i = 0; i != 0x8889; ++i) {
447 if (!isValidMask(Mask: i)) continue;
448
449 // CostSat - The cost of this operation saturated to two bits.
450 unsigned CostSat = ShufTab[i].Cost;
451 if (CostSat > 4) CostSat = 4;
452 if (CostSat == 0) CostSat = 1;
453 --CostSat; // Cost is now between 0-3.
454
455 unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
456 assert(OpNum < 16 && "Too few bits to encode operation!");
457
458 unsigned LHS = getCompressedMask(Mask: ShufTab[i].Arg0);
459 unsigned RHS = getCompressedMask(Mask: ShufTab[i].Arg1);
460
461 // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
462 // LHS, and 13 bits of RHS = 32 bits.
463 unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
464
465 std::cout << " " << std::setw(10) << Val << "U, // ";
466 PrintMask(i, OS&: std::cout);
467 std::cout << ": Cost " << ShufTab[i].Cost;
468 std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
469 std::cout << " ";
470 if (ShufTab[ShufTab[i].Arg0].Cost == 0) {
471 std::cout << getZeroCostOpName(Op: ShufTab[i].Arg0);
472 } else {
473 PrintMask(i: ShufTab[i].Arg0, OS&: std::cout);
474 }
475
476 if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) {
477 std::cout << ", ";
478 if (ShufTab[ShufTab[i].Arg1].Cost == 0) {
479 std::cout << getZeroCostOpName(Op: ShufTab[i].Arg1);
480 } else {
481 PrintMask(i: ShufTab[i].Arg1, OS&: std::cout);
482 }
483 }
484#ifdef GENERATE_NEON_INS
485 else if (ShufTab[i].Op == &InsOp) {
486 std::cout << ", lane " << ShufTab[i].Arg1;
487 }
488#endif
489
490 std::cout << "\n";
491 }
492 std::cout << " 0\n};\n";
493
494 if (false) {
495 // Print out the table.
496 for (unsigned i = 0; i != 0x8889; ++i) {
497 if (!isValidMask(Mask: i)) continue;
498 if (ShufTab[i].Cost < 1000) {
499 PrintMask(i, OS&: std::cerr);
500 std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
501
502 unsigned short Vals[30];
503 unsigned NumVals = 0;
504 EvaluateOps(Elt: i, Vals, NumVals);
505
506 for (unsigned j = 0, e = NumVals; j != e; ++j)
507 PrintOperation(ValNo: j, Vals);
508 std::cerr << "\n";
509 }
510 }
511 }
512}
513
514
515#ifdef GENERATE_ALTIVEC
516
517///===---------------------------------------------------------------------===//
518/// The altivec instruction definitions. This is the altivec-specific part of
519/// this file.
520///===---------------------------------------------------------------------===//
521
522// Note that the opcode numbers here must match those in the PPC backend.
523enum {
524 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
525 OP_VMRGHW,
526 OP_VMRGLW,
527 OP_VSPLTISW0,
528 OP_VSPLTISW1,
529 OP_VSPLTISW2,
530 OP_VSPLTISW3,
531 OP_VSLDOI4,
532 OP_VSLDOI8,
533 OP_VSLDOI12
534};
535
536struct vmrghw : public Operator {
537 vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {}
538} the_vmrghw;
539
540struct vmrglw : public Operator {
541 vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {}
542} the_vmrglw;
543
544template<unsigned Elt>
545struct vspltisw : public Operator {
546 vspltisw(const char *N, unsigned Opc)
547 : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
548};
549
550vspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0);
551vspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1);
552vspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2);
553vspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3);
554
555template<unsigned N>
556struct vsldoi : public Operator {
557 vsldoi(const char *Name, unsigned Opc)
558 : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
559 }
560};
561
562vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4);
563vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
564vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
565
566#endif
567
568#ifdef GENERATE_NEON
569enum {
570 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
571 OP_VREV,
572 OP_VDUP0,
573 OP_VDUP1,
574 OP_VDUP2,
575 OP_VDUP3,
576 OP_VEXT1,
577 OP_VEXT2,
578 OP_VEXT3,
579 OP_VUZPL, // VUZP, left result
580 OP_VUZPR, // VUZP, right result
581 OP_VZIPL, // VZIP, left result
582 OP_VZIPR, // VZIP, right result
583 OP_VTRNL, // VTRN, left result
584 OP_VTRNR // VTRN, right result
585};
586
587struct vrev : public Operator {
588 vrev() : Operator(0x1032, "vrev", OP_VREV) {}
589} the_vrev;
590
591template<unsigned Elt>
592struct vdup : public Operator {
593 vdup(const char *N, unsigned Opc)
594 : Operator(MakeMask(V0: Elt, V1: Elt, V2: Elt, V3: Elt), N, Opc) {}
595};
596
597vdup<0> the_vdup0("vdup0", OP_VDUP0);
598vdup<1> the_vdup1("vdup1", OP_VDUP1);
599vdup<2> the_vdup2("vdup2", OP_VDUP2);
600vdup<3> the_vdup3("vdup3", OP_VDUP3);
601
602template<unsigned N>
603struct vext : public Operator {
604 vext(const char *Name, unsigned Opc)
605 : Operator(MakeMask(V0: N&7, V1: (N+1)&7, V2: (N+2)&7, V3: (N+3)&7), Name, Opc) {
606 }
607};
608
609vext<1> the_vext1("vext1", OP_VEXT1);
610vext<2> the_vext2("vext2", OP_VEXT2);
611vext<3> the_vext3("vext3", OP_VEXT3);
612
613struct vuzpl : public Operator {
614 vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 1) {}
615} the_vuzpl;
616
617struct vuzpr : public Operator {
618 vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 1) {}
619} the_vuzpr;
620
621struct vzipl : public Operator {
622 vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 1) {}
623} the_vzipl;
624
625struct vzipr : public Operator {
626 vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 1) {}
627} the_vzipr;
628
629struct vtrnl : public Operator {
630 vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 1) {}
631} the_vtrnl;
632
633struct vtrnr : public Operator {
634 vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 1) {}
635} the_vtrnr;
636
637#endif
638