1 | //===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file computes an optimal sequence of instructions for doing all shuffles |
10 | // of two 4-element vectors. With a release build and when configured to emit |
11 | // an altivec instruction table, this takes about 30s to run on a 2.7Ghz |
12 | // PowerPC G5. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include <cassert> |
17 | #include <cstdlib> |
18 | #include <iomanip> |
19 | #include <iostream> |
20 | #include <vector> |
21 | |
22 | #define GENERATE_NEON |
23 | #define GENERATE_NEON_INS |
24 | |
25 | struct Operator; |
26 | |
27 | // Masks are 4-nibble hex numbers. Values 0-7 in any nibble means that it takes |
28 | // an element from that value of the input vectors. A value of 8 means the |
29 | // entry is undefined. |
30 | |
31 | // Mask manipulation functions. |
32 | static inline unsigned short MakeMask(unsigned V0, unsigned V1, |
33 | unsigned V2, unsigned V3) { |
34 | return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4)); |
35 | } |
36 | |
37 | /// getMaskElt - Return element N of the specified mask. |
38 | static unsigned getMaskElt(unsigned Mask, unsigned Elt) { |
39 | return (Mask >> ((3-Elt)*4)) & 0xF; |
40 | } |
41 | |
42 | static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) { |
43 | unsigned FieldShift = ((3-Elt)*4); |
44 | return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift); |
45 | } |
46 | |
47 | // Reject elements where the values are 9-15. |
48 | static bool isValidMask(unsigned short Mask) { |
49 | unsigned short UndefBits = Mask & 0x8888; |
50 | return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0; |
51 | } |
52 | |
53 | /// hasUndefElements - Return true if any of the elements in the mask are undefs |
54 | /// |
55 | static bool hasUndefElements(unsigned short Mask) { |
56 | return (Mask & 0x8888) != 0; |
57 | } |
58 | |
59 | /// isOnlyLHSMask - Return true if this mask only refers to its LHS, not |
60 | /// including undef values.. |
61 | static bool isOnlyLHSMask(unsigned short Mask) { |
62 | return (Mask & 0x4444) == 0; |
63 | } |
64 | |
65 | /// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to |
66 | /// refer to the LHS only (for when one argument value is passed into the same |
67 | /// function twice). |
68 | #if 0 |
69 | static unsigned short getLHSOnlyMask(unsigned short Mask) { |
70 | return Mask & 0xBBBB; // Keep only LHS and Undefs. |
71 | } |
72 | #endif |
73 | |
74 | /// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4 |
75 | /// bits) into a compressed 13-bit mask, where each elt is multiplied by 9. |
76 | static unsigned getCompressedMask(unsigned short Mask) { |
77 | return getMaskElt(Mask, Elt: 0)*9*9*9 + getMaskElt(Mask, Elt: 1)*9*9 + |
78 | getMaskElt(Mask, Elt: 2)*9 + getMaskElt(Mask, Elt: 3); |
79 | } |
80 | |
81 | static void PrintMask(unsigned i, std::ostream &OS) { |
82 | OS << "<" << (char)(getMaskElt(Mask: i, Elt: 0) == 8 ? 'u' : ('0'+getMaskElt(Mask: i, Elt: 0))) |
83 | << "," << (char)(getMaskElt(Mask: i, Elt: 1) == 8 ? 'u' : ('0'+getMaskElt(Mask: i, Elt: 1))) |
84 | << "," << (char)(getMaskElt(Mask: i, Elt: 2) == 8 ? 'u' : ('0'+getMaskElt(Mask: i, Elt: 2))) |
85 | << "," << (char)(getMaskElt(Mask: i, Elt: 3) == 8 ? 'u' : ('0'+getMaskElt(Mask: i, Elt: 3))) |
86 | << ">" ; |
87 | } |
88 | |
89 | /// ShuffleVal - This represents a shufflevector operation. |
90 | struct ShuffleVal { |
91 | Operator *Op; // The Operation used to generate this value. |
92 | unsigned Cost; // Number of instrs used to generate this value. |
93 | unsigned short Arg0, Arg1; // Input operands for this value. |
94 | |
95 | ShuffleVal() : Cost(1000000) {} |
96 | }; |
97 | |
98 | |
99 | /// ShufTab - This is the actual shuffle table that we are trying to generate. |
100 | /// |
101 | static ShuffleVal ShufTab[65536]; |
102 | |
103 | /// TheOperators - All of the operators that this target supports. |
104 | static std::vector<Operator*> TheOperators; |
105 | |
106 | /// Operator - This is a vector operation that is available for use. |
107 | struct Operator { |
108 | const char *Name; |
109 | unsigned short ShuffleMask; |
110 | unsigned short OpNum; |
111 | unsigned Cost; |
112 | |
113 | Operator(unsigned short shufflemask, const char *name, unsigned opnum, |
114 | unsigned cost = 1) |
115 | : Name(name), ShuffleMask(shufflemask), OpNum(opnum),Cost(cost) { |
116 | TheOperators.push_back(x: this); |
117 | } |
118 | ~Operator() { |
119 | assert(TheOperators.back() == this); |
120 | TheOperators.pop_back(); |
121 | } |
122 | |
123 | bool isOnlyLHSOperator() const { |
124 | return isOnlyLHSMask(Mask: ShuffleMask); |
125 | } |
126 | |
127 | const char *getName() const { return Name; } |
128 | unsigned getCost() const { return Cost; } |
129 | |
130 | unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) { |
131 | // Extract the elements from LHSMask and RHSMask, as appropriate. |
132 | unsigned Result = 0; |
133 | for (unsigned i = 0; i != 4; ++i) { |
134 | unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF; |
135 | unsigned ResElt; |
136 | if (SrcElt < 4) |
137 | ResElt = getMaskElt(Mask: LHSMask, Elt: SrcElt); |
138 | else if (SrcElt < 8) |
139 | ResElt = getMaskElt(Mask: RHSMask, Elt: SrcElt-4); |
140 | else { |
141 | assert(SrcElt == 8 && "Bad src elt!" ); |
142 | ResElt = 8; |
143 | } |
144 | Result |= ResElt << (4*i); |
145 | } |
146 | return Result; |
147 | } |
148 | }; |
149 | |
150 | #ifdef GENERATE_NEON_INS |
151 | // Special case "insert" op identifier used below |
152 | static Operator InsOp(0, "ins" , 15, 1); |
153 | #endif |
154 | |
155 | static const char *getZeroCostOpName(unsigned short Op) { |
156 | if (ShufTab[Op].Arg0 == 0x0123) |
157 | return "LHS" ; |
158 | else if (ShufTab[Op].Arg0 == 0x4567) |
159 | return "RHS" ; |
160 | else { |
161 | assert(0 && "bad zero cost operation" ); |
162 | abort(); |
163 | } |
164 | } |
165 | |
166 | static void PrintOperation(unsigned ValNo, unsigned short Vals[]) { |
167 | unsigned short ThisOp = Vals[ValNo]; |
168 | std::cerr << "t" << ValNo; |
169 | PrintMask(i: ThisOp, OS&: std::cerr); |
170 | std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(" ; |
171 | |
172 | if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) { |
173 | std::cerr << getZeroCostOpName(Op: ShufTab[ThisOp].Arg0); |
174 | PrintMask(i: ShufTab[ThisOp].Arg0, OS&: std::cerr); |
175 | } else { |
176 | // Figure out what tmp # it is. |
177 | for (unsigned i = 0; ; ++i) |
178 | if (Vals[i] == ShufTab[ThisOp].Arg0) { |
179 | std::cerr << "t" << i; |
180 | break; |
181 | } |
182 | } |
183 | |
184 | #ifdef GENERATE_NEON_INS |
185 | if (ShufTab[ThisOp].Op == &InsOp) { |
186 | std::cerr << ", lane " << ShufTab[ThisOp].Arg1; |
187 | } else |
188 | #endif |
189 | if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) { |
190 | std::cerr << ", " ; |
191 | if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) { |
192 | std::cerr << getZeroCostOpName(Op: ShufTab[ThisOp].Arg1); |
193 | PrintMask(i: ShufTab[ThisOp].Arg1, OS&: std::cerr); |
194 | } else { |
195 | // Figure out what tmp # it is. |
196 | for (unsigned i = 0; ; ++i) |
197 | if (Vals[i] == ShufTab[ThisOp].Arg1) { |
198 | std::cerr << "t" << i; |
199 | break; |
200 | } |
201 | } |
202 | } |
203 | std::cerr << ") " ; |
204 | } |
205 | |
206 | static unsigned getNumEntered() { |
207 | unsigned Count = 0; |
208 | for (unsigned i = 0; i != 65536; ++i) |
209 | Count += ShufTab[i].Cost < 100; |
210 | return Count; |
211 | } |
212 | |
213 | static void EvaluateOps(unsigned short Elt, unsigned short Vals[], |
214 | unsigned &NumVals) { |
215 | if (ShufTab[Elt].Cost == 0) return; |
216 | #ifdef GENERATE_NEON_INS |
217 | if (ShufTab[Elt].Op == &InsOp) { |
218 | EvaluateOps(Elt: ShufTab[Elt].Arg0, Vals, NumVals); |
219 | Vals[NumVals++] = Elt; |
220 | return; |
221 | } |
222 | #endif |
223 | |
224 | // If this value has already been evaluated, it is free. FIXME: match undefs. |
225 | for (unsigned i = 0, e = NumVals; i != e; ++i) |
226 | if (Vals[i] == Elt) return; |
227 | |
228 | // Otherwise, get the operands of the value, then add it. |
229 | unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1; |
230 | if (ShufTab[Arg0].Cost) |
231 | EvaluateOps(Elt: Arg0, Vals, NumVals); |
232 | if (Arg0 != Arg1 && ShufTab[Arg1].Cost) |
233 | EvaluateOps(Elt: Arg1, Vals, NumVals); |
234 | |
235 | Vals[NumVals++] = Elt; |
236 | } |
237 | |
238 | |
239 | int main() { |
240 | // Seed the table with accesses to the LHS and RHS. |
241 | ShufTab[0x0123].Cost = 0; |
242 | ShufTab[0x0123].Op = nullptr; |
243 | ShufTab[0x0123].Arg0 = 0x0123; |
244 | ShufTab[0x4567].Cost = 0; |
245 | ShufTab[0x4567].Op = nullptr; |
246 | ShufTab[0x4567].Arg0 = 0x4567; |
247 | |
248 | // Seed the first-level of shuffles, shuffles whose inputs are the input to |
249 | // the vectorshuffle operation. |
250 | bool MadeChange = true; |
251 | unsigned OpCount = 0; |
252 | while (MadeChange) { |
253 | MadeChange = false; |
254 | ++OpCount; |
255 | std::cerr << "Starting iteration #" << OpCount << " with " |
256 | << getNumEntered() << " entries established.\n" ; |
257 | |
258 | // Scan the table for two reasons: First, compute the maximum cost of any |
259 | // operation left in the table. Second, make sure that values with undefs |
260 | // have the cheapest alternative that they match. |
261 | unsigned MaxCost = ShufTab[0].Cost; |
262 | for (unsigned i = 1; i != 0x8889; ++i) { |
263 | if (!isValidMask(Mask: i)) continue; |
264 | if (ShufTab[i].Cost > MaxCost) |
265 | MaxCost = ShufTab[i].Cost; |
266 | |
267 | // If this value has an undef, make it be computed the cheapest possible |
268 | // way of any of the things that it matches. |
269 | if (hasUndefElements(Mask: i)) { |
270 | // This code is a little bit tricky, so here's the idea: consider some |
271 | // permutation, like 7u4u. To compute the lowest cost for 7u4u, we |
272 | // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries. If |
273 | // there are 3 undefs, the number rises to 729 entries we have to scan, |
274 | // and for the 4 undef case, we have to scan the whole table. |
275 | // |
276 | // Instead of doing this huge amount of scanning, we process the table |
277 | // entries *in order*, and use the fact that 'u' is 8, larger than any |
278 | // valid index. Given an entry like 7u4u then, we only need to scan |
279 | // 7[0-7]4u - 8 entries. We can get away with this, because we already |
280 | // know that each of 704u, 714u, 724u, etc contain the minimum value of |
281 | // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively. |
282 | unsigned UndefIdx; |
283 | if (i & 0x8000) |
284 | UndefIdx = 0; |
285 | else if (i & 0x0800) |
286 | UndefIdx = 1; |
287 | else if (i & 0x0080) |
288 | UndefIdx = 2; |
289 | else if (i & 0x0008) |
290 | UndefIdx = 3; |
291 | else |
292 | abort(); |
293 | |
294 | unsigned MinVal = i; |
295 | unsigned MinCost = ShufTab[i].Cost; |
296 | |
297 | // Scan the 8 entries. |
298 | for (unsigned j = 0; j != 8; ++j) { |
299 | unsigned NewElt = setMaskElt(Mask: i, Elt: UndefIdx, NewVal: j); |
300 | if (ShufTab[NewElt].Cost < MinCost) { |
301 | MinCost = ShufTab[NewElt].Cost; |
302 | MinVal = NewElt; |
303 | } |
304 | } |
305 | |
306 | // If we found something cheaper than what was here before, use it. |
307 | if (i != MinVal) { |
308 | MadeChange = true; |
309 | ShufTab[i] = ShufTab[MinVal]; |
310 | } |
311 | } |
312 | #ifdef GENERATE_NEON_INS |
313 | else { |
314 | // Similarly, if we take the mask (eg 3,6,1,0) and take the cost with |
315 | // undef for each lane (eg u,6,1,0 or 3,u,1,0 etc), we can use a single |
316 | // lane insert to fixup the result. |
317 | for (unsigned LaneIdx = 0; LaneIdx < 4; LaneIdx++) { |
318 | if (getMaskElt(Mask: i, Elt: LaneIdx) == 8) |
319 | continue; |
320 | unsigned NewElt = setMaskElt(Mask: i, Elt: LaneIdx, NewVal: 8); |
321 | if (ShufTab[NewElt].Cost + 1 < ShufTab[i].Cost) { |
322 | MadeChange = true; |
323 | ShufTab[i].Cost = ShufTab[NewElt].Cost + 1; |
324 | ShufTab[i].Op = &InsOp; |
325 | ShufTab[i].Arg0 = NewElt; |
326 | ShufTab[i].Arg1 = LaneIdx; |
327 | } |
328 | } |
329 | |
330 | // Similar idea for using a D register mov, masking out 2 lanes to undef |
331 | for (unsigned LaneIdx = 0; LaneIdx < 4; LaneIdx += 2) { |
332 | unsigned Ln0 = getMaskElt(Mask: i, Elt: LaneIdx); |
333 | unsigned Ln1 = getMaskElt(Mask: i, Elt: LaneIdx + 1); |
334 | if ((Ln0 == 0 && Ln1 == 1) || (Ln0 == 2 && Ln1 == 3) || |
335 | (Ln0 == 4 && Ln1 == 5) || (Ln0 == 6 && Ln1 == 7)) { |
336 | unsigned NewElt = setMaskElt(Mask: i, Elt: LaneIdx, NewVal: 8); |
337 | NewElt = setMaskElt(Mask: NewElt, Elt: LaneIdx + 1, NewVal: 8); |
338 | if (ShufTab[NewElt].Cost + 1 < ShufTab[i].Cost) { |
339 | MadeChange = true; |
340 | ShufTab[i].Cost = ShufTab[NewElt].Cost + 1; |
341 | ShufTab[i].Op = &InsOp; |
342 | ShufTab[i].Arg0 = NewElt; |
343 | ShufTab[i].Arg1 = (LaneIdx >> 1) | 0x4; |
344 | } |
345 | } |
346 | } |
347 | } |
348 | #endif |
349 | } |
350 | |
351 | for (unsigned LHS = 0; LHS != 0x8889; ++LHS) { |
352 | if (!isValidMask(Mask: LHS)) continue; |
353 | if (ShufTab[LHS].Cost > 1000) continue; |
354 | |
355 | // If nothing involving this operand could possibly be cheaper than what |
356 | // we already have, don't consider it. |
357 | if (ShufTab[LHS].Cost + 1 >= MaxCost) |
358 | continue; |
359 | |
360 | for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) { |
361 | Operator *Op = TheOperators[opnum]; |
362 | #ifdef GENERATE_NEON_INS |
363 | if (Op == &InsOp) |
364 | continue; |
365 | #endif |
366 | |
367 | // Evaluate op(LHS,LHS) |
368 | unsigned ResultMask = Op->getTransformedMask(LHSMask: LHS, RHSMask: LHS); |
369 | |
370 | unsigned Cost = ShufTab[LHS].Cost + Op->getCost(); |
371 | if (Cost < ShufTab[ResultMask].Cost) { |
372 | ShufTab[ResultMask].Cost = Cost; |
373 | ShufTab[ResultMask].Op = Op; |
374 | ShufTab[ResultMask].Arg0 = LHS; |
375 | ShufTab[ResultMask].Arg1 = LHS; |
376 | MadeChange = true; |
377 | } |
378 | |
379 | // If this is a two input instruction, include the op(x,y) cases. If |
380 | // this is a one input instruction, skip this. |
381 | if (Op->isOnlyLHSOperator()) continue; |
382 | |
383 | for (unsigned RHS = 0; RHS != 0x8889; ++RHS) { |
384 | if (!isValidMask(Mask: RHS)) continue; |
385 | if (ShufTab[RHS].Cost > 1000) continue; |
386 | |
387 | // If nothing involving this operand could possibly be cheaper than |
388 | // what we already have, don't consider it. |
389 | if (ShufTab[RHS].Cost + 1 >= MaxCost) |
390 | continue; |
391 | |
392 | |
393 | // Evaluate op(LHS,RHS) |
394 | unsigned ResultMask = Op->getTransformedMask(LHSMask: LHS, RHSMask: RHS); |
395 | |
396 | if (ShufTab[ResultMask].Cost <= OpCount || |
397 | ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost || |
398 | ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost) |
399 | continue; |
400 | |
401 | // Figure out the cost to evaluate this, knowing that CSE's only need |
402 | // to be evaluated once. |
403 | unsigned short Vals[30]; |
404 | unsigned NumVals = 0; |
405 | EvaluateOps(Elt: LHS, Vals, NumVals); |
406 | EvaluateOps(Elt: RHS, Vals, NumVals); |
407 | |
408 | unsigned Cost = NumVals + Op->getCost(); |
409 | if (Cost < ShufTab[ResultMask].Cost) { |
410 | ShufTab[ResultMask].Cost = Cost; |
411 | ShufTab[ResultMask].Op = Op; |
412 | ShufTab[ResultMask].Arg0 = LHS; |
413 | ShufTab[ResultMask].Arg1 = RHS; |
414 | MadeChange = true; |
415 | } |
416 | } |
417 | } |
418 | } |
419 | } |
420 | |
421 | std::cerr << "Finished Table has " << getNumEntered() |
422 | << " entries established.\n" ; |
423 | |
424 | unsigned CostArray[10] = { 0 }; |
425 | |
426 | // Compute a cost histogram. |
427 | for (unsigned i = 0; i != 65536; ++i) { |
428 | if (!isValidMask(Mask: i)) continue; |
429 | if (ShufTab[i].Cost > 9) |
430 | ++CostArray[9]; |
431 | else |
432 | ++CostArray[ShufTab[i].Cost]; |
433 | } |
434 | |
435 | for (unsigned i = 0; i != 9; ++i) |
436 | if (CostArray[i]) |
437 | std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n" ; |
438 | if (CostArray[9]) |
439 | std::cout << "// " << CostArray[9] << " entries have higher cost!\n" ; |
440 | |
441 | |
442 | // Build up the table to emit. |
443 | std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n" ; |
444 | std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n" ; |
445 | |
446 | for (unsigned i = 0; i != 0x8889; ++i) { |
447 | if (!isValidMask(Mask: i)) continue; |
448 | |
449 | // CostSat - The cost of this operation saturated to two bits. |
450 | unsigned CostSat = ShufTab[i].Cost; |
451 | if (CostSat > 4) CostSat = 4; |
452 | if (CostSat == 0) CostSat = 1; |
453 | --CostSat; // Cost is now between 0-3. |
454 | |
455 | unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0; |
456 | assert(OpNum < 16 && "Too few bits to encode operation!" ); |
457 | |
458 | unsigned LHS = getCompressedMask(Mask: ShufTab[i].Arg0); |
459 | unsigned RHS = getCompressedMask(Mask: ShufTab[i].Arg1); |
460 | |
461 | // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of |
462 | // LHS, and 13 bits of RHS = 32 bits. |
463 | unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS; |
464 | |
465 | std::cout << " " << std::setw(10) << Val << "U, // " ; |
466 | PrintMask(i, OS&: std::cout); |
467 | std::cout << ": Cost " << ShufTab[i].Cost; |
468 | std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy" ); |
469 | std::cout << " " ; |
470 | if (ShufTab[ShufTab[i].Arg0].Cost == 0) { |
471 | std::cout << getZeroCostOpName(Op: ShufTab[i].Arg0); |
472 | } else { |
473 | PrintMask(i: ShufTab[i].Arg0, OS&: std::cout); |
474 | } |
475 | |
476 | if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) { |
477 | std::cout << ", " ; |
478 | if (ShufTab[ShufTab[i].Arg1].Cost == 0) { |
479 | std::cout << getZeroCostOpName(Op: ShufTab[i].Arg1); |
480 | } else { |
481 | PrintMask(i: ShufTab[i].Arg1, OS&: std::cout); |
482 | } |
483 | } |
484 | #ifdef GENERATE_NEON_INS |
485 | else if (ShufTab[i].Op == &InsOp) { |
486 | std::cout << ", lane " << ShufTab[i].Arg1; |
487 | } |
488 | #endif |
489 | |
490 | std::cout << "\n" ; |
491 | } |
492 | std::cout << " 0\n};\n" ; |
493 | |
494 | if (false) { |
495 | // Print out the table. |
496 | for (unsigned i = 0; i != 0x8889; ++i) { |
497 | if (!isValidMask(Mask: i)) continue; |
498 | if (ShufTab[i].Cost < 1000) { |
499 | PrintMask(i, OS&: std::cerr); |
500 | std::cerr << " - Cost " << ShufTab[i].Cost << " - " ; |
501 | |
502 | unsigned short Vals[30]; |
503 | unsigned NumVals = 0; |
504 | EvaluateOps(Elt: i, Vals, NumVals); |
505 | |
506 | for (unsigned j = 0, e = NumVals; j != e; ++j) |
507 | PrintOperation(ValNo: j, Vals); |
508 | std::cerr << "\n" ; |
509 | } |
510 | } |
511 | } |
512 | } |
513 | |
514 | |
515 | #ifdef GENERATE_ALTIVEC |
516 | |
517 | ///===---------------------------------------------------------------------===// |
518 | /// The altivec instruction definitions. This is the altivec-specific part of |
519 | /// this file. |
520 | ///===---------------------------------------------------------------------===// |
521 | |
522 | // Note that the opcode numbers here must match those in the PPC backend. |
523 | enum { |
524 | OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> |
525 | OP_VMRGHW, |
526 | OP_VMRGLW, |
527 | OP_VSPLTISW0, |
528 | OP_VSPLTISW1, |
529 | OP_VSPLTISW2, |
530 | OP_VSPLTISW3, |
531 | OP_VSLDOI4, |
532 | OP_VSLDOI8, |
533 | OP_VSLDOI12 |
534 | }; |
535 | |
536 | struct vmrghw : public Operator { |
537 | vmrghw() : Operator(0x0415, "vmrghw" , OP_VMRGHW) {} |
538 | } the_vmrghw; |
539 | |
540 | struct vmrglw : public Operator { |
541 | vmrglw() : Operator(0x2637, "vmrglw" , OP_VMRGLW) {} |
542 | } the_vmrglw; |
543 | |
544 | template<unsigned Elt> |
545 | struct vspltisw : public Operator { |
546 | vspltisw(const char *N, unsigned Opc) |
547 | : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {} |
548 | }; |
549 | |
550 | vspltisw<0> the_vspltisw0("vspltisw0" , OP_VSPLTISW0); |
551 | vspltisw<1> the_vspltisw1("vspltisw1" , OP_VSPLTISW1); |
552 | vspltisw<2> the_vspltisw2("vspltisw2" , OP_VSPLTISW2); |
553 | vspltisw<3> the_vspltisw3("vspltisw3" , OP_VSPLTISW3); |
554 | |
555 | template<unsigned N> |
556 | struct vsldoi : public Operator { |
557 | vsldoi(const char *Name, unsigned Opc) |
558 | : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) { |
559 | } |
560 | }; |
561 | |
562 | vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4); |
563 | vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8); |
564 | vsldoi<3> the_vsldoi3("vsldoi12" , OP_VSLDOI12); |
565 | |
566 | #endif |
567 | |
568 | #ifdef GENERATE_NEON |
569 | enum { |
570 | OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> |
571 | OP_VREV, |
572 | OP_VDUP0, |
573 | OP_VDUP1, |
574 | OP_VDUP2, |
575 | OP_VDUP3, |
576 | OP_VEXT1, |
577 | OP_VEXT2, |
578 | OP_VEXT3, |
579 | OP_VUZPL, // VUZP, left result |
580 | OP_VUZPR, // VUZP, right result |
581 | OP_VZIPL, // VZIP, left result |
582 | OP_VZIPR, // VZIP, right result |
583 | OP_VTRNL, // VTRN, left result |
584 | OP_VTRNR // VTRN, right result |
585 | }; |
586 | |
587 | struct vrev : public Operator { |
588 | vrev() : Operator(0x1032, "vrev" , OP_VREV) {} |
589 | } the_vrev; |
590 | |
591 | template<unsigned Elt> |
592 | struct vdup : public Operator { |
593 | vdup(const char *N, unsigned Opc) |
594 | : Operator(MakeMask(V0: Elt, V1: Elt, V2: Elt, V3: Elt), N, Opc) {} |
595 | }; |
596 | |
597 | vdup<0> the_vdup0("vdup0" , OP_VDUP0); |
598 | vdup<1> the_vdup1("vdup1" , OP_VDUP1); |
599 | vdup<2> the_vdup2("vdup2" , OP_VDUP2); |
600 | vdup<3> the_vdup3("vdup3" , OP_VDUP3); |
601 | |
602 | template<unsigned N> |
603 | struct vext : public Operator { |
604 | vext(const char *Name, unsigned Opc) |
605 | : Operator(MakeMask(V0: N&7, V1: (N+1)&7, V2: (N+2)&7, V3: (N+3)&7), Name, Opc) { |
606 | } |
607 | }; |
608 | |
609 | vext<1> the_vext1("vext1" , OP_VEXT1); |
610 | vext<2> the_vext2("vext2" , OP_VEXT2); |
611 | vext<3> the_vext3("vext3" , OP_VEXT3); |
612 | |
613 | struct vuzpl : public Operator { |
614 | vuzpl() : Operator(0x0246, "vuzpl" , OP_VUZPL, 1) {} |
615 | } the_vuzpl; |
616 | |
617 | struct vuzpr : public Operator { |
618 | vuzpr() : Operator(0x1357, "vuzpr" , OP_VUZPR, 1) {} |
619 | } the_vuzpr; |
620 | |
621 | struct vzipl : public Operator { |
622 | vzipl() : Operator(0x0415, "vzipl" , OP_VZIPL, 1) {} |
623 | } the_vzipl; |
624 | |
625 | struct vzipr : public Operator { |
626 | vzipr() : Operator(0x2637, "vzipr" , OP_VZIPR, 1) {} |
627 | } the_vzipr; |
628 | |
629 | struct vtrnl : public Operator { |
630 | vtrnl() : Operator(0x0426, "vtrnl" , OP_VTRNL, 1) {} |
631 | } the_vtrnl; |
632 | |
633 | struct vtrnr : public Operator { |
634 | vtrnr() : Operator(0x1537, "vtrnr" , OP_VTRNR, 1) {} |
635 | } the_vtrnr; |
636 | |
637 | #endif |
638 | |