1 | //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that X86 uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
16 | |
17 | #include "llvm/CodeGen/MachineFunction.h" |
18 | #include "llvm/CodeGen/TargetLowering.h" |
19 | |
20 | namespace llvm { |
21 | class X86Subtarget; |
22 | class X86TargetMachine; |
23 | |
24 | namespace X86ISD { |
25 | // X86 Specific DAG Nodes |
26 | enum NodeType : unsigned { |
27 | // Start the numbering where the builtin ops leave off. |
28 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
29 | |
30 | /// Bit scan forward. |
31 | BSF, |
32 | /// Bit scan reverse. |
33 | BSR, |
34 | |
35 | /// X86 funnel/double shift i16 instructions. These correspond to |
36 | /// X86::SHLDW and X86::SHRDW instructions which have different amt |
37 | /// modulo rules to generic funnel shifts. |
38 | /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. |
39 | FSHL, |
40 | FSHR, |
41 | |
42 | /// Bitwise logical AND of floating point values. This corresponds |
43 | /// to X86::ANDPS or X86::ANDPD. |
44 | FAND, |
45 | |
46 | /// Bitwise logical OR of floating point values. This corresponds |
47 | /// to X86::ORPS or X86::ORPD. |
48 | FOR, |
49 | |
50 | /// Bitwise logical XOR of floating point values. This corresponds |
51 | /// to X86::XORPS or X86::XORPD. |
52 | FXOR, |
53 | |
54 | /// Bitwise logical ANDNOT of floating point values. This |
55 | /// corresponds to X86::ANDNPS or X86::ANDNPD. |
56 | FANDN, |
57 | |
58 | /// These operations represent an abstract X86 call |
59 | /// instruction, which includes a bunch of information. In particular the |
60 | /// operands of these node are: |
61 | /// |
62 | /// #0 - The incoming token chain |
63 | /// #1 - The callee |
64 | /// #2 - The number of arg bytes the caller pushes on the stack. |
65 | /// #3 - The number of arg bytes the callee pops off the stack. |
66 | /// #4 - The value to pass in AL/AX/EAX (optional) |
67 | /// #5 - The value to pass in DL/DX/EDX (optional) |
68 | /// |
69 | /// The result values of these nodes are: |
70 | /// |
71 | /// #0 - The outgoing token chain |
72 | /// #1 - The first register result value (optional) |
73 | /// #2 - The second register result value (optional) |
74 | /// |
75 | CALL, |
76 | |
77 | /// Same as call except it adds the NoTrack prefix. |
78 | NT_CALL, |
79 | |
80 | // Pseudo for a OBJC call that gets emitted together with a special |
81 | // marker instruction. |
82 | CALL_RVMARKER, |
83 | |
84 | /// The same as ISD::CopyFromReg except that this node makes it explicit |
85 | /// that it may lower to an x87 FPU stack pop. Optimizations should be more |
86 | /// cautious when handling this node than a normal CopyFromReg to avoid |
87 | /// removing a required FPU stack pop. A key requirement is optimizations |
88 | /// should not optimize any users of a chain that contains a |
89 | /// POP_FROM_X87_REG to use a chain from a point earlier than the |
90 | /// POP_FROM_X87_REG (which may remove a required FPU stack pop). |
91 | POP_FROM_X87_REG, |
92 | |
93 | // Pseudo for a call to an imported function to ensure the correct machine |
94 | // instruction is emitted for Import Call Optimization. |
95 | IMP_CALL, |
96 | |
97 | /// X86 compare and logical compare instructions. |
98 | CMP, |
99 | FCMP, |
100 | COMI, |
101 | UCOMI, |
102 | |
103 | // X86 compare with Intrinsics similar to COMI. |
104 | COMX, |
105 | UCOMX, |
106 | |
107 | /// X86 bit-test instructions. |
108 | BT, |
109 | |
110 | /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS |
111 | /// operand, usually produced by a CMP instruction. |
112 | SETCC, |
113 | |
114 | /// X86 Select |
115 | SELECTS, |
116 | |
117 | // Same as SETCC except it's materialized with a sbb and the value is all |
118 | // one's or all zero's. |
119 | SETCC_CARRY, // R = carry_bit ? ~0 : 0 |
120 | |
121 | /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. |
122 | /// Operands are two FP values to compare; result is a mask of |
123 | /// 0s or 1s. Generally DTRT for C/C++ with NaNs. |
124 | FSETCC, |
125 | |
126 | /// X86 FP SETCC, similar to above, but with output as an i1 mask and |
127 | /// and a version with SAE. |
128 | FSETCCM, |
129 | FSETCCM_SAE, |
130 | |
131 | /// X86 conditional moves. Operand 0 and operand 1 are the two values |
132 | /// to select from. Operand 2 is the condition code, and operand 3 is the |
133 | /// flag operand produced by a CMP or TEST instruction. |
134 | CMOV, |
135 | |
136 | /// X86 conditional branches. Operand 0 is the chain operand, operand 1 |
137 | /// is the block to branch if condition is true, operand 2 is the |
138 | /// condition code, and operand 3 is the flag operand produced by a CMP |
139 | /// or TEST instruction. |
140 | BRCOND, |
141 | |
142 | /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and |
143 | /// operand 1 is the target address. |
144 | NT_BRIND, |
145 | |
146 | /// Return with a glue operand. Operand 0 is the chain operand, operand |
147 | /// 1 is the number of bytes of stack to pop. |
148 | RET_GLUE, |
149 | |
150 | /// Return from interrupt. Operand 0 is the number of bytes to pop. |
151 | IRET, |
152 | |
153 | /// Repeat fill, corresponds to X86::REP_STOSx. |
154 | REP_STOS, |
155 | |
156 | /// Repeat move, corresponds to X86::REP_MOVSx. |
157 | REP_MOVS, |
158 | |
159 | /// On Darwin, this node represents the result of the popl |
160 | /// at function entry, used for PIC code. |
161 | GlobalBaseReg, |
162 | |
163 | /// A wrapper node for TargetConstantPool, TargetJumpTable, |
164 | /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, |
165 | /// MCSymbol and TargetBlockAddress. |
166 | Wrapper, |
167 | |
168 | /// Special wrapper used under X86-64 PIC mode for RIP |
169 | /// relative displacements. |
170 | WrapperRIP, |
171 | |
172 | /// Copies a 64-bit value from an MMX vector to the low word |
173 | /// of an XMM vector, with the high word zero filled. |
174 | MOVQ2DQ, |
175 | |
176 | /// Copies a 64-bit value from the low word of an XMM vector |
177 | /// to an MMX vector. |
178 | MOVDQ2Q, |
179 | |
180 | /// Copies a 32-bit value from the low word of a MMX |
181 | /// vector to a GPR. |
182 | MMX_MOVD2W, |
183 | |
184 | /// Copies a GPR into the low 32-bit word of a MMX vector |
185 | /// and zero out the high word. |
186 | MMX_MOVW2D, |
187 | |
188 | /// Extract an 8-bit value from a vector and zero extend it to |
189 | /// i32, corresponds to X86::PEXTRB. |
190 | PEXTRB, |
191 | |
192 | /// Extract a 16-bit value from a vector and zero extend it to |
193 | /// i32, corresponds to X86::PEXTRW. |
194 | PEXTRW, |
195 | |
196 | /// Insert any element of a 4 x float vector into any element |
197 | /// of a destination 4 x floatvector. |
198 | INSERTPS, |
199 | |
200 | /// Insert the lower 8-bits of a 32-bit value to a vector, |
201 | /// corresponds to X86::PINSRB. |
202 | PINSRB, |
203 | |
204 | /// Insert the lower 16-bits of a 32-bit value to a vector, |
205 | /// corresponds to X86::PINSRW. |
206 | PINSRW, |
207 | |
208 | /// Shuffle 16 8-bit values within a vector. |
209 | PSHUFB, |
210 | |
211 | /// Compute Sum of Absolute Differences. |
212 | PSADBW, |
213 | /// Compute Double Block Packed Sum-Absolute-Differences |
214 | DBPSADBW, |
215 | |
216 | /// Bitwise Logical AND NOT of Packed FP values. |
217 | ANDNP, |
218 | |
219 | /// Blend where the selector is an immediate. |
220 | BLENDI, |
221 | |
222 | /// Dynamic (non-constant condition) vector blend where only the sign bits |
223 | /// of the condition elements are used. This is used to enforce that the |
224 | /// condition mask is not valid for generic VSELECT optimizations. This |
225 | /// is also used to implement the intrinsics. |
226 | /// Operands are in VSELECT order: MASK, TRUE, FALSE |
227 | BLENDV, |
228 | |
229 | /// Combined add and sub on an FP vector. |
230 | ADDSUB, |
231 | |
232 | // FP vector ops with rounding mode. |
233 | FADD_RND, |
234 | FADDS, |
235 | FADDS_RND, |
236 | FSUB_RND, |
237 | FSUBS, |
238 | FSUBS_RND, |
239 | FMUL_RND, |
240 | FMULS, |
241 | FMULS_RND, |
242 | FDIV_RND, |
243 | FDIVS, |
244 | FDIVS_RND, |
245 | FMAX_SAE, |
246 | FMAXS_SAE, |
247 | FMIN_SAE, |
248 | FMINS_SAE, |
249 | FSQRT_RND, |
250 | FSQRTS, |
251 | FSQRTS_RND, |
252 | |
253 | // FP vector get exponent. |
254 | FGETEXP, |
255 | FGETEXP_SAE, |
256 | FGETEXPS, |
257 | FGETEXPS_SAE, |
258 | // Extract Normalized Mantissas. |
259 | VGETMANT, |
260 | VGETMANT_SAE, |
261 | VGETMANTS, |
262 | VGETMANTS_SAE, |
263 | // FP Scale. |
264 | SCALEF, |
265 | SCALEF_RND, |
266 | SCALEFS, |
267 | SCALEFS_RND, |
268 | |
269 | /// Integer horizontal add/sub. |
270 | HADD, |
271 | HSUB, |
272 | |
273 | /// Floating point horizontal add/sub. |
274 | FHADD, |
275 | FHSUB, |
276 | |
277 | // Detect Conflicts Within a Vector |
278 | CONFLICT, |
279 | |
280 | /// Floating point max and min. |
281 | FMAX, |
282 | FMIN, |
283 | |
284 | /// Commutative FMIN and FMAX. |
285 | FMAXC, |
286 | FMINC, |
287 | |
288 | /// Scalar intrinsic floating point max and min. |
289 | FMAXS, |
290 | FMINS, |
291 | |
292 | /// Floating point reciprocal-sqrt and reciprocal approximation. |
293 | /// Note that these typically require refinement |
294 | /// in order to obtain suitable precision. |
295 | FRSQRT, |
296 | FRCP, |
297 | |
298 | // AVX-512 reciprocal approximations with a little more precision. |
299 | RSQRT14, |
300 | RSQRT14S, |
301 | RCP14, |
302 | RCP14S, |
303 | |
304 | // Thread Local Storage. |
305 | TLSADDR, |
306 | |
307 | // Thread Local Storage. A call to get the start address |
308 | // of the TLS block for the current module. |
309 | TLSBASEADDR, |
310 | |
311 | // Thread Local Storage. When calling to an OS provided |
312 | // thunk at the address from an earlier relocation. |
313 | TLSCALL, |
314 | |
315 | // Thread Local Storage. A descriptor containing pointer to |
316 | // code and to argument to get the TLS offset for the symbol. |
317 | TLSDESC, |
318 | |
319 | // Exception Handling helpers. |
320 | EH_RETURN, |
321 | |
322 | // SjLj exception handling setjmp. |
323 | EH_SJLJ_SETJMP, |
324 | |
325 | // SjLj exception handling longjmp. |
326 | EH_SJLJ_LONGJMP, |
327 | |
328 | // SjLj exception handling dispatch. |
329 | EH_SJLJ_SETUP_DISPATCH, |
330 | |
331 | /// Tail call return. See X86TargetLowering::LowerCall for |
332 | /// the list of operands. |
333 | TC_RETURN, |
334 | |
335 | // Vector move to low scalar and zero higher vector elements. |
336 | VZEXT_MOVL, |
337 | |
338 | // Vector integer truncate. |
339 | VTRUNC, |
340 | // Vector integer truncate with unsigned/signed saturation. |
341 | VTRUNCUS, |
342 | VTRUNCS, |
343 | |
344 | // Masked version of the above. Used when less than a 128-bit result is |
345 | // produced since the mask only applies to the lower elements and can't |
346 | // be represented by a select. |
347 | // SRC, PASSTHRU, MASK |
348 | VMTRUNC, |
349 | VMTRUNCUS, |
350 | VMTRUNCS, |
351 | |
352 | // Vector FP extend. |
353 | VFPEXT, |
354 | VFPEXT_SAE, |
355 | VFPEXTS, |
356 | VFPEXTS_SAE, |
357 | |
358 | // Vector FP round. |
359 | VFPROUND, |
360 | // Convert TWO packed single data to one packed data |
361 | VFPROUND2, |
362 | VFPROUND2_RND, |
363 | VFPROUND_RND, |
364 | VFPROUNDS, |
365 | VFPROUNDS_RND, |
366 | |
367 | // Masked version of above. Used for v2f64->v4f32. |
368 | // SRC, PASSTHRU, MASK |
369 | VMFPROUND, |
370 | |
371 | // 128-bit vector logical left / right shift |
372 | VSHLDQ, |
373 | VSRLDQ, |
374 | |
375 | // Vector shift elements |
376 | VSHL, |
377 | VSRL, |
378 | VSRA, |
379 | |
380 | // Vector variable shift |
381 | VSHLV, |
382 | VSRLV, |
383 | VSRAV, |
384 | |
385 | // Vector shift elements by immediate |
386 | VSHLI, |
387 | VSRLI, |
388 | VSRAI, |
389 | |
390 | // Shifts of mask registers. |
391 | KSHIFTL, |
392 | KSHIFTR, |
393 | |
394 | // Bit rotate by immediate |
395 | VROTLI, |
396 | VROTRI, |
397 | |
398 | // Vector packed double/float comparison. |
399 | CMPP, |
400 | |
401 | // Vector integer comparisons. |
402 | PCMPEQ, |
403 | PCMPGT, |
404 | |
405 | // v8i16 Horizontal minimum and position. |
406 | PHMINPOS, |
407 | |
408 | MULTISHIFT, |
409 | |
410 | /// Vector comparison generating mask bits for fp and |
411 | /// integer signed and unsigned data types. |
412 | CMPM, |
413 | // Vector mask comparison generating mask bits for FP values. |
414 | CMPMM, |
415 | // Vector mask comparison with SAE for FP values. |
416 | CMPMM_SAE, |
417 | |
418 | // Arithmetic operations with FLAGS results. |
419 | ADD, |
420 | SUB, |
421 | ADC, |
422 | SBB, |
423 | SMUL, |
424 | UMUL, |
425 | OR, |
426 | XOR, |
427 | AND, |
428 | |
429 | // Bit field extract. |
430 | BEXTR, |
431 | BEXTRI, |
432 | |
433 | // Zero High Bits Starting with Specified Bit Position. |
434 | BZHI, |
435 | |
436 | // Parallel extract and deposit. |
437 | PDEP, |
438 | PEXT, |
439 | |
440 | // X86-specific multiply by immediate. |
441 | MUL_IMM, |
442 | |
443 | // Vector sign bit extraction. |
444 | MOVMSK, |
445 | |
446 | // Vector bitwise comparisons. |
447 | PTEST, |
448 | |
449 | // Vector packed fp sign bitwise comparisons. |
450 | TESTP, |
451 | |
452 | // OR/AND test for masks. |
453 | KORTEST, |
454 | KTEST, |
455 | |
456 | // ADD for masks. |
457 | KADD, |
458 | |
459 | // Several flavors of instructions with vector shuffle behaviors. |
460 | // Saturated signed/unnsigned packing. |
461 | PACKSS, |
462 | PACKUS, |
463 | // Intra-lane alignr. |
464 | PALIGNR, |
465 | // AVX512 inter-lane alignr. |
466 | VALIGN, |
467 | PSHUFD, |
468 | PSHUFHW, |
469 | PSHUFLW, |
470 | SHUFP, |
471 | // VBMI2 Concat & Shift. |
472 | VSHLD, |
473 | VSHRD, |
474 | VSHLDV, |
475 | VSHRDV, |
476 | // Shuffle Packed Values at 128-bit granularity. |
477 | SHUF128, |
478 | MOVDDUP, |
479 | MOVSHDUP, |
480 | MOVSLDUP, |
481 | MOVLHPS, |
482 | MOVHLPS, |
483 | MOVSD, |
484 | MOVSS, |
485 | MOVSH, |
486 | UNPCKL, |
487 | UNPCKH, |
488 | VPERMILPV, |
489 | VPERMILPI, |
490 | VPERMI, |
491 | VPERM2X128, |
492 | |
493 | // Variable Permute (VPERM). |
494 | // Res = VPERMV MaskV, V0 |
495 | VPERMV, |
496 | |
497 | // 3-op Variable Permute (VPERMT2). |
498 | // Res = VPERMV3 V0, MaskV, V1 |
499 | VPERMV3, |
500 | |
501 | // Bitwise ternary logic. |
502 | VPTERNLOG, |
503 | // Fix Up Special Packed Float32/64 values. |
504 | VFIXUPIMM, |
505 | VFIXUPIMM_SAE, |
506 | VFIXUPIMMS, |
507 | VFIXUPIMMS_SAE, |
508 | // Range Restriction Calculation For Packed Pairs of Float32/64 values. |
509 | VRANGE, |
510 | VRANGE_SAE, |
511 | VRANGES, |
512 | VRANGES_SAE, |
513 | // Reduce - Perform Reduction Transformation on scalar\packed FP. |
514 | VREDUCE, |
515 | VREDUCE_SAE, |
516 | VREDUCES, |
517 | VREDUCES_SAE, |
518 | // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. |
519 | // Also used by the legacy (V)ROUND intrinsics where we mask out the |
520 | // scaling part of the immediate. |
521 | VRNDSCALE, |
522 | VRNDSCALE_SAE, |
523 | VRNDSCALES, |
524 | VRNDSCALES_SAE, |
525 | // Tests Types Of a FP Values for packed types. |
526 | VFPCLASS, |
527 | // Tests Types Of a FP Values for scalar types. |
528 | VFPCLASSS, |
529 | |
530 | // Broadcast (splat) scalar or element 0 of a vector. If the operand is |
531 | // a vector, this node may change the vector length as part of the splat. |
532 | VBROADCAST, |
533 | // Broadcast mask to vector. |
534 | VBROADCASTM, |
535 | |
536 | /// SSE4A Extraction and Insertion. |
537 | EXTRQI, |
538 | INSERTQI, |
539 | |
540 | // XOP arithmetic/logical shifts. |
541 | VPSHA, |
542 | VPSHL, |
543 | // XOP signed/unsigned integer comparisons. |
544 | VPCOM, |
545 | VPCOMU, |
546 | // XOP packed permute bytes. |
547 | VPPERM, |
548 | // XOP two source permutation. |
549 | VPERMIL2, |
550 | |
551 | // Vector multiply packed unsigned doubleword integers. |
552 | PMULUDQ, |
553 | // Vector multiply packed signed doubleword integers. |
554 | PMULDQ, |
555 | // Vector Multiply Packed UnsignedIntegers with Round and Scale. |
556 | MULHRS, |
557 | |
558 | // Multiply and Add Packed Integers. |
559 | VPMADDUBSW, |
560 | VPMADDWD, |
561 | |
562 | // AVX512IFMA multiply and add. |
563 | // NOTE: These are different than the instruction and perform |
564 | // op0 x op1 + op2. |
565 | VPMADD52L, |
566 | VPMADD52H, |
567 | |
568 | // VNNI |
569 | VPDPBUSD, |
570 | VPDPBUSDS, |
571 | VPDPWSSD, |
572 | VPDPWSSDS, |
573 | |
574 | // FMA nodes. |
575 | // We use the target independent ISD::FMA for the non-inverted case. |
576 | FNMADD, |
577 | FMSUB, |
578 | FNMSUB, |
579 | FMADDSUB, |
580 | FMSUBADD, |
581 | |
582 | // FMA with rounding mode. |
583 | FMADD_RND, |
584 | FNMADD_RND, |
585 | FMSUB_RND, |
586 | FNMSUB_RND, |
587 | FMADDSUB_RND, |
588 | FMSUBADD_RND, |
589 | |
590 | // AVX512-FP16 complex addition and multiplication. |
591 | VFMADDC, |
592 | VFMADDC_RND, |
593 | VFCMADDC, |
594 | VFCMADDC_RND, |
595 | |
596 | VFMULC, |
597 | VFMULC_RND, |
598 | VFCMULC, |
599 | VFCMULC_RND, |
600 | |
601 | VFMADDCSH, |
602 | VFMADDCSH_RND, |
603 | VFCMADDCSH, |
604 | VFCMADDCSH_RND, |
605 | |
606 | VFMULCSH, |
607 | VFMULCSH_RND, |
608 | VFCMULCSH, |
609 | VFCMULCSH_RND, |
610 | |
611 | VPDPBSUD, |
612 | VPDPBSUDS, |
613 | VPDPBUUD, |
614 | VPDPBUUDS, |
615 | VPDPBSSD, |
616 | VPDPBSSDS, |
617 | |
618 | VPDPWSUD, |
619 | VPDPWSUDS, |
620 | VPDPWUSD, |
621 | VPDPWUSDS, |
622 | VPDPWUUD, |
623 | VPDPWUUDS, |
624 | |
625 | VMINMAX, |
626 | VMINMAX_SAE, |
627 | VMINMAXS, |
628 | VMINMAXS_SAE, |
629 | |
630 | CVTP2IBS, |
631 | CVTP2IUBS, |
632 | CVTP2IBS_RND, |
633 | CVTP2IUBS_RND, |
634 | CVTTP2IBS, |
635 | CVTTP2IUBS, |
636 | CVTTP2IBS_SAE, |
637 | CVTTP2IUBS_SAE, |
638 | |
639 | MPSADBW, |
640 | |
641 | VCVT2PH2BF8, |
642 | VCVT2PH2BF8S, |
643 | VCVT2PH2HF8, |
644 | VCVT2PH2HF8S, |
645 | VCVTBIASPH2BF8, |
646 | VCVTBIASPH2BF8S, |
647 | VCVTBIASPH2HF8, |
648 | VCVTBIASPH2HF8S, |
649 | VCVTPH2BF8, |
650 | VCVTPH2BF8S, |
651 | VCVTPH2HF8, |
652 | VCVTPH2HF8S, |
653 | VMCVTBIASPH2BF8, |
654 | VMCVTBIASPH2BF8S, |
655 | VMCVTBIASPH2HF8, |
656 | VMCVTBIASPH2HF8S, |
657 | VMCVTPH2BF8, |
658 | VMCVTPH2BF8S, |
659 | VMCVTPH2HF8, |
660 | VMCVTPH2HF8S, |
661 | VCVTHF82PH, |
662 | |
663 | // Compress and expand. |
664 | COMPRESS, |
665 | EXPAND, |
666 | |
667 | // Bits shuffle |
668 | VPSHUFBITQMB, |
669 | |
670 | // Convert Unsigned/Integer to Floating-Point Value with rounding mode. |
671 | SINT_TO_FP_RND, |
672 | UINT_TO_FP_RND, |
673 | SCALAR_SINT_TO_FP, |
674 | SCALAR_UINT_TO_FP, |
675 | SCALAR_SINT_TO_FP_RND, |
676 | SCALAR_UINT_TO_FP_RND, |
677 | |
678 | // Vector float/double to signed/unsigned integer. |
679 | CVTP2SI, |
680 | CVTP2UI, |
681 | CVTP2SI_RND, |
682 | CVTP2UI_RND, |
683 | // Scalar float/double to signed/unsigned integer. |
684 | CVTS2SI, |
685 | CVTS2UI, |
686 | CVTS2SI_RND, |
687 | CVTS2UI_RND, |
688 | |
689 | // Vector float/double to signed/unsigned integer with truncation. |
690 | CVTTP2SI, |
691 | CVTTP2UI, |
692 | CVTTP2SI_SAE, |
693 | CVTTP2UI_SAE, |
694 | |
695 | // Saturation enabled Vector float/double to signed/unsigned |
696 | // integer with truncation. |
697 | CVTTP2SIS, |
698 | CVTTP2UIS, |
699 | CVTTP2SIS_SAE, |
700 | CVTTP2UIS_SAE, |
701 | // Masked versions of above. Used for v2f64 to v4i32. |
702 | // SRC, PASSTHRU, MASK |
703 | MCVTTP2SIS, |
704 | MCVTTP2UIS, |
705 | |
706 | // Scalar float/double to signed/unsigned integer with truncation. |
707 | CVTTS2SI, |
708 | CVTTS2UI, |
709 | CVTTS2SI_SAE, |
710 | CVTTS2UI_SAE, |
711 | |
712 | // Vector signed/unsigned integer to float/double. |
713 | CVTSI2P, |
714 | CVTUI2P, |
715 | |
716 | // Scalar float/double to signed/unsigned integer with saturation. |
717 | CVTTS2SIS, |
718 | CVTTS2UIS, |
719 | CVTTS2SIS_SAE, |
720 | CVTTS2UIS_SAE, |
721 | |
722 | // Masked versions of above. Used for v2f64->v4f32. |
723 | // SRC, PASSTHRU, MASK |
724 | MCVTP2SI, |
725 | MCVTP2UI, |
726 | MCVTTP2SI, |
727 | MCVTTP2UI, |
728 | MCVTSI2P, |
729 | MCVTUI2P, |
730 | |
731 | // Custom handling for FP_TO_xINT_SAT |
732 | FP_TO_SINT_SAT, |
733 | FP_TO_UINT_SAT, |
734 | |
735 | // Vector float to bfloat16. |
736 | // Convert packed single data to packed BF16 data |
737 | CVTNEPS2BF16, |
738 | // Masked version of above. |
739 | // SRC, PASSTHRU, MASK |
740 | MCVTNEPS2BF16, |
741 | |
742 | // Dot product of BF16/FP16 pairs to accumulated into |
743 | // packed single precision. |
744 | DPBF16PS, |
745 | DPFP16PS, |
746 | |
747 | // A stack checking function call. On Windows it's _chkstk call. |
748 | DYN_ALLOCA, |
749 | |
750 | // For allocating variable amounts of stack space when using |
751 | // segmented stacks. Check if the current stacklet has enough space, and |
752 | // falls back to heap allocation if not. |
753 | SEG_ALLOCA, |
754 | |
755 | // For allocating stack space when using stack clash protector. |
756 | // Allocation is performed by block, and each block is probed. |
757 | PROBED_ALLOCA, |
758 | |
759 | // Memory barriers. |
760 | MFENCE, |
761 | |
762 | // Get a random integer and indicate whether it is valid in CF. |
763 | RDRAND, |
764 | |
765 | // Get a NIST SP800-90B & C compliant random integer and |
766 | // indicate whether it is valid in CF. |
767 | RDSEED, |
768 | |
769 | // Protection keys |
770 | // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. |
771 | // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is |
772 | // value for ECX. |
773 | RDPKRU, |
774 | WRPKRU, |
775 | |
776 | // SSE42 string comparisons. |
777 | // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG |
778 | // will emit one or two instructions based on which results are used. If |
779 | // flags and index/mask this allows us to use a single instruction since |
780 | // we won't have to pick and opcode for flags. Instead we can rely on the |
781 | // DAG to CSE everything and decide at isel. |
782 | PCMPISTR, |
783 | PCMPESTR, |
784 | |
785 | // Test if in transactional execution. |
786 | XTEST, |
787 | |
788 | // Conversions between float and half-float. |
789 | CVTPS2PH, |
790 | CVTPS2PH_SAE, |
791 | CVTPH2PS, |
792 | CVTPH2PS_SAE, |
793 | |
794 | // Masked version of above. |
795 | // SRC, RND, PASSTHRU, MASK |
796 | MCVTPS2PH, |
797 | MCVTPS2PH_SAE, |
798 | |
799 | // Galois Field Arithmetic Instructions |
800 | GF2P8AFFINEINVQB, |
801 | GF2P8AFFINEQB, |
802 | GF2P8MULB, |
803 | |
804 | // LWP insert record. |
805 | LWPINS, |
806 | |
807 | // User level wait |
808 | UMWAIT, |
809 | TPAUSE, |
810 | |
811 | // Enqueue Stores Instructions |
812 | ENQCMD, |
813 | ENQCMDS, |
814 | |
815 | // For avx512-vp2intersect |
816 | VP2INTERSECT, |
817 | |
818 | // User level interrupts - testui |
819 | TESTUI, |
820 | |
821 | // Perform an FP80 add after changing precision control in FPCW. |
822 | FP80_ADD, |
823 | |
824 | // Conditional compare instructions |
825 | CCMP, |
826 | CTEST, |
827 | |
828 | /// X86 strict FP compare instructions. |
829 | FIRST_STRICTFP_OPCODE, |
830 | STRICT_FCMP = FIRST_STRICTFP_OPCODE, |
831 | STRICT_FCMPS, |
832 | |
833 | // Vector packed double/float comparison. |
834 | STRICT_CMPP, |
835 | |
836 | /// Vector comparison generating mask bits for fp and |
837 | /// integer signed and unsigned data types. |
838 | STRICT_CMPM, |
839 | |
840 | // Vector float/double to signed/unsigned integer with truncation. |
841 | STRICT_CVTTP2SI, |
842 | STRICT_CVTTP2UI, |
843 | |
844 | // Vector FP extend. |
845 | STRICT_VFPEXT, |
846 | |
847 | // Vector FP round. |
848 | STRICT_VFPROUND, |
849 | |
850 | // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. |
851 | // Also used by the legacy (V)ROUND intrinsics where we mask out the |
852 | // scaling part of the immediate. |
853 | STRICT_VRNDSCALE, |
854 | |
855 | // Vector signed/unsigned integer to float/double. |
856 | STRICT_CVTSI2P, |
857 | STRICT_CVTUI2P, |
858 | |
859 | // Strict FMA nodes. |
860 | STRICT_FNMADD, |
861 | STRICT_FMSUB, |
862 | STRICT_FNMSUB, |
863 | |
864 | // Conversions between float and half-float. |
865 | STRICT_CVTPS2PH, |
866 | STRICT_CVTPH2PS, |
867 | |
868 | // Perform an FP80 add after changing precision control in FPCW. |
869 | STRICT_FP80_ADD, |
870 | |
871 | /// Floating point max and min. |
872 | STRICT_FMAX, |
873 | STRICT_FMIN, |
874 | LAST_STRICTFP_OPCODE = STRICT_FMIN, |
875 | |
876 | // Compare and swap. |
877 | FIRST_MEMORY_OPCODE, |
878 | LCMPXCHG_DAG = FIRST_MEMORY_OPCODE, |
879 | LCMPXCHG8_DAG, |
880 | LCMPXCHG16_DAG, |
881 | LCMPXCHG16_SAVE_RBX_DAG, |
882 | |
883 | /// LOCK-prefixed arithmetic read-modify-write instructions. |
884 | /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) |
885 | LADD, |
886 | LSUB, |
887 | LOR, |
888 | LXOR, |
889 | LAND, |
890 | LBTS, |
891 | LBTC, |
892 | LBTR, |
893 | LBTS_RM, |
894 | LBTC_RM, |
895 | LBTR_RM, |
896 | |
897 | /// RAO arithmetic instructions. |
898 | /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) |
899 | AADD, |
900 | AOR, |
901 | AXOR, |
902 | AAND, |
903 | |
904 | // Load, scalar_to_vector, and zero extend. |
905 | VZEXT_LOAD, |
906 | |
907 | // extract_vector_elt, store. |
908 | , |
909 | |
910 | // scalar broadcast from memory. |
911 | VBROADCAST_LOAD, |
912 | |
913 | // subvector broadcast from memory. |
914 | SUBV_BROADCAST_LOAD, |
915 | |
916 | // Store FP control word into i16 memory. |
917 | FNSTCW16m, |
918 | |
919 | // Load FP control word from i16 memory. |
920 | FLDCW16m, |
921 | |
922 | // Store x87 FPU environment into memory. |
923 | FNSTENVm, |
924 | |
925 | // Load x87 FPU environment from memory. |
926 | FLDENVm, |
927 | |
928 | /// This instruction implements FP_TO_SINT with the |
929 | /// integer destination in memory and a FP reg source. This corresponds |
930 | /// to the X86::FIST*m instructions and the rounding mode change stuff. It |
931 | /// has two inputs (token chain and address) and two outputs (int value |
932 | /// and token chain). Memory VT specifies the type to store to. |
933 | FP_TO_INT_IN_MEM, |
934 | |
935 | /// This instruction implements SINT_TO_FP with the |
936 | /// integer source in memory and FP reg result. This corresponds to the |
937 | /// X86::FILD*m instructions. It has two inputs (token chain and address) |
938 | /// and two outputs (FP value and token chain). The integer source type is |
939 | /// specified by the memory VT. |
940 | FILD, |
941 | |
942 | /// This instruction implements a fp->int store from FP stack |
943 | /// slots. This corresponds to the fist instruction. It takes a |
944 | /// chain operand, value to store, address, and glue. The memory VT |
945 | /// specifies the type to store as. |
946 | FIST, |
947 | |
948 | /// This instruction implements an extending load to FP stack slots. |
949 | /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain |
950 | /// operand, and ptr to load from. The memory VT specifies the type to |
951 | /// load from. |
952 | FLD, |
953 | |
954 | /// This instruction implements a truncating store from FP stack |
955 | /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a |
956 | /// chain operand, value to store, address, and glue. The memory VT |
957 | /// specifies the type to store as. |
958 | FST, |
959 | |
960 | /// These instructions grab the address of the next argument |
961 | /// from a va_list. (reads and modifies the va_list in memory) |
962 | VAARG_64, |
963 | VAARG_X32, |
964 | |
965 | // Vector truncating store with unsigned/signed saturation |
966 | VTRUNCSTOREUS, |
967 | VTRUNCSTORES, |
968 | // Vector truncating masked store with unsigned/signed saturation |
969 | VMTRUNCSTOREUS, |
970 | VMTRUNCSTORES, |
971 | |
972 | // X86 specific gather and scatter |
973 | MGATHER, |
974 | MSCATTER, |
975 | |
976 | // Key locker nodes that produce flags. |
977 | AESENC128KL, |
978 | AESDEC128KL, |
979 | AESENC256KL, |
980 | AESDEC256KL, |
981 | AESENCWIDE128KL, |
982 | AESDECWIDE128KL, |
983 | AESENCWIDE256KL, |
984 | AESDECWIDE256KL, |
985 | |
986 | /// Compare and Add if Condition is Met. Compare value in operand 2 with |
987 | /// value in memory of operand 1. If condition of operand 4 is met, add |
988 | /// value operand 3 to m32 and write new value in operand 1. Operand 2 is |
989 | /// always updated with the original value from operand 1. |
990 | CMPCCXADD, |
991 | |
992 | // Save xmm argument registers to the stack, according to %al. An operator |
993 | // is needed so that this can be expanded with control flow. |
994 | VASTART_SAVE_XMM_REGS, |
995 | |
996 | // Conditional load/store instructions |
997 | CLOAD, |
998 | CSTORE, |
999 | LAST_MEMORY_OPCODE = CSTORE, |
1000 | }; |
1001 | } // end namespace X86ISD |
1002 | |
1003 | namespace X86 { |
1004 | /// Current rounding mode is represented in bits 11:10 of FPSR. These |
1005 | /// values are same as corresponding constants for rounding mode used |
1006 | /// in glibc. |
1007 | enum RoundingMode { |
1008 | rmToNearest = 0, // FE_TONEAREST |
1009 | rmDownward = 1 << 10, // FE_DOWNWARD |
1010 | rmUpward = 2 << 10, // FE_UPWARD |
1011 | rmTowardZero = 3 << 10, // FE_TOWARDZERO |
1012 | rmMask = 3 << 10 // Bit mask selecting rounding mode |
1013 | }; |
1014 | } |
1015 | |
1016 | /// Define some predicates that are used for node matching. |
1017 | namespace X86 { |
1018 | /// Returns true if Elt is a constant zero or floating point constant +0.0. |
1019 | bool isZeroNode(SDValue Elt); |
1020 | |
1021 | /// Returns true of the given offset can be |
1022 | /// fit into displacement field of the instruction. |
1023 | bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, |
1024 | bool hasSymbolicDisplacement); |
1025 | |
1026 | /// Determines whether the callee is required to pop its |
1027 | /// own arguments. Callee pop is necessary to support tail calls. |
1028 | bool isCalleePop(CallingConv::ID CallingConv, |
1029 | bool is64Bit, bool IsVarArg, bool GuaranteeTCO); |
1030 | |
1031 | /// If Op is a constant whose elements are all the same constant or |
1032 | /// undefined, return true and return the constant value in \p SplatVal. |
1033 | /// If we have undef bits that don't cover an entire element, we treat these |
1034 | /// as zero if AllowPartialUndefs is set, else we fail and return false. |
1035 | bool isConstantSplat(SDValue Op, APInt &SplatVal, |
1036 | bool AllowPartialUndefs = true); |
1037 | |
1038 | /// Check if Op is a load operation that could be folded into some other x86 |
1039 | /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0. |
1040 | bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, |
1041 | bool AssumeSingleUse = false); |
1042 | |
1043 | /// Check if Op is a load operation that could be folded into a vector splat |
1044 | /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2. |
1045 | bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, |
1046 | const X86Subtarget &Subtarget, |
1047 | bool AssumeSingleUse = false); |
1048 | |
1049 | /// Check if Op is a value that could be used to fold a store into some |
1050 | /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi). |
1051 | bool mayFoldIntoStore(SDValue Op); |
1052 | |
1053 | /// Check if Op is an operation that could be folded into a zero extend x86 |
1054 | /// instruction. |
1055 | bool mayFoldIntoZeroExtend(SDValue Op); |
1056 | |
1057 | /// True if the target supports the extended frame for async Swift |
1058 | /// functions. |
1059 | bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, |
1060 | const MachineFunction &MF); |
1061 | } // end namespace X86 |
1062 | |
1063 | //===--------------------------------------------------------------------===// |
1064 | // X86 Implementation of the TargetLowering interface |
1065 | class X86TargetLowering final : public TargetLowering { |
1066 | public: |
1067 | explicit X86TargetLowering(const X86TargetMachine &TM, |
1068 | const X86Subtarget &STI); |
1069 | |
1070 | unsigned getJumpTableEncoding() const override; |
1071 | bool useSoftFloat() const override; |
1072 | |
1073 | void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
1074 | ArgListTy &Args) const override; |
1075 | |
1076 | MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { |
1077 | return MVT::i8; |
1078 | } |
1079 | |
1080 | const MCExpr * |
1081 | LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
1082 | const MachineBasicBlock *MBB, unsigned uid, |
1083 | MCContext &Ctx) const override; |
1084 | |
1085 | /// Returns relocation base for the given PIC jumptable. |
1086 | SDValue getPICJumpTableRelocBase(SDValue Table, |
1087 | SelectionDAG &DAG) const override; |
1088 | const MCExpr * |
1089 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
1090 | unsigned JTI, MCContext &Ctx) const override; |
1091 | |
1092 | /// Return the desired alignment for ByVal aggregate |
1093 | /// function arguments in the caller parameter area. For X86, aggregates |
1094 | /// that contains are placed at 16-byte boundaries while the rest are at |
1095 | /// 4-byte boundaries. |
1096 | Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; |
1097 | |
1098 | EVT getOptimalMemOpType(const MemOp &Op, |
1099 | const AttributeList &FuncAttributes) const override; |
1100 | |
1101 | /// Returns true if it's safe to use load / store of the |
1102 | /// specified type to expand memcpy / memset inline. This is mostly true |
1103 | /// for all types except for some special cases. For example, on X86 |
1104 | /// targets without SSE2 f64 load / store are done with fldl / fstpl which |
1105 | /// also does type conversion. Note the specified type doesn't have to be |
1106 | /// legal as the hook is used before type legalization. |
1107 | bool isSafeMemOpType(MVT VT) const override; |
1108 | |
1109 | bool isMemoryAccessFast(EVT VT, Align Alignment) const; |
1110 | |
1111 | /// Returns true if the target allows unaligned memory accesses of the |
1112 | /// specified type. Returns whether it is "fast" in the last argument. |
1113 | bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, |
1114 | MachineMemOperand::Flags Flags, |
1115 | unsigned *Fast) const override; |
1116 | |
1117 | /// This function returns true if the memory access is aligned or if the |
1118 | /// target allows this specific unaligned memory access. If the access is |
1119 | /// allowed, the optional final parameter returns a relative speed of the |
1120 | /// access (as defined by the target). |
1121 | bool allowsMemoryAccess( |
1122 | LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, |
1123 | Align Alignment, |
1124 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1125 | unsigned *Fast = nullptr) const override; |
1126 | |
1127 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1128 | const MachineMemOperand &MMO, |
1129 | unsigned *Fast) const { |
1130 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), |
1131 | Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast); |
1132 | } |
1133 | |
1134 | /// Provide custom lowering hooks for some operations. |
1135 | /// |
1136 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
1137 | |
1138 | /// Replace the results of node with an illegal result |
1139 | /// type with new values built out of custom code. |
1140 | /// |
1141 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, |
1142 | SelectionDAG &DAG) const override; |
1143 | |
1144 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
1145 | |
1146 | bool preferABDSToABSWithNSW(EVT VT) const override; |
1147 | |
1148 | bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, |
1149 | EVT ExtVT) const override; |
1150 | |
1151 | bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, |
1152 | EVT VT) const override; |
1153 | |
1154 | /// Return true if the target has native support for |
1155 | /// the specified value type and it is 'desirable' to use the type for the |
1156 | /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 |
1157 | /// instruction encodings are longer and some i16 instructions are slow. |
1158 | bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; |
1159 | |
1160 | /// Return true if the target has native support for the |
1161 | /// specified value type and it is 'desirable' to use the type. e.g. On x86 |
1162 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
1163 | /// and some i16 instructions are slow. |
1164 | bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; |
1165 | |
1166 | /// Return prefered fold type, Abs if this is a vector, AddAnd if its an |
1167 | /// integer, None otherwise. |
1168 | TargetLowering::AndOrSETCCFoldKind |
1169 | isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, |
1170 | const SDNode *SETCC0, |
1171 | const SDNode *SETCC1) const override; |
1172 | |
1173 | /// Return the newly negated expression if the cost is not expensive and |
1174 | /// set the cost in \p Cost to indicate that if it is cheaper or neutral to |
1175 | /// do the negation. |
1176 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
1177 | bool LegalOperations, bool ForCodeSize, |
1178 | NegatibleCost &Cost, |
1179 | unsigned Depth) const override; |
1180 | |
1181 | MachineBasicBlock * |
1182 | EmitInstrWithCustomInserter(MachineInstr &MI, |
1183 | MachineBasicBlock *MBB) const override; |
1184 | |
1185 | /// This method returns the name of a target specific DAG node. |
1186 | const char *getTargetNodeName(unsigned Opcode) const override; |
1187 | |
1188 | /// Do not merge vector stores after legalization because that may conflict |
1189 | /// with x86-specific store splitting optimizations. |
1190 | bool mergeStoresAfterLegalization(EVT MemVT) const override { |
1191 | return !MemVT.isVector(); |
1192 | } |
1193 | |
1194 | bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
1195 | const MachineFunction &MF) const override; |
1196 | |
1197 | bool isCheapToSpeculateCttz(Type *Ty) const override; |
1198 | |
1199 | bool isCheapToSpeculateCtlz(Type *Ty) const override; |
1200 | |
1201 | bool isCtlzFast() const override; |
1202 | |
1203 | bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { |
1204 | // If the pair to store is a mixture of float and int values, we will |
1205 | // save two bitwise instructions and one float-to-int instruction and |
1206 | // increase one store instruction. There is potentially a more |
1207 | // significant benefit because it avoids the float->int domain switch |
1208 | // for input value. So It is more likely a win. |
1209 | if ((LTy.isFloatingPoint() && HTy.isInteger()) || |
1210 | (LTy.isInteger() && HTy.isFloatingPoint())) |
1211 | return true; |
1212 | // If the pair only contains int values, we will save two bitwise |
1213 | // instructions and increase one store instruction (costing one more |
1214 | // store buffer). Since the benefit is more blurred so we leave |
1215 | // such pair out until we get testcase to prove it is a win. |
1216 | return false; |
1217 | } |
1218 | |
1219 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
1220 | |
1221 | bool hasAndNotCompare(SDValue Y) const override; |
1222 | |
1223 | bool hasAndNot(SDValue Y) const override; |
1224 | |
1225 | bool hasBitTest(SDValue X, SDValue Y) const override; |
1226 | |
1227 | bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
1228 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
1229 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
1230 | SelectionDAG &DAG) const override; |
1231 | |
1232 | unsigned preferedOpcodeForCmpEqPiecesOfOperand( |
1233 | EVT VT, unsigned ShiftOpc, bool MayTransformRotate, |
1234 | const APInt &ShiftOrRotateAmt, |
1235 | const std::optional<APInt> &AndMask) const override; |
1236 | |
1237 | bool preferScalarizeSplat(SDNode *N) const override; |
1238 | |
1239 | CondMergingParams |
1240 | getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, |
1241 | const Value *Rhs) const override; |
1242 | |
1243 | bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
1244 | CombineLevel Level) const override; |
1245 | |
1246 | bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; |
1247 | |
1248 | bool |
1249 | shouldTransformSignedTruncationCheck(EVT XVT, |
1250 | unsigned KeptBits) const override { |
1251 | // For vectors, we don't have a preference.. |
1252 | if (XVT.isVector()) |
1253 | return false; |
1254 | |
1255 | auto VTIsOk = [](EVT VT) -> bool { |
1256 | return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || |
1257 | VT == MVT::i64; |
1258 | }; |
1259 | |
1260 | // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. |
1261 | // XVT will be larger than KeptBitsVT. |
1262 | MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits); |
1263 | return VTIsOk(XVT) && VTIsOk(KeptBitsVT); |
1264 | } |
1265 | |
1266 | ShiftLegalizationStrategy |
1267 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
1268 | unsigned ExpansionFactor) const override; |
1269 | |
1270 | bool shouldSplatInsEltVarIndex(EVT VT) const override; |
1271 | |
1272 | bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { |
1273 | // Converting to sat variants holds little benefit on X86 as we will just |
1274 | // need to saturate the value back using fp arithmatic. |
1275 | return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); |
1276 | } |
1277 | |
1278 | bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { |
1279 | return VT.isScalarInteger(); |
1280 | } |
1281 | |
1282 | /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. |
1283 | MVT hasFastEqualityCompare(unsigned NumBits) const override; |
1284 | |
1285 | /// Return the value type to use for ISD::SETCC. |
1286 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
1287 | EVT VT) const override; |
1288 | |
1289 | bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
1290 | const APInt &DemandedElts, |
1291 | TargetLoweringOpt &TLO) const override; |
1292 | |
1293 | /// Determine which of the bits specified in Mask are known to be either |
1294 | /// zero or one and return them in the KnownZero/KnownOne bitsets. |
1295 | void computeKnownBitsForTargetNode(const SDValue Op, |
1296 | KnownBits &Known, |
1297 | const APInt &DemandedElts, |
1298 | const SelectionDAG &DAG, |
1299 | unsigned Depth = 0) const override; |
1300 | |
1301 | /// Determine the number of bits in the operation that are sign bits. |
1302 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
1303 | const APInt &DemandedElts, |
1304 | const SelectionDAG &DAG, |
1305 | unsigned Depth) const override; |
1306 | |
1307 | bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, |
1308 | const APInt &DemandedElts, |
1309 | APInt &KnownUndef, |
1310 | APInt &KnownZero, |
1311 | TargetLoweringOpt &TLO, |
1312 | unsigned Depth) const override; |
1313 | |
1314 | bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, |
1315 | const APInt &DemandedElts, |
1316 | unsigned MaskIndex, |
1317 | TargetLoweringOpt &TLO, |
1318 | unsigned Depth) const; |
1319 | |
1320 | bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
1321 | const APInt &DemandedBits, |
1322 | const APInt &DemandedElts, |
1323 | KnownBits &Known, |
1324 | TargetLoweringOpt &TLO, |
1325 | unsigned Depth) const override; |
1326 | |
1327 | SDValue SimplifyMultipleUseDemandedBitsForTargetNode( |
1328 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
1329 | SelectionDAG &DAG, unsigned Depth) const override; |
1330 | |
1331 | bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( |
1332 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
1333 | bool PoisonOnly, unsigned Depth) const override; |
1334 | |
1335 | bool canCreateUndefOrPoisonForTargetNode( |
1336 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
1337 | bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override; |
1338 | |
1339 | bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, |
1340 | APInt &UndefElts, const SelectionDAG &DAG, |
1341 | unsigned Depth) const override; |
1342 | |
1343 | bool isTargetCanonicalConstantNode(SDValue Op) const override { |
1344 | // Peek through bitcasts/extracts/inserts to see if we have a vector |
1345 | // load/broadcast from memory. |
1346 | while (Op.getOpcode() == ISD::BITCAST || |
1347 | Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || |
1348 | (Op.getOpcode() == ISD::INSERT_SUBVECTOR && |
1349 | Op.getOperand(i: 0).isUndef())) |
1350 | Op = Op.getOperand(i: Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); |
1351 | |
1352 | return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || |
1353 | Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD || |
1354 | (Op.getOpcode() == ISD::LOAD && |
1355 | getTargetConstantFromLoad(LD: cast<LoadSDNode>(Val&: Op))) || |
1356 | TargetLowering::isTargetCanonicalConstantNode(Op); |
1357 | } |
1358 | |
1359 | const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; |
1360 | |
1361 | SDValue unwrapAddress(SDValue N) const override; |
1362 | |
1363 | SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; |
1364 | |
1365 | bool ExpandInlineAsm(CallInst *CI) const override; |
1366 | |
1367 | ConstraintType getConstraintType(StringRef Constraint) const override; |
1368 | |
1369 | /// Examine constraint string and operand type and determine a weight value. |
1370 | /// The operand object must already have been set up with the operand type. |
1371 | ConstraintWeight |
1372 | getSingleConstraintMatchWeight(AsmOperandInfo &Info, |
1373 | const char *Constraint) const override; |
1374 | |
1375 | const char *LowerXConstraint(EVT ConstraintVT) const override; |
1376 | |
1377 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
1378 | /// add anything to Ops. If hasMemory is true it means one of the asm |
1379 | /// constraint of the inline asm instruction being processed is 'm'. |
1380 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
1381 | std::vector<SDValue> &Ops, |
1382 | SelectionDAG &DAG) const override; |
1383 | |
1384 | InlineAsm::ConstraintCode |
1385 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override { |
1386 | if (ConstraintCode == "v" ) |
1387 | return InlineAsm::ConstraintCode::v; |
1388 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
1389 | } |
1390 | |
1391 | /// Handle Lowering flag assembly outputs. |
1392 | SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, |
1393 | const SDLoc &DL, |
1394 | const AsmOperandInfo &Constraint, |
1395 | SelectionDAG &DAG) const override; |
1396 | |
1397 | /// Given a physical register constraint |
1398 | /// (e.g. {edx}), return the register number and the register class for the |
1399 | /// register. This should only be used for C_Register constraints. On |
1400 | /// error, this returns a register number of 0. |
1401 | std::pair<unsigned, const TargetRegisterClass *> |
1402 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
1403 | StringRef Constraint, MVT VT) const override; |
1404 | |
1405 | /// Return true if the addressing mode represented |
1406 | /// by AM is legal for this target, for a load/store of the specified type. |
1407 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
1408 | Type *Ty, unsigned AS, |
1409 | Instruction *I = nullptr) const override; |
1410 | |
1411 | bool addressingModeSupportsTLS(const GlobalValue &GV) const override; |
1412 | |
1413 | /// Return true if the specified immediate is legal |
1414 | /// icmp immediate, that is the target has icmp instructions which can |
1415 | /// compare a register against the immediate without having to materialize |
1416 | /// the immediate into a register. |
1417 | bool isLegalICmpImmediate(int64_t Imm) const override; |
1418 | |
1419 | /// Return true if the specified immediate is legal |
1420 | /// add immediate, that is the target has add instructions which can |
1421 | /// add a register and the immediate without having to materialize |
1422 | /// the immediate into a register. |
1423 | bool isLegalAddImmediate(int64_t Imm) const override; |
1424 | |
1425 | bool isLegalStoreImmediate(int64_t Imm) const override; |
1426 | |
1427 | /// Add x86-specific opcodes to the default list. |
1428 | bool isBinOp(unsigned Opcode) const override; |
1429 | |
1430 | /// Returns true if the opcode is a commutative binary operation. |
1431 | bool isCommutativeBinOp(unsigned Opcode) const override; |
1432 | |
1433 | /// Return true if it's free to truncate a value of |
1434 | /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in |
1435 | /// register EAX to i16 by referencing its sub-register AX. |
1436 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
1437 | bool isTruncateFree(EVT VT1, EVT VT2) const override; |
1438 | |
1439 | bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; |
1440 | |
1441 | /// Return true if any actual instruction that defines a |
1442 | /// value of type Ty1 implicit zero-extends the value to Ty2 in the result |
1443 | /// register. This does not necessarily include registers defined in |
1444 | /// unknown ways, such as incoming arguments, or copies from unknown |
1445 | /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this |
1446 | /// does not necessarily apply to truncate instructions. e.g. on x86-64, |
1447 | /// all instructions that define 32-bit values implicit zero-extend the |
1448 | /// result out to 64 bits. |
1449 | bool isZExtFree(Type *Ty1, Type *Ty2) const override; |
1450 | bool isZExtFree(EVT VT1, EVT VT2) const override; |
1451 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
1452 | |
1453 | bool shouldConvertPhiType(Type *From, Type *To) const override; |
1454 | |
1455 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
1456 | /// extend node) is profitable. |
1457 | bool isVectorLoadExtDesirable(SDValue) const override; |
1458 | |
1459 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
1460 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this |
1461 | /// method returns true, otherwise fmuladd is expanded to fmul + fadd. |
1462 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
1463 | EVT VT) const override; |
1464 | |
1465 | /// Return true if it's profitable to narrow operations of type SrcVT to |
1466 | /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not |
1467 | /// from i32 to i16. |
1468 | bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override; |
1469 | |
1470 | bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, |
1471 | unsigned SelectOpcode, SDValue X, |
1472 | SDValue Y) const override; |
1473 | |
1474 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
1475 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
1476 | /// true and stores the intrinsic information into the IntrinsicInfo that was |
1477 | /// passed to the function. |
1478 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
1479 | MachineFunction &MF, |
1480 | unsigned Intrinsic) const override; |
1481 | |
1482 | /// Returns true if the target can instruction select the |
1483 | /// specified FP immediate natively. If false, the legalizer will |
1484 | /// materialize the FP immediate as a load from a constant pool. |
1485 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
1486 | bool ForCodeSize) const override; |
1487 | |
1488 | /// Targets can use this to indicate that they only support *some* |
1489 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
1490 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to |
1491 | /// be legal. |
1492 | bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; |
1493 | |
1494 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
1495 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
1496 | /// constant pool entry. |
1497 | bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; |
1498 | |
1499 | /// Returns true if lowering to a jump table is allowed. |
1500 | bool areJTsAllowed(const Function *Fn) const override; |
1501 | |
1502 | MVT getPreferredSwitchConditionType(LLVMContext &Context, |
1503 | EVT ConditionVT) const override; |
1504 | |
1505 | /// If true, then instruction selection should |
1506 | /// seek to shrink the FP constant of the specified type to a smaller type |
1507 | /// in order to save space and / or reduce runtime. |
1508 | bool ShouldShrinkFPConstant(EVT VT) const override; |
1509 | |
1510 | /// Return true if we believe it is correct and profitable to reduce the |
1511 | /// load node to a smaller type. |
1512 | bool |
1513 | shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, |
1514 | std::optional<unsigned> ByteOffset) const override; |
1515 | |
1516 | /// Return true if the specified scalar FP type is computed in an SSE |
1517 | /// register, not on the X87 floating point stack. |
1518 | bool isScalarFPTypeInSSEReg(EVT VT) const; |
1519 | |
1520 | /// Returns true if it is beneficial to convert a load of a constant |
1521 | /// to just the constant itself. |
1522 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
1523 | Type *Ty) const override; |
1524 | |
1525 | bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; |
1526 | |
1527 | bool convertSelectOfConstantsToMath(EVT VT) const override; |
1528 | |
1529 | bool decomposeMulByConstant(LLVMContext &Context, EVT VT, |
1530 | SDValue C) const override; |
1531 | |
1532 | /// Return true if EXTRACT_SUBVECTOR is cheap for this result type |
1533 | /// with this index. |
1534 | bool (EVT ResVT, EVT SrcVT, |
1535 | unsigned Index) const override; |
1536 | |
1537 | /// Scalar ops always have equal or better analysis/performance/power than |
1538 | /// the vector equivalent, so this always makes sense if the scalar op is |
1539 | /// supported. |
1540 | bool shouldScalarizeBinop(SDValue) const override; |
1541 | |
1542 | /// Extract of a scalar FP value from index 0 of a vector is free. |
1543 | bool (EVT VT, unsigned Index) const override { |
1544 | EVT EltVT = VT.getScalarType(); |
1545 | return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; |
1546 | } |
1547 | |
1548 | /// Overflow nodes should get combined/lowered to optimal instructions |
1549 | /// (they should allow eliminating explicit compares by getting flags from |
1550 | /// math ops). |
1551 | bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
1552 | bool MathUsed) const override; |
1553 | |
1554 | bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, |
1555 | unsigned AddrSpace) const override { |
1556 | // If we can replace more than 2 scalar stores, there will be a reduction |
1557 | // in instructions even after we add a vector constant load. |
1558 | return IsZero || NumElem > 2; |
1559 | } |
1560 | |
1561 | bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
1562 | const SelectionDAG &DAG, |
1563 | const MachineMemOperand &MMO) const override; |
1564 | |
1565 | Register getRegisterByName(const char* RegName, LLT VT, |
1566 | const MachineFunction &MF) const override; |
1567 | |
1568 | /// If a physical register, this returns the register that receives the |
1569 | /// exception address on entry to an EH pad. |
1570 | Register |
1571 | getExceptionPointerRegister(const Constant *PersonalityFn) const override; |
1572 | |
1573 | /// If a physical register, this returns the register that receives the |
1574 | /// exception typeid on entry to a landing pad. |
1575 | Register |
1576 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override; |
1577 | |
1578 | bool needsFixedCatchObjects() const override; |
1579 | |
1580 | /// This method returns a target specific FastISel object, |
1581 | /// or null if the target does not support "fast" ISel. |
1582 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
1583 | const TargetLibraryInfo *libInfo) const override; |
1584 | |
1585 | /// If the target has a standard location for the stack protector cookie, |
1586 | /// returns the address of that location. Otherwise, returns nullptr. |
1587 | Value *getIRStackGuard(IRBuilderBase &IRB) const override; |
1588 | |
1589 | bool useLoadStackGuardNode(const Module &M) const override; |
1590 | bool useStackGuardXorFP() const override; |
1591 | void insertSSPDeclarations(Module &M) const override; |
1592 | Value *getSDagStackGuard(const Module &M) const override; |
1593 | Function *getSSPStackGuardCheck(const Module &M) const override; |
1594 | SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
1595 | const SDLoc &DL) const override; |
1596 | |
1597 | |
1598 | /// Return true if the target stores SafeStack pointer at a fixed offset in |
1599 | /// some non-standard address space, and populates the address space and |
1600 | /// offset as appropriate. |
1601 | Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; |
1602 | |
1603 | std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, |
1604 | SDValue Chain, SDValue Pointer, |
1605 | MachinePointerInfo PtrInfo, |
1606 | Align Alignment, |
1607 | SelectionDAG &DAG) const; |
1608 | |
1609 | /// Customize the preferred legalization strategy for certain types. |
1610 | LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; |
1611 | |
1612 | bool softPromoteHalfType() const override { return true; } |
1613 | |
1614 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, |
1615 | EVT VT) const override; |
1616 | |
1617 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1618 | CallingConv::ID CC, |
1619 | EVT VT) const override; |
1620 | |
1621 | unsigned getVectorTypeBreakdownForCallingConv( |
1622 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
1623 | unsigned &NumIntermediates, MVT &RegisterVT) const override; |
1624 | |
1625 | bool functionArgumentNeedsConsecutiveRegisters( |
1626 | Type *Ty, CallingConv::ID CallConv, bool isVarArg, |
1627 | const DataLayout &DL) const override; |
1628 | |
1629 | bool isIntDivCheap(EVT VT, AttributeList Attr) const override; |
1630 | |
1631 | bool supportSwiftError() const override; |
1632 | |
1633 | bool supportKCFIBundles() const override { return true; } |
1634 | |
1635 | MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
1636 | MachineBasicBlock::instr_iterator &MBBI, |
1637 | const TargetInstrInfo *TII) const override; |
1638 | |
1639 | bool hasStackProbeSymbol(const MachineFunction &MF) const override; |
1640 | bool hasInlineStackProbe(const MachineFunction &MF) const override; |
1641 | StringRef getStackProbeSymbolName(const MachineFunction &MF) const override; |
1642 | |
1643 | unsigned getStackProbeSize(const MachineFunction &MF) const; |
1644 | |
1645 | bool hasVectorBlend() const override { return true; } |
1646 | |
1647 | unsigned getMaxSupportedInterleaveFactor() const override { return 4; } |
1648 | |
1649 | bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, |
1650 | unsigned OpNo) const override; |
1651 | |
1652 | SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, |
1653 | MachineMemOperand *MMO, SDValue &NewLoad, |
1654 | SDValue Ptr, SDValue PassThru, |
1655 | SDValue Mask) const override; |
1656 | SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, |
1657 | MachineMemOperand *MMO, SDValue Ptr, SDValue Val, |
1658 | SDValue Mask) const override; |
1659 | |
1660 | /// Lower interleaved load(s) into target specific |
1661 | /// instructions/intrinsics. |
1662 | bool lowerInterleavedLoad(LoadInst *LI, |
1663 | ArrayRef<ShuffleVectorInst *> Shuffles, |
1664 | ArrayRef<unsigned> Indices, |
1665 | unsigned Factor) const override; |
1666 | |
1667 | /// Lower interleaved store(s) into target specific |
1668 | /// instructions/intrinsics. |
1669 | bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
1670 | unsigned Factor) const override; |
1671 | |
1672 | SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, |
1673 | int JTI, SelectionDAG &DAG) const override; |
1674 | |
1675 | Align getPrefLoopAlignment(MachineLoop *ML) const override; |
1676 | |
1677 | EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override { |
1678 | if (VT == MVT::f80) |
1679 | return EVT::getIntegerVT(Context, BitWidth: 96); |
1680 | return TargetLoweringBase::getTypeToTransformTo(Context, VT); |
1681 | } |
1682 | |
1683 | protected: |
1684 | std::pair<const TargetRegisterClass *, uint8_t> |
1685 | findRepresentativeClass(const TargetRegisterInfo *TRI, |
1686 | MVT VT) const override; |
1687 | |
1688 | private: |
1689 | /// Keep a reference to the X86Subtarget around so that we can |
1690 | /// make the right decision when generating code for different targets. |
1691 | const X86Subtarget &Subtarget; |
1692 | |
1693 | /// A list of legal FP immediates. |
1694 | std::vector<APFloat> LegalFPImmediates; |
1695 | |
1696 | /// Indicate that this x86 target can instruction |
1697 | /// select the specified FP immediate natively. |
1698 | void addLegalFPImmediate(const APFloat& Imm) { |
1699 | LegalFPImmediates.push_back(x: Imm); |
1700 | } |
1701 | |
1702 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
1703 | CallingConv::ID CallConv, bool isVarArg, |
1704 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1705 | const SDLoc &dl, SelectionDAG &DAG, |
1706 | SmallVectorImpl<SDValue> &InVals, |
1707 | uint32_t *RegMask) const; |
1708 | SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, |
1709 | const SmallVectorImpl<ISD::InputArg> &ArgInfo, |
1710 | const SDLoc &dl, SelectionDAG &DAG, |
1711 | const CCValAssign &VA, MachineFrameInfo &MFI, |
1712 | unsigned i) const; |
1713 | SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, |
1714 | const SDLoc &dl, SelectionDAG &DAG, |
1715 | const CCValAssign &VA, |
1716 | ISD::ArgFlagsTy Flags, bool isByval) const; |
1717 | |
1718 | // Call lowering helpers. |
1719 | |
1720 | /// Check whether the call is eligible for tail call optimization. Targets |
1721 | /// that want to do tail call optimization should implement this function. |
1722 | bool IsEligibleForTailCallOptimization( |
1723 | TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, |
1724 | SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const; |
1725 | SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, |
1726 | SDValue Chain, bool IsTailCall, |
1727 | bool Is64Bit, int FPDiff, |
1728 | const SDLoc &dl) const; |
1729 | |
1730 | unsigned GetAlignedArgumentStackSize(unsigned StackSize, |
1731 | SelectionDAG &DAG) const; |
1732 | |
1733 | unsigned getAddressSpace() const; |
1734 | |
1735 | SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, |
1736 | SDValue &Chain) const; |
1737 | SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; |
1738 | |
1739 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1740 | SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; |
1741 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
1742 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
1743 | |
1744 | unsigned getGlobalWrapperKind(const GlobalValue *GV, |
1745 | const unsigned char OpFlags) const; |
1746 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
1747 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
1748 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
1749 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1750 | SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; |
1751 | |
1752 | /// Creates target global address or external symbol nodes for calls or |
1753 | /// other uses. |
1754 | SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall, |
1755 | bool *IsImpCall) const; |
1756 | |
1757 | SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1758 | SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1759 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; |
1760 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
1761 | SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
1762 | SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; |
1763 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
1764 | SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; |
1765 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
1766 | SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; |
1767 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
1768 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1769 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
1770 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
1771 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1772 | SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1773 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
1774 | SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; |
1775 | SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; |
1776 | SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; |
1777 | SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; |
1778 | SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; |
1779 | SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
1780 | SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1781 | SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1782 | SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; |
1783 | SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; |
1784 | SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
1785 | SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; |
1786 | SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, |
1787 | SDValue &Chain) const; |
1788 | SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1789 | SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; |
1790 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
1791 | SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; |
1792 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
1793 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
1794 | SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; |
1795 | |
1796 | SDValue |
1797 | LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1798 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1799 | const SDLoc &dl, SelectionDAG &DAG, |
1800 | SmallVectorImpl<SDValue> &InVals) const override; |
1801 | SDValue LowerCall(CallLoweringInfo &CLI, |
1802 | SmallVectorImpl<SDValue> &InVals) const override; |
1803 | |
1804 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1805 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1806 | const SmallVectorImpl<SDValue> &OutVals, |
1807 | const SDLoc &dl, SelectionDAG &DAG) const override; |
1808 | |
1809 | bool supportSplitCSR(MachineFunction *MF) const override { |
1810 | return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
1811 | MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind); |
1812 | } |
1813 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
1814 | void insertCopiesSplitCSR( |
1815 | MachineBasicBlock *Entry, |
1816 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
1817 | |
1818 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
1819 | |
1820 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
1821 | |
1822 | EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
1823 | ISD::NodeType ExtendKind) const override; |
1824 | |
1825 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
1826 | bool isVarArg, |
1827 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1828 | LLVMContext &Context, |
1829 | const Type *RetTy) const override; |
1830 | |
1831 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; |
1832 | ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; |
1833 | |
1834 | TargetLoweringBase::AtomicExpansionKind |
1835 | shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
1836 | TargetLoweringBase::AtomicExpansionKind |
1837 | shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
1838 | TargetLoweringBase::AtomicExpansionKind |
1839 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
1840 | TargetLoweringBase::AtomicExpansionKind |
1841 | shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; |
1842 | void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; |
1843 | void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; |
1844 | |
1845 | LoadInst * |
1846 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; |
1847 | |
1848 | bool needsCmpXchgNb(Type *MemType) const; |
1849 | |
1850 | void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, |
1851 | MachineBasicBlock *DispatchBB, int FI) const; |
1852 | |
1853 | // Utility function to emit the low-level va_arg code for X86-64. |
1854 | MachineBasicBlock * |
1855 | EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
1856 | |
1857 | /// Utility function to emit the xmm reg save portion of va_start. |
1858 | MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, |
1859 | MachineInstr &MI2, |
1860 | MachineBasicBlock *BB) const; |
1861 | |
1862 | MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, |
1863 | MachineBasicBlock *BB) const; |
1864 | |
1865 | MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, |
1866 | MachineBasicBlock *BB) const; |
1867 | |
1868 | MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, |
1869 | MachineBasicBlock *BB) const; |
1870 | |
1871 | MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, |
1872 | MachineBasicBlock *BB) const; |
1873 | |
1874 | MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, |
1875 | MachineBasicBlock *BB) const; |
1876 | |
1877 | MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, |
1878 | MachineBasicBlock *BB) const; |
1879 | |
1880 | MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, |
1881 | MachineBasicBlock *MBB) const; |
1882 | |
1883 | void emitSetJmpShadowStackFix(MachineInstr &MI, |
1884 | MachineBasicBlock *MBB) const; |
1885 | |
1886 | MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, |
1887 | MachineBasicBlock *MBB) const; |
1888 | |
1889 | MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, |
1890 | MachineBasicBlock *MBB) const; |
1891 | |
1892 | MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, |
1893 | MachineBasicBlock *MBB) const; |
1894 | |
1895 | MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI, |
1896 | MachineBasicBlock *MBB) const; |
1897 | |
1898 | /// Emit flags for the given setcc condition and operands. Also returns the |
1899 | /// corresponding X86 condition code constant in X86CC. |
1900 | SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, |
1901 | const SDLoc &dl, SelectionDAG &DAG, |
1902 | SDValue &X86CC) const; |
1903 | |
1904 | bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, |
1905 | SDValue IntPow2) const override; |
1906 | |
1907 | /// Check if replacement of SQRT with RSQRT should be disabled. |
1908 | bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; |
1909 | |
1910 | /// Use rsqrt* to speed up sqrt calculations. |
1911 | SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, |
1912 | int &RefinementSteps, bool &UseOneConstNR, |
1913 | bool Reciprocal) const override; |
1914 | |
1915 | /// Use rcp* to speed up fdiv calculations. |
1916 | SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, |
1917 | int &RefinementSteps) const override; |
1918 | |
1919 | /// Reassociate floating point divisions into multiply by reciprocal. |
1920 | unsigned combineRepeatedFPDivisors() const override; |
1921 | |
1922 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
1923 | SmallVectorImpl<SDNode *> &Created) const override; |
1924 | |
1925 | SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, |
1926 | SDValue V2) const; |
1927 | }; |
1928 | |
1929 | namespace X86 { |
1930 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
1931 | const TargetLibraryInfo *libInfo); |
1932 | } // end namespace X86 |
1933 | |
1934 | // X86 specific Gather/Scatter nodes. |
1935 | // The class has the same order of operands as MaskedGatherScatterSDNode for |
1936 | // convenience. |
1937 | class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { |
1938 | public: |
1939 | // This is a intended as a utility and should never be directly created. |
1940 | X86MaskedGatherScatterSDNode() = delete; |
1941 | ~X86MaskedGatherScatterSDNode() = delete; |
1942 | |
1943 | const SDValue &getBasePtr() const { return getOperand(Num: 3); } |
1944 | const SDValue &getIndex() const { return getOperand(Num: 4); } |
1945 | const SDValue &getMask() const { return getOperand(Num: 2); } |
1946 | const SDValue &getScale() const { return getOperand(Num: 5); } |
1947 | |
1948 | static bool classof(const SDNode *N) { |
1949 | return N->getOpcode() == X86ISD::MGATHER || |
1950 | N->getOpcode() == X86ISD::MSCATTER; |
1951 | } |
1952 | }; |
1953 | |
1954 | class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { |
1955 | public: |
1956 | const SDValue &getPassThru() const { return getOperand(Num: 1); } |
1957 | |
1958 | static bool classof(const SDNode *N) { |
1959 | return N->getOpcode() == X86ISD::MGATHER; |
1960 | } |
1961 | }; |
1962 | |
1963 | class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { |
1964 | public: |
1965 | const SDValue &getValue() const { return getOperand(Num: 1); } |
1966 | |
1967 | static bool classof(const SDNode *N) { |
1968 | return N->getOpcode() == X86ISD::MSCATTER; |
1969 | } |
1970 | }; |
1971 | |
1972 | /// Generate unpacklo/unpackhi shuffle mask. |
1973 | void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, |
1974 | bool Unary); |
1975 | |
1976 | /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation |
1977 | /// imposed by AVX and specific to the unary pattern. Example: |
1978 | /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> |
1979 | /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> |
1980 | void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); |
1981 | |
1982 | } // end namespace llvm |
1983 | |
1984 | #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
1985 | |