1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16
17#include "llvm/CodeGen/MachineFunction.h"
18#include "llvm/CodeGen/TargetLowering.h"
19
20namespace llvm {
21 class X86Subtarget;
22 class X86TargetMachine;
23
24 namespace X86ISD {
25 // X86 Specific DAG Nodes
26 enum NodeType : unsigned {
27 // Start the numbering where the builtin ops leave off.
28 FIRST_NUMBER = ISD::BUILTIN_OP_END,
29
30 /// Bit scan forward.
31 BSF,
32 /// Bit scan reverse.
33 BSR,
34
35 /// X86 funnel/double shift i16 instructions. These correspond to
36 /// X86::SHLDW and X86::SHRDW instructions which have different amt
37 /// modulo rules to generic funnel shifts.
38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39 FSHL,
40 FSHR,
41
42 /// Bitwise logical AND of floating point values. This corresponds
43 /// to X86::ANDPS or X86::ANDPD.
44 FAND,
45
46 /// Bitwise logical OR of floating point values. This corresponds
47 /// to X86::ORPS or X86::ORPD.
48 FOR,
49
50 /// Bitwise logical XOR of floating point values. This corresponds
51 /// to X86::XORPS or X86::XORPD.
52 FXOR,
53
54 /// Bitwise logical ANDNOT of floating point values. This
55 /// corresponds to X86::ANDNPS or X86::ANDNPD.
56 FANDN,
57
58 /// These operations represent an abstract X86 call
59 /// instruction, which includes a bunch of information. In particular the
60 /// operands of these node are:
61 ///
62 /// #0 - The incoming token chain
63 /// #1 - The callee
64 /// #2 - The number of arg bytes the caller pushes on the stack.
65 /// #3 - The number of arg bytes the callee pops off the stack.
66 /// #4 - The value to pass in AL/AX/EAX (optional)
67 /// #5 - The value to pass in DL/DX/EDX (optional)
68 ///
69 /// The result values of these nodes are:
70 ///
71 /// #0 - The outgoing token chain
72 /// #1 - The first register result value (optional)
73 /// #2 - The second register result value (optional)
74 ///
75 CALL,
76
77 /// Same as call except it adds the NoTrack prefix.
78 NT_CALL,
79
80 // Pseudo for a OBJC call that gets emitted together with a special
81 // marker instruction.
82 CALL_RVMARKER,
83
84 // Psuedo for a call to a global address that must be called via a memory
85 // address (i.e., not loaded into a register then called).
86 CALL_GLOBALADDR,
87
88 /// The same as ISD::CopyFromReg except that this node makes it explicit
89 /// that it may lower to an x87 FPU stack pop. Optimizations should be more
90 /// cautious when handling this node than a normal CopyFromReg to avoid
91 /// removing a required FPU stack pop. A key requirement is optimizations
92 /// should not optimize any users of a chain that contains a
93 /// POP_FROM_X87_REG to use a chain from a point earlier than the
94 /// POP_FROM_X87_REG (which may remove a required FPU stack pop).
95 POP_FROM_X87_REG,
96
97 // Pseudo for a call to an imported function to ensure the correct machine
98 // instruction is emitted for Import Call Optimization.
99 IMP_CALL,
100
101 /// X86 compare and logical compare instructions.
102 CMP,
103 FCMP,
104 COMI,
105 UCOMI,
106
107 // X86 compare with Intrinsics similar to COMI.
108 COMX,
109 UCOMX,
110
111 /// X86 bit-test instructions.
112 BT,
113
114 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
115 /// operand, usually produced by a CMP instruction.
116 SETCC,
117
118 /// X86 Select
119 SELECTS,
120
121 // Same as SETCC except it's materialized with a sbb and the value is all
122 // one's or all zero's.
123 SETCC_CARRY, // R = carry_bit ? ~0 : 0
124
125 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
126 /// Operands are two FP values to compare; result is a mask of
127 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
128 FSETCC,
129
130 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
131 /// and a version with SAE.
132 FSETCCM,
133 FSETCCM_SAE,
134
135 /// X86 conditional moves. Operand 0 and operand 1 are the two values
136 /// to select from. Operand 2 is the condition code, and operand 3 is the
137 /// flag operand produced by a CMP or TEST instruction.
138 CMOV,
139
140 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
141 /// is the block to branch if condition is true, operand 2 is the
142 /// condition code, and operand 3 is the flag operand produced by a CMP
143 /// or TEST instruction.
144 BRCOND,
145
146 /// X86 conditional branch to self, used for implementing efficient
147 /// conditional traps. Operand 0 is the chain operand, operand 1 is the
148 /// condition code, and operand 2 is the flag operand.
149 BRCOND_SELF,
150
151 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
152 /// operand 1 is the target address.
153 NT_BRIND,
154
155 /// Return with a glue operand. Operand 0 is the chain operand, operand
156 /// 1 is the number of bytes of stack to pop.
157 RET_GLUE,
158
159 /// Return from interrupt. Operand 0 is the number of bytes to pop.
160 IRET,
161
162 /// Repeat fill, corresponds to X86::REP_STOSx.
163 REP_STOS,
164
165 /// Repeat move, corresponds to X86::REP_MOVSx.
166 REP_MOVS,
167
168 /// On Darwin, this node represents the result of the popl
169 /// at function entry, used for PIC code.
170 GlobalBaseReg,
171
172 /// A wrapper node for TargetConstantPool, TargetJumpTable,
173 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
174 /// MCSymbol and TargetBlockAddress.
175 Wrapper,
176
177 /// Special wrapper used under X86-64 PIC mode for RIP
178 /// relative displacements.
179 WrapperRIP,
180
181 /// Copies a 64-bit value from an MMX vector to the low word
182 /// of an XMM vector, with the high word zero filled.
183 MOVQ2DQ,
184
185 /// Copies a 64-bit value from the low word of an XMM vector
186 /// to an MMX vector.
187 MOVDQ2Q,
188
189 /// Copies a 32-bit value from the low word of a MMX
190 /// vector to a GPR.
191 MMX_MOVD2W,
192
193 /// Copies a GPR into the low 32-bit word of a MMX vector
194 /// and zero out the high word.
195 MMX_MOVW2D,
196
197 /// Extract an 8-bit value from a vector and zero extend it to
198 /// i32, corresponds to X86::PEXTRB.
199 PEXTRB,
200
201 /// Extract a 16-bit value from a vector and zero extend it to
202 /// i32, corresponds to X86::PEXTRW.
203 PEXTRW,
204
205 /// Insert any element of a 4 x float vector into any element
206 /// of a destination 4 x floatvector.
207 INSERTPS,
208
209 /// Insert the lower 8-bits of a 32-bit value to a vector,
210 /// corresponds to X86::PINSRB.
211 PINSRB,
212
213 /// Insert the lower 16-bits of a 32-bit value to a vector,
214 /// corresponds to X86::PINSRW.
215 PINSRW,
216
217 /// Shuffle 16 8-bit values within a vector.
218 PSHUFB,
219
220 /// Compute Sum of Absolute Differences.
221 PSADBW,
222 /// Compute Double Block Packed Sum-Absolute-Differences
223 DBPSADBW,
224
225 /// Bitwise Logical AND NOT of Packed FP values.
226 ANDNP,
227
228 /// Blend where the selector is an immediate.
229 BLENDI,
230
231 /// Dynamic (non-constant condition) vector blend where only the sign bits
232 /// of the condition elements are used. This is used to enforce that the
233 /// condition mask is not valid for generic VSELECT optimizations. This
234 /// is also used to implement the intrinsics.
235 /// Operands are in VSELECT order: MASK, TRUE, FALSE
236 BLENDV,
237
238 /// Combined add and sub on an FP vector.
239 ADDSUB,
240
241 // FP vector ops with rounding mode.
242 FADD_RND,
243 FADDS,
244 FADDS_RND,
245 FSUB_RND,
246 FSUBS,
247 FSUBS_RND,
248 FMUL_RND,
249 FMULS,
250 FMULS_RND,
251 FDIV_RND,
252 FDIVS,
253 FDIVS_RND,
254 FMAX_SAE,
255 FMAXS_SAE,
256 FMIN_SAE,
257 FMINS_SAE,
258 FSQRT_RND,
259 FSQRTS,
260 FSQRTS_RND,
261
262 // FP vector get exponent.
263 FGETEXP,
264 FGETEXP_SAE,
265 FGETEXPS,
266 FGETEXPS_SAE,
267 // Extract Normalized Mantissas.
268 VGETMANT,
269 VGETMANT_SAE,
270 VGETMANTS,
271 VGETMANTS_SAE,
272 // FP Scale.
273 SCALEF,
274 SCALEF_RND,
275 SCALEFS,
276 SCALEFS_RND,
277
278 /// Integer horizontal add/sub.
279 HADD,
280 HSUB,
281
282 /// Integer horizontal saturating add/sub.
283 HADDS,
284 HSUBS,
285
286 /// Floating point horizontal add/sub.
287 FHADD,
288 FHSUB,
289
290 // Detect Conflicts Within a Vector
291 CONFLICT,
292
293 /// Floating point max and min.
294 FMAX,
295 FMIN,
296
297 /// Commutative FMIN and FMAX.
298 FMAXC,
299 FMINC,
300
301 /// Scalar intrinsic floating point max and min.
302 FMAXS,
303 FMINS,
304
305 /// Floating point reciprocal-sqrt and reciprocal approximation.
306 /// Note that these typically require refinement
307 /// in order to obtain suitable precision.
308 FRSQRT,
309 FRCP,
310
311 // AVX-512 reciprocal approximations with a little more precision.
312 RSQRT14,
313 RSQRT14S,
314 RCP14,
315 RCP14S,
316
317 // Thread Local Storage.
318 TLSADDR,
319
320 // Thread Local Storage. A call to get the start address
321 // of the TLS block for the current module.
322 TLSBASEADDR,
323
324 // Thread Local Storage. When calling to an OS provided
325 // thunk at the address from an earlier relocation.
326 TLSCALL,
327
328 // Thread Local Storage. A descriptor containing pointer to
329 // code and to argument to get the TLS offset for the symbol.
330 TLSDESC,
331
332 // Exception Handling helpers.
333 EH_RETURN,
334
335 // SjLj exception handling setjmp.
336 EH_SJLJ_SETJMP,
337
338 // SjLj exception handling longjmp.
339 EH_SJLJ_LONGJMP,
340
341 // SjLj exception handling dispatch.
342 EH_SJLJ_SETUP_DISPATCH,
343
344 /// Tail call return. See X86TargetLowering::LowerCall for
345 /// the list of operands.
346 TC_RETURN,
347
348 // Psuedo for a tail call return to a global address that must be called via
349 // a memory address (i.e., not loaded into a register then called).
350 TC_RETURN_GLOBALADDR,
351
352 // Vector move to low scalar and zero higher vector elements.
353 VZEXT_MOVL,
354
355 // Vector integer truncate.
356 VTRUNC,
357 // Vector integer truncate with unsigned/signed saturation.
358 VTRUNCUS,
359 VTRUNCS,
360
361 // Masked version of the above. Used when less than a 128-bit result is
362 // produced since the mask only applies to the lower elements and can't
363 // be represented by a select.
364 // SRC, PASSTHRU, MASK
365 VMTRUNC,
366 VMTRUNCUS,
367 VMTRUNCS,
368
369 // Vector FP extend.
370 VFPEXT,
371 VFPEXT_SAE,
372 VFPEXTS,
373 VFPEXTS_SAE,
374
375 // Vector FP round.
376 VFPROUND,
377 // Convert TWO packed single data to one packed data
378 VFPROUND2,
379 VFPROUND2_RND,
380 VFPROUND_RND,
381 VFPROUNDS,
382 VFPROUNDS_RND,
383
384 // Masked version of above. Used for v2f64->v4f32.
385 // SRC, PASSTHRU, MASK
386 VMFPROUND,
387
388 // 128-bit vector logical left / right shift
389 VSHLDQ,
390 VSRLDQ,
391
392 // Vector shift elements
393 VSHL,
394 VSRL,
395 VSRA,
396
397 // Vector variable shift
398 VSHLV,
399 VSRLV,
400 VSRAV,
401
402 // Vector shift elements by immediate
403 VSHLI,
404 VSRLI,
405 VSRAI,
406
407 // Shifts of mask registers.
408 KSHIFTL,
409 KSHIFTR,
410
411 // Bit rotate by immediate
412 VROTLI,
413 VROTRI,
414
415 // Vector packed double/float comparison.
416 CMPP,
417
418 // Vector integer comparisons.
419 PCMPEQ,
420 PCMPGT,
421
422 // v8i16 Horizontal minimum and position.
423 PHMINPOS,
424
425 MULTISHIFT,
426
427 /// Vector comparison generating mask bits for fp and
428 /// integer signed and unsigned data types.
429 CMPM,
430 // Vector mask comparison generating mask bits for FP values.
431 CMPMM,
432 // Vector mask comparison with SAE for FP values.
433 CMPMM_SAE,
434
435 // Arithmetic operations with FLAGS results.
436 ADD,
437 SUB,
438 ADC,
439 SBB,
440 SMUL,
441 UMUL,
442 OR,
443 XOR,
444 AND,
445
446 // Bit field extract.
447 BEXTR,
448 BEXTRI,
449
450 // Zero High Bits Starting with Specified Bit Position.
451 BZHI,
452
453 // Parallel extract and deposit.
454 PDEP,
455 PEXT,
456
457 // X86-specific multiply by immediate.
458 MUL_IMM,
459
460 // Vector sign bit extraction.
461 MOVMSK,
462
463 // Vector bitwise comparisons.
464 PTEST,
465
466 // Vector packed fp sign bitwise comparisons.
467 TESTP,
468
469 // OR/AND test for masks.
470 KORTEST,
471 KTEST,
472
473 // ADD for masks.
474 KADD,
475
476 // Several flavors of instructions with vector shuffle behaviors.
477 // Saturated signed/unnsigned packing.
478 PACKSS,
479 PACKUS,
480 // Intra-lane alignr.
481 PALIGNR,
482 // AVX512 inter-lane alignr.
483 VALIGN,
484 PSHUFD,
485 PSHUFHW,
486 PSHUFLW,
487 SHUFP,
488 // VBMI2 Concat & Shift.
489 VSHLD,
490 VSHRD,
491
492 // Shuffle Packed Values at 128-bit granularity.
493 SHUF128,
494 MOVDDUP,
495 MOVSHDUP,
496 MOVSLDUP,
497 MOVLHPS,
498 MOVHLPS,
499 MOVSD,
500 MOVSS,
501 MOVSH,
502 UNPCKL,
503 UNPCKH,
504 VPERMILPV,
505 VPERMILPI,
506 VPERMI,
507 VPERM2X128,
508
509 // Variable Permute (VPERM).
510 // Res = VPERMV MaskV, V0
511 VPERMV,
512
513 // 3-op Variable Permute (VPERMT2).
514 // Res = VPERMV3 V0, MaskV, V1
515 VPERMV3,
516
517 // Bitwise ternary logic.
518 VPTERNLOG,
519 // Fix Up Special Packed Float32/64 values.
520 VFIXUPIMM,
521 VFIXUPIMM_SAE,
522 VFIXUPIMMS,
523 VFIXUPIMMS_SAE,
524 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
525 VRANGE,
526 VRANGE_SAE,
527 VRANGES,
528 VRANGES_SAE,
529 // Reduce - Perform Reduction Transformation on scalar\packed FP.
530 VREDUCE,
531 VREDUCE_SAE,
532 VREDUCES,
533 VREDUCES_SAE,
534 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
535 // Also used by the legacy (V)ROUND intrinsics where we mask out the
536 // scaling part of the immediate.
537 VRNDSCALE,
538 VRNDSCALE_SAE,
539 VRNDSCALES,
540 VRNDSCALES_SAE,
541 // Tests Types Of a FP Values for packed types.
542 VFPCLASS,
543 // Tests Types Of a FP Values for scalar types.
544 VFPCLASSS,
545
546 // Broadcast (splat) scalar or element 0 of a vector. If the operand is
547 // a vector, this node may change the vector length as part of the splat.
548 VBROADCAST,
549 // Broadcast mask to vector.
550 VBROADCASTM,
551
552 /// SSE4A Extraction and Insertion.
553 EXTRQI,
554 INSERTQI,
555
556 // XOP arithmetic/logical shifts.
557 VPSHA,
558 VPSHL,
559 // XOP signed/unsigned integer comparisons.
560 VPCOM,
561 VPCOMU,
562 // XOP packed permute bytes.
563 VPPERM,
564 // XOP two source permutation.
565 VPERMIL2,
566
567 // Vector multiply packed unsigned doubleword integers.
568 PMULUDQ,
569 // Vector multiply packed signed doubleword integers.
570 PMULDQ,
571 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
572 MULHRS,
573
574 // Multiply and Add Packed Integers.
575 VPMADDUBSW,
576 VPMADDWD,
577
578 // AVX512IFMA multiply and add.
579 // NOTE: These are different than the instruction and perform
580 // op0 x op1 + op2.
581 VPMADD52L,
582 VPMADD52H,
583
584 // VNNI
585 VPDPBUSD,
586 VPDPBUSDS,
587 VPDPWSSD,
588 VPDPWSSDS,
589
590 // FMA nodes.
591 // We use the target independent ISD::FMA for the non-inverted case.
592 FNMADD,
593 FMSUB,
594 FNMSUB,
595 FMADDSUB,
596 FMSUBADD,
597
598 // FMA with rounding mode.
599 FMADD_RND,
600 FNMADD_RND,
601 FMSUB_RND,
602 FNMSUB_RND,
603 FMADDSUB_RND,
604 FMSUBADD_RND,
605
606 // AVX512-FP16 complex addition and multiplication.
607 VFMADDC,
608 VFMADDC_RND,
609 VFCMADDC,
610 VFCMADDC_RND,
611
612 VFMULC,
613 VFMULC_RND,
614 VFCMULC,
615 VFCMULC_RND,
616
617 VFMADDCSH,
618 VFMADDCSH_RND,
619 VFCMADDCSH,
620 VFCMADDCSH_RND,
621
622 VFMULCSH,
623 VFMULCSH_RND,
624 VFCMULCSH,
625 VFCMULCSH_RND,
626
627 VPDPBSUD,
628 VPDPBSUDS,
629 VPDPBUUD,
630 VPDPBUUDS,
631 VPDPBSSD,
632 VPDPBSSDS,
633
634 VPDPWSUD,
635 VPDPWSUDS,
636 VPDPWUSD,
637 VPDPWUSDS,
638 VPDPWUUD,
639 VPDPWUUDS,
640
641 VMINMAX,
642 VMINMAX_SAE,
643 VMINMAXS,
644 VMINMAXS_SAE,
645
646 CVTP2IBS,
647 CVTP2IUBS,
648 CVTP2IBS_RND,
649 CVTP2IUBS_RND,
650 CVTTP2IBS,
651 CVTTP2IUBS,
652 CVTTP2IBS_SAE,
653 CVTTP2IUBS_SAE,
654
655 MPSADBW,
656
657 VCVT2PH2BF8,
658 VCVT2PH2BF8S,
659 VCVT2PH2HF8,
660 VCVT2PH2HF8S,
661 VCVTBIASPH2BF8,
662 VCVTBIASPH2BF8S,
663 VCVTBIASPH2HF8,
664 VCVTBIASPH2HF8S,
665 VCVTPH2BF8,
666 VCVTPH2BF8S,
667 VCVTPH2HF8,
668 VCVTPH2HF8S,
669 VMCVTBIASPH2BF8,
670 VMCVTBIASPH2BF8S,
671 VMCVTBIASPH2HF8,
672 VMCVTBIASPH2HF8S,
673 VMCVTPH2BF8,
674 VMCVTPH2BF8S,
675 VMCVTPH2HF8,
676 VMCVTPH2HF8S,
677 VCVTHF82PH,
678
679 // Compress and expand.
680 COMPRESS,
681 EXPAND,
682
683 // Bits shuffle
684 VPSHUFBITQMB,
685
686 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
687 SINT_TO_FP_RND,
688 UINT_TO_FP_RND,
689 SCALAR_SINT_TO_FP,
690 SCALAR_UINT_TO_FP,
691 SCALAR_SINT_TO_FP_RND,
692 SCALAR_UINT_TO_FP_RND,
693
694 // Vector float/double to signed/unsigned integer.
695 CVTP2SI,
696 CVTP2UI,
697 CVTP2SI_RND,
698 CVTP2UI_RND,
699 // Scalar float/double to signed/unsigned integer.
700 CVTS2SI,
701 CVTS2UI,
702 CVTS2SI_RND,
703 CVTS2UI_RND,
704
705 // Vector float/double to signed/unsigned integer with truncation.
706 CVTTP2SI,
707 CVTTP2UI,
708 CVTTP2SI_SAE,
709 CVTTP2UI_SAE,
710
711 // Saturation enabled Vector float/double to signed/unsigned
712 // integer with truncation.
713 CVTTP2SIS,
714 CVTTP2UIS,
715 CVTTP2SIS_SAE,
716 CVTTP2UIS_SAE,
717 // Masked versions of above. Used for v2f64 to v4i32.
718 // SRC, PASSTHRU, MASK
719 MCVTTP2SIS,
720 MCVTTP2UIS,
721
722 // Scalar float/double to signed/unsigned integer with truncation.
723 CVTTS2SI,
724 CVTTS2UI,
725 CVTTS2SI_SAE,
726 CVTTS2UI_SAE,
727
728 // Vector signed/unsigned integer to float/double.
729 CVTSI2P,
730 CVTUI2P,
731
732 // Scalar float/double to signed/unsigned integer with saturation.
733 CVTTS2SIS,
734 CVTTS2UIS,
735 CVTTS2SIS_SAE,
736 CVTTS2UIS_SAE,
737
738 // Masked versions of above. Used for v2f64->v4f32.
739 // SRC, PASSTHRU, MASK
740 MCVTP2SI,
741 MCVTP2UI,
742 MCVTTP2SI,
743 MCVTTP2UI,
744 MCVTSI2P,
745 MCVTUI2P,
746
747 // Custom handling for FP_TO_xINT_SAT
748 FP_TO_SINT_SAT,
749 FP_TO_UINT_SAT,
750
751 // Vector float to bfloat16.
752 // Convert packed single data to packed BF16 data
753 CVTNEPS2BF16,
754 // Masked version of above.
755 // SRC, PASSTHRU, MASK
756 MCVTNEPS2BF16,
757
758 // Dot product of BF16/FP16 pairs to accumulated into
759 // packed single precision.
760 DPBF16PS,
761 DPFP16PS,
762
763 // A stack checking function call. On Windows it's _chkstk call.
764 DYN_ALLOCA,
765
766 // For allocating variable amounts of stack space when using
767 // segmented stacks. Check if the current stacklet has enough space, and
768 // falls back to heap allocation if not.
769 SEG_ALLOCA,
770
771 // For allocating stack space when using stack clash protector.
772 // Allocation is performed by block, and each block is probed.
773 PROBED_ALLOCA,
774
775 // Memory barriers.
776 MFENCE,
777
778 // Get a random integer and indicate whether it is valid in CF.
779 RDRAND,
780
781 // Get a NIST SP800-90B & C compliant random integer and
782 // indicate whether it is valid in CF.
783 RDSEED,
784
785 // Protection keys
786 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
787 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
788 // value for ECX.
789 RDPKRU,
790 WRPKRU,
791
792 // SSE42 string comparisons.
793 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
794 // will emit one or two instructions based on which results are used. If
795 // flags and index/mask this allows us to use a single instruction since
796 // we won't have to pick and opcode for flags. Instead we can rely on the
797 // DAG to CSE everything and decide at isel.
798 PCMPISTR,
799 PCMPESTR,
800
801 // Test if in transactional execution.
802 XTEST,
803
804 // Conversions between float and half-float.
805 CVTPS2PH,
806 CVTPS2PH_SAE,
807 CVTPH2PS,
808 CVTPH2PS_SAE,
809
810 // Masked version of above.
811 // SRC, RND, PASSTHRU, MASK
812 MCVTPS2PH,
813 MCVTPS2PH_SAE,
814
815 // Galois Field Arithmetic Instructions
816 GF2P8AFFINEINVQB,
817 GF2P8AFFINEQB,
818 GF2P8MULB,
819
820 // Carry-less multiplication
821 PCLMULQDQ,
822
823 // LWP insert record.
824 LWPINS,
825
826 // User level wait
827 UMWAIT,
828 TPAUSE,
829
830 // Enqueue Stores Instructions
831 ENQCMD,
832 ENQCMDS,
833
834 // For avx512-vp2intersect
835 VP2INTERSECT,
836
837 // User level interrupts - testui
838 TESTUI,
839
840 // Perform an FP80 add after changing precision control in FPCW.
841 FP80_ADD,
842
843 // Conditional compare instructions
844 CCMP,
845 CTEST,
846
847 /// X86 strict FP compare instructions.
848 FIRST_STRICTFP_OPCODE,
849 STRICT_FCMP = FIRST_STRICTFP_OPCODE,
850 STRICT_FCMPS,
851
852 // Vector packed double/float comparison.
853 STRICT_CMPP,
854
855 /// Vector comparison generating mask bits for fp and
856 /// integer signed and unsigned data types.
857 STRICT_CMPM,
858
859 // Vector float/double to signed/unsigned integer with truncation.
860 STRICT_CVTTP2SI,
861 STRICT_CVTTP2UI,
862
863 // Vector FP extend.
864 STRICT_VFPEXT,
865
866 // Vector FP round.
867 STRICT_VFPROUND,
868
869 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
870 // Also used by the legacy (V)ROUND intrinsics where we mask out the
871 // scaling part of the immediate.
872 STRICT_VRNDSCALE,
873
874 // Vector signed/unsigned integer to float/double.
875 STRICT_CVTSI2P,
876 STRICT_CVTUI2P,
877
878 // Strict FMA nodes.
879 STRICT_FNMADD,
880 STRICT_FMSUB,
881 STRICT_FNMSUB,
882
883 // Conversions between float and half-float.
884 STRICT_CVTPS2PH,
885 STRICT_CVTPH2PS,
886
887 // Perform an FP80 add after changing precision control in FPCW.
888 STRICT_FP80_ADD,
889
890 /// Floating point max and min.
891 STRICT_FMAX,
892 STRICT_FMIN,
893 LAST_STRICTFP_OPCODE = STRICT_FMIN,
894
895 // Compare and swap.
896 FIRST_MEMORY_OPCODE,
897 LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
898 LCMPXCHG8_DAG,
899 LCMPXCHG16_DAG,
900 LCMPXCHG16_SAVE_RBX_DAG,
901
902 /// LOCK-prefixed arithmetic read-modify-write instructions.
903 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
904 LADD,
905 LSUB,
906 LOR,
907 LXOR,
908 LAND,
909 LBTS,
910 LBTC,
911 LBTR,
912 LBTS_RM,
913 LBTC_RM,
914 LBTR_RM,
915
916 /// RAO arithmetic instructions.
917 /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
918 AADD,
919 AOR,
920 AXOR,
921 AAND,
922
923 // Load, scalar_to_vector, and zero extend.
924 VZEXT_LOAD,
925
926 // extract_vector_elt, store.
927 VEXTRACT_STORE,
928
929 // scalar broadcast from memory.
930 VBROADCAST_LOAD,
931
932 // subvector broadcast from memory.
933 SUBV_BROADCAST_LOAD,
934
935 // Store FP control word into i16 memory.
936 FNSTCW16m,
937
938 // Load FP control word from i16 memory.
939 FLDCW16m,
940
941 // Store x87 FPU environment into memory.
942 FNSTENVm,
943
944 // Load x87 FPU environment from memory.
945 FLDENVm,
946
947 /// This instruction implements FP_TO_SINT with the
948 /// integer destination in memory and a FP reg source. This corresponds
949 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
950 /// has two inputs (token chain and address) and two outputs (int value
951 /// and token chain). Memory VT specifies the type to store to.
952 FP_TO_INT_IN_MEM,
953
954 /// This instruction implements SINT_TO_FP with the
955 /// integer source in memory and FP reg result. This corresponds to the
956 /// X86::FILD*m instructions. It has two inputs (token chain and address)
957 /// and two outputs (FP value and token chain). The integer source type is
958 /// specified by the memory VT.
959 FILD,
960
961 /// This instruction implements a fp->int store from FP stack
962 /// slots. This corresponds to the fist instruction. It takes a
963 /// chain operand, value to store, address, and glue. The memory VT
964 /// specifies the type to store as.
965 FIST,
966
967 /// This instruction implements an extending load to FP stack slots.
968 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
969 /// operand, and ptr to load from. The memory VT specifies the type to
970 /// load from.
971 FLD,
972
973 /// This instruction implements a truncating store from FP stack
974 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
975 /// chain operand, value to store, address, and glue. The memory VT
976 /// specifies the type to store as.
977 FST,
978
979 /// These instructions grab the address of the next argument
980 /// from a va_list. (reads and modifies the va_list in memory)
981 VAARG_64,
982 VAARG_X32,
983
984 // Vector truncating store with unsigned/signed saturation
985 VTRUNCSTOREUS,
986 VTRUNCSTORES,
987 // Vector truncating masked store with unsigned/signed saturation
988 VMTRUNCSTOREUS,
989 VMTRUNCSTORES,
990
991 // X86 specific gather and scatter
992 MGATHER,
993 MSCATTER,
994
995 // Key locker nodes that produce flags.
996 AESENC128KL,
997 AESDEC128KL,
998 AESENC256KL,
999 AESDEC256KL,
1000 AESENCWIDE128KL,
1001 AESDECWIDE128KL,
1002 AESENCWIDE256KL,
1003 AESDECWIDE256KL,
1004
1005 /// Compare and Add if Condition is Met. Compare value in operand 2 with
1006 /// value in memory of operand 1. If condition of operand 4 is met, add
1007 /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
1008 /// always updated with the original value from operand 1.
1009 CMPCCXADD,
1010
1011 // Save xmm argument registers to the stack, according to %al. An operator
1012 // is needed so that this can be expanded with control flow.
1013 VASTART_SAVE_XMM_REGS,
1014
1015 // Conditional load/store instructions
1016 CLOAD,
1017 CSTORE,
1018 LAST_MEMORY_OPCODE = CSTORE,
1019 };
1020 } // end namespace X86ISD
1021
1022 namespace X86 {
1023 /// Current rounding mode is represented in bits 11:10 of FPSR. These
1024 /// values are same as corresponding constants for rounding mode used
1025 /// in glibc.
1026 enum RoundingMode {
1027 rmInvalid = -1, // For handle Invalid rounding mode
1028 rmToNearest = 0, // FE_TONEAREST
1029 rmDownward = 1 << 10, // FE_DOWNWARD
1030 rmUpward = 2 << 10, // FE_UPWARD
1031 rmTowardZero = 3 << 10, // FE_TOWARDZERO
1032 rmMask = 3 << 10 // Bit mask selecting rounding mode
1033 };
1034 }
1035
1036 /// Define some predicates that are used for node matching.
1037 namespace X86 {
1038 /// Returns true if Elt is a constant zero or floating point constant +0.0.
1039 bool isZeroNode(SDValue Elt);
1040
1041 /// Returns true of the given offset can be
1042 /// fit into displacement field of the instruction.
1043 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
1044 bool hasSymbolicDisplacement);
1045
1046 /// Determines whether the callee is required to pop its
1047 /// own arguments. Callee pop is necessary to support tail calls.
1048 bool isCalleePop(CallingConv::ID CallingConv,
1049 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
1050
1051 /// If Op is a constant whose elements are all the same constant or
1052 /// undefined, return true and return the constant value in \p SplatVal.
1053 /// If we have undef bits that don't cover an entire element, we treat these
1054 /// as zero if AllowPartialUndefs is set, else we fail and return false.
1055 bool isConstantSplat(SDValue Op, APInt &SplatVal,
1056 bool AllowPartialUndefs = true);
1057
1058 /// Check if Op is a load operation that could be folded into some other x86
1059 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
1060 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
1061 bool AssumeSingleUse = false,
1062 bool IgnoreAlignment = false);
1063
1064 /// Check if Op is a load operation that could be folded into a vector splat
1065 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
1066 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
1067 const X86Subtarget &Subtarget,
1068 bool AssumeSingleUse = false);
1069
1070 /// Check if Op is a value that could be used to fold a store into some
1071 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
1072 bool mayFoldIntoStore(SDValue Op);
1073
1074 /// Check if Op is an operation that could be folded into a zero extend x86
1075 /// instruction.
1076 bool mayFoldIntoZeroExtend(SDValue Op);
1077
1078 /// True if the target supports the extended frame for async Swift
1079 /// functions.
1080 bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
1081 const MachineFunction &MF);
1082
1083 /// Convert LLVM rounding mode to X86 rounding mode.
1084 int getRoundingModeX86(unsigned RM);
1085
1086 } // end namespace X86
1087
1088 //===--------------------------------------------------------------------===//
1089 // X86 Implementation of the TargetLowering interface
1090 class X86TargetLowering final : public TargetLowering {
1091 // Copying needed for an outgoing byval argument.
1092 enum ByValCopyKind {
1093 // Argument is already in the correct location, no copy needed.
1094 NoCopy,
1095 // Argument value is currently in the local stack frame, needs copying to
1096 // outgoing arguemnt area.
1097 CopyOnce,
1098 // Argument value is currently in the outgoing argument area, but not at
1099 // the correct offset, so needs copying via a temporary in local stack
1100 // space.
1101 CopyViaTemp,
1102 };
1103
1104 public:
1105 explicit X86TargetLowering(const X86TargetMachine &TM,
1106 const X86Subtarget &STI);
1107
1108 unsigned getJumpTableEncoding() const override;
1109 bool useSoftFloat() const override;
1110
1111 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
1112 ArgListTy &Args) const override;
1113
1114 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
1115 return MVT::i8;
1116 }
1117
1118 const MCExpr *
1119 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1120 const MachineBasicBlock *MBB, unsigned uid,
1121 MCContext &Ctx) const override;
1122
1123 /// Returns relocation base for the given PIC jumptable.
1124 SDValue getPICJumpTableRelocBase(SDValue Table,
1125 SelectionDAG &DAG) const override;
1126 const MCExpr *
1127 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1128 unsigned JTI, MCContext &Ctx) const override;
1129
1130 /// Return the desired alignment for ByVal aggregate
1131 /// function arguments in the caller parameter area. For X86, aggregates
1132 /// that contains are placed at 16-byte boundaries while the rest are at
1133 /// 4-byte boundaries.
1134 Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
1135
1136 EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
1137 const AttributeList &FuncAttributes) const override;
1138
1139 /// Returns true if it's safe to use load / store of the
1140 /// specified type to expand memcpy / memset inline. This is mostly true
1141 /// for all types except for some special cases. For example, on X86
1142 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1143 /// also does type conversion. Note the specified type doesn't have to be
1144 /// legal as the hook is used before type legalization.
1145 bool isSafeMemOpType(MVT VT) const override;
1146
1147 bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1148
1149 /// Returns true if the target allows unaligned memory accesses of the
1150 /// specified type. Returns whether it is "fast" in the last argument.
1151 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1152 MachineMemOperand::Flags Flags,
1153 unsigned *Fast) const override;
1154
1155 /// This function returns true if the memory access is aligned or if the
1156 /// target allows this specific unaligned memory access. If the access is
1157 /// allowed, the optional final parameter returns a relative speed of the
1158 /// access (as defined by the target).
1159 bool allowsMemoryAccess(
1160 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1161 Align Alignment,
1162 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1163 unsigned *Fast = nullptr) const override;
1164
1165 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1166 const MachineMemOperand &MMO,
1167 unsigned *Fast) const {
1168 return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(),
1169 Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast);
1170 }
1171
1172 /// Provide custom lowering hooks for some operations.
1173 ///
1174 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1175
1176 /// Replace the results of node with an illegal result
1177 /// type with new values built out of custom code.
1178 ///
1179 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1180 SelectionDAG &DAG) const override;
1181
1182 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1183
1184 bool preferABDSToABSWithNSW(EVT VT) const override;
1185
1186 bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1187 EVT ExtVT) const override;
1188
1189 bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1190 EVT VT) const override;
1191
1192 /// Return true if the target has native support for
1193 /// the specified value type and it is 'desirable' to use the type for the
1194 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1195 /// instruction encodings are longer and some i16 instructions are slow.
1196 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1197
1198 /// Return true if the target has native support for the
1199 /// specified value type and it is 'desirable' to use the type. e.g. On x86
1200 /// i16 is legal, but undesirable since i16 instruction encodings are longer
1201 /// and some i16 instructions are slow.
1202 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1203
1204 /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1205 /// integer, None otherwise.
1206 TargetLowering::AndOrSETCCFoldKind
1207 isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1208 const SDNode *SETCC0,
1209 const SDNode *SETCC1) const override;
1210
1211 /// Return the newly negated expression if the cost is not expensive and
1212 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1213 /// do the negation.
1214 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1215 bool LegalOperations, bool ForCodeSize,
1216 NegatibleCost &Cost,
1217 unsigned Depth) const override;
1218
1219 MachineBasicBlock *
1220 EmitInstrWithCustomInserter(MachineInstr &MI,
1221 MachineBasicBlock *MBB) const override;
1222
1223 /// This method returns the name of a target specific DAG node.
1224 const char *getTargetNodeName(unsigned Opcode) const override;
1225
1226 /// Do not merge vector stores after legalization because that may conflict
1227 /// with x86-specific store splitting optimizations.
1228 bool mergeStoresAfterLegalization(EVT MemVT) const override {
1229 return !MemVT.isVector();
1230 }
1231
1232 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1233 const MachineFunction &MF) const override;
1234
1235 bool isCheapToSpeculateCttz(Type *Ty) const override;
1236
1237 bool isCheapToSpeculateCtlz(Type *Ty) const override;
1238
1239 bool isCtlzFast() const override;
1240
1241 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1242 // If the pair to store is a mixture of float and int values, we will
1243 // save two bitwise instructions and one float-to-int instruction and
1244 // increase one store instruction. There is potentially a more
1245 // significant benefit because it avoids the float->int domain switch
1246 // for input value. So It is more likely a win.
1247 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1248 (LTy.isInteger() && HTy.isFloatingPoint()))
1249 return true;
1250 // If the pair only contains int values, we will save two bitwise
1251 // instructions and increase one store instruction (costing one more
1252 // store buffer). Since the benefit is more blurred so we leave
1253 // such pair out until we get testcase to prove it is a win.
1254 return false;
1255 }
1256
1257 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1258
1259 bool hasAndNotCompare(SDValue Y) const override;
1260
1261 bool hasAndNot(SDValue Y) const override;
1262
1263 bool hasBitTest(SDValue X, SDValue Y) const override;
1264
1265 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1266 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1267 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1268 SelectionDAG &DAG) const override;
1269
1270 unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1271 EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1272 const APInt &ShiftOrRotateAmt,
1273 const std::optional<APInt> &AndMask) const override;
1274
1275 bool preferScalarizeSplat(SDNode *N) const override;
1276
1277 CondMergingParams
1278 getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1279 const Value *Rhs) const override;
1280
1281 bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override;
1282
1283 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1284
1285 bool
1286 shouldTransformSignedTruncationCheck(EVT XVT,
1287 unsigned KeptBits) const override {
1288 // For vectors, we don't have a preference..
1289 if (XVT.isVector())
1290 return false;
1291
1292 auto VTIsOk = [](EVT VT) -> bool {
1293 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1294 VT == MVT::i64;
1295 };
1296
1297 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1298 // XVT will be larger than KeptBitsVT.
1299 MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
1300 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1301 }
1302
1303 ShiftLegalizationStrategy
1304 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1305 unsigned ExpansionFactor) const override;
1306
1307 bool shouldSplatInsEltVarIndex(EVT VT) const override;
1308
1309 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1310 // Converting to sat variants holds little benefit on X86 as we will just
1311 // need to saturate the value back using fp arithmatic.
1312 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1313 }
1314
1315 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1316 return VT.isScalarInteger();
1317 }
1318
1319 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1320 MVT hasFastEqualityCompare(unsigned NumBits) const override;
1321
1322 /// Return the value type to use for ISD::SETCC.
1323 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1324 EVT VT) const override;
1325
1326 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1327 const APInt &DemandedElts,
1328 TargetLoweringOpt &TLO) const override;
1329
1330 /// Determine which of the bits specified in Mask are known to be either
1331 /// zero or one and return them in the KnownZero/KnownOne bitsets.
1332 void computeKnownBitsForTargetNode(const SDValue Op,
1333 KnownBits &Known,
1334 const APInt &DemandedElts,
1335 const SelectionDAG &DAG,
1336 unsigned Depth = 0) const override;
1337
1338 /// Determine the number of bits in the operation that are sign bits.
1339 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1340 const APInt &DemandedElts,
1341 const SelectionDAG &DAG,
1342 unsigned Depth) const override;
1343
1344 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1345 const APInt &DemandedElts,
1346 APInt &KnownUndef,
1347 APInt &KnownZero,
1348 TargetLoweringOpt &TLO,
1349 unsigned Depth) const override;
1350
1351 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1352 const APInt &DemandedElts,
1353 unsigned MaskIndex,
1354 TargetLoweringOpt &TLO,
1355 unsigned Depth) const;
1356
1357 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1358 const APInt &DemandedBits,
1359 const APInt &DemandedElts,
1360 KnownBits &Known,
1361 TargetLoweringOpt &TLO,
1362 unsigned Depth) const override;
1363
1364 SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1365 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1366 SelectionDAG &DAG, unsigned Depth) const override;
1367
1368 bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1369 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1370 bool PoisonOnly, unsigned Depth) const override;
1371
1372 bool canCreateUndefOrPoisonForTargetNode(
1373 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1374 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1375
1376 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1377 APInt &UndefElts, const SelectionDAG &DAG,
1378 unsigned Depth) const override;
1379
1380 bool isTargetCanonicalConstantNode(SDValue Op) const override {
1381 // Peek through bitcasts/extracts/inserts to see if we have a vector
1382 // load/broadcast from memory.
1383 while (Op.getOpcode() == ISD::BITCAST ||
1384 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1385 (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1386 Op.getOperand(i: 0).isUndef()))
1387 Op = Op.getOperand(i: Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1388
1389 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1390 Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||
1391 (Op.getOpcode() == ISD::LOAD &&
1392 getTargetConstantFromLoad(LD: cast<LoadSDNode>(Val&: Op))) ||
1393 TargetLowering::isTargetCanonicalConstantNode(Op);
1394 }
1395
1396 bool isTargetCanonicalSelect(SDNode *N) const override;
1397
1398 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1399
1400 SDValue unwrapAddress(SDValue N) const override;
1401
1402 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1403
1404 ConstraintType getConstraintType(StringRef Constraint) const override;
1405
1406 /// Examine constraint string and operand type and determine a weight value.
1407 /// The operand object must already have been set up with the operand type.
1408 ConstraintWeight
1409 getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1410 const char *Constraint) const override;
1411
1412 const char *LowerXConstraint(EVT ConstraintVT) const override;
1413
1414 /// Lower the specified operand into the Ops vector. If it is invalid, don't
1415 /// add anything to Ops. If hasMemory is true it means one of the asm
1416 /// constraint of the inline asm instruction being processed is 'm'.
1417 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1418 std::vector<SDValue> &Ops,
1419 SelectionDAG &DAG) const override;
1420
1421 InlineAsm::ConstraintCode
1422 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1423 if (ConstraintCode == "v")
1424 return InlineAsm::ConstraintCode::v;
1425 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1426 }
1427
1428 /// Handle Lowering flag assembly outputs.
1429 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1430 const SDLoc &DL,
1431 const AsmOperandInfo &Constraint,
1432 SelectionDAG &DAG) const override;
1433
1434 /// Given a physical register constraint
1435 /// (e.g. {edx}), return the register number and the register class for the
1436 /// register. This should only be used for C_Register constraints. On
1437 /// error, this returns a register number of 0.
1438 std::pair<unsigned, const TargetRegisterClass *>
1439 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1440 StringRef Constraint, MVT VT) const override;
1441
1442 /// Return true if the addressing mode represented
1443 /// by AM is legal for this target, for a load/store of the specified type.
1444 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1445 Type *Ty, unsigned AS,
1446 Instruction *I = nullptr) const override;
1447
1448 bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1449
1450 /// Return true if the specified immediate is legal
1451 /// icmp immediate, that is the target has icmp instructions which can
1452 /// compare a register against the immediate without having to materialize
1453 /// the immediate into a register.
1454 bool isLegalICmpImmediate(int64_t Imm) const override;
1455
1456 /// Return true if the specified immediate is legal
1457 /// add immediate, that is the target has add instructions which can
1458 /// add a register and the immediate without having to materialize
1459 /// the immediate into a register.
1460 bool isLegalAddImmediate(int64_t Imm) const override;
1461
1462 bool isLegalStoreImmediate(int64_t Imm) const override;
1463
1464 /// Add x86-specific opcodes to the default list.
1465 bool isBinOp(unsigned Opcode) const override;
1466
1467 /// Returns true if the opcode is a commutative binary operation.
1468 bool isCommutativeBinOp(unsigned Opcode) const override;
1469
1470 /// Return true if it's free to truncate a value of
1471 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1472 /// register EAX to i16 by referencing its sub-register AX.
1473 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1474 bool isTruncateFree(EVT VT1, EVT VT2) const override;
1475
1476 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1477
1478 /// Return true if any actual instruction that defines a
1479 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1480 /// register. This does not necessarily include registers defined in
1481 /// unknown ways, such as incoming arguments, or copies from unknown
1482 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1483 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1484 /// all instructions that define 32-bit values implicit zero-extend the
1485 /// result out to 64 bits.
1486 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1487 bool isZExtFree(EVT VT1, EVT VT2) const override;
1488 bool isZExtFree(SDValue Val, EVT VT2) const override;
1489
1490 bool shouldConvertPhiType(Type *From, Type *To) const override;
1491
1492 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1493 /// extend node) is profitable.
1494 bool isVectorLoadExtDesirable(SDValue) const override;
1495
1496 /// Return true if an FMA operation is faster than a pair of fmul and fadd
1497 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1498 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1499 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1500 EVT VT) const override;
1501
1502 /// Return true if it's profitable to narrow operations of type SrcVT to
1503 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1504 /// from i32 to i16.
1505 bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override;
1506
1507 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
1508 unsigned SelectOpcode, SDValue X,
1509 SDValue Y) const override;
1510
1511 /// Given an intrinsic, checks if on the target the intrinsic will need to
1512 /// map to a MemIntrinsicNode (touches memory). If this is the case, it
1513 /// returns true and stores the intrinsic information into the IntrinsicInfo
1514 /// that was passed to the function.
1515 void getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &Infos,
1516 const CallBase &I, MachineFunction &MF,
1517 unsigned Intrinsic) const override;
1518
1519 /// Returns true if the target can instruction select the
1520 /// specified FP immediate natively. If false, the legalizer will
1521 /// materialize the FP immediate as a load from a constant pool.
1522 bool isFPImmLegal(const APFloat &Imm, EVT VT,
1523 bool ForCodeSize) const override;
1524
1525 /// Targets can use this to indicate that they only support *some*
1526 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1527 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1528 /// be legal.
1529 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1530
1531 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1532 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1533 /// constant pool entry.
1534 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1535
1536 /// Returns true if lowering to a jump table is allowed.
1537 bool areJTsAllowed(const Function *Fn) const override;
1538
1539 MVT getPreferredSwitchConditionType(LLVMContext &Context,
1540 EVT ConditionVT) const override;
1541
1542 /// If true, then instruction selection should
1543 /// seek to shrink the FP constant of the specified type to a smaller type
1544 /// in order to save space and / or reduce runtime.
1545 bool ShouldShrinkFPConstant(EVT VT) const override;
1546
1547 /// Return true if we believe it is correct and profitable to reduce the
1548 /// load node to a smaller type.
1549 bool
1550 shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1551 std::optional<unsigned> ByteOffset) const override;
1552
1553 /// Return true if the specified scalar FP type is computed in an SSE
1554 /// register, not on the X87 floating point stack.
1555 bool isScalarFPTypeInSSEReg(EVT VT) const;
1556
1557 /// Returns true if it is beneficial to convert a load of a constant
1558 /// to just the constant itself.
1559 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1560 Type *Ty) const override;
1561
1562 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1563
1564 bool convertSelectOfConstantsToMath(EVT VT) const override;
1565
1566 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1567 SDValue C) const override;
1568
1569 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1570 /// with this index.
1571 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1572 unsigned Index) const override;
1573
1574 /// Scalar ops always have equal or better analysis/performance/power than
1575 /// the vector equivalent, so this always makes sense if the scalar op is
1576 /// supported.
1577 bool shouldScalarizeBinop(SDValue) const override;
1578
1579 /// Extract of a scalar FP value from index 0 of a vector is free.
1580 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1581 EVT EltVT = VT.getScalarType();
1582 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1583 }
1584
1585 /// Overflow nodes should get combined/lowered to optimal instructions
1586 /// (they should allow eliminating explicit compares by getting flags from
1587 /// math ops).
1588 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1589 bool MathUsed) const override;
1590
1591 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1592 unsigned AddrSpace) const override {
1593 // If we can replace more than 2 scalar stores, there will be a reduction
1594 // in instructions even after we add a vector constant load.
1595 return IsZero || NumElem > 2;
1596 }
1597
1598 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1599 const SelectionDAG &DAG,
1600 const MachineMemOperand &MMO) const override;
1601
1602 Register getRegisterByName(const char* RegName, LLT VT,
1603 const MachineFunction &MF) const override;
1604
1605 /// If a physical register, this returns the register that receives the
1606 /// exception address on entry to an EH pad.
1607 Register
1608 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1609
1610 /// If a physical register, this returns the register that receives the
1611 /// exception typeid on entry to a landing pad.
1612 Register
1613 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1614
1615 bool needsFixedCatchObjects() const override;
1616
1617 /// This method returns a target specific FastISel object,
1618 /// or null if the target does not support "fast" ISel.
1619 FastISel *
1620 createFastISel(FunctionLoweringInfo &funcInfo,
1621 const TargetLibraryInfo *libInfo,
1622 const LibcallLoweringInfo *libcallLowering) const override;
1623
1624 /// If the target has a standard location for the stack protector cookie,
1625 /// returns the address of that location. Otherwise, returns nullptr.
1626 Value *getIRStackGuard(IRBuilderBase &IRB,
1627 const LibcallLoweringInfo &Libcalls) const override;
1628
1629 bool useLoadStackGuardNode(const Module &M) const override;
1630 bool useStackGuardXorFP() const override;
1631 void
1632 insertSSPDeclarations(Module &M,
1633 const LibcallLoweringInfo &Libcalls) const override;
1634 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1635 const SDLoc &DL) const override;
1636
1637
1638 /// Return true if the target stores SafeStack pointer at a fixed offset in
1639 /// some non-standard address space, and populates the address space and
1640 /// offset as appropriate.
1641 Value *getSafeStackPointerLocation(
1642 IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override;
1643
1644 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1645 SDValue Chain, SDValue Pointer,
1646 MachinePointerInfo PtrInfo,
1647 Align Alignment,
1648 SelectionDAG &DAG) const;
1649
1650 /// Customize the preferred legalization strategy for certain types.
1651 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1652
1653 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1654 EVT VT) const override;
1655
1656 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1657 CallingConv::ID CC,
1658 EVT VT) const override;
1659
1660 unsigned getVectorTypeBreakdownForCallingConv(
1661 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1662 unsigned &NumIntermediates, MVT &RegisterVT) const override;
1663
1664 bool functionArgumentNeedsConsecutiveRegisters(
1665 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
1666 const DataLayout &DL) const override;
1667
1668 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1669
1670 bool supportSwiftError() const override;
1671
1672 bool supportKCFIBundles() const override { return true; }
1673
1674 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1675 MachineBasicBlock::instr_iterator &MBBI,
1676 const TargetInstrInfo *TII) const override;
1677
1678 bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1679 bool hasInlineStackProbe(const MachineFunction &MF) const override;
1680 StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1681
1682 unsigned getStackProbeSize(const MachineFunction &MF) const;
1683
1684 bool hasVectorBlend() const override { return true; }
1685
1686 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1687
1688 bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1689 unsigned OpNo) const override;
1690
1691 SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1692 MachineMemOperand *MMO, SDValue &NewLoad,
1693 SDValue Ptr, SDValue PassThru,
1694 SDValue Mask) const override;
1695 SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1696 MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1697 SDValue Mask) const override;
1698
1699 /// Lower interleaved load(s) into target specific
1700 /// instructions/intrinsics.
1701 bool lowerInterleavedLoad(Instruction *Load, Value *Mask,
1702 ArrayRef<ShuffleVectorInst *> Shuffles,
1703 ArrayRef<unsigned> Indices, unsigned Factor,
1704 const APInt &GapMask) const override;
1705
1706 /// Lower interleaved store(s) into target specific
1707 /// instructions/intrinsics.
1708 bool lowerInterleavedStore(Instruction *Store, Value *Mask,
1709 ShuffleVectorInst *SVI, unsigned Factor,
1710 const APInt &GapMask) const override;
1711
1712 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1713 int JTI, SelectionDAG &DAG) const override;
1714
1715 Align getPrefLoopAlignment(MachineLoop *ML) const override;
1716
1717 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1718 if (VT == MVT::f80)
1719 return EVT::getIntegerVT(Context, BitWidth: 96);
1720 return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1721 }
1722
1723 protected:
1724 std::pair<const TargetRegisterClass *, uint8_t>
1725 findRepresentativeClass(const TargetRegisterInfo *TRI,
1726 MVT VT) const override;
1727
1728 private:
1729 /// Keep a reference to the X86Subtarget around so that we can
1730 /// make the right decision when generating code for different targets.
1731 const X86Subtarget &Subtarget;
1732
1733 /// A list of legal FP immediates.
1734 std::vector<APFloat> LegalFPImmediates;
1735
1736 /// Indicate that this x86 target can instruction
1737 /// select the specified FP immediate natively.
1738 void addLegalFPImmediate(const APFloat& Imm) {
1739 LegalFPImmediates.push_back(x: Imm);
1740 }
1741
1742 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1743 CallingConv::ID CallConv, bool isVarArg,
1744 const SmallVectorImpl<ISD::InputArg> &Ins,
1745 const SDLoc &dl, SelectionDAG &DAG,
1746 SmallVectorImpl<SDValue> &InVals,
1747 uint32_t *RegMask) const;
1748 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1749 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1750 const SDLoc &dl, SelectionDAG &DAG,
1751 const CCValAssign &VA, MachineFrameInfo &MFI,
1752 unsigned i) const;
1753 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1754 const SDLoc &dl, SelectionDAG &DAG,
1755 const CCValAssign &VA,
1756 ISD::ArgFlagsTy Flags, bool isByval) const;
1757
1758 // Call lowering helpers.
1759
1760 /// Check whether the call is eligible for sibling call optimization.
1761 bool
1762 isEligibleForSiblingCallOpt(TargetLowering::CallLoweringInfo &CLI,
1763 CCState &CCInfo,
1764 SmallVectorImpl<CCValAssign> &ArgLocs) const;
1765 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1766 SDValue Chain, bool IsTailCall,
1767 bool Is64Bit, int FPDiff,
1768 const SDLoc &dl) const;
1769
1770 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1771 SelectionDAG &DAG) const;
1772
1773 unsigned getAddressSpace() const;
1774
1775 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1776 SDValue &Chain) const;
1777 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1778
1779 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1780 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1781 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1782 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1783
1784 unsigned getGlobalWrapperKind(const GlobalValue *GV,
1785 const unsigned char OpFlags) const;
1786 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1787 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1788 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1789 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1790 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1791
1792 /// Creates target global address or external symbol nodes for calls or
1793 /// other uses.
1794 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall,
1795 bool *IsImpCall) const;
1796
1797 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1798 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1799 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1800 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1801 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1802 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1803 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1804 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1805 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1806 SDValue LowerConditionalBranch(SDValue Op, SelectionDAG &DAG) const;
1807 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1808 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1809 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1810 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1811 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1812 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1813 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1814 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1815 ByValCopyKind ByValNeedsCopyForTailCall(SelectionDAG &DAG, SDValue Src,
1816 SDValue Dst,
1817 ISD::ArgFlagsTy Flags) const;
1818 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1819 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1820 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1821 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1822 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1823 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1824 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1825 SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1826 SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1827 SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1828 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1829 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1830 SDValue &Chain) const;
1831 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1832 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1833 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1834 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1835 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1836 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1837 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1838
1839 SDValue
1840 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1841 const SmallVectorImpl<ISD::InputArg> &Ins,
1842 const SDLoc &dl, SelectionDAG &DAG,
1843 SmallVectorImpl<SDValue> &InVals) const override;
1844 SDValue LowerCall(CallLoweringInfo &CLI,
1845 SmallVectorImpl<SDValue> &InVals) const override;
1846
1847 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1848 const SmallVectorImpl<ISD::OutputArg> &Outs,
1849 const SmallVectorImpl<SDValue> &OutVals,
1850 const SDLoc &dl, SelectionDAG &DAG) const override;
1851
1852 bool supportSplitCSR(MachineFunction *MF) const override {
1853 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1854 MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind);
1855 }
1856 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1857 void insertCopiesSplitCSR(
1858 MachineBasicBlock *Entry,
1859 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1860
1861 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1862
1863 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1864
1865 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1866 ISD::NodeType ExtendKind) const override;
1867
1868 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1869 bool isVarArg,
1870 const SmallVectorImpl<ISD::OutputArg> &Outs,
1871 LLVMContext &Context,
1872 const Type *RetTy) const override;
1873
1874 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1875 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1876
1877 TargetLoweringBase::AtomicExpansionKind
1878 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1879 TargetLoweringBase::AtomicExpansionKind
1880 shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1881 TargetLoweringBase::AtomicExpansionKind
1882 shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override;
1883 TargetLoweringBase::AtomicExpansionKind
1884 shouldExpandLogicAtomicRMWInIR(const AtomicRMWInst *AI) const;
1885 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1886 void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1887
1888 LoadInst *
1889 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1890
1891 bool needsCmpXchgNb(Type *MemType) const;
1892
1893 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1894 MachineBasicBlock *DispatchBB, int FI) const;
1895
1896 // Utility function to emit the low-level va_arg code for X86-64.
1897 MachineBasicBlock *
1898 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1899
1900 /// Utility function to emit the xmm reg save portion of va_start.
1901 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1902 MachineInstr &MI2,
1903 MachineBasicBlock *BB) const;
1904
1905 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1906 MachineBasicBlock *BB) const;
1907
1908 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1909 MachineBasicBlock *BB) const;
1910
1911 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1912 MachineBasicBlock *BB) const;
1913
1914 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1915 MachineBasicBlock *BB) const;
1916
1917 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1918 MachineBasicBlock *BB) const;
1919
1920 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1921 MachineBasicBlock *BB) const;
1922
1923 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1924 MachineBasicBlock *MBB) const;
1925
1926 void emitSetJmpShadowStackFix(MachineInstr &MI,
1927 MachineBasicBlock *MBB) const;
1928
1929 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1930 MachineBasicBlock *MBB) const;
1931
1932 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1933 MachineBasicBlock *MBB) const;
1934
1935 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1936 MachineBasicBlock *MBB) const;
1937
1938 MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1939 MachineBasicBlock *MBB) const;
1940
1941 /// Emit flags for the given setcc condition and operands. Also returns the
1942 /// corresponding X86 condition code constant in X86CC.
1943 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1944 const SDLoc &dl, SelectionDAG &DAG,
1945 SDValue &X86CC) const;
1946
1947 bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1948 SDValue IntPow2) const override;
1949
1950 /// Check if replacement of SQRT with RSQRT should be disabled.
1951 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1952
1953 /// Use rsqrt* to speed up sqrt calculations.
1954 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1955 int &RefinementSteps, bool &UseOneConstNR,
1956 bool Reciprocal) const override;
1957
1958 /// Use rcp* to speed up fdiv calculations.
1959 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1960 int &RefinementSteps) const override;
1961
1962 /// Reassociate floating point divisions into multiply by reciprocal.
1963 unsigned combineRepeatedFPDivisors() const override;
1964
1965 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1966 SmallVectorImpl<SDNode *> &Created) const override;
1967
1968 SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1969 SDValue V2) const;
1970 };
1971
1972 namespace X86 {
1973 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1974 const TargetLibraryInfo *libInfo,
1975 const LibcallLoweringInfo *libcallLowering);
1976 } // end namespace X86
1977
1978 // X86 specific Gather/Scatter nodes.
1979 // The class has the same order of operands as MaskedGatherScatterSDNode for
1980 // convenience.
1981 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1982 public:
1983 // This is a intended as a utility and should never be directly created.
1984 X86MaskedGatherScatterSDNode() = delete;
1985 ~X86MaskedGatherScatterSDNode() = delete;
1986
1987 const SDValue &getBasePtr() const { return getOperand(Num: 3); }
1988 const SDValue &getIndex() const { return getOperand(Num: 4); }
1989 const SDValue &getMask() const { return getOperand(Num: 2); }
1990 const SDValue &getScale() const { return getOperand(Num: 5); }
1991
1992 static bool classof(const SDNode *N) {
1993 return N->getOpcode() == X86ISD::MGATHER ||
1994 N->getOpcode() == X86ISD::MSCATTER;
1995 }
1996 };
1997
1998 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1999 public:
2000 const SDValue &getPassThru() const { return getOperand(Num: 1); }
2001
2002 static bool classof(const SDNode *N) {
2003 return N->getOpcode() == X86ISD::MGATHER;
2004 }
2005 };
2006
2007 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
2008 public:
2009 const SDValue &getValue() const { return getOperand(Num: 1); }
2010
2011 static bool classof(const SDNode *N) {
2012 return N->getOpcode() == X86ISD::MSCATTER;
2013 }
2014 };
2015
2016 /// Generate unpacklo/unpackhi shuffle mask.
2017 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
2018 bool Unary);
2019
2020 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
2021 /// imposed by AVX and specific to the unary pattern. Example:
2022 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
2023 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
2024 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
2025
2026} // end namespace llvm
2027
2028#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
2029