1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16
17#include "llvm/CodeGen/MachineFunction.h"
18#include "llvm/CodeGen/TargetLowering.h"
19
20namespace llvm {
21 class X86Subtarget;
22 class X86TargetMachine;
23
24 namespace X86ISD {
25 // X86 Specific DAG Nodes
26 enum NodeType : unsigned {
27 // Start the numbering where the builtin ops leave off.
28 FIRST_NUMBER = ISD::BUILTIN_OP_END,
29
30 /// Bit scan forward.
31 BSF,
32 /// Bit scan reverse.
33 BSR,
34
35 /// X86 funnel/double shift i16 instructions. These correspond to
36 /// X86::SHLDW and X86::SHRDW instructions which have different amt
37 /// modulo rules to generic funnel shifts.
38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39 FSHL,
40 FSHR,
41
42 /// Bitwise logical AND of floating point values. This corresponds
43 /// to X86::ANDPS or X86::ANDPD.
44 FAND,
45
46 /// Bitwise logical OR of floating point values. This corresponds
47 /// to X86::ORPS or X86::ORPD.
48 FOR,
49
50 /// Bitwise logical XOR of floating point values. This corresponds
51 /// to X86::XORPS or X86::XORPD.
52 FXOR,
53
54 /// Bitwise logical ANDNOT of floating point values. This
55 /// corresponds to X86::ANDNPS or X86::ANDNPD.
56 FANDN,
57
58 /// These operations represent an abstract X86 call
59 /// instruction, which includes a bunch of information. In particular the
60 /// operands of these node are:
61 ///
62 /// #0 - The incoming token chain
63 /// #1 - The callee
64 /// #2 - The number of arg bytes the caller pushes on the stack.
65 /// #3 - The number of arg bytes the callee pops off the stack.
66 /// #4 - The value to pass in AL/AX/EAX (optional)
67 /// #5 - The value to pass in DL/DX/EDX (optional)
68 ///
69 /// The result values of these nodes are:
70 ///
71 /// #0 - The outgoing token chain
72 /// #1 - The first register result value (optional)
73 /// #2 - The second register result value (optional)
74 ///
75 CALL,
76
77 /// Same as call except it adds the NoTrack prefix.
78 NT_CALL,
79
80 // Pseudo for a OBJC call that gets emitted together with a special
81 // marker instruction.
82 CALL_RVMARKER,
83
84 // Psuedo for a call to a global address that must be called via a memory
85 // address (i.e., not loaded into a register then called).
86 CALL_GLOBALADDR,
87
88 /// The same as ISD::CopyFromReg except that this node makes it explicit
89 /// that it may lower to an x87 FPU stack pop. Optimizations should be more
90 /// cautious when handling this node than a normal CopyFromReg to avoid
91 /// removing a required FPU stack pop. A key requirement is optimizations
92 /// should not optimize any users of a chain that contains a
93 /// POP_FROM_X87_REG to use a chain from a point earlier than the
94 /// POP_FROM_X87_REG (which may remove a required FPU stack pop).
95 POP_FROM_X87_REG,
96
97 // Pseudo for a call to an imported function to ensure the correct machine
98 // instruction is emitted for Import Call Optimization.
99 IMP_CALL,
100
101 /// X86 compare and logical compare instructions.
102 CMP,
103 FCMP,
104 COMI,
105 UCOMI,
106
107 // X86 compare with Intrinsics similar to COMI.
108 COMX,
109 UCOMX,
110
111 /// X86 bit-test instructions.
112 BT,
113
114 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
115 /// operand, usually produced by a CMP instruction.
116 SETCC,
117
118 /// X86 Select
119 SELECTS,
120
121 // Same as SETCC except it's materialized with a sbb and the value is all
122 // one's or all zero's.
123 SETCC_CARRY, // R = carry_bit ? ~0 : 0
124
125 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
126 /// Operands are two FP values to compare; result is a mask of
127 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
128 FSETCC,
129
130 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
131 /// and a version with SAE.
132 FSETCCM,
133 FSETCCM_SAE,
134
135 /// X86 conditional moves. Operand 0 and operand 1 are the two values
136 /// to select from. Operand 2 is the condition code, and operand 3 is the
137 /// flag operand produced by a CMP or TEST instruction.
138 CMOV,
139
140 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
141 /// is the block to branch if condition is true, operand 2 is the
142 /// condition code, and operand 3 is the flag operand produced by a CMP
143 /// or TEST instruction.
144 BRCOND,
145
146 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
147 /// operand 1 is the target address.
148 NT_BRIND,
149
150 /// Return with a glue operand. Operand 0 is the chain operand, operand
151 /// 1 is the number of bytes of stack to pop.
152 RET_GLUE,
153
154 /// Return from interrupt. Operand 0 is the number of bytes to pop.
155 IRET,
156
157 /// Repeat fill, corresponds to X86::REP_STOSx.
158 REP_STOS,
159
160 /// Repeat move, corresponds to X86::REP_MOVSx.
161 REP_MOVS,
162
163 /// On Darwin, this node represents the result of the popl
164 /// at function entry, used for PIC code.
165 GlobalBaseReg,
166
167 /// A wrapper node for TargetConstantPool, TargetJumpTable,
168 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
169 /// MCSymbol and TargetBlockAddress.
170 Wrapper,
171
172 /// Special wrapper used under X86-64 PIC mode for RIP
173 /// relative displacements.
174 WrapperRIP,
175
176 /// Copies a 64-bit value from an MMX vector to the low word
177 /// of an XMM vector, with the high word zero filled.
178 MOVQ2DQ,
179
180 /// Copies a 64-bit value from the low word of an XMM vector
181 /// to an MMX vector.
182 MOVDQ2Q,
183
184 /// Copies a 32-bit value from the low word of a MMX
185 /// vector to a GPR.
186 MMX_MOVD2W,
187
188 /// Copies a GPR into the low 32-bit word of a MMX vector
189 /// and zero out the high word.
190 MMX_MOVW2D,
191
192 /// Extract an 8-bit value from a vector and zero extend it to
193 /// i32, corresponds to X86::PEXTRB.
194 PEXTRB,
195
196 /// Extract a 16-bit value from a vector and zero extend it to
197 /// i32, corresponds to X86::PEXTRW.
198 PEXTRW,
199
200 /// Insert any element of a 4 x float vector into any element
201 /// of a destination 4 x floatvector.
202 INSERTPS,
203
204 /// Insert the lower 8-bits of a 32-bit value to a vector,
205 /// corresponds to X86::PINSRB.
206 PINSRB,
207
208 /// Insert the lower 16-bits of a 32-bit value to a vector,
209 /// corresponds to X86::PINSRW.
210 PINSRW,
211
212 /// Shuffle 16 8-bit values within a vector.
213 PSHUFB,
214
215 /// Compute Sum of Absolute Differences.
216 PSADBW,
217 /// Compute Double Block Packed Sum-Absolute-Differences
218 DBPSADBW,
219
220 /// Bitwise Logical AND NOT of Packed FP values.
221 ANDNP,
222
223 /// Blend where the selector is an immediate.
224 BLENDI,
225
226 /// Dynamic (non-constant condition) vector blend where only the sign bits
227 /// of the condition elements are used. This is used to enforce that the
228 /// condition mask is not valid for generic VSELECT optimizations. This
229 /// is also used to implement the intrinsics.
230 /// Operands are in VSELECT order: MASK, TRUE, FALSE
231 BLENDV,
232
233 /// Combined add and sub on an FP vector.
234 ADDSUB,
235
236 // FP vector ops with rounding mode.
237 FADD_RND,
238 FADDS,
239 FADDS_RND,
240 FSUB_RND,
241 FSUBS,
242 FSUBS_RND,
243 FMUL_RND,
244 FMULS,
245 FMULS_RND,
246 FDIV_RND,
247 FDIVS,
248 FDIVS_RND,
249 FMAX_SAE,
250 FMAXS_SAE,
251 FMIN_SAE,
252 FMINS_SAE,
253 FSQRT_RND,
254 FSQRTS,
255 FSQRTS_RND,
256
257 // FP vector get exponent.
258 FGETEXP,
259 FGETEXP_SAE,
260 FGETEXPS,
261 FGETEXPS_SAE,
262 // Extract Normalized Mantissas.
263 VGETMANT,
264 VGETMANT_SAE,
265 VGETMANTS,
266 VGETMANTS_SAE,
267 // FP Scale.
268 SCALEF,
269 SCALEF_RND,
270 SCALEFS,
271 SCALEFS_RND,
272
273 /// Integer horizontal add/sub.
274 HADD,
275 HSUB,
276
277 /// Integer horizontal saturating add/sub.
278 HADDS,
279 HSUBS,
280
281 /// Floating point horizontal add/sub.
282 FHADD,
283 FHSUB,
284
285 // Detect Conflicts Within a Vector
286 CONFLICT,
287
288 /// Floating point max and min.
289 FMAX,
290 FMIN,
291
292 /// Commutative FMIN and FMAX.
293 FMAXC,
294 FMINC,
295
296 /// Scalar intrinsic floating point max and min.
297 FMAXS,
298 FMINS,
299
300 /// Floating point reciprocal-sqrt and reciprocal approximation.
301 /// Note that these typically require refinement
302 /// in order to obtain suitable precision.
303 FRSQRT,
304 FRCP,
305
306 // AVX-512 reciprocal approximations with a little more precision.
307 RSQRT14,
308 RSQRT14S,
309 RCP14,
310 RCP14S,
311
312 // Thread Local Storage.
313 TLSADDR,
314
315 // Thread Local Storage. A call to get the start address
316 // of the TLS block for the current module.
317 TLSBASEADDR,
318
319 // Thread Local Storage. When calling to an OS provided
320 // thunk at the address from an earlier relocation.
321 TLSCALL,
322
323 // Thread Local Storage. A descriptor containing pointer to
324 // code and to argument to get the TLS offset for the symbol.
325 TLSDESC,
326
327 // Exception Handling helpers.
328 EH_RETURN,
329
330 // SjLj exception handling setjmp.
331 EH_SJLJ_SETJMP,
332
333 // SjLj exception handling longjmp.
334 EH_SJLJ_LONGJMP,
335
336 // SjLj exception handling dispatch.
337 EH_SJLJ_SETUP_DISPATCH,
338
339 /// Tail call return. See X86TargetLowering::LowerCall for
340 /// the list of operands.
341 TC_RETURN,
342
343 // Psuedo for a tail call return to a global address that must be called via
344 // a memory address (i.e., not loaded into a register then called).
345 TC_RETURN_GLOBALADDR,
346
347 // Vector move to low scalar and zero higher vector elements.
348 VZEXT_MOVL,
349
350 // Vector integer truncate.
351 VTRUNC,
352 // Vector integer truncate with unsigned/signed saturation.
353 VTRUNCUS,
354 VTRUNCS,
355
356 // Masked version of the above. Used when less than a 128-bit result is
357 // produced since the mask only applies to the lower elements and can't
358 // be represented by a select.
359 // SRC, PASSTHRU, MASK
360 VMTRUNC,
361 VMTRUNCUS,
362 VMTRUNCS,
363
364 // Vector FP extend.
365 VFPEXT,
366 VFPEXT_SAE,
367 VFPEXTS,
368 VFPEXTS_SAE,
369
370 // Vector FP round.
371 VFPROUND,
372 // Convert TWO packed single data to one packed data
373 VFPROUND2,
374 VFPROUND2_RND,
375 VFPROUND_RND,
376 VFPROUNDS,
377 VFPROUNDS_RND,
378
379 // Masked version of above. Used for v2f64->v4f32.
380 // SRC, PASSTHRU, MASK
381 VMFPROUND,
382
383 // 128-bit vector logical left / right shift
384 VSHLDQ,
385 VSRLDQ,
386
387 // Vector shift elements
388 VSHL,
389 VSRL,
390 VSRA,
391
392 // Vector variable shift
393 VSHLV,
394 VSRLV,
395 VSRAV,
396
397 // Vector shift elements by immediate
398 VSHLI,
399 VSRLI,
400 VSRAI,
401
402 // Shifts of mask registers.
403 KSHIFTL,
404 KSHIFTR,
405
406 // Bit rotate by immediate
407 VROTLI,
408 VROTRI,
409
410 // Vector packed double/float comparison.
411 CMPP,
412
413 // Vector integer comparisons.
414 PCMPEQ,
415 PCMPGT,
416
417 // v8i16 Horizontal minimum and position.
418 PHMINPOS,
419
420 MULTISHIFT,
421
422 /// Vector comparison generating mask bits for fp and
423 /// integer signed and unsigned data types.
424 CMPM,
425 // Vector mask comparison generating mask bits for FP values.
426 CMPMM,
427 // Vector mask comparison with SAE for FP values.
428 CMPMM_SAE,
429
430 // Arithmetic operations with FLAGS results.
431 ADD,
432 SUB,
433 ADC,
434 SBB,
435 SMUL,
436 UMUL,
437 OR,
438 XOR,
439 AND,
440
441 // Bit field extract.
442 BEXTR,
443 BEXTRI,
444
445 // Zero High Bits Starting with Specified Bit Position.
446 BZHI,
447
448 // Parallel extract and deposit.
449 PDEP,
450 PEXT,
451
452 // X86-specific multiply by immediate.
453 MUL_IMM,
454
455 // Vector sign bit extraction.
456 MOVMSK,
457
458 // Vector bitwise comparisons.
459 PTEST,
460
461 // Vector packed fp sign bitwise comparisons.
462 TESTP,
463
464 // OR/AND test for masks.
465 KORTEST,
466 KTEST,
467
468 // ADD for masks.
469 KADD,
470
471 // Several flavors of instructions with vector shuffle behaviors.
472 // Saturated signed/unnsigned packing.
473 PACKSS,
474 PACKUS,
475 // Intra-lane alignr.
476 PALIGNR,
477 // AVX512 inter-lane alignr.
478 VALIGN,
479 PSHUFD,
480 PSHUFHW,
481 PSHUFLW,
482 SHUFP,
483 // VBMI2 Concat & Shift.
484 VSHLD,
485 VSHRD,
486
487 // Shuffle Packed Values at 128-bit granularity.
488 SHUF128,
489 MOVDDUP,
490 MOVSHDUP,
491 MOVSLDUP,
492 MOVLHPS,
493 MOVHLPS,
494 MOVSD,
495 MOVSS,
496 MOVSH,
497 UNPCKL,
498 UNPCKH,
499 VPERMILPV,
500 VPERMILPI,
501 VPERMI,
502 VPERM2X128,
503
504 // Variable Permute (VPERM).
505 // Res = VPERMV MaskV, V0
506 VPERMV,
507
508 // 3-op Variable Permute (VPERMT2).
509 // Res = VPERMV3 V0, MaskV, V1
510 VPERMV3,
511
512 // Bitwise ternary logic.
513 VPTERNLOG,
514 // Fix Up Special Packed Float32/64 values.
515 VFIXUPIMM,
516 VFIXUPIMM_SAE,
517 VFIXUPIMMS,
518 VFIXUPIMMS_SAE,
519 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
520 VRANGE,
521 VRANGE_SAE,
522 VRANGES,
523 VRANGES_SAE,
524 // Reduce - Perform Reduction Transformation on scalar\packed FP.
525 VREDUCE,
526 VREDUCE_SAE,
527 VREDUCES,
528 VREDUCES_SAE,
529 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
530 // Also used by the legacy (V)ROUND intrinsics where we mask out the
531 // scaling part of the immediate.
532 VRNDSCALE,
533 VRNDSCALE_SAE,
534 VRNDSCALES,
535 VRNDSCALES_SAE,
536 // Tests Types Of a FP Values for packed types.
537 VFPCLASS,
538 // Tests Types Of a FP Values for scalar types.
539 VFPCLASSS,
540
541 // Broadcast (splat) scalar or element 0 of a vector. If the operand is
542 // a vector, this node may change the vector length as part of the splat.
543 VBROADCAST,
544 // Broadcast mask to vector.
545 VBROADCASTM,
546
547 /// SSE4A Extraction and Insertion.
548 EXTRQI,
549 INSERTQI,
550
551 // XOP arithmetic/logical shifts.
552 VPSHA,
553 VPSHL,
554 // XOP signed/unsigned integer comparisons.
555 VPCOM,
556 VPCOMU,
557 // XOP packed permute bytes.
558 VPPERM,
559 // XOP two source permutation.
560 VPERMIL2,
561
562 // Vector multiply packed unsigned doubleword integers.
563 PMULUDQ,
564 // Vector multiply packed signed doubleword integers.
565 PMULDQ,
566 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
567 MULHRS,
568
569 // Multiply and Add Packed Integers.
570 VPMADDUBSW,
571 VPMADDWD,
572
573 // AVX512IFMA multiply and add.
574 // NOTE: These are different than the instruction and perform
575 // op0 x op1 + op2.
576 VPMADD52L,
577 VPMADD52H,
578
579 // VNNI
580 VPDPBUSD,
581 VPDPBUSDS,
582 VPDPWSSD,
583 VPDPWSSDS,
584
585 // FMA nodes.
586 // We use the target independent ISD::FMA for the non-inverted case.
587 FNMADD,
588 FMSUB,
589 FNMSUB,
590 FMADDSUB,
591 FMSUBADD,
592
593 // FMA with rounding mode.
594 FMADD_RND,
595 FNMADD_RND,
596 FMSUB_RND,
597 FNMSUB_RND,
598 FMADDSUB_RND,
599 FMSUBADD_RND,
600
601 // AVX512-FP16 complex addition and multiplication.
602 VFMADDC,
603 VFMADDC_RND,
604 VFCMADDC,
605 VFCMADDC_RND,
606
607 VFMULC,
608 VFMULC_RND,
609 VFCMULC,
610 VFCMULC_RND,
611
612 VFMADDCSH,
613 VFMADDCSH_RND,
614 VFCMADDCSH,
615 VFCMADDCSH_RND,
616
617 VFMULCSH,
618 VFMULCSH_RND,
619 VFCMULCSH,
620 VFCMULCSH_RND,
621
622 VPDPBSUD,
623 VPDPBSUDS,
624 VPDPBUUD,
625 VPDPBUUDS,
626 VPDPBSSD,
627 VPDPBSSDS,
628
629 VPDPWSUD,
630 VPDPWSUDS,
631 VPDPWUSD,
632 VPDPWUSDS,
633 VPDPWUUD,
634 VPDPWUUDS,
635
636 VMINMAX,
637 VMINMAX_SAE,
638 VMINMAXS,
639 VMINMAXS_SAE,
640
641 CVTP2IBS,
642 CVTP2IUBS,
643 CVTP2IBS_RND,
644 CVTP2IUBS_RND,
645 CVTTP2IBS,
646 CVTTP2IUBS,
647 CVTTP2IBS_SAE,
648 CVTTP2IUBS_SAE,
649
650 MPSADBW,
651
652 VCVT2PH2BF8,
653 VCVT2PH2BF8S,
654 VCVT2PH2HF8,
655 VCVT2PH2HF8S,
656 VCVTBIASPH2BF8,
657 VCVTBIASPH2BF8S,
658 VCVTBIASPH2HF8,
659 VCVTBIASPH2HF8S,
660 VCVTPH2BF8,
661 VCVTPH2BF8S,
662 VCVTPH2HF8,
663 VCVTPH2HF8S,
664 VMCVTBIASPH2BF8,
665 VMCVTBIASPH2BF8S,
666 VMCVTBIASPH2HF8,
667 VMCVTBIASPH2HF8S,
668 VMCVTPH2BF8,
669 VMCVTPH2BF8S,
670 VMCVTPH2HF8,
671 VMCVTPH2HF8S,
672 VCVTHF82PH,
673
674 // Compress and expand.
675 COMPRESS,
676 EXPAND,
677
678 // Bits shuffle
679 VPSHUFBITQMB,
680
681 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
682 SINT_TO_FP_RND,
683 UINT_TO_FP_RND,
684 SCALAR_SINT_TO_FP,
685 SCALAR_UINT_TO_FP,
686 SCALAR_SINT_TO_FP_RND,
687 SCALAR_UINT_TO_FP_RND,
688
689 // Vector float/double to signed/unsigned integer.
690 CVTP2SI,
691 CVTP2UI,
692 CVTP2SI_RND,
693 CVTP2UI_RND,
694 // Scalar float/double to signed/unsigned integer.
695 CVTS2SI,
696 CVTS2UI,
697 CVTS2SI_RND,
698 CVTS2UI_RND,
699
700 // Vector float/double to signed/unsigned integer with truncation.
701 CVTTP2SI,
702 CVTTP2UI,
703 CVTTP2SI_SAE,
704 CVTTP2UI_SAE,
705
706 // Saturation enabled Vector float/double to signed/unsigned
707 // integer with truncation.
708 CVTTP2SIS,
709 CVTTP2UIS,
710 CVTTP2SIS_SAE,
711 CVTTP2UIS_SAE,
712 // Masked versions of above. Used for v2f64 to v4i32.
713 // SRC, PASSTHRU, MASK
714 MCVTTP2SIS,
715 MCVTTP2UIS,
716
717 // Scalar float/double to signed/unsigned integer with truncation.
718 CVTTS2SI,
719 CVTTS2UI,
720 CVTTS2SI_SAE,
721 CVTTS2UI_SAE,
722
723 // Vector signed/unsigned integer to float/double.
724 CVTSI2P,
725 CVTUI2P,
726
727 // Scalar float/double to signed/unsigned integer with saturation.
728 CVTTS2SIS,
729 CVTTS2UIS,
730 CVTTS2SIS_SAE,
731 CVTTS2UIS_SAE,
732
733 // Masked versions of above. Used for v2f64->v4f32.
734 // SRC, PASSTHRU, MASK
735 MCVTP2SI,
736 MCVTP2UI,
737 MCVTTP2SI,
738 MCVTTP2UI,
739 MCVTSI2P,
740 MCVTUI2P,
741
742 // Custom handling for FP_TO_xINT_SAT
743 FP_TO_SINT_SAT,
744 FP_TO_UINT_SAT,
745
746 // Vector float to bfloat16.
747 // Convert packed single data to packed BF16 data
748 CVTNEPS2BF16,
749 // Masked version of above.
750 // SRC, PASSTHRU, MASK
751 MCVTNEPS2BF16,
752
753 // Dot product of BF16/FP16 pairs to accumulated into
754 // packed single precision.
755 DPBF16PS,
756 DPFP16PS,
757
758 // A stack checking function call. On Windows it's _chkstk call.
759 DYN_ALLOCA,
760
761 // For allocating variable amounts of stack space when using
762 // segmented stacks. Check if the current stacklet has enough space, and
763 // falls back to heap allocation if not.
764 SEG_ALLOCA,
765
766 // For allocating stack space when using stack clash protector.
767 // Allocation is performed by block, and each block is probed.
768 PROBED_ALLOCA,
769
770 // Memory barriers.
771 MFENCE,
772
773 // Get a random integer and indicate whether it is valid in CF.
774 RDRAND,
775
776 // Get a NIST SP800-90B & C compliant random integer and
777 // indicate whether it is valid in CF.
778 RDSEED,
779
780 // Protection keys
781 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
782 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
783 // value for ECX.
784 RDPKRU,
785 WRPKRU,
786
787 // SSE42 string comparisons.
788 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
789 // will emit one or two instructions based on which results are used. If
790 // flags and index/mask this allows us to use a single instruction since
791 // we won't have to pick and opcode for flags. Instead we can rely on the
792 // DAG to CSE everything and decide at isel.
793 PCMPISTR,
794 PCMPESTR,
795
796 // Test if in transactional execution.
797 XTEST,
798
799 // Conversions between float and half-float.
800 CVTPS2PH,
801 CVTPS2PH_SAE,
802 CVTPH2PS,
803 CVTPH2PS_SAE,
804
805 // Masked version of above.
806 // SRC, RND, PASSTHRU, MASK
807 MCVTPS2PH,
808 MCVTPS2PH_SAE,
809
810 // Galois Field Arithmetic Instructions
811 GF2P8AFFINEINVQB,
812 GF2P8AFFINEQB,
813 GF2P8MULB,
814
815 // Carry-less multiplication
816 PCLMULQDQ,
817
818 // LWP insert record.
819 LWPINS,
820
821 // User level wait
822 UMWAIT,
823 TPAUSE,
824
825 // Enqueue Stores Instructions
826 ENQCMD,
827 ENQCMDS,
828
829 // For avx512-vp2intersect
830 VP2INTERSECT,
831
832 // User level interrupts - testui
833 TESTUI,
834
835 // Perform an FP80 add after changing precision control in FPCW.
836 FP80_ADD,
837
838 // Conditional compare instructions
839 CCMP,
840 CTEST,
841
842 /// X86 strict FP compare instructions.
843 FIRST_STRICTFP_OPCODE,
844 STRICT_FCMP = FIRST_STRICTFP_OPCODE,
845 STRICT_FCMPS,
846
847 // Vector packed double/float comparison.
848 STRICT_CMPP,
849
850 /// Vector comparison generating mask bits for fp and
851 /// integer signed and unsigned data types.
852 STRICT_CMPM,
853
854 // Vector float/double to signed/unsigned integer with truncation.
855 STRICT_CVTTP2SI,
856 STRICT_CVTTP2UI,
857
858 // Vector FP extend.
859 STRICT_VFPEXT,
860
861 // Vector FP round.
862 STRICT_VFPROUND,
863
864 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
865 // Also used by the legacy (V)ROUND intrinsics where we mask out the
866 // scaling part of the immediate.
867 STRICT_VRNDSCALE,
868
869 // Vector signed/unsigned integer to float/double.
870 STRICT_CVTSI2P,
871 STRICT_CVTUI2P,
872
873 // Strict FMA nodes.
874 STRICT_FNMADD,
875 STRICT_FMSUB,
876 STRICT_FNMSUB,
877
878 // Conversions between float and half-float.
879 STRICT_CVTPS2PH,
880 STRICT_CVTPH2PS,
881
882 // Perform an FP80 add after changing precision control in FPCW.
883 STRICT_FP80_ADD,
884
885 /// Floating point max and min.
886 STRICT_FMAX,
887 STRICT_FMIN,
888 LAST_STRICTFP_OPCODE = STRICT_FMIN,
889
890 // Compare and swap.
891 FIRST_MEMORY_OPCODE,
892 LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
893 LCMPXCHG8_DAG,
894 LCMPXCHG16_DAG,
895 LCMPXCHG16_SAVE_RBX_DAG,
896
897 /// LOCK-prefixed arithmetic read-modify-write instructions.
898 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
899 LADD,
900 LSUB,
901 LOR,
902 LXOR,
903 LAND,
904 LBTS,
905 LBTC,
906 LBTR,
907 LBTS_RM,
908 LBTC_RM,
909 LBTR_RM,
910
911 /// RAO arithmetic instructions.
912 /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
913 AADD,
914 AOR,
915 AXOR,
916 AAND,
917
918 // Load, scalar_to_vector, and zero extend.
919 VZEXT_LOAD,
920
921 // extract_vector_elt, store.
922 VEXTRACT_STORE,
923
924 // scalar broadcast from memory.
925 VBROADCAST_LOAD,
926
927 // subvector broadcast from memory.
928 SUBV_BROADCAST_LOAD,
929
930 // Store FP control word into i16 memory.
931 FNSTCW16m,
932
933 // Load FP control word from i16 memory.
934 FLDCW16m,
935
936 // Store x87 FPU environment into memory.
937 FNSTENVm,
938
939 // Load x87 FPU environment from memory.
940 FLDENVm,
941
942 /// This instruction implements FP_TO_SINT with the
943 /// integer destination in memory and a FP reg source. This corresponds
944 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
945 /// has two inputs (token chain and address) and two outputs (int value
946 /// and token chain). Memory VT specifies the type to store to.
947 FP_TO_INT_IN_MEM,
948
949 /// This instruction implements SINT_TO_FP with the
950 /// integer source in memory and FP reg result. This corresponds to the
951 /// X86::FILD*m instructions. It has two inputs (token chain and address)
952 /// and two outputs (FP value and token chain). The integer source type is
953 /// specified by the memory VT.
954 FILD,
955
956 /// This instruction implements a fp->int store from FP stack
957 /// slots. This corresponds to the fist instruction. It takes a
958 /// chain operand, value to store, address, and glue. The memory VT
959 /// specifies the type to store as.
960 FIST,
961
962 /// This instruction implements an extending load to FP stack slots.
963 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
964 /// operand, and ptr to load from. The memory VT specifies the type to
965 /// load from.
966 FLD,
967
968 /// This instruction implements a truncating store from FP stack
969 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
970 /// chain operand, value to store, address, and glue. The memory VT
971 /// specifies the type to store as.
972 FST,
973
974 /// These instructions grab the address of the next argument
975 /// from a va_list. (reads and modifies the va_list in memory)
976 VAARG_64,
977 VAARG_X32,
978
979 // Vector truncating store with unsigned/signed saturation
980 VTRUNCSTOREUS,
981 VTRUNCSTORES,
982 // Vector truncating masked store with unsigned/signed saturation
983 VMTRUNCSTOREUS,
984 VMTRUNCSTORES,
985
986 // X86 specific gather and scatter
987 MGATHER,
988 MSCATTER,
989
990 // Key locker nodes that produce flags.
991 AESENC128KL,
992 AESDEC128KL,
993 AESENC256KL,
994 AESDEC256KL,
995 AESENCWIDE128KL,
996 AESDECWIDE128KL,
997 AESENCWIDE256KL,
998 AESDECWIDE256KL,
999
1000 /// Compare and Add if Condition is Met. Compare value in operand 2 with
1001 /// value in memory of operand 1. If condition of operand 4 is met, add
1002 /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
1003 /// always updated with the original value from operand 1.
1004 CMPCCXADD,
1005
1006 // Save xmm argument registers to the stack, according to %al. An operator
1007 // is needed so that this can be expanded with control flow.
1008 VASTART_SAVE_XMM_REGS,
1009
1010 // Conditional load/store instructions
1011 CLOAD,
1012 CSTORE,
1013 LAST_MEMORY_OPCODE = CSTORE,
1014 };
1015 } // end namespace X86ISD
1016
1017 namespace X86 {
1018 /// Current rounding mode is represented in bits 11:10 of FPSR. These
1019 /// values are same as corresponding constants for rounding mode used
1020 /// in glibc.
1021 enum RoundingMode {
1022 rmInvalid = -1, // For handle Invalid rounding mode
1023 rmToNearest = 0, // FE_TONEAREST
1024 rmDownward = 1 << 10, // FE_DOWNWARD
1025 rmUpward = 2 << 10, // FE_UPWARD
1026 rmTowardZero = 3 << 10, // FE_TOWARDZERO
1027 rmMask = 3 << 10 // Bit mask selecting rounding mode
1028 };
1029 }
1030
1031 /// Define some predicates that are used for node matching.
1032 namespace X86 {
1033 /// Returns true if Elt is a constant zero or floating point constant +0.0.
1034 bool isZeroNode(SDValue Elt);
1035
1036 /// Returns true of the given offset can be
1037 /// fit into displacement field of the instruction.
1038 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
1039 bool hasSymbolicDisplacement);
1040
1041 /// Determines whether the callee is required to pop its
1042 /// own arguments. Callee pop is necessary to support tail calls.
1043 bool isCalleePop(CallingConv::ID CallingConv,
1044 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
1045
1046 /// If Op is a constant whose elements are all the same constant or
1047 /// undefined, return true and return the constant value in \p SplatVal.
1048 /// If we have undef bits that don't cover an entire element, we treat these
1049 /// as zero if AllowPartialUndefs is set, else we fail and return false.
1050 bool isConstantSplat(SDValue Op, APInt &SplatVal,
1051 bool AllowPartialUndefs = true);
1052
1053 /// Check if Op is a load operation that could be folded into some other x86
1054 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
1055 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
1056 bool AssumeSingleUse = false,
1057 bool IgnoreAlignment = false);
1058
1059 /// Check if Op is a load operation that could be folded into a vector splat
1060 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
1061 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
1062 const X86Subtarget &Subtarget,
1063 bool AssumeSingleUse = false);
1064
1065 /// Check if Op is a value that could be used to fold a store into some
1066 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
1067 bool mayFoldIntoStore(SDValue Op);
1068
1069 /// Check if Op is an operation that could be folded into a zero extend x86
1070 /// instruction.
1071 bool mayFoldIntoZeroExtend(SDValue Op);
1072
1073 /// True if the target supports the extended frame for async Swift
1074 /// functions.
1075 bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
1076 const MachineFunction &MF);
1077
1078 /// Convert LLVM rounding mode to X86 rounding mode.
1079 int getRoundingModeX86(unsigned RM);
1080
1081 } // end namespace X86
1082
1083 //===--------------------------------------------------------------------===//
1084 // X86 Implementation of the TargetLowering interface
1085 class X86TargetLowering final : public TargetLowering {
1086 // Copying needed for an outgoing byval argument.
1087 enum ByValCopyKind {
1088 // Argument is already in the correct location, no copy needed.
1089 NoCopy,
1090 // Argument value is currently in the local stack frame, needs copying to
1091 // outgoing arguemnt area.
1092 CopyOnce,
1093 // Argument value is currently in the outgoing argument area, but not at
1094 // the correct offset, so needs copying via a temporary in local stack
1095 // space.
1096 CopyViaTemp,
1097 };
1098
1099 public:
1100 explicit X86TargetLowering(const X86TargetMachine &TM,
1101 const X86Subtarget &STI);
1102
1103 unsigned getJumpTableEncoding() const override;
1104 bool useSoftFloat() const override;
1105
1106 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
1107 ArgListTy &Args) const override;
1108
1109 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
1110 return MVT::i8;
1111 }
1112
1113 const MCExpr *
1114 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1115 const MachineBasicBlock *MBB, unsigned uid,
1116 MCContext &Ctx) const override;
1117
1118 /// Returns relocation base for the given PIC jumptable.
1119 SDValue getPICJumpTableRelocBase(SDValue Table,
1120 SelectionDAG &DAG) const override;
1121 const MCExpr *
1122 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1123 unsigned JTI, MCContext &Ctx) const override;
1124
1125 /// Return the desired alignment for ByVal aggregate
1126 /// function arguments in the caller parameter area. For X86, aggregates
1127 /// that contains are placed at 16-byte boundaries while the rest are at
1128 /// 4-byte boundaries.
1129 Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
1130
1131 EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
1132 const AttributeList &FuncAttributes) const override;
1133
1134 /// Returns true if it's safe to use load / store of the
1135 /// specified type to expand memcpy / memset inline. This is mostly true
1136 /// for all types except for some special cases. For example, on X86
1137 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1138 /// also does type conversion. Note the specified type doesn't have to be
1139 /// legal as the hook is used before type legalization.
1140 bool isSafeMemOpType(MVT VT) const override;
1141
1142 bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1143
1144 /// Returns true if the target allows unaligned memory accesses of the
1145 /// specified type. Returns whether it is "fast" in the last argument.
1146 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1147 MachineMemOperand::Flags Flags,
1148 unsigned *Fast) const override;
1149
1150 /// This function returns true if the memory access is aligned or if the
1151 /// target allows this specific unaligned memory access. If the access is
1152 /// allowed, the optional final parameter returns a relative speed of the
1153 /// access (as defined by the target).
1154 bool allowsMemoryAccess(
1155 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1156 Align Alignment,
1157 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1158 unsigned *Fast = nullptr) const override;
1159
1160 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1161 const MachineMemOperand &MMO,
1162 unsigned *Fast) const {
1163 return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(),
1164 Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast);
1165 }
1166
1167 /// Provide custom lowering hooks for some operations.
1168 ///
1169 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1170
1171 /// Replace the results of node with an illegal result
1172 /// type with new values built out of custom code.
1173 ///
1174 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1175 SelectionDAG &DAG) const override;
1176
1177 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1178
1179 bool preferABDSToABSWithNSW(EVT VT) const override;
1180
1181 bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1182 EVT ExtVT) const override;
1183
1184 bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1185 EVT VT) const override;
1186
1187 /// Return true if the target has native support for
1188 /// the specified value type and it is 'desirable' to use the type for the
1189 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1190 /// instruction encodings are longer and some i16 instructions are slow.
1191 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1192
1193 /// Return true if the target has native support for the
1194 /// specified value type and it is 'desirable' to use the type. e.g. On x86
1195 /// i16 is legal, but undesirable since i16 instruction encodings are longer
1196 /// and some i16 instructions are slow.
1197 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1198
1199 /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1200 /// integer, None otherwise.
1201 TargetLowering::AndOrSETCCFoldKind
1202 isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1203 const SDNode *SETCC0,
1204 const SDNode *SETCC1) const override;
1205
1206 /// Return the newly negated expression if the cost is not expensive and
1207 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1208 /// do the negation.
1209 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1210 bool LegalOperations, bool ForCodeSize,
1211 NegatibleCost &Cost,
1212 unsigned Depth) const override;
1213
1214 MachineBasicBlock *
1215 EmitInstrWithCustomInserter(MachineInstr &MI,
1216 MachineBasicBlock *MBB) const override;
1217
1218 /// This method returns the name of a target specific DAG node.
1219 const char *getTargetNodeName(unsigned Opcode) const override;
1220
1221 /// Do not merge vector stores after legalization because that may conflict
1222 /// with x86-specific store splitting optimizations.
1223 bool mergeStoresAfterLegalization(EVT MemVT) const override {
1224 return !MemVT.isVector();
1225 }
1226
1227 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1228 const MachineFunction &MF) const override;
1229
1230 bool isCheapToSpeculateCttz(Type *Ty) const override;
1231
1232 bool isCheapToSpeculateCtlz(Type *Ty) const override;
1233
1234 bool isCtlzFast() const override;
1235
1236 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1237 // If the pair to store is a mixture of float and int values, we will
1238 // save two bitwise instructions and one float-to-int instruction and
1239 // increase one store instruction. There is potentially a more
1240 // significant benefit because it avoids the float->int domain switch
1241 // for input value. So It is more likely a win.
1242 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1243 (LTy.isInteger() && HTy.isFloatingPoint()))
1244 return true;
1245 // If the pair only contains int values, we will save two bitwise
1246 // instructions and increase one store instruction (costing one more
1247 // store buffer). Since the benefit is more blurred so we leave
1248 // such pair out until we get testcase to prove it is a win.
1249 return false;
1250 }
1251
1252 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1253
1254 bool hasAndNotCompare(SDValue Y) const override;
1255
1256 bool hasAndNot(SDValue Y) const override;
1257
1258 bool hasBitTest(SDValue X, SDValue Y) const override;
1259
1260 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1261 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1262 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1263 SelectionDAG &DAG) const override;
1264
1265 unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1266 EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1267 const APInt &ShiftOrRotateAmt,
1268 const std::optional<APInt> &AndMask) const override;
1269
1270 bool preferScalarizeSplat(SDNode *N) const override;
1271
1272 CondMergingParams
1273 getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1274 const Value *Rhs) const override;
1275
1276 bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override;
1277
1278 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1279
1280 bool
1281 shouldTransformSignedTruncationCheck(EVT XVT,
1282 unsigned KeptBits) const override {
1283 // For vectors, we don't have a preference..
1284 if (XVT.isVector())
1285 return false;
1286
1287 auto VTIsOk = [](EVT VT) -> bool {
1288 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1289 VT == MVT::i64;
1290 };
1291
1292 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1293 // XVT will be larger than KeptBitsVT.
1294 MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
1295 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1296 }
1297
1298 ShiftLegalizationStrategy
1299 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1300 unsigned ExpansionFactor) const override;
1301
1302 bool shouldSplatInsEltVarIndex(EVT VT) const override;
1303
1304 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1305 // Converting to sat variants holds little benefit on X86 as we will just
1306 // need to saturate the value back using fp arithmatic.
1307 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1308 }
1309
1310 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1311 return VT.isScalarInteger();
1312 }
1313
1314 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1315 MVT hasFastEqualityCompare(unsigned NumBits) const override;
1316
1317 /// Return the value type to use for ISD::SETCC.
1318 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1319 EVT VT) const override;
1320
1321 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1322 const APInt &DemandedElts,
1323 TargetLoweringOpt &TLO) const override;
1324
1325 /// Determine which of the bits specified in Mask are known to be either
1326 /// zero or one and return them in the KnownZero/KnownOne bitsets.
1327 void computeKnownBitsForTargetNode(const SDValue Op,
1328 KnownBits &Known,
1329 const APInt &DemandedElts,
1330 const SelectionDAG &DAG,
1331 unsigned Depth = 0) const override;
1332
1333 /// Determine the number of bits in the operation that are sign bits.
1334 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1335 const APInt &DemandedElts,
1336 const SelectionDAG &DAG,
1337 unsigned Depth) const override;
1338
1339 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1340 const APInt &DemandedElts,
1341 APInt &KnownUndef,
1342 APInt &KnownZero,
1343 TargetLoweringOpt &TLO,
1344 unsigned Depth) const override;
1345
1346 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1347 const APInt &DemandedElts,
1348 unsigned MaskIndex,
1349 TargetLoweringOpt &TLO,
1350 unsigned Depth) const;
1351
1352 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1353 const APInt &DemandedBits,
1354 const APInt &DemandedElts,
1355 KnownBits &Known,
1356 TargetLoweringOpt &TLO,
1357 unsigned Depth) const override;
1358
1359 SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1360 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1361 SelectionDAG &DAG, unsigned Depth) const override;
1362
1363 bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1364 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1365 bool PoisonOnly, unsigned Depth) const override;
1366
1367 bool canCreateUndefOrPoisonForTargetNode(
1368 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1369 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1370
1371 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1372 APInt &UndefElts, const SelectionDAG &DAG,
1373 unsigned Depth) const override;
1374
1375 bool isTargetCanonicalConstantNode(SDValue Op) const override {
1376 // Peek through bitcasts/extracts/inserts to see if we have a vector
1377 // load/broadcast from memory.
1378 while (Op.getOpcode() == ISD::BITCAST ||
1379 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1380 (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1381 Op.getOperand(i: 0).isUndef()))
1382 Op = Op.getOperand(i: Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1383
1384 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1385 Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||
1386 (Op.getOpcode() == ISD::LOAD &&
1387 getTargetConstantFromLoad(LD: cast<LoadSDNode>(Val&: Op))) ||
1388 TargetLowering::isTargetCanonicalConstantNode(Op);
1389 }
1390
1391 bool isTargetCanonicalSelect(SDNode *N) const override;
1392
1393 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1394
1395 SDValue unwrapAddress(SDValue N) const override;
1396
1397 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1398
1399 ConstraintType getConstraintType(StringRef Constraint) const override;
1400
1401 /// Examine constraint string and operand type and determine a weight value.
1402 /// The operand object must already have been set up with the operand type.
1403 ConstraintWeight
1404 getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1405 const char *Constraint) const override;
1406
1407 const char *LowerXConstraint(EVT ConstraintVT) const override;
1408
1409 /// Lower the specified operand into the Ops vector. If it is invalid, don't
1410 /// add anything to Ops. If hasMemory is true it means one of the asm
1411 /// constraint of the inline asm instruction being processed is 'm'.
1412 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1413 std::vector<SDValue> &Ops,
1414 SelectionDAG &DAG) const override;
1415
1416 InlineAsm::ConstraintCode
1417 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1418 if (ConstraintCode == "v")
1419 return InlineAsm::ConstraintCode::v;
1420 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1421 }
1422
1423 /// Handle Lowering flag assembly outputs.
1424 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1425 const SDLoc &DL,
1426 const AsmOperandInfo &Constraint,
1427 SelectionDAG &DAG) const override;
1428
1429 /// Given a physical register constraint
1430 /// (e.g. {edx}), return the register number and the register class for the
1431 /// register. This should only be used for C_Register constraints. On
1432 /// error, this returns a register number of 0.
1433 std::pair<unsigned, const TargetRegisterClass *>
1434 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1435 StringRef Constraint, MVT VT) const override;
1436
1437 /// Return true if the addressing mode represented
1438 /// by AM is legal for this target, for a load/store of the specified type.
1439 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1440 Type *Ty, unsigned AS,
1441 Instruction *I = nullptr) const override;
1442
1443 bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1444
1445 /// Return true if the specified immediate is legal
1446 /// icmp immediate, that is the target has icmp instructions which can
1447 /// compare a register against the immediate without having to materialize
1448 /// the immediate into a register.
1449 bool isLegalICmpImmediate(int64_t Imm) const override;
1450
1451 /// Return true if the specified immediate is legal
1452 /// add immediate, that is the target has add instructions which can
1453 /// add a register and the immediate without having to materialize
1454 /// the immediate into a register.
1455 bool isLegalAddImmediate(int64_t Imm) const override;
1456
1457 bool isLegalStoreImmediate(int64_t Imm) const override;
1458
1459 /// Add x86-specific opcodes to the default list.
1460 bool isBinOp(unsigned Opcode) const override;
1461
1462 /// Returns true if the opcode is a commutative binary operation.
1463 bool isCommutativeBinOp(unsigned Opcode) const override;
1464
1465 /// Return true if it's free to truncate a value of
1466 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1467 /// register EAX to i16 by referencing its sub-register AX.
1468 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1469 bool isTruncateFree(EVT VT1, EVT VT2) const override;
1470
1471 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1472
1473 /// Return true if any actual instruction that defines a
1474 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1475 /// register. This does not necessarily include registers defined in
1476 /// unknown ways, such as incoming arguments, or copies from unknown
1477 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1478 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1479 /// all instructions that define 32-bit values implicit zero-extend the
1480 /// result out to 64 bits.
1481 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1482 bool isZExtFree(EVT VT1, EVT VT2) const override;
1483 bool isZExtFree(SDValue Val, EVT VT2) const override;
1484
1485 bool shouldConvertPhiType(Type *From, Type *To) const override;
1486
1487 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1488 /// extend node) is profitable.
1489 bool isVectorLoadExtDesirable(SDValue) const override;
1490
1491 /// Return true if an FMA operation is faster than a pair of fmul and fadd
1492 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1493 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1494 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1495 EVT VT) const override;
1496
1497 /// Return true if it's profitable to narrow operations of type SrcVT to
1498 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1499 /// from i32 to i16.
1500 bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override;
1501
1502 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
1503 unsigned SelectOpcode, SDValue X,
1504 SDValue Y) const override;
1505
1506 /// Given an intrinsic, checks if on the target the intrinsic will need to
1507 /// map to a MemIntrinsicNode (touches memory). If this is the case, it
1508 /// returns true and stores the intrinsic information into the IntrinsicInfo
1509 /// that was passed to the function.
1510 void getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &Infos,
1511 const CallBase &I, MachineFunction &MF,
1512 unsigned Intrinsic) const override;
1513
1514 /// Returns true if the target can instruction select the
1515 /// specified FP immediate natively. If false, the legalizer will
1516 /// materialize the FP immediate as a load from a constant pool.
1517 bool isFPImmLegal(const APFloat &Imm, EVT VT,
1518 bool ForCodeSize) const override;
1519
1520 /// Targets can use this to indicate that they only support *some*
1521 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1522 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1523 /// be legal.
1524 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1525
1526 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1527 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1528 /// constant pool entry.
1529 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1530
1531 /// Returns true if lowering to a jump table is allowed.
1532 bool areJTsAllowed(const Function *Fn) const override;
1533
1534 MVT getPreferredSwitchConditionType(LLVMContext &Context,
1535 EVT ConditionVT) const override;
1536
1537 /// If true, then instruction selection should
1538 /// seek to shrink the FP constant of the specified type to a smaller type
1539 /// in order to save space and / or reduce runtime.
1540 bool ShouldShrinkFPConstant(EVT VT) const override;
1541
1542 /// Return true if we believe it is correct and profitable to reduce the
1543 /// load node to a smaller type.
1544 bool
1545 shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1546 std::optional<unsigned> ByteOffset) const override;
1547
1548 /// Return true if the specified scalar FP type is computed in an SSE
1549 /// register, not on the X87 floating point stack.
1550 bool isScalarFPTypeInSSEReg(EVT VT) const;
1551
1552 /// Returns true if it is beneficial to convert a load of a constant
1553 /// to just the constant itself.
1554 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1555 Type *Ty) const override;
1556
1557 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1558
1559 bool convertSelectOfConstantsToMath(EVT VT) const override;
1560
1561 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1562 SDValue C) const override;
1563
1564 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1565 /// with this index.
1566 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1567 unsigned Index) const override;
1568
1569 /// Scalar ops always have equal or better analysis/performance/power than
1570 /// the vector equivalent, so this always makes sense if the scalar op is
1571 /// supported.
1572 bool shouldScalarizeBinop(SDValue) const override;
1573
1574 /// Extract of a scalar FP value from index 0 of a vector is free.
1575 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1576 EVT EltVT = VT.getScalarType();
1577 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1578 }
1579
1580 /// Overflow nodes should get combined/lowered to optimal instructions
1581 /// (they should allow eliminating explicit compares by getting flags from
1582 /// math ops).
1583 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1584 bool MathUsed) const override;
1585
1586 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1587 unsigned AddrSpace) const override {
1588 // If we can replace more than 2 scalar stores, there will be a reduction
1589 // in instructions even after we add a vector constant load.
1590 return IsZero || NumElem > 2;
1591 }
1592
1593 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1594 const SelectionDAG &DAG,
1595 const MachineMemOperand &MMO) const override;
1596
1597 Register getRegisterByName(const char* RegName, LLT VT,
1598 const MachineFunction &MF) const override;
1599
1600 /// If a physical register, this returns the register that receives the
1601 /// exception address on entry to an EH pad.
1602 Register
1603 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1604
1605 /// If a physical register, this returns the register that receives the
1606 /// exception typeid on entry to a landing pad.
1607 Register
1608 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1609
1610 bool needsFixedCatchObjects() const override;
1611
1612 /// This method returns a target specific FastISel object,
1613 /// or null if the target does not support "fast" ISel.
1614 FastISel *
1615 createFastISel(FunctionLoweringInfo &funcInfo,
1616 const TargetLibraryInfo *libInfo,
1617 const LibcallLoweringInfo *libcallLowering) const override;
1618
1619 /// If the target has a standard location for the stack protector cookie,
1620 /// returns the address of that location. Otherwise, returns nullptr.
1621 Value *getIRStackGuard(IRBuilderBase &IRB,
1622 const LibcallLoweringInfo &Libcalls) const override;
1623
1624 bool useLoadStackGuardNode(const Module &M) const override;
1625 bool useStackGuardXorFP() const override;
1626 void
1627 insertSSPDeclarations(Module &M,
1628 const LibcallLoweringInfo &Libcalls) const override;
1629 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1630 const SDLoc &DL) const override;
1631
1632
1633 /// Return true if the target stores SafeStack pointer at a fixed offset in
1634 /// some non-standard address space, and populates the address space and
1635 /// offset as appropriate.
1636 Value *getSafeStackPointerLocation(
1637 IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override;
1638
1639 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1640 SDValue Chain, SDValue Pointer,
1641 MachinePointerInfo PtrInfo,
1642 Align Alignment,
1643 SelectionDAG &DAG) const;
1644
1645 /// Customize the preferred legalization strategy for certain types.
1646 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1647
1648 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1649 EVT VT) const override;
1650
1651 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1652 CallingConv::ID CC,
1653 EVT VT) const override;
1654
1655 unsigned getVectorTypeBreakdownForCallingConv(
1656 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1657 unsigned &NumIntermediates, MVT &RegisterVT) const override;
1658
1659 bool functionArgumentNeedsConsecutiveRegisters(
1660 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
1661 const DataLayout &DL) const override;
1662
1663 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1664
1665 bool supportSwiftError() const override;
1666
1667 bool supportKCFIBundles() const override { return true; }
1668
1669 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1670 MachineBasicBlock::instr_iterator &MBBI,
1671 const TargetInstrInfo *TII) const override;
1672
1673 bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1674 bool hasInlineStackProbe(const MachineFunction &MF) const override;
1675 StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1676
1677 unsigned getStackProbeSize(const MachineFunction &MF) const;
1678
1679 bool hasVectorBlend() const override { return true; }
1680
1681 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1682
1683 bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1684 unsigned OpNo) const override;
1685
1686 SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1687 MachineMemOperand *MMO, SDValue &NewLoad,
1688 SDValue Ptr, SDValue PassThru,
1689 SDValue Mask) const override;
1690 SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1691 MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1692 SDValue Mask) const override;
1693
1694 /// Lower interleaved load(s) into target specific
1695 /// instructions/intrinsics.
1696 bool lowerInterleavedLoad(Instruction *Load, Value *Mask,
1697 ArrayRef<ShuffleVectorInst *> Shuffles,
1698 ArrayRef<unsigned> Indices, unsigned Factor,
1699 const APInt &GapMask) const override;
1700
1701 /// Lower interleaved store(s) into target specific
1702 /// instructions/intrinsics.
1703 bool lowerInterleavedStore(Instruction *Store, Value *Mask,
1704 ShuffleVectorInst *SVI, unsigned Factor,
1705 const APInt &GapMask) const override;
1706
1707 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1708 int JTI, SelectionDAG &DAG) const override;
1709
1710 Align getPrefLoopAlignment(MachineLoop *ML) const override;
1711
1712 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1713 if (VT == MVT::f80)
1714 return EVT::getIntegerVT(Context, BitWidth: 96);
1715 return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1716 }
1717
1718 protected:
1719 std::pair<const TargetRegisterClass *, uint8_t>
1720 findRepresentativeClass(const TargetRegisterInfo *TRI,
1721 MVT VT) const override;
1722
1723 private:
1724 /// Keep a reference to the X86Subtarget around so that we can
1725 /// make the right decision when generating code for different targets.
1726 const X86Subtarget &Subtarget;
1727
1728 /// A list of legal FP immediates.
1729 std::vector<APFloat> LegalFPImmediates;
1730
1731 /// Indicate that this x86 target can instruction
1732 /// select the specified FP immediate natively.
1733 void addLegalFPImmediate(const APFloat& Imm) {
1734 LegalFPImmediates.push_back(x: Imm);
1735 }
1736
1737 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1738 CallingConv::ID CallConv, bool isVarArg,
1739 const SmallVectorImpl<ISD::InputArg> &Ins,
1740 const SDLoc &dl, SelectionDAG &DAG,
1741 SmallVectorImpl<SDValue> &InVals,
1742 uint32_t *RegMask) const;
1743 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1744 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1745 const SDLoc &dl, SelectionDAG &DAG,
1746 const CCValAssign &VA, MachineFrameInfo &MFI,
1747 unsigned i) const;
1748 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1749 const SDLoc &dl, SelectionDAG &DAG,
1750 const CCValAssign &VA,
1751 ISD::ArgFlagsTy Flags, bool isByval) const;
1752
1753 // Call lowering helpers.
1754
1755 /// Check whether the call is eligible for sibling call optimization.
1756 bool
1757 isEligibleForSiblingCallOpt(TargetLowering::CallLoweringInfo &CLI,
1758 CCState &CCInfo,
1759 SmallVectorImpl<CCValAssign> &ArgLocs) const;
1760 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1761 SDValue Chain, bool IsTailCall,
1762 bool Is64Bit, int FPDiff,
1763 const SDLoc &dl) const;
1764
1765 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1766 SelectionDAG &DAG) const;
1767
1768 unsigned getAddressSpace() const;
1769
1770 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1771 SDValue &Chain) const;
1772 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1773
1774 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1775 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1776 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1777 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1778
1779 unsigned getGlobalWrapperKind(const GlobalValue *GV,
1780 const unsigned char OpFlags) const;
1781 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1782 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1783 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1784 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1785 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1786
1787 /// Creates target global address or external symbol nodes for calls or
1788 /// other uses.
1789 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall,
1790 bool *IsImpCall) const;
1791
1792 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1793 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1794 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1795 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1796 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1797 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1798 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1799 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1800 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1801 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1802 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1803 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1804 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1805 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1806 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1807 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1808 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1809 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1810 ByValCopyKind ByValNeedsCopyForTailCall(SelectionDAG &DAG, SDValue Src,
1811 SDValue Dst,
1812 ISD::ArgFlagsTy Flags) const;
1813 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1814 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1815 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1816 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1817 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1818 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1819 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1820 SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1821 SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1822 SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1823 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1824 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1825 SDValue &Chain) const;
1826 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1827 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1828 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1829 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1830 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1831 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1832 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1833
1834 SDValue
1835 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1836 const SmallVectorImpl<ISD::InputArg> &Ins,
1837 const SDLoc &dl, SelectionDAG &DAG,
1838 SmallVectorImpl<SDValue> &InVals) const override;
1839 SDValue LowerCall(CallLoweringInfo &CLI,
1840 SmallVectorImpl<SDValue> &InVals) const override;
1841
1842 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1843 const SmallVectorImpl<ISD::OutputArg> &Outs,
1844 const SmallVectorImpl<SDValue> &OutVals,
1845 const SDLoc &dl, SelectionDAG &DAG) const override;
1846
1847 bool supportSplitCSR(MachineFunction *MF) const override {
1848 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1849 MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind);
1850 }
1851 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1852 void insertCopiesSplitCSR(
1853 MachineBasicBlock *Entry,
1854 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1855
1856 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1857
1858 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1859
1860 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1861 ISD::NodeType ExtendKind) const override;
1862
1863 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1864 bool isVarArg,
1865 const SmallVectorImpl<ISD::OutputArg> &Outs,
1866 LLVMContext &Context,
1867 const Type *RetTy) const override;
1868
1869 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1870 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1871
1872 TargetLoweringBase::AtomicExpansionKind
1873 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1874 TargetLoweringBase::AtomicExpansionKind
1875 shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1876 TargetLoweringBase::AtomicExpansionKind
1877 shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override;
1878 TargetLoweringBase::AtomicExpansionKind
1879 shouldExpandLogicAtomicRMWInIR(const AtomicRMWInst *AI) const;
1880 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1881 void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1882
1883 LoadInst *
1884 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1885
1886 bool needsCmpXchgNb(Type *MemType) const;
1887
1888 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1889 MachineBasicBlock *DispatchBB, int FI) const;
1890
1891 // Utility function to emit the low-level va_arg code for X86-64.
1892 MachineBasicBlock *
1893 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1894
1895 /// Utility function to emit the xmm reg save portion of va_start.
1896 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1897 MachineInstr &MI2,
1898 MachineBasicBlock *BB) const;
1899
1900 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1901 MachineBasicBlock *BB) const;
1902
1903 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1904 MachineBasicBlock *BB) const;
1905
1906 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1907 MachineBasicBlock *BB) const;
1908
1909 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1910 MachineBasicBlock *BB) const;
1911
1912 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1913 MachineBasicBlock *BB) const;
1914
1915 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1916 MachineBasicBlock *BB) const;
1917
1918 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1919 MachineBasicBlock *MBB) const;
1920
1921 void emitSetJmpShadowStackFix(MachineInstr &MI,
1922 MachineBasicBlock *MBB) const;
1923
1924 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1925 MachineBasicBlock *MBB) const;
1926
1927 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1928 MachineBasicBlock *MBB) const;
1929
1930 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1931 MachineBasicBlock *MBB) const;
1932
1933 MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1934 MachineBasicBlock *MBB) const;
1935
1936 /// Emit flags for the given setcc condition and operands. Also returns the
1937 /// corresponding X86 condition code constant in X86CC.
1938 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1939 const SDLoc &dl, SelectionDAG &DAG,
1940 SDValue &X86CC) const;
1941
1942 bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1943 SDValue IntPow2) const override;
1944
1945 /// Check if replacement of SQRT with RSQRT should be disabled.
1946 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1947
1948 /// Use rsqrt* to speed up sqrt calculations.
1949 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1950 int &RefinementSteps, bool &UseOneConstNR,
1951 bool Reciprocal) const override;
1952
1953 /// Use rcp* to speed up fdiv calculations.
1954 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1955 int &RefinementSteps) const override;
1956
1957 /// Reassociate floating point divisions into multiply by reciprocal.
1958 unsigned combineRepeatedFPDivisors() const override;
1959
1960 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1961 SmallVectorImpl<SDNode *> &Created) const override;
1962
1963 SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1964 SDValue V2) const;
1965 };
1966
1967 namespace X86 {
1968 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1969 const TargetLibraryInfo *libInfo,
1970 const LibcallLoweringInfo *libcallLowering);
1971 } // end namespace X86
1972
1973 // X86 specific Gather/Scatter nodes.
1974 // The class has the same order of operands as MaskedGatherScatterSDNode for
1975 // convenience.
1976 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1977 public:
1978 // This is a intended as a utility and should never be directly created.
1979 X86MaskedGatherScatterSDNode() = delete;
1980 ~X86MaskedGatherScatterSDNode() = delete;
1981
1982 const SDValue &getBasePtr() const { return getOperand(Num: 3); }
1983 const SDValue &getIndex() const { return getOperand(Num: 4); }
1984 const SDValue &getMask() const { return getOperand(Num: 2); }
1985 const SDValue &getScale() const { return getOperand(Num: 5); }
1986
1987 static bool classof(const SDNode *N) {
1988 return N->getOpcode() == X86ISD::MGATHER ||
1989 N->getOpcode() == X86ISD::MSCATTER;
1990 }
1991 };
1992
1993 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1994 public:
1995 const SDValue &getPassThru() const { return getOperand(Num: 1); }
1996
1997 static bool classof(const SDNode *N) {
1998 return N->getOpcode() == X86ISD::MGATHER;
1999 }
2000 };
2001
2002 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
2003 public:
2004 const SDValue &getValue() const { return getOperand(Num: 1); }
2005
2006 static bool classof(const SDNode *N) {
2007 return N->getOpcode() == X86ISD::MSCATTER;
2008 }
2009 };
2010
2011 /// Generate unpacklo/unpackhi shuffle mask.
2012 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
2013 bool Unary);
2014
2015 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
2016 /// imposed by AVX and specific to the unary pattern. Example:
2017 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
2018 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
2019 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
2020
2021} // end namespace llvm
2022
2023#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
2024