X86ISelLowering.h source code [llvm_projects/llvm/lib/Target/X86/X86ISelLowering.h]

1	//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that X86 uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15	#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16
17	#include "llvm/CodeGen/MachineFunction.h"
18	#include "llvm/CodeGen/TargetLowering.h"
19
20	namespace llvm {
21	class X86Subtarget;
22	class X86TargetMachine;
23
24	namespace X86ISD {
25	// X86 Specific DAG Nodes
26	enum NodeType : unsigned {
27	// Start the numbering where the builtin ops leave off.
28	FIRST_NUMBER = ISD::BUILTIN_OP_END,
29
30	/// Bit scan forward.
31	BSF,
32	/// Bit scan reverse.
33	BSR,
34
35	/// X86 funnel/double shift i16 instructions. These correspond to
36	/// X86::SHLDW and X86::SHRDW instructions which have different amt
37	/// modulo rules to generic funnel shifts.
38	/// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39	FSHL,
40	FSHR,
41
42	/// Bitwise logical AND of floating point values. This corresponds
43	/// to X86::ANDPS or X86::ANDPD.
44	FAND,
45
46	/// Bitwise logical OR of floating point values. This corresponds
47	/// to X86::ORPS or X86::ORPD.
48	FOR,
49
50	/// Bitwise logical XOR of floating point values. This corresponds
51	/// to X86::XORPS or X86::XORPD.
52	FXOR,
53
54	/// Bitwise logical ANDNOT of floating point values. This
55	/// corresponds to X86::ANDNPS or X86::ANDNPD.
56	FANDN,
57
58	/// These operations represent an abstract X86 call
59	/// instruction, which includes a bunch of information. In particular the
60	/// operands of these node are:
61	///
62	/// #0 - The incoming token chain
63	/// #1 - The callee
64	/// #2 - The number of arg bytes the caller pushes on the stack.
65	/// #3 - The number of arg bytes the callee pops off the stack.
66	/// #4 - The value to pass in AL/AX/EAX (optional)
67	/// #5 - The value to pass in DL/DX/EDX (optional)
68	///
69	/// The result values of these nodes are:
70	///
71	/// #0 - The outgoing token chain
72	/// #1 - The first register result value (optional)
73	/// #2 - The second register result value (optional)
74	///
75	CALL,
76
77	/// Same as call except it adds the NoTrack prefix.
78	NT_CALL,
79
80	// Pseudo for a OBJC call that gets emitted together with a special
81	// marker instruction.
82	CALL_RVMARKER,
83
84	/// The same as ISD::CopyFromReg except that this node makes it explicit
85	/// that it may lower to an x87 FPU stack pop. Optimizations should be more
86	/// cautious when handling this node than a normal CopyFromReg to avoid
87	/// removing a required FPU stack pop. A key requirement is optimizations
88	/// should not optimize any users of a chain that contains a
89	/// POP_FROM_X87_REG to use a chain from a point earlier than the
90	/// POP_FROM_X87_REG (which may remove a required FPU stack pop).
91	POP_FROM_X87_REG,
92
93	// Pseudo for a call to an imported function to ensure the correct machine
94	// instruction is emitted for Import Call Optimization.
95	IMP_CALL,
96
97	/// X86 compare and logical compare instructions.
98	CMP,
99	FCMP,
100	COMI,
101	UCOMI,
102
103	// X86 compare with Intrinsics similar to COMI.
104	COMX,
105	UCOMX,
106
107	/// X86 bit-test instructions.
108	BT,
109
110	/// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
111	/// operand, usually produced by a CMP instruction.
112	SETCC,
113
114	/// X86 Select
115	SELECTS,
116
117	// Same as SETCC except it's materialized with a sbb and the value is all
118	// one's or all zero's.
119	SETCC_CARRY, // R = carry_bit ? ~0 : 0
120
121	/// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
122	/// Operands are two FP values to compare; result is a mask of
123	/// 0s or 1s. Generally DTRT for C/C++ with NaNs.
124	FSETCC,
125
126	/// X86 FP SETCC, similar to above, but with output as an i1 mask and
127	/// and a version with SAE.
128	FSETCCM,
129	FSETCCM_SAE,
130
131	/// X86 conditional moves. Operand 0 and operand 1 are the two values
132	/// to select from. Operand 2 is the condition code, and operand 3 is the
133	/// flag operand produced by a CMP or TEST instruction.
134	CMOV,
135
136	/// X86 conditional branches. Operand 0 is the chain operand, operand 1
137	/// is the block to branch if condition is true, operand 2 is the
138	/// condition code, and operand 3 is the flag operand produced by a CMP
139	/// or TEST instruction.
140	BRCOND,
141
142	/// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
143	/// operand 1 is the target address.
144	NT_BRIND,
145
146	/// Return with a glue operand. Operand 0 is the chain operand, operand
147	/// 1 is the number of bytes of stack to pop.
148	RET_GLUE,
149
150	/// Return from interrupt. Operand 0 is the number of bytes to pop.
151	IRET,
152
153	/// Repeat fill, corresponds to X86::REP_STOSx.
154	REP_STOS,
155
156	/// Repeat move, corresponds to X86::REP_MOVSx.
157	REP_MOVS,
158
159	/// On Darwin, this node represents the result of the popl
160	/// at function entry, used for PIC code.
161	GlobalBaseReg,
162
163	/// A wrapper node for TargetConstantPool, TargetJumpTable,
164	/// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
165	/// MCSymbol and TargetBlockAddress.
166	Wrapper,
167
168	/// Special wrapper used under X86-64 PIC mode for RIP
169	/// relative displacements.
170	WrapperRIP,
171
172	/// Copies a 64-bit value from an MMX vector to the low word
173	/// of an XMM vector, with the high word zero filled.
174	MOVQ2DQ,
175
176	/// Copies a 64-bit value from the low word of an XMM vector
177	/// to an MMX vector.
178	MOVDQ2Q,
179
180	/// Copies a 32-bit value from the low word of a MMX
181	/// vector to a GPR.
182	MMX_MOVD2W,
183
184	/// Copies a GPR into the low 32-bit word of a MMX vector
185	/// and zero out the high word.
186	MMX_MOVW2D,
187
188	/// Extract an 8-bit value from a vector and zero extend it to
189	/// i32, corresponds to X86::PEXTRB.
190	PEXTRB,
191
192	/// Extract a 16-bit value from a vector and zero extend it to
193	/// i32, corresponds to X86::PEXTRW.
194	PEXTRW,
195
196	/// Insert any element of a 4 x float vector into any element
197	/// of a destination 4 x floatvector.
198	INSERTPS,
199
200	/// Insert the lower 8-bits of a 32-bit value to a vector,
201	/// corresponds to X86::PINSRB.
202	PINSRB,
203
204	/// Insert the lower 16-bits of a 32-bit value to a vector,
205	/// corresponds to X86::PINSRW.
206	PINSRW,
207
208	/// Shuffle 16 8-bit values within a vector.
209	PSHUFB,
210
211	/// Compute Sum of Absolute Differences.
212	PSADBW,
213	/// Compute Double Block Packed Sum-Absolute-Differences
214	DBPSADBW,
215
216	/// Bitwise Logical AND NOT of Packed FP values.
217	ANDNP,
218
219	/// Blend where the selector is an immediate.
220	BLENDI,
221
222	/// Dynamic (non-constant condition) vector blend where only the sign bits
223	/// of the condition elements are used. This is used to enforce that the
224	/// condition mask is not valid for generic VSELECT optimizations. This
225	/// is also used to implement the intrinsics.
226	/// Operands are in VSELECT order: MASK, TRUE, FALSE
227	BLENDV,
228
229	/// Combined add and sub on an FP vector.
230	ADDSUB,
231
232	// FP vector ops with rounding mode.
233	FADD_RND,
234	FADDS,
235	FADDS_RND,
236	FSUB_RND,
237	FSUBS,
238	FSUBS_RND,
239	FMUL_RND,
240	FMULS,
241	FMULS_RND,
242	FDIV_RND,
243	FDIVS,
244	FDIVS_RND,
245	FMAX_SAE,
246	FMAXS_SAE,
247	FMIN_SAE,
248	FMINS_SAE,
249	FSQRT_RND,
250	FSQRTS,
251	FSQRTS_RND,
252
253	// FP vector get exponent.
254	FGETEXP,
255	FGETEXP_SAE,
256	FGETEXPS,
257	FGETEXPS_SAE,
258	// Extract Normalized Mantissas.
259	VGETMANT,
260	VGETMANT_SAE,
261	VGETMANTS,
262	VGETMANTS_SAE,
263	// FP Scale.
264	SCALEF,
265	SCALEF_RND,
266	SCALEFS,
267	SCALEFS_RND,
268
269	/// Integer horizontal add/sub.
270	HADD,
271	HSUB,
272
273	/// Floating point horizontal add/sub.
274	FHADD,
275	FHSUB,
276
277	// Detect Conflicts Within a Vector
278	CONFLICT,
279
280	/// Floating point max and min.
281	FMAX,
282	FMIN,
283
284	/// Commutative FMIN and FMAX.
285	FMAXC,
286	FMINC,
287
288	/// Scalar intrinsic floating point max and min.
289	FMAXS,
290	FMINS,
291
292	/// Floating point reciprocal-sqrt and reciprocal approximation.
293	/// Note that these typically require refinement
294	/// in order to obtain suitable precision.
295	FRSQRT,
296	FRCP,
297
298	// AVX-512 reciprocal approximations with a little more precision.
299	RSQRT14,
300	RSQRT14S,
301	RCP14,
302	RCP14S,
303
304	// Thread Local Storage.
305	TLSADDR,
306
307	// Thread Local Storage. A call to get the start address
308	// of the TLS block for the current module.
309	TLSBASEADDR,
310
311	// Thread Local Storage. When calling to an OS provided
312	// thunk at the address from an earlier relocation.
313	TLSCALL,
314
315	// Thread Local Storage. A descriptor containing pointer to
316	// code and to argument to get the TLS offset for the symbol.
317	TLSDESC,
318
319	// Exception Handling helpers.
320	EH_RETURN,
321
322	// SjLj exception handling setjmp.
323	EH_SJLJ_SETJMP,
324
325	// SjLj exception handling longjmp.
326	EH_SJLJ_LONGJMP,
327
328	// SjLj exception handling dispatch.
329	EH_SJLJ_SETUP_DISPATCH,
330
331	/// Tail call return. See X86TargetLowering::LowerCall for
332	/// the list of operands.
333	TC_RETURN,
334
335	// Vector move to low scalar and zero higher vector elements.
336	VZEXT_MOVL,
337
338	// Vector integer truncate.
339	VTRUNC,
340	// Vector integer truncate with unsigned/signed saturation.
341	VTRUNCUS,
342	VTRUNCS,
343
344	// Masked version of the above. Used when less than a 128-bit result is
345	// produced since the mask only applies to the lower elements and can't
346	// be represented by a select.
347	// SRC, PASSTHRU, MASK
348	VMTRUNC,
349	VMTRUNCUS,
350	VMTRUNCS,
351
352	// Vector FP extend.
353	VFPEXT,
354	VFPEXT_SAE,
355	VFPEXTS,
356	VFPEXTS_SAE,
357
358	// Vector FP round.
359	VFPROUND,
360	// Convert TWO packed single data to one packed data
361	VFPROUND2,
362	VFPROUND2_RND,
363	VFPROUND_RND,
364	VFPROUNDS,
365	VFPROUNDS_RND,
366
367	// Masked version of above. Used for v2f64->v4f32.
368	// SRC, PASSTHRU, MASK
369	VMFPROUND,
370
371	// 128-bit vector logical left / right shift
372	VSHLDQ,
373	VSRLDQ,
374
375	// Vector shift elements
376	VSHL,
377	VSRL,
378	VSRA,
379
380	// Vector variable shift
381	VSHLV,
382	VSRLV,
383	VSRAV,
384
385	// Vector shift elements by immediate
386	VSHLI,
387	VSRLI,
388	VSRAI,
389
390	// Shifts of mask registers.
391	KSHIFTL,
392	KSHIFTR,
393
394	// Bit rotate by immediate
395	VROTLI,
396	VROTRI,
397
398	// Vector packed double/float comparison.
399	CMPP,
400
401	// Vector integer comparisons.
402	PCMPEQ,
403	PCMPGT,
404
405	// v8i16 Horizontal minimum and position.
406	PHMINPOS,
407
408	MULTISHIFT,
409
410	/// Vector comparison generating mask bits for fp and
411	/// integer signed and unsigned data types.
412	CMPM,
413	// Vector mask comparison generating mask bits for FP values.
414	CMPMM,
415	// Vector mask comparison with SAE for FP values.
416	CMPMM_SAE,
417
418	// Arithmetic operations with FLAGS results.
419	ADD,
420	SUB,
421	ADC,
422	SBB,
423	SMUL,
424	UMUL,
425	OR,
426	XOR,
427	AND,
428
429	// Bit field extract.
430	BEXTR,
431	BEXTRI,
432
433	// Zero High Bits Starting with Specified Bit Position.
434	BZHI,
435
436	// Parallel extract and deposit.
437	PDEP,
438	PEXT,
439
440	// X86-specific multiply by immediate.
441	MUL_IMM,
442
443	// Vector sign bit extraction.
444	MOVMSK,
445
446	// Vector bitwise comparisons.
447	PTEST,
448
449	// Vector packed fp sign bitwise comparisons.
450	TESTP,
451
452	// OR/AND test for masks.
453	KORTEST,
454	KTEST,
455
456	// ADD for masks.
457	KADD,
458
459	// Several flavors of instructions with vector shuffle behaviors.
460	// Saturated signed/unnsigned packing.
461	PACKSS,
462	PACKUS,
463	// Intra-lane alignr.
464	PALIGNR,
465	// AVX512 inter-lane alignr.
466	VALIGN,
467	PSHUFD,
468	PSHUFHW,
469	PSHUFLW,
470	SHUFP,
471	// VBMI2 Concat & Shift.
472	VSHLD,
473	VSHRD,
474	VSHLDV,
475	VSHRDV,
476	// Shuffle Packed Values at 128-bit granularity.
477	SHUF128,
478	MOVDDUP,
479	MOVSHDUP,
480	MOVSLDUP,
481	MOVLHPS,
482	MOVHLPS,
483	MOVSD,
484	MOVSS,
485	MOVSH,
486	UNPCKL,
487	UNPCKH,
488	VPERMILPV,
489	VPERMILPI,
490	VPERMI,
491	VPERM2X128,
492
493	// Variable Permute (VPERM).
494	// Res = VPERMV MaskV, V0
495	VPERMV,
496
497	// 3-op Variable Permute (VPERMT2).
498	// Res = VPERMV3 V0, MaskV, V1
499	VPERMV3,
500
501	// Bitwise ternary logic.
502	VPTERNLOG,
503	// Fix Up Special Packed Float32/64 values.
504	VFIXUPIMM,
505	VFIXUPIMM_SAE,
506	VFIXUPIMMS,
507	VFIXUPIMMS_SAE,
508	// Range Restriction Calculation For Packed Pairs of Float32/64 values.
509	VRANGE,
510	VRANGE_SAE,
511	VRANGES,
512	VRANGES_SAE,
513	// Reduce - Perform Reduction Transformation on scalar\packed FP.
514	VREDUCE,
515	VREDUCE_SAE,
516	VREDUCES,
517	VREDUCES_SAE,
518	// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
519	// Also used by the legacy (V)ROUND intrinsics where we mask out the
520	// scaling part of the immediate.
521	VRNDSCALE,
522	VRNDSCALE_SAE,
523	VRNDSCALES,
524	VRNDSCALES_SAE,
525	// Tests Types Of a FP Values for packed types.
526	VFPCLASS,
527	// Tests Types Of a FP Values for scalar types.
528	VFPCLASSS,
529
530	// Broadcast (splat) scalar or element 0 of a vector. If the operand is
531	// a vector, this node may change the vector length as part of the splat.
532	VBROADCAST,
533	// Broadcast mask to vector.
534	VBROADCASTM,
535
536	/// SSE4A Extraction and Insertion.
537	EXTRQI,
538	INSERTQI,
539
540	// XOP arithmetic/logical shifts.
541	VPSHA,
542	VPSHL,
543	// XOP signed/unsigned integer comparisons.
544	VPCOM,
545	VPCOMU,
546	// XOP packed permute bytes.
547	VPPERM,
548	// XOP two source permutation.
549	VPERMIL2,
550
551	// Vector multiply packed unsigned doubleword integers.
552	PMULUDQ,
553	// Vector multiply packed signed doubleword integers.
554	PMULDQ,
555	// Vector Multiply Packed UnsignedIntegers with Round and Scale.
556	MULHRS,
557
558	// Multiply and Add Packed Integers.
559	VPMADDUBSW,
560	VPMADDWD,
561
562	// AVX512IFMA multiply and add.
563	// NOTE: These are different than the instruction and perform
564	// op0 x op1 + op2.
565	VPMADD52L,
566	VPMADD52H,
567
568	// VNNI
569	VPDPBUSD,
570	VPDPBUSDS,
571	VPDPWSSD,
572	VPDPWSSDS,
573
574	// FMA nodes.
575	// We use the target independent ISD::FMA for the non-inverted case.
576	FNMADD,
577	FMSUB,
578	FNMSUB,
579	FMADDSUB,
580	FMSUBADD,
581
582	// FMA with rounding mode.
583	FMADD_RND,
584	FNMADD_RND,
585	FMSUB_RND,
586	FNMSUB_RND,
587	FMADDSUB_RND,
588	FMSUBADD_RND,
589
590	// AVX512-FP16 complex addition and multiplication.
591	VFMADDC,
592	VFMADDC_RND,
593	VFCMADDC,
594	VFCMADDC_RND,
595
596	VFMULC,
597	VFMULC_RND,
598	VFCMULC,
599	VFCMULC_RND,
600
601	VFMADDCSH,
602	VFMADDCSH_RND,
603	VFCMADDCSH,
604	VFCMADDCSH_RND,
605
606	VFMULCSH,
607	VFMULCSH_RND,
608	VFCMULCSH,
609	VFCMULCSH_RND,
610
611	VPDPBSUD,
612	VPDPBSUDS,
613	VPDPBUUD,
614	VPDPBUUDS,
615	VPDPBSSD,
616	VPDPBSSDS,
617
618	VPDPWSUD,
619	VPDPWSUDS,
620	VPDPWUSD,
621	VPDPWUSDS,
622	VPDPWUUD,
623	VPDPWUUDS,
624
625	VMINMAX,
626	VMINMAX_SAE,
627	VMINMAXS,
628	VMINMAXS_SAE,
629
630	CVTP2IBS,
631	CVTP2IUBS,
632	CVTP2IBS_RND,
633	CVTP2IUBS_RND,
634	CVTTP2IBS,
635	CVTTP2IUBS,
636	CVTTP2IBS_SAE,
637	CVTTP2IUBS_SAE,
638
639	MPSADBW,
640
641	VCVT2PH2BF8,
642	VCVT2PH2BF8S,
643	VCVT2PH2HF8,
644	VCVT2PH2HF8S,
645	VCVTBIASPH2BF8,
646	VCVTBIASPH2BF8S,
647	VCVTBIASPH2HF8,
648	VCVTBIASPH2HF8S,
649	VCVTPH2BF8,
650	VCVTPH2BF8S,
651	VCVTPH2HF8,
652	VCVTPH2HF8S,
653	VMCVTBIASPH2BF8,
654	VMCVTBIASPH2BF8S,
655	VMCVTBIASPH2HF8,
656	VMCVTBIASPH2HF8S,
657	VMCVTPH2BF8,
658	VMCVTPH2BF8S,
659	VMCVTPH2HF8,
660	VMCVTPH2HF8S,
661	VCVTHF82PH,
662
663	// Compress and expand.
664	COMPRESS,
665	EXPAND,
666
667	// Bits shuffle
668	VPSHUFBITQMB,
669
670	// Convert Unsigned/Integer to Floating-Point Value with rounding mode.
671	SINT_TO_FP_RND,
672	UINT_TO_FP_RND,
673	SCALAR_SINT_TO_FP,
674	SCALAR_UINT_TO_FP,
675	SCALAR_SINT_TO_FP_RND,
676	SCALAR_UINT_TO_FP_RND,
677
678	// Vector float/double to signed/unsigned integer.
679	CVTP2SI,
680	CVTP2UI,
681	CVTP2SI_RND,
682	CVTP2UI_RND,
683	// Scalar float/double to signed/unsigned integer.
684	CVTS2SI,
685	CVTS2UI,
686	CVTS2SI_RND,
687	CVTS2UI_RND,
688
689	// Vector float/double to signed/unsigned integer with truncation.
690	CVTTP2SI,
691	CVTTP2UI,
692	CVTTP2SI_SAE,
693	CVTTP2UI_SAE,
694
695	// Saturation enabled Vector float/double to signed/unsigned
696	// integer with truncation.
697	CVTTP2SIS,
698	CVTTP2UIS,
699	CVTTP2SIS_SAE,
700	CVTTP2UIS_SAE,
701	// Masked versions of above. Used for v2f64 to v4i32.
702	// SRC, PASSTHRU, MASK
703	MCVTTP2SIS,
704	MCVTTP2UIS,
705
706	// Scalar float/double to signed/unsigned integer with truncation.
707	CVTTS2SI,
708	CVTTS2UI,
709	CVTTS2SI_SAE,
710	CVTTS2UI_SAE,
711
712	// Vector signed/unsigned integer to float/double.
713	CVTSI2P,
714	CVTUI2P,
715
716	// Scalar float/double to signed/unsigned integer with saturation.
717	CVTTS2SIS,
718	CVTTS2UIS,
719	CVTTS2SIS_SAE,
720	CVTTS2UIS_SAE,
721
722	// Masked versions of above. Used for v2f64->v4f32.
723	// SRC, PASSTHRU, MASK
724	MCVTP2SI,
725	MCVTP2UI,
726	MCVTTP2SI,
727	MCVTTP2UI,
728	MCVTSI2P,
729	MCVTUI2P,
730
731	// Custom handling for FP_TO_xINT_SAT
732	FP_TO_SINT_SAT,
733	FP_TO_UINT_SAT,
734
735	// Vector float to bfloat16.
736	// Convert packed single data to packed BF16 data
737	CVTNEPS2BF16,
738	// Masked version of above.
739	// SRC, PASSTHRU, MASK
740	MCVTNEPS2BF16,
741
742	// Dot product of BF16/FP16 pairs to accumulated into
743	// packed single precision.
744	DPBF16PS,
745	DPFP16PS,
746
747	// A stack checking function call. On Windows it's _chkstk call.
748	DYN_ALLOCA,
749
750	// For allocating variable amounts of stack space when using
751	// segmented stacks. Check if the current stacklet has enough space, and
752	// falls back to heap allocation if not.
753	SEG_ALLOCA,
754
755	// For allocating stack space when using stack clash protector.
756	// Allocation is performed by block, and each block is probed.
757	PROBED_ALLOCA,
758
759	// Memory barriers.
760	MFENCE,
761
762	// Get a random integer and indicate whether it is valid in CF.
763	RDRAND,
764
765	// Get a NIST SP800-90B & C compliant random integer and
766	// indicate whether it is valid in CF.
767	RDSEED,
768
769	// Protection keys
770	// RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
771	// WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
772	// value for ECX.
773	RDPKRU,
774	WRPKRU,
775
776	// SSE42 string comparisons.
777	// These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
778	// will emit one or two instructions based on which results are used. If
779	// flags and index/mask this allows us to use a single instruction since
780	// we won't have to pick and opcode for flags. Instead we can rely on the
781	// DAG to CSE everything and decide at isel.
782	PCMPISTR,
783	PCMPESTR,
784
785	// Test if in transactional execution.
786	XTEST,
787
788	// Conversions between float and half-float.
789	CVTPS2PH,
790	CVTPS2PH_SAE,
791	CVTPH2PS,
792	CVTPH2PS_SAE,
793
794	// Masked version of above.
795	// SRC, RND, PASSTHRU, MASK
796	MCVTPS2PH,
797	MCVTPS2PH_SAE,
798
799	// Galois Field Arithmetic Instructions
800	GF2P8AFFINEINVQB,
801	GF2P8AFFINEQB,
802	GF2P8MULB,
803
804	// LWP insert record.
805	LWPINS,
806
807	// User level wait
808	UMWAIT,
809	TPAUSE,
810
811	// Enqueue Stores Instructions
812	ENQCMD,
813	ENQCMDS,
814
815	// For avx512-vp2intersect
816	VP2INTERSECT,
817
818	// User level interrupts - testui
819	TESTUI,
820
821	// Perform an FP80 add after changing precision control in FPCW.
822	FP80_ADD,
823
824	// Conditional compare instructions
825	CCMP,
826	CTEST,
827
828	/// X86 strict FP compare instructions.
829	FIRST_STRICTFP_OPCODE,
830	STRICT_FCMP = FIRST_STRICTFP_OPCODE,
831	STRICT_FCMPS,
832
833	// Vector packed double/float comparison.
834	STRICT_CMPP,
835
836	/// Vector comparison generating mask bits for fp and
837	/// integer signed and unsigned data types.
838	STRICT_CMPM,
839
840	// Vector float/double to signed/unsigned integer with truncation.
841	STRICT_CVTTP2SI,
842	STRICT_CVTTP2UI,
843
844	// Vector FP extend.
845	STRICT_VFPEXT,
846
847	// Vector FP round.
848	STRICT_VFPROUND,
849
850	// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
851	// Also used by the legacy (V)ROUND intrinsics where we mask out the
852	// scaling part of the immediate.
853	STRICT_VRNDSCALE,
854
855	// Vector signed/unsigned integer to float/double.
856	STRICT_CVTSI2P,
857	STRICT_CVTUI2P,
858
859	// Strict FMA nodes.
860	STRICT_FNMADD,
861	STRICT_FMSUB,
862	STRICT_FNMSUB,
863
864	// Conversions between float and half-float.
865	STRICT_CVTPS2PH,
866	STRICT_CVTPH2PS,
867
868	// Perform an FP80 add after changing precision control in FPCW.
869	STRICT_FP80_ADD,
870
871	/// Floating point max and min.
872	STRICT_FMAX,
873	STRICT_FMIN,
874	LAST_STRICTFP_OPCODE = STRICT_FMIN,
875
876	// Compare and swap.
877	FIRST_MEMORY_OPCODE,
878	LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
879	LCMPXCHG8_DAG,
880	LCMPXCHG16_DAG,
881	LCMPXCHG16_SAVE_RBX_DAG,
882
883	/// LOCK-prefixed arithmetic read-modify-write instructions.
884	/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
885	LADD,
886	LSUB,
887	LOR,
888	LXOR,
889	LAND,
890	LBTS,
891	LBTC,
892	LBTR,
893	LBTS_RM,
894	LBTC_RM,
895	LBTR_RM,
896
897	/// RAO arithmetic instructions.
898	/// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
899	AADD,
900	AOR,
901	AXOR,
902	AAND,
903
904	// Load, scalar_to_vector, and zero extend.
905	VZEXT_LOAD,
906
907	// extract_vector_elt, store.
908	VEXTRACT_STORE,
909
910	// scalar broadcast from memory.
911	VBROADCAST_LOAD,
912
913	// subvector broadcast from memory.
914	SUBV_BROADCAST_LOAD,
915
916	// Store FP control word into i16 memory.
917	FNSTCW16m,
918
919	// Load FP control word from i16 memory.
920	FLDCW16m,
921
922	// Store x87 FPU environment into memory.
923	FNSTENVm,
924
925	// Load x87 FPU environment from memory.
926	FLDENVm,
927
928	/// This instruction implements FP_TO_SINT with the
929	/// integer destination in memory and a FP reg source. This corresponds
930	/// to the X86::FISTm instructions and the rounding mode change stuff. It*
931	/// has two inputs (token chain and address) and two outputs (int value
932	/// and token chain). Memory VT specifies the type to store to.
933	FP_TO_INT_IN_MEM,
934
935	/// This instruction implements SINT_TO_FP with the
936	/// integer source in memory and FP reg result. This corresponds to the
937	/// X86::FILDm instructions. It has two inputs (token chain and address)*
938	/// and two outputs (FP value and token chain). The integer source type is
939	/// specified by the memory VT.
940	FILD,
941
942	/// This instruction implements a fp->int store from FP stack
943	/// slots. This corresponds to the fist instruction. It takes a
944	/// chain operand, value to store, address, and glue. The memory VT
945	/// specifies the type to store as.
946	FIST,
947
948	/// This instruction implements an extending load to FP stack slots.
949	/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
950	/// operand, and ptr to load from. The memory VT specifies the type to
951	/// load from.
952	FLD,
953
954	/// This instruction implements a truncating store from FP stack
955	/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
956	/// chain operand, value to store, address, and glue. The memory VT
957	/// specifies the type to store as.
958	FST,
959
960	/// These instructions grab the address of the next argument
961	/// from a va_list. (reads and modifies the va_list in memory)
962	VAARG_64,
963	VAARG_X32,
964
965	// Vector truncating store with unsigned/signed saturation
966	VTRUNCSTOREUS,
967	VTRUNCSTORES,
968	// Vector truncating masked store with unsigned/signed saturation
969	VMTRUNCSTOREUS,
970	VMTRUNCSTORES,
971
972	// X86 specific gather and scatter
973	MGATHER,
974	MSCATTER,
975
976	// Key locker nodes that produce flags.
977	AESENC128KL,
978	AESDEC128KL,
979	AESENC256KL,
980	AESDEC256KL,
981	AESENCWIDE128KL,
982	AESDECWIDE128KL,
983	AESENCWIDE256KL,
984	AESDECWIDE256KL,
985
986	/// Compare and Add if Condition is Met. Compare value in operand 2 with
987	/// value in memory of operand 1. If condition of operand 4 is met, add
988	/// value operand 3 to m32 and write new value in operand 1. Operand 2 is
989	/// always updated with the original value from operand 1.
990	CMPCCXADD,
991
992	// Save xmm argument registers to the stack, according to %al. An operator
993	// is needed so that this can be expanded with control flow.
994	VASTART_SAVE_XMM_REGS,
995
996	// Conditional load/store instructions
997	CLOAD,
998	CSTORE,
999	LAST_MEMORY_OPCODE = CSTORE,
1000	};
1001	} // end namespace X86ISD
1002
1003	namespace X86 {
1004	/// Current rounding mode is represented in bits 11:10 of FPSR. These
1005	/// values are same as corresponding constants for rounding mode used
1006	/// in glibc.
1007	enum RoundingMode {
1008	rmToNearest = `0`, // FE_TONEAREST
1009	rmDownward = `1` << `10`, // FE_DOWNWARD
1010	rmUpward = `2` << `10`, // FE_UPWARD
1011	rmTowardZero = `3` << `10`, // FE_TOWARDZERO
1012	rmMask = `3` << `10` // Bit mask selecting rounding mode
1013	};
1014	}
1015
1016	/// Define some predicates that are used for node matching.
1017	namespace X86 {
1018	/// Returns true if Elt is a constant zero or floating point constant +0.0.
1019	bool isZeroNode(SDValue Elt);
1020
1021	/// Returns true of the given offset can be
1022	/// fit into displacement field of the instruction.
1023	bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
1024	bool hasSymbolicDisplacement);
1025
1026	/// Determines whether the callee is required to pop its
1027	/// own arguments. Callee pop is necessary to support tail calls.
1028	bool isCalleePop(CallingConv::ID CallingConv,
1029	bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
1030
1031	/// If Op is a constant whose elements are all the same constant or
1032	/// undefined, return true and return the constant value in \p SplatVal.
1033	/// If we have undef bits that don't cover an entire element, we treat these
1034	/// as zero if AllowPartialUndefs is set, else we fail and return false.
1035	bool isConstantSplat(SDValue Op, APInt &SplatVal,
1036	bool AllowPartialUndefs = true);
1037
1038	/// Check if Op is a load operation that could be folded into some other x86
1039	/// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
1040	bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
1041	bool AssumeSingleUse = false);
1042
1043	/// Check if Op is a load operation that could be folded into a vector splat
1044	/// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
1045	bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
1046	const X86Subtarget &Subtarget,
1047	bool AssumeSingleUse = false);
1048
1049	/// Check if Op is a value that could be used to fold a store into some
1050	/// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
1051	bool mayFoldIntoStore(SDValue Op);
1052
1053	/// Check if Op is an operation that could be folded into a zero extend x86
1054	/// instruction.
1055	bool mayFoldIntoZeroExtend(SDValue Op);
1056
1057	/// True if the target supports the extended frame for async Swift
1058	/// functions.
1059	bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
1060	const MachineFunction &MF);
1061	} // end namespace X86
1062
1063	//===--------------------------------------------------------------------===//
1064	// X86 Implementation of the TargetLowering interface
1065	class X86TargetLowering final : public TargetLowering {
1066	public:
1067	explicit X86TargetLowering(const X86TargetMachine &TM,
1068	const X86Subtarget &STI);
1069
1070	unsigned getJumpTableEncoding() const override;
1071	bool useSoftFloat() const override;
1072
1073	void markLibCallAttributes(MachineFunction MF, unsigned* CC,
1074	ArgListTy &Args) const override;
1075
1076	MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
1077	return MVT::i8;
1078	}
1079
1080	const MCExpr *
1081	LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1082	const MachineBasicBlock MBB, unsigned* uid,
1083	MCContext &Ctx) const override;
1084
1085	/// Returns relocation base for the given PIC jumptable.
1086	SDValue getPICJumpTableRelocBase(SDValue Table,
1087	SelectionDAG &DAG) const override;
1088	const MCExpr *
1089	getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1090	unsigned JTI, MCContext &Ctx) const override;
1091
1092	/// Return the desired alignment for ByVal aggregate
1093	/// function arguments in the caller parameter area. For X86, aggregates
1094	/// that contains are placed at 16-byte boundaries while the rest are at
1095	/// 4-byte boundaries.
1096	Align getByValTypeAlignment(Type Ty, const* DataLayout &DL) const override;
1097
1098	EVT getOptimalMemOpType(const MemOp &Op,
1099	const AttributeList &FuncAttributes) const override;
1100
1101	/// Returns true if it's safe to use load / store of the
1102	/// specified type to expand memcpy / memset inline. This is mostly true
1103	/// for all types except for some special cases. For example, on X86
1104	/// targets without SSE2 f64 load / store are done with fldl / fstpl which
1105	/// also does type conversion. Note the specified type doesn't have to be
1106	/// legal as the hook is used before type legalization.
1107	bool isSafeMemOpType(MVT VT) const override;
1108
1109	bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1110
1111	/// Returns true if the target allows unaligned memory accesses of the
1112	/// specified type. Returns whether it is "fast" in the last argument.
1113	bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1114	MachineMemOperand::Flags Flags,
1115	unsigned Fast) const* override;
1116
1117	/// This function returns true if the memory access is aligned or if the
1118	/// target allows this specific unaligned memory access. If the access is
1119	/// allowed, the optional final parameter returns a relative speed of the
1120	/// access (as defined by the target).
1121	bool allowsMemoryAccess(
1122	LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1123	Align Alignment,
1124	MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1125	unsigned Fast = nullptr) const* override;
1126
1127	bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1128	const MachineMemOperand &MMO,
1129	unsigned Fast) const* {
1130	return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(),
1131	Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast);
1132	}
1133
1134	/// Provide custom lowering hooks for some operations.
1135	///
1136	SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1137
1138	/// Replace the results of node with an illegal result
1139	/// type with new values built out of custom code.
1140	///
1141	void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1142	SelectionDAG &DAG) const override;
1143
1144	SDValue PerformDAGCombine(SDNode N, DAGCombinerInfo &DCI) const* override;
1145
1146	bool preferABDSToABSWithNSW(EVT VT) const override;
1147
1148	bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1149	EVT ExtVT) const override;
1150
1151	bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1152	EVT VT) const override;
1153
1154	/// Return true if the target has native support for
1155	/// the specified value type and it is 'desirable' to use the type for the
1156	/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1157	/// instruction encodings are longer and some i16 instructions are slow.
1158	bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1159
1160	/// Return true if the target has native support for the
1161	/// specified value type and it is 'desirable' to use the type. e.g. On x86
1162	/// i16 is legal, but undesirable since i16 instruction encodings are longer
1163	/// and some i16 instructions are slow.
1164	bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1165
1166	/// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1167	/// integer, None otherwise.
1168	TargetLowering::AndOrSETCCFoldKind
1169	isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1170	const SDNode *SETCC0,
1171	const SDNode SETCC1) const* override;
1172
1173	/// Return the newly negated expression if the cost is not expensive and
1174	/// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1175	/// do the negation.
1176	SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1177	bool LegalOperations, bool ForCodeSize,
1178	NegatibleCost &Cost,
1179	unsigned Depth) const override;
1180
1181	MachineBasicBlock *
1182	EmitInstrWithCustomInserter(MachineInstr &MI,
1183	MachineBasicBlock MBB) const* override;
1184
1185	/// This method returns the name of a target specific DAG node.
1186	const char getTargetNodeName(unsigned* Opcode) const override;
1187
1188	/// Do not merge vector stores after legalization because that may conflict
1189	/// with x86-specific store splitting optimizations.
1190	bool mergeStoresAfterLegalization(EVT MemVT) const override {
1191	return !MemVT.isVector();
1192	}
1193
1194	bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1195	const MachineFunction &MF) const override;
1196
1197	bool isCheapToSpeculateCttz(Type Ty) const* override;
1198
1199	bool isCheapToSpeculateCtlz(Type Ty) const* override;
1200
1201	bool isCtlzFast() const override;
1202
1203	bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1204	// If the pair to store is a mixture of float and int values, we will
1205	// save two bitwise instructions and one float-to-int instruction and
1206	// increase one store instruction. There is potentially a more
1207	// significant benefit because it avoids the float->int domain switch
1208	// for input value. So It is more likely a win.
1209	if ((LTy.isFloatingPoint() && HTy.isInteger()) \|\|
1210	(LTy.isInteger() && HTy.isFloatingPoint()))
1211	return true;
1212	// If the pair only contains int values, we will save two bitwise
1213	// instructions and increase one store instruction (costing one more
1214	// store buffer). Since the benefit is more blurred so we leave
1215	// such pair out until we get testcase to prove it is a win.
1216	return false;
1217	}
1218
1219	bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1220
1221	bool hasAndNotCompare(SDValue Y) const override;
1222
1223	bool hasAndNot(SDValue Y) const override;
1224
1225	bool hasBitTest(SDValue X, SDValue Y) const override;
1226
1227	bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1228	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
1229	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1230	SelectionDAG &DAG) const override;
1231
1232	unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1233	EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1234	const APInt &ShiftOrRotateAmt,
1235	const std::optional<APInt> &AndMask) const override;
1236
1237	bool preferScalarizeSplat(SDNode N) const* override;
1238
1239	CondMergingParams
1240	getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1241	const Value Rhs) const* override;
1242
1243	bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1244	CombineLevel Level) const override;
1245
1246	bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1247
1248	bool
1249	shouldTransformSignedTruncationCheck(EVT XVT,
1250	unsigned KeptBits) const override {
1251	// For vectors, we don't have a preference..
1252	if (XVT.isVector())
1253	return false;
1254
1255	auto VTIsOk = [](EVT VT) -> bool {
1256	return VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32 \|\|
1257	VT == MVT::i64;
1258	};
1259
1260	// We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1261	// XVT will be larger than KeptBitsVT.
1262	MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
1263	return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1264	}
1265
1266	ShiftLegalizationStrategy
1267	preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1268	unsigned ExpansionFactor) const override;
1269
1270	bool shouldSplatInsEltVarIndex(EVT VT) const override;
1271
1272	bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1273	// Converting to sat variants holds little benefit on X86 as we will just
1274	// need to saturate the value back using fp arithmatic.
1275	return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1276	}
1277
1278	bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1279	return VT.isScalarInteger();
1280	}
1281
1282	/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1283	MVT hasFastEqualityCompare(unsigned NumBits) const override;
1284
1285	/// Return the value type to use for ISD::SETCC.
1286	EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1287	EVT VT) const override;
1288
1289	bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1290	const APInt &DemandedElts,
1291	TargetLoweringOpt &TLO) const override;
1292
1293	/// Determine which of the bits specified in Mask are known to be either
1294	/// zero or one and return them in the KnownZero/KnownOne bitsets.
1295	void computeKnownBitsForTargetNode(const SDValue Op,
1296	KnownBits &Known,
1297	const APInt &DemandedElts,
1298	const SelectionDAG &DAG,
1299	unsigned Depth = `0`) const override;
1300
1301	/// Determine the number of bits in the operation that are sign bits.
1302	unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1303	const APInt &DemandedElts,
1304	const SelectionDAG &DAG,
1305	unsigned Depth) const override;
1306
1307	bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1308	const APInt &DemandedElts,
1309	APInt &KnownUndef,
1310	APInt &KnownZero,
1311	TargetLoweringOpt &TLO,
1312	unsigned Depth) const override;
1313
1314	bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1315	const APInt &DemandedElts,
1316	unsigned MaskIndex,
1317	TargetLoweringOpt &TLO,
1318	unsigned Depth) const;
1319
1320	bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1321	const APInt &DemandedBits,
1322	const APInt &DemandedElts,
1323	KnownBits &Known,
1324	TargetLoweringOpt &TLO,
1325	unsigned Depth) const override;
1326
1327	SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1328	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1329	SelectionDAG &DAG, unsigned Depth) const override;
1330
1331	bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1332	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1333	bool PoisonOnly, unsigned Depth) const override;
1334
1335	bool canCreateUndefOrPoisonForTargetNode(
1336	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1337	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1338
1339	bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1340	APInt &UndefElts, const SelectionDAG &DAG,
1341	unsigned Depth) const override;
1342
1343	bool isTargetCanonicalConstantNode(SDValue Op) const override {
1344	// Peek through bitcasts/extracts/inserts to see if we have a vector
1345	// load/broadcast from memory.
1346	while (Op.getOpcode() == ISD::BITCAST \|\|
1347	Op.getOpcode() == ISD::EXTRACT_SUBVECTOR \|\|
1348	(Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1349	Op.getOperand(i: `0`).isUndef()))
1350	Op = Op.getOperand(i: Op.getOpcode() == ISD::INSERT_SUBVECTOR ? `1` : `0`);
1351
1352	return Op.getOpcode() == X86ISD::VBROADCAST_LOAD \|\|
1353	Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD \|\|
1354	(Op.getOpcode() == ISD::LOAD &&
1355	getTargetConstantFromLoad(LD: cast<LoadSDNode>(Val&: Op))) \|\|
1356	TargetLowering::isTargetCanonicalConstantNode(Op);
1357	}
1358
1359	const Constant getTargetConstantFromLoad(LoadSDNode LD) const override;
1360
1361	SDValue unwrapAddress(SDValue N) const override;
1362
1363	SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1364
1365	bool ExpandInlineAsm(CallInst CI) const* override;
1366
1367	ConstraintType getConstraintType(StringRef Constraint) const override;
1368
1369	/// Examine constraint string and operand type and determine a weight value.
1370	/// The operand object must already have been set up with the operand type.
1371	ConstraintWeight
1372	getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1373	const char Constraint) const* override;
1374
1375	const char LowerXConstraint(EVT ConstraintVT) const* override;
1376
1377	/// Lower the specified operand into the Ops vector. If it is invalid, don't
1378	/// add anything to Ops. If hasMemory is true it means one of the asm
1379	/// constraint of the inline asm instruction being processed is 'm'.
1380	void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1381	std::vector<SDValue> &Ops,
1382	SelectionDAG &DAG) const override;
1383
1384	InlineAsm::ConstraintCode
1385	getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1386	if (ConstraintCode == "v")
1387	return InlineAsm::ConstraintCode::v;
1388	return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1389	}
1390
1391	/// Handle Lowering flag assembly outputs.
1392	SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1393	const SDLoc &DL,
1394	const AsmOperandInfo &Constraint,
1395	SelectionDAG &DAG) const override;
1396
1397	/// Given a physical register constraint
1398	/// (e.g. {edx}), return the register number and the register class for the
1399	/// register. This should only be used for C_Register constraints. On
1400	/// error, this returns a register number of 0.
1401	std::pair<unsigned, const TargetRegisterClass *>
1402	getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1403	StringRef Constraint, MVT VT) const override;
1404
1405	/// Return true if the addressing mode represented
1406	/// by AM is legal for this target, for a load/store of the specified type.
1407	bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1408	Type Ty, unsigned* AS,
1409	Instruction I = nullptr) const* override;
1410
1411	bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1412
1413	/// Return true if the specified immediate is legal
1414	/// icmp immediate, that is the target has icmp instructions which can
1415	/// compare a register against the immediate without having to materialize
1416	/// the immediate into a register.
1417	bool isLegalICmpImmediate(int64_t Imm) const override;
1418
1419	/// Return true if the specified immediate is legal
1420	/// add immediate, that is the target has add instructions which can
1421	/// add a register and the immediate without having to materialize
1422	/// the immediate into a register.
1423	bool isLegalAddImmediate(int64_t Imm) const override;
1424
1425	bool isLegalStoreImmediate(int64_t Imm) const override;
1426
1427	/// Add x86-specific opcodes to the default list.
1428	bool isBinOp(unsigned Opcode) const override;
1429
1430	/// Returns true if the opcode is a commutative binary operation.
1431	bool isCommutativeBinOp(unsigned Opcode) const override;
1432
1433	/// Return true if it's free to truncate a value of
1434	/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1435	/// register EAX to i16 by referencing its sub-register AX.
1436	bool isTruncateFree(Type Ty1, Type Ty2) const override;
1437	bool isTruncateFree(EVT VT1, EVT VT2) const override;
1438
1439	bool allowTruncateForTailCall(Type Ty1, Type Ty2) const override;
1440
1441	/// Return true if any actual instruction that defines a
1442	/// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1443	/// register. This does not necessarily include registers defined in
1444	/// unknown ways, such as incoming arguments, or copies from unknown
1445	/// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1446	/// does not necessarily apply to truncate instructions. e.g. on x86-64,
1447	/// all instructions that define 32-bit values implicit zero-extend the
1448	/// result out to 64 bits.
1449	bool isZExtFree(Type Ty1, Type Ty2) const override;
1450	bool isZExtFree(EVT VT1, EVT VT2) const override;
1451	bool isZExtFree(SDValue Val, EVT VT2) const override;
1452
1453	bool shouldConvertPhiType(Type From, Type To) const override;
1454
1455	/// Return true if folding a vector load into ExtVal (a sign, zero, or any
1456	/// extend node) is profitable.
1457	bool isVectorLoadExtDesirable(SDValue) const override;
1458
1459	/// Return true if an FMA operation is faster than a pair of fmul and fadd
1460	/// instructions. fmuladd intrinsics will be expanded to FMAs when this
1461	/// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1462	bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1463	EVT VT) const override;
1464
1465	/// Return true if it's profitable to narrow operations of type SrcVT to
1466	/// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1467	/// from i32 to i16.
1468	bool isNarrowingProfitable(SDNode N, EVT SrcVT, EVT DestVT) const* override;
1469
1470	bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
1471	unsigned SelectOpcode, SDValue X,
1472	SDValue Y) const override;
1473
1474	/// Given an intrinsic, checks if on the target the intrinsic will need to map
1475	/// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1476	/// true and stores the intrinsic information into the IntrinsicInfo that was
1477	/// passed to the function.
1478	bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1479	MachineFunction &MF,
1480	unsigned Intrinsic) const override;
1481
1482	/// Returns true if the target can instruction select the
1483	/// specified FP immediate natively. If false, the legalizer will
1484	/// materialize the FP immediate as a load from a constant pool.
1485	bool isFPImmLegal(const APFloat &Imm, EVT VT,
1486	bool ForCodeSize) const override;
1487
1488	/// Targets can use this to indicate that they only support some
1489	/// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1490	/// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1491	/// be legal.
1492	bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1493
1494	/// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1495	/// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1496	/// constant pool entry.
1497	bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1498
1499	/// Returns true if lowering to a jump table is allowed.
1500	bool areJTsAllowed(const Function Fn) const* override;
1501
1502	MVT getPreferredSwitchConditionType(LLVMContext &Context,
1503	EVT ConditionVT) const override;
1504
1505	/// If true, then instruction selection should
1506	/// seek to shrink the FP constant of the specified type to a smaller type
1507	/// in order to save space and / or reduce runtime.
1508	bool ShouldShrinkFPConstant(EVT VT) const override;
1509
1510	/// Return true if we believe it is correct and profitable to reduce the
1511	/// load node to a smaller type.
1512	bool
1513	shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1514	std::optional<unsigned> ByteOffset) const override;
1515
1516	/// Return true if the specified scalar FP type is computed in an SSE
1517	/// register, not on the X87 floating point stack.
1518	bool isScalarFPTypeInSSEReg(EVT VT) const;
1519
1520	/// Returns true if it is beneficial to convert a load of a constant
1521	/// to just the constant itself.
1522	bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1523	Type Ty) const* override;
1524
1525	bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1526
1527	bool convertSelectOfConstantsToMath(EVT VT) const override;
1528
1529	bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1530	SDValue C) const override;
1531
1532	/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1533	/// with this index.
1534	bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1535	unsigned Index) const override;
1536
1537	/// Scalar ops always have equal or better analysis/performance/power than
1538	/// the vector equivalent, so this always makes sense if the scalar op is
1539	/// supported.
1540	bool shouldScalarizeBinop(SDValue) const override;
1541
1542	/// Extract of a scalar FP value from index 0 of a vector is free.
1543	bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1544	EVT EltVT = VT.getScalarType();
1545	return (EltVT == MVT::f32 \|\| EltVT == MVT::f64) && Index == `0`;
1546	}
1547
1548	/// Overflow nodes should get combined/lowered to optimal instructions
1549	/// (they should allow eliminating explicit compares by getting flags from
1550	/// math ops).
1551	bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1552	bool MathUsed) const override;
1553
1554	bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1555	unsigned AddrSpace) const override {
1556	// If we can replace more than 2 scalar stores, there will be a reduction
1557	// in instructions even after we add a vector constant load.
1558	return IsZero \|\| NumElem > `2`;
1559	}
1560
1561	bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1562	const SelectionDAG &DAG,
1563	const MachineMemOperand &MMO) const override;
1564
1565	Register getRegisterByName(const char* RegName, LLT VT,
1566	const MachineFunction &MF) const override;
1567
1568	/// If a physical register, this returns the register that receives the
1569	/// exception address on entry to an EH pad.
1570	Register
1571	getExceptionPointerRegister(const Constant PersonalityFn) const* override;
1572
1573	/// If a physical register, this returns the register that receives the
1574	/// exception typeid on entry to a landing pad.
1575	Register
1576	getExceptionSelectorRegister(const Constant PersonalityFn) const* override;
1577
1578	bool needsFixedCatchObjects() const override;
1579
1580	/// This method returns a target specific FastISel object,
1581	/// or null if the target does not support "fast" ISel.
1582	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1583	const TargetLibraryInfo libInfo) const* override;
1584
1585	/// If the target has a standard location for the stack protector cookie,
1586	/// returns the address of that location. Otherwise, returns nullptr.
1587	Value getIRStackGuard(IRBuilderBase &IRB) const* override;
1588
1589	bool useLoadStackGuardNode(const Module &M) const override;
1590	bool useStackGuardXorFP() const override;
1591	void insertSSPDeclarations(Module &M) const override;
1592	Value getSDagStackGuard(const* Module &M) const override;
1593	Function getSSPStackGuardCheck(const* Module &M) const override;
1594	SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1595	const SDLoc &DL) const override;
1596
1597
1598	/// Return true if the target stores SafeStack pointer at a fixed offset in
1599	/// some non-standard address space, and populates the address space and
1600	/// offset as appropriate.
1601	Value getSafeStackPointerLocation(IRBuilderBase &IRB) const* override;
1602
1603	std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1604	SDValue Chain, SDValue Pointer,
1605	MachinePointerInfo PtrInfo,
1606	Align Alignment,
1607	SelectionDAG &DAG) const;
1608
1609	/// Customize the preferred legalization strategy for certain types.
1610	LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1611
1612	bool softPromoteHalfType() const override { return true; }
1613
1614	MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1615	EVT VT) const override;
1616
1617	unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1618	CallingConv::ID CC,
1619	EVT VT) const override;
1620
1621	unsigned getVectorTypeBreakdownForCallingConv(
1622	LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1623	unsigned &NumIntermediates, MVT &RegisterVT) const override;
1624
1625	bool functionArgumentNeedsConsecutiveRegisters(
1626	Type Ty, CallingConv::ID CallConv, bool* isVarArg,
1627	const DataLayout &DL) const override;
1628
1629	bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1630
1631	bool supportSwiftError() const override;
1632
1633	bool supportKCFIBundles() const override { return true; }
1634
1635	MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1636	MachineBasicBlock::instr_iterator &MBBI,
1637	const TargetInstrInfo TII) const* override;
1638
1639	bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1640	bool hasInlineStackProbe(const MachineFunction &MF) const override;
1641	StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1642
1643	unsigned getStackProbeSize(const MachineFunction &MF) const;
1644
1645	bool hasVectorBlend() const override { return true; }
1646
1647	unsigned getMaxSupportedInterleaveFactor() const override { return `4`; }
1648
1649	bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1650	unsigned OpNo) const override;
1651
1652	SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1653	MachineMemOperand *MMO, SDValue &NewLoad,
1654	SDValue Ptr, SDValue PassThru,
1655	SDValue Mask) const override;
1656	SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1657	MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1658	SDValue Mask) const override;
1659
1660	/// Lower interleaved load(s) into target specific
1661	/// instructions/intrinsics.
1662	bool lowerInterleavedLoad(LoadInst *LI,
1663	ArrayRef<ShuffleVectorInst *> Shuffles,
1664	ArrayRef<unsigned> Indices,
1665	unsigned Factor) const override;
1666
1667	/// Lower interleaved store(s) into target specific
1668	/// instructions/intrinsics.
1669	bool lowerInterleavedStore(StoreInst SI, ShuffleVectorInst SVI,
1670	unsigned Factor) const override;
1671
1672	SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1673	int JTI, SelectionDAG &DAG) const override;
1674
1675	Align getPrefLoopAlignment(MachineLoop ML) const* override;
1676
1677	EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1678	if (VT == MVT::f80)
1679	return EVT::getIntegerVT(Context, BitWidth: `96`);
1680	return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1681	}
1682
1683	protected:
1684	std::pair<const TargetRegisterClass *, uint8_t>
1685	findRepresentativeClass(const TargetRegisterInfo *TRI,
1686	MVT VT) const override;
1687
1688	private:
1689	/// Keep a reference to the X86Subtarget around so that we can
1690	/// make the right decision when generating code for different targets.
1691	const X86Subtarget &Subtarget;
1692
1693	/// A list of legal FP immediates.
1694	std::vector<APFloat> LegalFPImmediates;
1695
1696	/// Indicate that this x86 target can instruction
1697	/// select the specified FP immediate natively.
1698	void addLegalFPImmediate(const APFloat& Imm) {
1699	LegalFPImmediates.push_back(x: Imm);
1700	}
1701
1702	SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1703	CallingConv::ID CallConv, bool isVarArg,
1704	const SmallVectorImpl<ISD::InputArg> &Ins,
1705	const SDLoc &dl, SelectionDAG &DAG,
1706	SmallVectorImpl<SDValue> &InVals,
1707	uint32_t RegMask) const*;
1708	SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1709	const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1710	const SDLoc &dl, SelectionDAG &DAG,
1711	const CCValAssign &VA, MachineFrameInfo &MFI,
1712	unsigned i) const;
1713	SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1714	const SDLoc &dl, SelectionDAG &DAG,
1715	const CCValAssign &VA,
1716	ISD::ArgFlagsTy Flags, bool isByval) const;
1717
1718	// Call lowering helpers.
1719
1720	/// Check whether the call is eligible for tail call optimization. Targets
1721	/// that want to do tail call optimization should implement this function.
1722	bool IsEligibleForTailCallOptimization(
1723	TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
1724	SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;
1725	SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1726	SDValue Chain, bool IsTailCall,
1727	bool Is64Bit, int FPDiff,
1728	const SDLoc &dl) const;
1729
1730	unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1731	SelectionDAG &DAG) const;
1732
1733	unsigned getAddressSpace() const;
1734
1735	SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1736	SDValue &Chain) const;
1737	SDValue LRINT_LLRINTHelper(SDNode N, SelectionDAG &DAG) const*;
1738
1739	SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1740	SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1741	SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1742	SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1743
1744	unsigned getGlobalWrapperKind(const GlobalValue *GV,
1745	const unsigned char OpFlags) const;
1746	SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1747	SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1748	SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1749	SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1750	SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1751
1752	/// Creates target global address or external symbol nodes for calls or
1753	/// other uses.
1754	SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall,
1755	bool IsImpCall) const*;
1756
1757	SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1758	SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1759	SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1760	SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1761	SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1762	SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1763	SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1764	SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1765	SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1766	SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1767	SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1768	SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1769	SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1770	SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1771	SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1772	SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1773	SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1774	SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1775	SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1776	SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1777	SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1778	SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1779	SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1780	SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1781	SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1782	SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1783	SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1784	SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1785	SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1786	SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1787	SDValue &Chain) const;
1788	SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1789	SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1790	SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1791	SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1792	SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1793	SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1794	SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1795
1796	SDValue
1797	LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1798	const SmallVectorImpl<ISD::InputArg> &Ins,
1799	const SDLoc &dl, SelectionDAG &DAG,
1800	SmallVectorImpl<SDValue> &InVals) const override;
1801	SDValue LowerCall(CallLoweringInfo &CLI,
1802	SmallVectorImpl<SDValue> &InVals) const override;
1803
1804	SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1805	const SmallVectorImpl<ISD::OutputArg> &Outs,
1806	const SmallVectorImpl<SDValue> &OutVals,
1807	const SDLoc &dl, SelectionDAG &DAG) const override;
1808
1809	bool supportSplitCSR(MachineFunction MF) const* override {
1810	return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1811	MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind);
1812	}
1813	void initializeSplitCSR(MachineBasicBlock Entry) const* override;
1814	void insertCopiesSplitCSR(
1815	MachineBasicBlock *Entry,
1816	const SmallVectorImpl<MachineBasicBlock > &Exits) const* override;
1817
1818	bool isUsedByReturnOnly(SDNode N, SDValue &Chain) const* override;
1819
1820	bool mayBeEmittedAsTailCall(const CallInst CI) const* override;
1821
1822	EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1823	ISD::NodeType ExtendKind) const override;
1824
1825	bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1826	bool isVarArg,
1827	const SmallVectorImpl<ISD::OutputArg> &Outs,
1828	LLVMContext &Context,
1829	const Type RetTy) const* override;
1830
1831	const MCPhysReg getScratchRegisters(CallingConv::ID CC) const* override;
1832	ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1833
1834	TargetLoweringBase::AtomicExpansionKind
1835	shouldExpandAtomicLoadInIR(LoadInst LI) const* override;
1836	TargetLoweringBase::AtomicExpansionKind
1837	shouldExpandAtomicStoreInIR(StoreInst SI) const* override;
1838	TargetLoweringBase::AtomicExpansionKind
1839	shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* override;
1840	TargetLoweringBase::AtomicExpansionKind
1841	shouldExpandLogicAtomicRMWInIR(AtomicRMWInst AI) const*;
1842	void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst AI) const* override;
1843	void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst AI) const* override;
1844
1845	LoadInst *
1846	lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst AI) const* override;
1847
1848	bool needsCmpXchgNb(Type MemType) const*;
1849
1850	void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1851	MachineBasicBlock DispatchBB, int* FI) const;
1852
1853	// Utility function to emit the low-level va_arg code for X86-64.
1854	MachineBasicBlock *
1855	EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock MBB) const*;
1856
1857	/// Utility function to emit the xmm reg save portion of va_start.
1858	MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1859	MachineInstr &MI2,
1860	MachineBasicBlock BB) const*;
1861
1862	MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1863	MachineBasicBlock BB) const*;
1864
1865	MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1866	MachineBasicBlock BB) const*;
1867
1868	MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1869	MachineBasicBlock BB) const*;
1870
1871	MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1872	MachineBasicBlock BB) const*;
1873
1874	MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1875	MachineBasicBlock BB) const*;
1876
1877	MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1878	MachineBasicBlock BB) const*;
1879
1880	MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1881	MachineBasicBlock MBB) const*;
1882
1883	void emitSetJmpShadowStackFix(MachineInstr &MI,
1884	MachineBasicBlock MBB) const*;
1885
1886	MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1887	MachineBasicBlock MBB) const*;
1888
1889	MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1890	MachineBasicBlock MBB) const*;
1891
1892	MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1893	MachineBasicBlock MBB) const*;
1894
1895	MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1896	MachineBasicBlock MBB) const*;
1897
1898	/// Emit flags for the given setcc condition and operands. Also returns the
1899	/// corresponding X86 condition code constant in X86CC.
1900	SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1901	const SDLoc &dl, SelectionDAG &DAG,
1902	SDValue &X86CC) const;
1903
1904	bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1905	SDValue IntPow2) const override;
1906
1907	/// Check if replacement of SQRT with RSQRT should be disabled.
1908	bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1909
1910	/// Use rsqrt to speed up sqrt calculations.*
1911	SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1912	int &RefinementSteps, bool &UseOneConstNR,
1913	bool Reciprocal) const override;
1914
1915	/// Use rcp to speed up fdiv calculations.*
1916	SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1917	int &RefinementSteps) const override;
1918
1919	/// Reassociate floating point divisions into multiply by reciprocal.
1920	unsigned combineRepeatedFPDivisors() const override;
1921
1922	SDValue BuildSDIVPow2(SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
1923	SmallVectorImpl<SDNode > &Created) const* override;
1924
1925	SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1926	SDValue V2) const;
1927	};
1928
1929	namespace X86 {
1930	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1931	const TargetLibraryInfo *libInfo);
1932	} // end namespace X86
1933
1934	// X86 specific Gather/Scatter nodes.
1935	// The class has the same order of operands as MaskedGatherScatterSDNode for
1936	// convenience.
1937	class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1938	public:
1939	// This is a intended as a utility and should never be directly created.
1940	X86MaskedGatherScatterSDNode() = delete;
1941	~X86MaskedGatherScatterSDNode() = delete;
1942
1943	const SDValue &getBasePtr() const { return getOperand(Num: `3`); }
1944	const SDValue &getIndex() const { return getOperand(Num: `4`); }
1945	const SDValue &getMask() const { return getOperand(Num: `2`); }
1946	const SDValue &getScale() const { return getOperand(Num: `5`); }
1947
1948	static bool classof(const SDNode *N) {
1949	return N->getOpcode() == X86ISD::MGATHER \|\|
1950	N->getOpcode() == X86ISD::MSCATTER;
1951	}
1952	};
1953
1954	class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1955	public:
1956	const SDValue &getPassThru() const { return getOperand(Num: `1`); }
1957
1958	static bool classof(const SDNode *N) {
1959	return N->getOpcode() == X86ISD::MGATHER;
1960	}
1961	};
1962
1963	class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1964	public:
1965	const SDValue &getValue() const { return getOperand(Num: `1`); }
1966
1967	static bool classof(const SDNode *N) {
1968	return N->getOpcode() == X86ISD::MSCATTER;
1969	}
1970	};
1971
1972	/// Generate unpacklo/unpackhi shuffle mask.
1973	void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1974	bool Unary);
1975
1976	/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1977	/// imposed by AVX and specific to the unary pattern. Example:
1978	/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1979	/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1980	void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1981
1982	} // end namespace llvm
1983
1984	#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1985

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86ISelLowering.h