1 | //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that X86 uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
16 | |
17 | #include "llvm/CodeGen/MachineFunction.h" |
18 | #include "llvm/CodeGen/TargetLowering.h" |
19 | |
20 | namespace llvm { |
21 | class X86Subtarget; |
22 | class X86TargetMachine; |
23 | |
24 | namespace X86ISD { |
25 | // X86 Specific DAG Nodes |
26 | enum NodeType : unsigned { |
27 | // Start the numbering where the builtin ops leave off. |
28 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
29 | |
30 | /// Bit scan forward. |
31 | BSF, |
32 | /// Bit scan reverse. |
33 | BSR, |
34 | |
35 | /// X86 funnel/double shift i16 instructions. These correspond to |
36 | /// X86::SHLDW and X86::SHRDW instructions which have different amt |
37 | /// modulo rules to generic funnel shifts. |
38 | /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. |
39 | FSHL, |
40 | FSHR, |
41 | |
42 | /// Bitwise logical AND of floating point values. This corresponds |
43 | /// to X86::ANDPS or X86::ANDPD. |
44 | FAND, |
45 | |
46 | /// Bitwise logical OR of floating point values. This corresponds |
47 | /// to X86::ORPS or X86::ORPD. |
48 | FOR, |
49 | |
50 | /// Bitwise logical XOR of floating point values. This corresponds |
51 | /// to X86::XORPS or X86::XORPD. |
52 | FXOR, |
53 | |
54 | /// Bitwise logical ANDNOT of floating point values. This |
55 | /// corresponds to X86::ANDNPS or X86::ANDNPD. |
56 | FANDN, |
57 | |
58 | /// These operations represent an abstract X86 call |
59 | /// instruction, which includes a bunch of information. In particular the |
60 | /// operands of these node are: |
61 | /// |
62 | /// #0 - The incoming token chain |
63 | /// #1 - The callee |
64 | /// #2 - The number of arg bytes the caller pushes on the stack. |
65 | /// #3 - The number of arg bytes the callee pops off the stack. |
66 | /// #4 - The value to pass in AL/AX/EAX (optional) |
67 | /// #5 - The value to pass in DL/DX/EDX (optional) |
68 | /// |
69 | /// The result values of these nodes are: |
70 | /// |
71 | /// #0 - The outgoing token chain |
72 | /// #1 - The first register result value (optional) |
73 | /// #2 - The second register result value (optional) |
74 | /// |
75 | CALL, |
76 | |
77 | /// Same as call except it adds the NoTrack prefix. |
78 | NT_CALL, |
79 | |
80 | // Pseudo for a OBJC call that gets emitted together with a special |
81 | // marker instruction. |
82 | CALL_RVMARKER, |
83 | |
84 | /// X86 compare and logical compare instructions. |
85 | CMP, |
86 | FCMP, |
87 | COMI, |
88 | UCOMI, |
89 | |
90 | /// X86 bit-test instructions. |
91 | BT, |
92 | |
93 | /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS |
94 | /// operand, usually produced by a CMP instruction. |
95 | SETCC, |
96 | |
97 | /// X86 Select |
98 | SELECTS, |
99 | |
100 | // Same as SETCC except it's materialized with a sbb and the value is all |
101 | // one's or all zero's. |
102 | SETCC_CARRY, // R = carry_bit ? ~0 : 0 |
103 | |
104 | /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. |
105 | /// Operands are two FP values to compare; result is a mask of |
106 | /// 0s or 1s. Generally DTRT for C/C++ with NaNs. |
107 | FSETCC, |
108 | |
109 | /// X86 FP SETCC, similar to above, but with output as an i1 mask and |
110 | /// and a version with SAE. |
111 | FSETCCM, |
112 | FSETCCM_SAE, |
113 | |
114 | /// X86 conditional moves. Operand 0 and operand 1 are the two values |
115 | /// to select from. Operand 2 is the condition code, and operand 3 is the |
116 | /// flag operand produced by a CMP or TEST instruction. |
117 | CMOV, |
118 | |
119 | /// X86 conditional branches. Operand 0 is the chain operand, operand 1 |
120 | /// is the block to branch if condition is true, operand 2 is the |
121 | /// condition code, and operand 3 is the flag operand produced by a CMP |
122 | /// or TEST instruction. |
123 | BRCOND, |
124 | |
125 | /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and |
126 | /// operand 1 is the target address. |
127 | NT_BRIND, |
128 | |
129 | /// Return with a glue operand. Operand 0 is the chain operand, operand |
130 | /// 1 is the number of bytes of stack to pop. |
131 | RET_GLUE, |
132 | |
133 | /// Return from interrupt. Operand 0 is the number of bytes to pop. |
134 | IRET, |
135 | |
136 | /// Repeat fill, corresponds to X86::REP_STOSx. |
137 | REP_STOS, |
138 | |
139 | /// Repeat move, corresponds to X86::REP_MOVSx. |
140 | REP_MOVS, |
141 | |
142 | /// On Darwin, this node represents the result of the popl |
143 | /// at function entry, used for PIC code. |
144 | GlobalBaseReg, |
145 | |
146 | /// A wrapper node for TargetConstantPool, TargetJumpTable, |
147 | /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, |
148 | /// MCSymbol and TargetBlockAddress. |
149 | Wrapper, |
150 | |
151 | /// Special wrapper used under X86-64 PIC mode for RIP |
152 | /// relative displacements. |
153 | WrapperRIP, |
154 | |
155 | /// Copies a 64-bit value from an MMX vector to the low word |
156 | /// of an XMM vector, with the high word zero filled. |
157 | MOVQ2DQ, |
158 | |
159 | /// Copies a 64-bit value from the low word of an XMM vector |
160 | /// to an MMX vector. |
161 | MOVDQ2Q, |
162 | |
163 | /// Copies a 32-bit value from the low word of a MMX |
164 | /// vector to a GPR. |
165 | MMX_MOVD2W, |
166 | |
167 | /// Copies a GPR into the low 32-bit word of a MMX vector |
168 | /// and zero out the high word. |
169 | MMX_MOVW2D, |
170 | |
171 | /// Extract an 8-bit value from a vector and zero extend it to |
172 | /// i32, corresponds to X86::PEXTRB. |
173 | PEXTRB, |
174 | |
175 | /// Extract a 16-bit value from a vector and zero extend it to |
176 | /// i32, corresponds to X86::PEXTRW. |
177 | PEXTRW, |
178 | |
179 | /// Insert any element of a 4 x float vector into any element |
180 | /// of a destination 4 x floatvector. |
181 | INSERTPS, |
182 | |
183 | /// Insert the lower 8-bits of a 32-bit value to a vector, |
184 | /// corresponds to X86::PINSRB. |
185 | PINSRB, |
186 | |
187 | /// Insert the lower 16-bits of a 32-bit value to a vector, |
188 | /// corresponds to X86::PINSRW. |
189 | PINSRW, |
190 | |
191 | /// Shuffle 16 8-bit values within a vector. |
192 | PSHUFB, |
193 | |
194 | /// Compute Sum of Absolute Differences. |
195 | PSADBW, |
196 | /// Compute Double Block Packed Sum-Absolute-Differences |
197 | DBPSADBW, |
198 | |
199 | /// Bitwise Logical AND NOT of Packed FP values. |
200 | ANDNP, |
201 | |
202 | /// Blend where the selector is an immediate. |
203 | BLENDI, |
204 | |
205 | /// Dynamic (non-constant condition) vector blend where only the sign bits |
206 | /// of the condition elements are used. This is used to enforce that the |
207 | /// condition mask is not valid for generic VSELECT optimizations. This |
208 | /// is also used to implement the intrinsics. |
209 | /// Operands are in VSELECT order: MASK, TRUE, FALSE |
210 | BLENDV, |
211 | |
212 | /// Combined add and sub on an FP vector. |
213 | ADDSUB, |
214 | |
215 | // FP vector ops with rounding mode. |
216 | FADD_RND, |
217 | FADDS, |
218 | FADDS_RND, |
219 | FSUB_RND, |
220 | FSUBS, |
221 | FSUBS_RND, |
222 | FMUL_RND, |
223 | FMULS, |
224 | FMULS_RND, |
225 | FDIV_RND, |
226 | FDIVS, |
227 | FDIVS_RND, |
228 | FMAX_SAE, |
229 | FMAXS_SAE, |
230 | FMIN_SAE, |
231 | FMINS_SAE, |
232 | FSQRT_RND, |
233 | FSQRTS, |
234 | FSQRTS_RND, |
235 | |
236 | // FP vector get exponent. |
237 | FGETEXP, |
238 | FGETEXP_SAE, |
239 | FGETEXPS, |
240 | FGETEXPS_SAE, |
241 | // Extract Normalized Mantissas. |
242 | VGETMANT, |
243 | VGETMANT_SAE, |
244 | VGETMANTS, |
245 | VGETMANTS_SAE, |
246 | // FP Scale. |
247 | SCALEF, |
248 | SCALEF_RND, |
249 | SCALEFS, |
250 | SCALEFS_RND, |
251 | |
252 | /// Integer horizontal add/sub. |
253 | HADD, |
254 | HSUB, |
255 | |
256 | /// Floating point horizontal add/sub. |
257 | FHADD, |
258 | FHSUB, |
259 | |
260 | // Detect Conflicts Within a Vector |
261 | CONFLICT, |
262 | |
263 | /// Floating point max and min. |
264 | FMAX, |
265 | FMIN, |
266 | |
267 | /// Commutative FMIN and FMAX. |
268 | FMAXC, |
269 | FMINC, |
270 | |
271 | /// Scalar intrinsic floating point max and min. |
272 | FMAXS, |
273 | FMINS, |
274 | |
275 | /// Floating point reciprocal-sqrt and reciprocal approximation. |
276 | /// Note that these typically require refinement |
277 | /// in order to obtain suitable precision. |
278 | FRSQRT, |
279 | FRCP, |
280 | |
281 | // AVX-512 reciprocal approximations with a little more precision. |
282 | RSQRT14, |
283 | RSQRT14S, |
284 | RCP14, |
285 | RCP14S, |
286 | |
287 | // Thread Local Storage. |
288 | TLSADDR, |
289 | |
290 | // Thread Local Storage. A call to get the start address |
291 | // of the TLS block for the current module. |
292 | TLSBASEADDR, |
293 | |
294 | // Thread Local Storage. When calling to an OS provided |
295 | // thunk at the address from an earlier relocation. |
296 | TLSCALL, |
297 | |
298 | // Thread Local Storage. A descriptor containing pointer to |
299 | // code and to argument to get the TLS offset for the symbol. |
300 | TLSDESC, |
301 | |
302 | // Exception Handling helpers. |
303 | EH_RETURN, |
304 | |
305 | // SjLj exception handling setjmp. |
306 | EH_SJLJ_SETJMP, |
307 | |
308 | // SjLj exception handling longjmp. |
309 | EH_SJLJ_LONGJMP, |
310 | |
311 | // SjLj exception handling dispatch. |
312 | EH_SJLJ_SETUP_DISPATCH, |
313 | |
314 | /// Tail call return. See X86TargetLowering::LowerCall for |
315 | /// the list of operands. |
316 | TC_RETURN, |
317 | |
318 | // Vector move to low scalar and zero higher vector elements. |
319 | VZEXT_MOVL, |
320 | |
321 | // Vector integer truncate. |
322 | VTRUNC, |
323 | // Vector integer truncate with unsigned/signed saturation. |
324 | VTRUNCUS, |
325 | VTRUNCS, |
326 | |
327 | // Masked version of the above. Used when less than a 128-bit result is |
328 | // produced since the mask only applies to the lower elements and can't |
329 | // be represented by a select. |
330 | // SRC, PASSTHRU, MASK |
331 | VMTRUNC, |
332 | VMTRUNCUS, |
333 | VMTRUNCS, |
334 | |
335 | // Vector FP extend. |
336 | VFPEXT, |
337 | VFPEXT_SAE, |
338 | VFPEXTS, |
339 | VFPEXTS_SAE, |
340 | |
341 | // Vector FP round. |
342 | VFPROUND, |
343 | VFPROUND_RND, |
344 | VFPROUNDS, |
345 | VFPROUNDS_RND, |
346 | |
347 | // Masked version of above. Used for v2f64->v4f32. |
348 | // SRC, PASSTHRU, MASK |
349 | VMFPROUND, |
350 | |
351 | // 128-bit vector logical left / right shift |
352 | VSHLDQ, |
353 | VSRLDQ, |
354 | |
355 | // Vector shift elements |
356 | VSHL, |
357 | VSRL, |
358 | VSRA, |
359 | |
360 | // Vector variable shift |
361 | VSHLV, |
362 | VSRLV, |
363 | VSRAV, |
364 | |
365 | // Vector shift elements by immediate |
366 | VSHLI, |
367 | VSRLI, |
368 | VSRAI, |
369 | |
370 | // Shifts of mask registers. |
371 | KSHIFTL, |
372 | KSHIFTR, |
373 | |
374 | // Bit rotate by immediate |
375 | VROTLI, |
376 | VROTRI, |
377 | |
378 | // Vector packed double/float comparison. |
379 | CMPP, |
380 | |
381 | // Vector integer comparisons. |
382 | PCMPEQ, |
383 | PCMPGT, |
384 | |
385 | // v8i16 Horizontal minimum and position. |
386 | PHMINPOS, |
387 | |
388 | MULTISHIFT, |
389 | |
390 | /// Vector comparison generating mask bits for fp and |
391 | /// integer signed and unsigned data types. |
392 | CMPM, |
393 | // Vector mask comparison generating mask bits for FP values. |
394 | CMPMM, |
395 | // Vector mask comparison with SAE for FP values. |
396 | CMPMM_SAE, |
397 | |
398 | // Arithmetic operations with FLAGS results. |
399 | ADD, |
400 | SUB, |
401 | ADC, |
402 | SBB, |
403 | SMUL, |
404 | UMUL, |
405 | OR, |
406 | XOR, |
407 | AND, |
408 | |
409 | // Bit field extract. |
410 | BEXTR, |
411 | BEXTRI, |
412 | |
413 | // Zero High Bits Starting with Specified Bit Position. |
414 | BZHI, |
415 | |
416 | // Parallel extract and deposit. |
417 | PDEP, |
418 | PEXT, |
419 | |
420 | // X86-specific multiply by immediate. |
421 | MUL_IMM, |
422 | |
423 | // Vector sign bit extraction. |
424 | MOVMSK, |
425 | |
426 | // Vector bitwise comparisons. |
427 | PTEST, |
428 | |
429 | // Vector packed fp sign bitwise comparisons. |
430 | TESTP, |
431 | |
432 | // OR/AND test for masks. |
433 | KORTEST, |
434 | KTEST, |
435 | |
436 | // ADD for masks. |
437 | KADD, |
438 | |
439 | // Several flavors of instructions with vector shuffle behaviors. |
440 | // Saturated signed/unnsigned packing. |
441 | PACKSS, |
442 | PACKUS, |
443 | // Intra-lane alignr. |
444 | PALIGNR, |
445 | // AVX512 inter-lane alignr. |
446 | VALIGN, |
447 | PSHUFD, |
448 | PSHUFHW, |
449 | PSHUFLW, |
450 | SHUFP, |
451 | // VBMI2 Concat & Shift. |
452 | VSHLD, |
453 | VSHRD, |
454 | VSHLDV, |
455 | VSHRDV, |
456 | // Shuffle Packed Values at 128-bit granularity. |
457 | SHUF128, |
458 | MOVDDUP, |
459 | MOVSHDUP, |
460 | MOVSLDUP, |
461 | MOVLHPS, |
462 | MOVHLPS, |
463 | MOVSD, |
464 | MOVSS, |
465 | MOVSH, |
466 | UNPCKL, |
467 | UNPCKH, |
468 | VPERMILPV, |
469 | VPERMILPI, |
470 | VPERMI, |
471 | VPERM2X128, |
472 | |
473 | // Variable Permute (VPERM). |
474 | // Res = VPERMV MaskV, V0 |
475 | VPERMV, |
476 | |
477 | // 3-op Variable Permute (VPERMT2). |
478 | // Res = VPERMV3 V0, MaskV, V1 |
479 | VPERMV3, |
480 | |
481 | // Bitwise ternary logic. |
482 | VPTERNLOG, |
483 | // Fix Up Special Packed Float32/64 values. |
484 | VFIXUPIMM, |
485 | VFIXUPIMM_SAE, |
486 | VFIXUPIMMS, |
487 | VFIXUPIMMS_SAE, |
488 | // Range Restriction Calculation For Packed Pairs of Float32/64 values. |
489 | VRANGE, |
490 | VRANGE_SAE, |
491 | VRANGES, |
492 | VRANGES_SAE, |
493 | // Reduce - Perform Reduction Transformation on scalar\packed FP. |
494 | VREDUCE, |
495 | VREDUCE_SAE, |
496 | VREDUCES, |
497 | VREDUCES_SAE, |
498 | // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. |
499 | // Also used by the legacy (V)ROUND intrinsics where we mask out the |
500 | // scaling part of the immediate. |
501 | VRNDSCALE, |
502 | VRNDSCALE_SAE, |
503 | VRNDSCALES, |
504 | VRNDSCALES_SAE, |
505 | // Tests Types Of a FP Values for packed types. |
506 | VFPCLASS, |
507 | // Tests Types Of a FP Values for scalar types. |
508 | VFPCLASSS, |
509 | |
510 | // Broadcast (splat) scalar or element 0 of a vector. If the operand is |
511 | // a vector, this node may change the vector length as part of the splat. |
512 | VBROADCAST, |
513 | // Broadcast mask to vector. |
514 | VBROADCASTM, |
515 | |
516 | /// SSE4A Extraction and Insertion. |
517 | EXTRQI, |
518 | INSERTQI, |
519 | |
520 | // XOP arithmetic/logical shifts. |
521 | VPSHA, |
522 | VPSHL, |
523 | // XOP signed/unsigned integer comparisons. |
524 | VPCOM, |
525 | VPCOMU, |
526 | // XOP packed permute bytes. |
527 | VPPERM, |
528 | // XOP two source permutation. |
529 | VPERMIL2, |
530 | |
531 | // Vector multiply packed unsigned doubleword integers. |
532 | PMULUDQ, |
533 | // Vector multiply packed signed doubleword integers. |
534 | PMULDQ, |
535 | // Vector Multiply Packed UnsignedIntegers with Round and Scale. |
536 | MULHRS, |
537 | |
538 | // Multiply and Add Packed Integers. |
539 | VPMADDUBSW, |
540 | VPMADDWD, |
541 | |
542 | // AVX512IFMA multiply and add. |
543 | // NOTE: These are different than the instruction and perform |
544 | // op0 x op1 + op2. |
545 | VPMADD52L, |
546 | VPMADD52H, |
547 | |
548 | // VNNI |
549 | VPDPBUSD, |
550 | VPDPBUSDS, |
551 | VPDPWSSD, |
552 | VPDPWSSDS, |
553 | |
554 | // FMA nodes. |
555 | // We use the target independent ISD::FMA for the non-inverted case. |
556 | FNMADD, |
557 | FMSUB, |
558 | FNMSUB, |
559 | FMADDSUB, |
560 | FMSUBADD, |
561 | |
562 | // FMA with rounding mode. |
563 | FMADD_RND, |
564 | FNMADD_RND, |
565 | FMSUB_RND, |
566 | FNMSUB_RND, |
567 | FMADDSUB_RND, |
568 | FMSUBADD_RND, |
569 | |
570 | // AVX512-FP16 complex addition and multiplication. |
571 | VFMADDC, |
572 | VFMADDC_RND, |
573 | VFCMADDC, |
574 | VFCMADDC_RND, |
575 | |
576 | VFMULC, |
577 | VFMULC_RND, |
578 | VFCMULC, |
579 | VFCMULC_RND, |
580 | |
581 | VFMADDCSH, |
582 | VFMADDCSH_RND, |
583 | VFCMADDCSH, |
584 | VFCMADDCSH_RND, |
585 | |
586 | VFMULCSH, |
587 | VFMULCSH_RND, |
588 | VFCMULCSH, |
589 | VFCMULCSH_RND, |
590 | |
591 | VPDPBSUD, |
592 | VPDPBSUDS, |
593 | VPDPBUUD, |
594 | VPDPBUUDS, |
595 | VPDPBSSD, |
596 | VPDPBSSDS, |
597 | |
598 | // Compress and expand. |
599 | COMPRESS, |
600 | EXPAND, |
601 | |
602 | // Bits shuffle |
603 | VPSHUFBITQMB, |
604 | |
605 | // Convert Unsigned/Integer to Floating-Point Value with rounding mode. |
606 | SINT_TO_FP_RND, |
607 | UINT_TO_FP_RND, |
608 | SCALAR_SINT_TO_FP, |
609 | SCALAR_UINT_TO_FP, |
610 | SCALAR_SINT_TO_FP_RND, |
611 | SCALAR_UINT_TO_FP_RND, |
612 | |
613 | // Vector float/double to signed/unsigned integer. |
614 | CVTP2SI, |
615 | CVTP2UI, |
616 | CVTP2SI_RND, |
617 | CVTP2UI_RND, |
618 | // Scalar float/double to signed/unsigned integer. |
619 | CVTS2SI, |
620 | CVTS2UI, |
621 | CVTS2SI_RND, |
622 | CVTS2UI_RND, |
623 | |
624 | // Vector float/double to signed/unsigned integer with truncation. |
625 | CVTTP2SI, |
626 | CVTTP2UI, |
627 | CVTTP2SI_SAE, |
628 | CVTTP2UI_SAE, |
629 | // Scalar float/double to signed/unsigned integer with truncation. |
630 | CVTTS2SI, |
631 | CVTTS2UI, |
632 | CVTTS2SI_SAE, |
633 | CVTTS2UI_SAE, |
634 | |
635 | // Vector signed/unsigned integer to float/double. |
636 | CVTSI2P, |
637 | CVTUI2P, |
638 | |
639 | // Masked versions of above. Used for v2f64->v4f32. |
640 | // SRC, PASSTHRU, MASK |
641 | MCVTP2SI, |
642 | MCVTP2UI, |
643 | MCVTTP2SI, |
644 | MCVTTP2UI, |
645 | MCVTSI2P, |
646 | MCVTUI2P, |
647 | |
648 | // Vector float to bfloat16. |
649 | // Convert TWO packed single data to one packed BF16 data |
650 | CVTNE2PS2BF16, |
651 | // Convert packed single data to packed BF16 data |
652 | CVTNEPS2BF16, |
653 | // Masked version of above. |
654 | // SRC, PASSTHRU, MASK |
655 | MCVTNEPS2BF16, |
656 | |
657 | // Dot product of BF16 pairs to accumulated into |
658 | // packed single precision. |
659 | DPBF16PS, |
660 | |
661 | // A stack checking function call. On Windows it's _chkstk call. |
662 | DYN_ALLOCA, |
663 | |
664 | // For allocating variable amounts of stack space when using |
665 | // segmented stacks. Check if the current stacklet has enough space, and |
666 | // falls back to heap allocation if not. |
667 | SEG_ALLOCA, |
668 | |
669 | // For allocating stack space when using stack clash protector. |
670 | // Allocation is performed by block, and each block is probed. |
671 | PROBED_ALLOCA, |
672 | |
673 | // Memory barriers. |
674 | MFENCE, |
675 | |
676 | // Get a random integer and indicate whether it is valid in CF. |
677 | RDRAND, |
678 | |
679 | // Get a NIST SP800-90B & C compliant random integer and |
680 | // indicate whether it is valid in CF. |
681 | RDSEED, |
682 | |
683 | // Protection keys |
684 | // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. |
685 | // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is |
686 | // value for ECX. |
687 | RDPKRU, |
688 | WRPKRU, |
689 | |
690 | // SSE42 string comparisons. |
691 | // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG |
692 | // will emit one or two instructions based on which results are used. If |
693 | // flags and index/mask this allows us to use a single instruction since |
694 | // we won't have to pick and opcode for flags. Instead we can rely on the |
695 | // DAG to CSE everything and decide at isel. |
696 | PCMPISTR, |
697 | PCMPESTR, |
698 | |
699 | // Test if in transactional execution. |
700 | XTEST, |
701 | |
702 | // Conversions between float and half-float. |
703 | CVTPS2PH, |
704 | CVTPS2PH_SAE, |
705 | CVTPH2PS, |
706 | CVTPH2PS_SAE, |
707 | |
708 | // Masked version of above. |
709 | // SRC, RND, PASSTHRU, MASK |
710 | MCVTPS2PH, |
711 | MCVTPS2PH_SAE, |
712 | |
713 | // Galois Field Arithmetic Instructions |
714 | GF2P8AFFINEINVQB, |
715 | GF2P8AFFINEQB, |
716 | GF2P8MULB, |
717 | |
718 | // LWP insert record. |
719 | LWPINS, |
720 | |
721 | // User level wait |
722 | UMWAIT, |
723 | TPAUSE, |
724 | |
725 | // Enqueue Stores Instructions |
726 | ENQCMD, |
727 | ENQCMDS, |
728 | |
729 | // For avx512-vp2intersect |
730 | VP2INTERSECT, |
731 | |
732 | // User level interrupts - testui |
733 | TESTUI, |
734 | |
735 | // Perform an FP80 add after changing precision control in FPCW. |
736 | FP80_ADD, |
737 | |
738 | // Conditional compare instructions |
739 | CCMP, |
740 | CTEST, |
741 | |
742 | /// X86 strict FP compare instructions. |
743 | STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, |
744 | STRICT_FCMPS, |
745 | |
746 | // Vector packed double/float comparison. |
747 | STRICT_CMPP, |
748 | |
749 | /// Vector comparison generating mask bits for fp and |
750 | /// integer signed and unsigned data types. |
751 | STRICT_CMPM, |
752 | |
753 | // Vector float/double to signed/unsigned integer with truncation. |
754 | STRICT_CVTTP2SI, |
755 | STRICT_CVTTP2UI, |
756 | |
757 | // Vector FP extend. |
758 | STRICT_VFPEXT, |
759 | |
760 | // Vector FP round. |
761 | STRICT_VFPROUND, |
762 | |
763 | // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. |
764 | // Also used by the legacy (V)ROUND intrinsics where we mask out the |
765 | // scaling part of the immediate. |
766 | STRICT_VRNDSCALE, |
767 | |
768 | // Vector signed/unsigned integer to float/double. |
769 | STRICT_CVTSI2P, |
770 | STRICT_CVTUI2P, |
771 | |
772 | // Strict FMA nodes. |
773 | STRICT_FNMADD, |
774 | STRICT_FMSUB, |
775 | STRICT_FNMSUB, |
776 | |
777 | // Conversions between float and half-float. |
778 | STRICT_CVTPS2PH, |
779 | STRICT_CVTPH2PS, |
780 | |
781 | // Perform an FP80 add after changing precision control in FPCW. |
782 | STRICT_FP80_ADD, |
783 | |
784 | // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and |
785 | // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE. |
786 | |
787 | // Compare and swap. |
788 | LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, |
789 | LCMPXCHG8_DAG, |
790 | LCMPXCHG16_DAG, |
791 | LCMPXCHG16_SAVE_RBX_DAG, |
792 | |
793 | /// LOCK-prefixed arithmetic read-modify-write instructions. |
794 | /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) |
795 | LADD, |
796 | LSUB, |
797 | LOR, |
798 | LXOR, |
799 | LAND, |
800 | LBTS, |
801 | LBTC, |
802 | LBTR, |
803 | LBTS_RM, |
804 | LBTC_RM, |
805 | LBTR_RM, |
806 | |
807 | /// RAO arithmetic instructions. |
808 | /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) |
809 | AADD, |
810 | AOR, |
811 | AXOR, |
812 | AAND, |
813 | |
814 | // Load, scalar_to_vector, and zero extend. |
815 | VZEXT_LOAD, |
816 | |
817 | // extract_vector_elt, store. |
818 | , |
819 | |
820 | // scalar broadcast from memory. |
821 | VBROADCAST_LOAD, |
822 | |
823 | // subvector broadcast from memory. |
824 | SUBV_BROADCAST_LOAD, |
825 | |
826 | // Store FP control word into i16 memory. |
827 | FNSTCW16m, |
828 | |
829 | // Load FP control word from i16 memory. |
830 | FLDCW16m, |
831 | |
832 | // Store x87 FPU environment into memory. |
833 | FNSTENVm, |
834 | |
835 | // Load x87 FPU environment from memory. |
836 | FLDENVm, |
837 | |
838 | /// This instruction implements FP_TO_SINT with the |
839 | /// integer destination in memory and a FP reg source. This corresponds |
840 | /// to the X86::FIST*m instructions and the rounding mode change stuff. It |
841 | /// has two inputs (token chain and address) and two outputs (int value |
842 | /// and token chain). Memory VT specifies the type to store to. |
843 | FP_TO_INT_IN_MEM, |
844 | |
845 | /// This instruction implements SINT_TO_FP with the |
846 | /// integer source in memory and FP reg result. This corresponds to the |
847 | /// X86::FILD*m instructions. It has two inputs (token chain and address) |
848 | /// and two outputs (FP value and token chain). The integer source type is |
849 | /// specified by the memory VT. |
850 | FILD, |
851 | |
852 | /// This instruction implements a fp->int store from FP stack |
853 | /// slots. This corresponds to the fist instruction. It takes a |
854 | /// chain operand, value to store, address, and glue. The memory VT |
855 | /// specifies the type to store as. |
856 | FIST, |
857 | |
858 | /// This instruction implements an extending load to FP stack slots. |
859 | /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain |
860 | /// operand, and ptr to load from. The memory VT specifies the type to |
861 | /// load from. |
862 | FLD, |
863 | |
864 | /// This instruction implements a truncating store from FP stack |
865 | /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a |
866 | /// chain operand, value to store, address, and glue. The memory VT |
867 | /// specifies the type to store as. |
868 | FST, |
869 | |
870 | /// These instructions grab the address of the next argument |
871 | /// from a va_list. (reads and modifies the va_list in memory) |
872 | VAARG_64, |
873 | VAARG_X32, |
874 | |
875 | // Vector truncating store with unsigned/signed saturation |
876 | VTRUNCSTOREUS, |
877 | VTRUNCSTORES, |
878 | // Vector truncating masked store with unsigned/signed saturation |
879 | VMTRUNCSTOREUS, |
880 | VMTRUNCSTORES, |
881 | |
882 | // X86 specific gather and scatter |
883 | MGATHER, |
884 | MSCATTER, |
885 | |
886 | // Key locker nodes that produce flags. |
887 | AESENC128KL, |
888 | AESDEC128KL, |
889 | AESENC256KL, |
890 | AESDEC256KL, |
891 | AESENCWIDE128KL, |
892 | AESDECWIDE128KL, |
893 | AESENCWIDE256KL, |
894 | AESDECWIDE256KL, |
895 | |
896 | /// Compare and Add if Condition is Met. Compare value in operand 2 with |
897 | /// value in memory of operand 1. If condition of operand 4 is met, add |
898 | /// value operand 3 to m32 and write new value in operand 1. Operand 2 is |
899 | /// always updated with the original value from operand 1. |
900 | CMPCCXADD, |
901 | |
902 | // Save xmm argument registers to the stack, according to %al. An operator |
903 | // is needed so that this can be expanded with control flow. |
904 | VASTART_SAVE_XMM_REGS, |
905 | |
906 | // Conditional load/store instructions |
907 | CLOAD, |
908 | CSTORE, |
909 | |
910 | // WARNING: Do not add anything in the end unless you want the node to |
911 | // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all |
912 | // opcodes will be thought as target memory ops! |
913 | }; |
914 | } // end namespace X86ISD |
915 | |
916 | namespace X86 { |
917 | /// Current rounding mode is represented in bits 11:10 of FPSR. These |
918 | /// values are same as corresponding constants for rounding mode used |
919 | /// in glibc. |
920 | enum RoundingMode { |
921 | rmToNearest = 0, // FE_TONEAREST |
922 | rmDownward = 1 << 10, // FE_DOWNWARD |
923 | rmUpward = 2 << 10, // FE_UPWARD |
924 | rmTowardZero = 3 << 10, // FE_TOWARDZERO |
925 | rmMask = 3 << 10 // Bit mask selecting rounding mode |
926 | }; |
927 | } |
928 | |
929 | /// Define some predicates that are used for node matching. |
930 | namespace X86 { |
931 | /// Returns true if Elt is a constant zero or floating point constant +0.0. |
932 | bool isZeroNode(SDValue Elt); |
933 | |
934 | /// Returns true of the given offset can be |
935 | /// fit into displacement field of the instruction. |
936 | bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, |
937 | bool hasSymbolicDisplacement); |
938 | |
939 | /// Determines whether the callee is required to pop its |
940 | /// own arguments. Callee pop is necessary to support tail calls. |
941 | bool isCalleePop(CallingConv::ID CallingConv, |
942 | bool is64Bit, bool IsVarArg, bool GuaranteeTCO); |
943 | |
944 | /// If Op is a constant whose elements are all the same constant or |
945 | /// undefined, return true and return the constant value in \p SplatVal. |
946 | /// If we have undef bits that don't cover an entire element, we treat these |
947 | /// as zero if AllowPartialUndefs is set, else we fail and return false. |
948 | bool isConstantSplat(SDValue Op, APInt &SplatVal, |
949 | bool AllowPartialUndefs = true); |
950 | |
951 | /// Check if Op is a load operation that could be folded into some other x86 |
952 | /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0. |
953 | bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, |
954 | bool AssumeSingleUse = false); |
955 | |
956 | /// Check if Op is a load operation that could be folded into a vector splat |
957 | /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2. |
958 | bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, |
959 | const X86Subtarget &Subtarget, |
960 | bool AssumeSingleUse = false); |
961 | |
962 | /// Check if Op is a value that could be used to fold a store into some |
963 | /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi). |
964 | bool mayFoldIntoStore(SDValue Op); |
965 | |
966 | /// Check if Op is an operation that could be folded into a zero extend x86 |
967 | /// instruction. |
968 | bool mayFoldIntoZeroExtend(SDValue Op); |
969 | |
970 | /// True if the target supports the extended frame for async Swift |
971 | /// functions. |
972 | bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, |
973 | const MachineFunction &MF); |
974 | } // end namespace X86 |
975 | |
976 | //===--------------------------------------------------------------------===// |
977 | // X86 Implementation of the TargetLowering interface |
978 | class X86TargetLowering final : public TargetLowering { |
979 | public: |
980 | explicit X86TargetLowering(const X86TargetMachine &TM, |
981 | const X86Subtarget &STI); |
982 | |
983 | unsigned getJumpTableEncoding() const override; |
984 | bool useSoftFloat() const override; |
985 | |
986 | void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
987 | ArgListTy &Args) const override; |
988 | |
989 | MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { |
990 | return MVT::i8; |
991 | } |
992 | |
993 | const MCExpr * |
994 | LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
995 | const MachineBasicBlock *MBB, unsigned uid, |
996 | MCContext &Ctx) const override; |
997 | |
998 | /// Returns relocation base for the given PIC jumptable. |
999 | SDValue getPICJumpTableRelocBase(SDValue Table, |
1000 | SelectionDAG &DAG) const override; |
1001 | const MCExpr * |
1002 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
1003 | unsigned JTI, MCContext &Ctx) const override; |
1004 | |
1005 | /// Return the desired alignment for ByVal aggregate |
1006 | /// function arguments in the caller parameter area. For X86, aggregates |
1007 | /// that contains are placed at 16-byte boundaries while the rest are at |
1008 | /// 4-byte boundaries. |
1009 | uint64_t getByValTypeAlignment(Type *Ty, |
1010 | const DataLayout &DL) const override; |
1011 | |
1012 | EVT getOptimalMemOpType(const MemOp &Op, |
1013 | const AttributeList &FuncAttributes) const override; |
1014 | |
1015 | /// Returns true if it's safe to use load / store of the |
1016 | /// specified type to expand memcpy / memset inline. This is mostly true |
1017 | /// for all types except for some special cases. For example, on X86 |
1018 | /// targets without SSE2 f64 load / store are done with fldl / fstpl which |
1019 | /// also does type conversion. Note the specified type doesn't have to be |
1020 | /// legal as the hook is used before type legalization. |
1021 | bool isSafeMemOpType(MVT VT) const override; |
1022 | |
1023 | bool isMemoryAccessFast(EVT VT, Align Alignment) const; |
1024 | |
1025 | /// Returns true if the target allows unaligned memory accesses of the |
1026 | /// specified type. Returns whether it is "fast" in the last argument. |
1027 | bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, |
1028 | MachineMemOperand::Flags Flags, |
1029 | unsigned *Fast) const override; |
1030 | |
1031 | /// This function returns true if the memory access is aligned or if the |
1032 | /// target allows this specific unaligned memory access. If the access is |
1033 | /// allowed, the optional final parameter returns a relative speed of the |
1034 | /// access (as defined by the target). |
1035 | bool allowsMemoryAccess( |
1036 | LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, |
1037 | Align Alignment, |
1038 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1039 | unsigned *Fast = nullptr) const override; |
1040 | |
1041 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1042 | const MachineMemOperand &MMO, |
1043 | unsigned *Fast) const { |
1044 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), |
1045 | Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast); |
1046 | } |
1047 | |
1048 | /// Provide custom lowering hooks for some operations. |
1049 | /// |
1050 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
1051 | |
1052 | /// Replace the results of node with an illegal result |
1053 | /// type with new values built out of custom code. |
1054 | /// |
1055 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, |
1056 | SelectionDAG &DAG) const override; |
1057 | |
1058 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
1059 | |
1060 | bool preferABDSToABSWithNSW(EVT VT) const override; |
1061 | |
1062 | bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, |
1063 | EVT ExtVT) const override; |
1064 | |
1065 | bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, |
1066 | EVT VT) const override; |
1067 | |
1068 | /// Return true if the target has native support for |
1069 | /// the specified value type and it is 'desirable' to use the type for the |
1070 | /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 |
1071 | /// instruction encodings are longer and some i16 instructions are slow. |
1072 | bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; |
1073 | |
1074 | /// Return true if the target has native support for the |
1075 | /// specified value type and it is 'desirable' to use the type. e.g. On x86 |
1076 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
1077 | /// and some i16 instructions are slow. |
1078 | bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; |
1079 | |
1080 | /// Return prefered fold type, Abs if this is a vector, AddAnd if its an |
1081 | /// integer, None otherwise. |
1082 | TargetLowering::AndOrSETCCFoldKind |
1083 | isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, |
1084 | const SDNode *SETCC0, |
1085 | const SDNode *SETCC1) const override; |
1086 | |
1087 | /// Return the newly negated expression if the cost is not expensive and |
1088 | /// set the cost in \p Cost to indicate that if it is cheaper or neutral to |
1089 | /// do the negation. |
1090 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
1091 | bool LegalOperations, bool ForCodeSize, |
1092 | NegatibleCost &Cost, |
1093 | unsigned Depth) const override; |
1094 | |
1095 | MachineBasicBlock * |
1096 | EmitInstrWithCustomInserter(MachineInstr &MI, |
1097 | MachineBasicBlock *MBB) const override; |
1098 | |
1099 | /// This method returns the name of a target specific DAG node. |
1100 | const char *getTargetNodeName(unsigned Opcode) const override; |
1101 | |
1102 | /// Do not merge vector stores after legalization because that may conflict |
1103 | /// with x86-specific store splitting optimizations. |
1104 | bool mergeStoresAfterLegalization(EVT MemVT) const override { |
1105 | return !MemVT.isVector(); |
1106 | } |
1107 | |
1108 | bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
1109 | const MachineFunction &MF) const override; |
1110 | |
1111 | bool isCheapToSpeculateCttz(Type *Ty) const override; |
1112 | |
1113 | bool isCheapToSpeculateCtlz(Type *Ty) const override; |
1114 | |
1115 | bool isCtlzFast() const override; |
1116 | |
1117 | bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { |
1118 | // If the pair to store is a mixture of float and int values, we will |
1119 | // save two bitwise instructions and one float-to-int instruction and |
1120 | // increase one store instruction. There is potentially a more |
1121 | // significant benefit because it avoids the float->int domain switch |
1122 | // for input value. So It is more likely a win. |
1123 | if ((LTy.isFloatingPoint() && HTy.isInteger()) || |
1124 | (LTy.isInteger() && HTy.isFloatingPoint())) |
1125 | return true; |
1126 | // If the pair only contains int values, we will save two bitwise |
1127 | // instructions and increase one store instruction (costing one more |
1128 | // store buffer). Since the benefit is more blurred so we leave |
1129 | // such pair out until we get testcase to prove it is a win. |
1130 | return false; |
1131 | } |
1132 | |
1133 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
1134 | |
1135 | bool hasAndNotCompare(SDValue Y) const override; |
1136 | |
1137 | bool hasAndNot(SDValue Y) const override; |
1138 | |
1139 | bool hasBitTest(SDValue X, SDValue Y) const override; |
1140 | |
1141 | bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
1142 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
1143 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
1144 | SelectionDAG &DAG) const override; |
1145 | |
1146 | unsigned preferedOpcodeForCmpEqPiecesOfOperand( |
1147 | EVT VT, unsigned ShiftOpc, bool MayTransformRotate, |
1148 | const APInt &ShiftOrRotateAmt, |
1149 | const std::optional<APInt> &AndMask) const override; |
1150 | |
1151 | bool preferScalarizeSplat(SDNode *N) const override; |
1152 | |
1153 | CondMergingParams |
1154 | getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, |
1155 | const Value *Rhs) const override; |
1156 | |
1157 | bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
1158 | CombineLevel Level) const override; |
1159 | |
1160 | bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; |
1161 | |
1162 | bool |
1163 | shouldTransformSignedTruncationCheck(EVT XVT, |
1164 | unsigned KeptBits) const override { |
1165 | // For vectors, we don't have a preference.. |
1166 | if (XVT.isVector()) |
1167 | return false; |
1168 | |
1169 | auto VTIsOk = [](EVT VT) -> bool { |
1170 | return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || |
1171 | VT == MVT::i64; |
1172 | }; |
1173 | |
1174 | // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. |
1175 | // XVT will be larger than KeptBitsVT. |
1176 | MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits); |
1177 | return VTIsOk(XVT) && VTIsOk(KeptBitsVT); |
1178 | } |
1179 | |
1180 | ShiftLegalizationStrategy |
1181 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
1182 | unsigned ExpansionFactor) const override; |
1183 | |
1184 | bool shouldSplatInsEltVarIndex(EVT VT) const override; |
1185 | |
1186 | bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { |
1187 | // Converting to sat variants holds little benefit on X86 as we will just |
1188 | // need to saturate the value back using fp arithmatic. |
1189 | return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); |
1190 | } |
1191 | |
1192 | bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { |
1193 | return VT.isScalarInteger(); |
1194 | } |
1195 | |
1196 | /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. |
1197 | MVT hasFastEqualityCompare(unsigned NumBits) const override; |
1198 | |
1199 | /// Return the value type to use for ISD::SETCC. |
1200 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
1201 | EVT VT) const override; |
1202 | |
1203 | bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
1204 | const APInt &DemandedElts, |
1205 | TargetLoweringOpt &TLO) const override; |
1206 | |
1207 | /// Determine which of the bits specified in Mask are known to be either |
1208 | /// zero or one and return them in the KnownZero/KnownOne bitsets. |
1209 | void computeKnownBitsForTargetNode(const SDValue Op, |
1210 | KnownBits &Known, |
1211 | const APInt &DemandedElts, |
1212 | const SelectionDAG &DAG, |
1213 | unsigned Depth = 0) const override; |
1214 | |
1215 | /// Determine the number of bits in the operation that are sign bits. |
1216 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
1217 | const APInt &DemandedElts, |
1218 | const SelectionDAG &DAG, |
1219 | unsigned Depth) const override; |
1220 | |
1221 | bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, |
1222 | const APInt &DemandedElts, |
1223 | APInt &KnownUndef, |
1224 | APInt &KnownZero, |
1225 | TargetLoweringOpt &TLO, |
1226 | unsigned Depth) const override; |
1227 | |
1228 | bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, |
1229 | const APInt &DemandedElts, |
1230 | unsigned MaskIndex, |
1231 | TargetLoweringOpt &TLO, |
1232 | unsigned Depth) const; |
1233 | |
1234 | bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
1235 | const APInt &DemandedBits, |
1236 | const APInt &DemandedElts, |
1237 | KnownBits &Known, |
1238 | TargetLoweringOpt &TLO, |
1239 | unsigned Depth) const override; |
1240 | |
1241 | SDValue SimplifyMultipleUseDemandedBitsForTargetNode( |
1242 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
1243 | SelectionDAG &DAG, unsigned Depth) const override; |
1244 | |
1245 | bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( |
1246 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
1247 | bool PoisonOnly, unsigned Depth) const override; |
1248 | |
1249 | bool canCreateUndefOrPoisonForTargetNode( |
1250 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
1251 | bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override; |
1252 | |
1253 | bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, |
1254 | APInt &UndefElts, const SelectionDAG &DAG, |
1255 | unsigned Depth) const override; |
1256 | |
1257 | bool isTargetCanonicalConstantNode(SDValue Op) const override { |
1258 | // Peek through bitcasts/extracts/inserts to see if we have a broadcast |
1259 | // vector from memory. |
1260 | while (Op.getOpcode() == ISD::BITCAST || |
1261 | Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || |
1262 | (Op.getOpcode() == ISD::INSERT_SUBVECTOR && |
1263 | Op.getOperand(i: 0).isUndef())) |
1264 | Op = Op.getOperand(i: Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); |
1265 | |
1266 | return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || |
1267 | TargetLowering::isTargetCanonicalConstantNode(Op); |
1268 | } |
1269 | |
1270 | const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; |
1271 | |
1272 | SDValue unwrapAddress(SDValue N) const override; |
1273 | |
1274 | SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; |
1275 | |
1276 | bool ExpandInlineAsm(CallInst *CI) const override; |
1277 | |
1278 | ConstraintType getConstraintType(StringRef Constraint) const override; |
1279 | |
1280 | /// Examine constraint string and operand type and determine a weight value. |
1281 | /// The operand object must already have been set up with the operand type. |
1282 | ConstraintWeight |
1283 | getSingleConstraintMatchWeight(AsmOperandInfo &Info, |
1284 | const char *Constraint) const override; |
1285 | |
1286 | const char *LowerXConstraint(EVT ConstraintVT) const override; |
1287 | |
1288 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
1289 | /// add anything to Ops. If hasMemory is true it means one of the asm |
1290 | /// constraint of the inline asm instruction being processed is 'm'. |
1291 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
1292 | std::vector<SDValue> &Ops, |
1293 | SelectionDAG &DAG) const override; |
1294 | |
1295 | InlineAsm::ConstraintCode |
1296 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override { |
1297 | if (ConstraintCode == "v" ) |
1298 | return InlineAsm::ConstraintCode::v; |
1299 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
1300 | } |
1301 | |
1302 | /// Handle Lowering flag assembly outputs. |
1303 | SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, |
1304 | const SDLoc &DL, |
1305 | const AsmOperandInfo &Constraint, |
1306 | SelectionDAG &DAG) const override; |
1307 | |
1308 | /// Given a physical register constraint |
1309 | /// (e.g. {edx}), return the register number and the register class for the |
1310 | /// register. This should only be used for C_Register constraints. On |
1311 | /// error, this returns a register number of 0. |
1312 | std::pair<unsigned, const TargetRegisterClass *> |
1313 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
1314 | StringRef Constraint, MVT VT) const override; |
1315 | |
1316 | /// Return true if the addressing mode represented |
1317 | /// by AM is legal for this target, for a load/store of the specified type. |
1318 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
1319 | Type *Ty, unsigned AS, |
1320 | Instruction *I = nullptr) const override; |
1321 | |
1322 | bool addressingModeSupportsTLS(const GlobalValue &GV) const override; |
1323 | |
1324 | /// Return true if the specified immediate is legal |
1325 | /// icmp immediate, that is the target has icmp instructions which can |
1326 | /// compare a register against the immediate without having to materialize |
1327 | /// the immediate into a register. |
1328 | bool isLegalICmpImmediate(int64_t Imm) const override; |
1329 | |
1330 | /// Return true if the specified immediate is legal |
1331 | /// add immediate, that is the target has add instructions which can |
1332 | /// add a register and the immediate without having to materialize |
1333 | /// the immediate into a register. |
1334 | bool isLegalAddImmediate(int64_t Imm) const override; |
1335 | |
1336 | bool isLegalStoreImmediate(int64_t Imm) const override; |
1337 | |
1338 | /// This is used to enable splatted operand transforms for vector shifts |
1339 | /// and vector funnel shifts. |
1340 | bool isVectorShiftByScalarCheap(Type *Ty) const override; |
1341 | |
1342 | /// Add x86-specific opcodes to the default list. |
1343 | bool isBinOp(unsigned Opcode) const override; |
1344 | |
1345 | /// Returns true if the opcode is a commutative binary operation. |
1346 | bool isCommutativeBinOp(unsigned Opcode) const override; |
1347 | |
1348 | /// Return true if it's free to truncate a value of |
1349 | /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in |
1350 | /// register EAX to i16 by referencing its sub-register AX. |
1351 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
1352 | bool isTruncateFree(EVT VT1, EVT VT2) const override; |
1353 | |
1354 | bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; |
1355 | |
1356 | /// Return true if any actual instruction that defines a |
1357 | /// value of type Ty1 implicit zero-extends the value to Ty2 in the result |
1358 | /// register. This does not necessarily include registers defined in |
1359 | /// unknown ways, such as incoming arguments, or copies from unknown |
1360 | /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this |
1361 | /// does not necessarily apply to truncate instructions. e.g. on x86-64, |
1362 | /// all instructions that define 32-bit values implicit zero-extend the |
1363 | /// result out to 64 bits. |
1364 | bool isZExtFree(Type *Ty1, Type *Ty2) const override; |
1365 | bool isZExtFree(EVT VT1, EVT VT2) const override; |
1366 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
1367 | |
1368 | bool shouldSinkOperands(Instruction *I, |
1369 | SmallVectorImpl<Use *> &Ops) const override; |
1370 | bool shouldConvertPhiType(Type *From, Type *To) const override; |
1371 | |
1372 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
1373 | /// extend node) is profitable. |
1374 | bool isVectorLoadExtDesirable(SDValue) const override; |
1375 | |
1376 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
1377 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this |
1378 | /// method returns true, otherwise fmuladd is expanded to fmul + fadd. |
1379 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
1380 | EVT VT) const override; |
1381 | |
1382 | /// Return true if it's profitable to narrow operations of type SrcVT to |
1383 | /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not |
1384 | /// from i32 to i16. |
1385 | bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override; |
1386 | |
1387 | bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, |
1388 | EVT VT) const override; |
1389 | |
1390 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
1391 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
1392 | /// true and stores the intrinsic information into the IntrinsicInfo that was |
1393 | /// passed to the function. |
1394 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
1395 | MachineFunction &MF, |
1396 | unsigned Intrinsic) const override; |
1397 | |
1398 | /// Returns true if the target can instruction select the |
1399 | /// specified FP immediate natively. If false, the legalizer will |
1400 | /// materialize the FP immediate as a load from a constant pool. |
1401 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
1402 | bool ForCodeSize) const override; |
1403 | |
1404 | /// Targets can use this to indicate that they only support *some* |
1405 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
1406 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to |
1407 | /// be legal. |
1408 | bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; |
1409 | |
1410 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
1411 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
1412 | /// constant pool entry. |
1413 | bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; |
1414 | |
1415 | /// Returns true if lowering to a jump table is allowed. |
1416 | bool areJTsAllowed(const Function *Fn) const override; |
1417 | |
1418 | MVT getPreferredSwitchConditionType(LLVMContext &Context, |
1419 | EVT ConditionVT) const override; |
1420 | |
1421 | /// If true, then instruction selection should |
1422 | /// seek to shrink the FP constant of the specified type to a smaller type |
1423 | /// in order to save space and / or reduce runtime. |
1424 | bool ShouldShrinkFPConstant(EVT VT) const override; |
1425 | |
1426 | /// Return true if we believe it is correct and profitable to reduce the |
1427 | /// load node to a smaller type. |
1428 | bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
1429 | EVT NewVT) const override; |
1430 | |
1431 | /// Return true if the specified scalar FP type is computed in an SSE |
1432 | /// register, not on the X87 floating point stack. |
1433 | bool isScalarFPTypeInSSEReg(EVT VT) const; |
1434 | |
1435 | /// Returns true if it is beneficial to convert a load of a constant |
1436 | /// to just the constant itself. |
1437 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
1438 | Type *Ty) const override; |
1439 | |
1440 | bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; |
1441 | |
1442 | bool convertSelectOfConstantsToMath(EVT VT) const override; |
1443 | |
1444 | bool decomposeMulByConstant(LLVMContext &Context, EVT VT, |
1445 | SDValue C) const override; |
1446 | |
1447 | /// Return true if EXTRACT_SUBVECTOR is cheap for this result type |
1448 | /// with this index. |
1449 | bool (EVT ResVT, EVT SrcVT, |
1450 | unsigned Index) const override; |
1451 | |
1452 | /// Scalar ops always have equal or better analysis/performance/power than |
1453 | /// the vector equivalent, so this always makes sense if the scalar op is |
1454 | /// supported. |
1455 | bool shouldScalarizeBinop(SDValue) const override; |
1456 | |
1457 | /// Extract of a scalar FP value from index 0 of a vector is free. |
1458 | bool (EVT VT, unsigned Index) const override { |
1459 | EVT EltVT = VT.getScalarType(); |
1460 | return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; |
1461 | } |
1462 | |
1463 | /// Overflow nodes should get combined/lowered to optimal instructions |
1464 | /// (they should allow eliminating explicit compares by getting flags from |
1465 | /// math ops). |
1466 | bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
1467 | bool MathUsed) const override; |
1468 | |
1469 | bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, |
1470 | unsigned AddrSpace) const override { |
1471 | // If we can replace more than 2 scalar stores, there will be a reduction |
1472 | // in instructions even after we add a vector constant load. |
1473 | return IsZero || NumElem > 2; |
1474 | } |
1475 | |
1476 | bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
1477 | const SelectionDAG &DAG, |
1478 | const MachineMemOperand &MMO) const override; |
1479 | |
1480 | Register getRegisterByName(const char* RegName, LLT VT, |
1481 | const MachineFunction &MF) const override; |
1482 | |
1483 | /// If a physical register, this returns the register that receives the |
1484 | /// exception address on entry to an EH pad. |
1485 | Register |
1486 | getExceptionPointerRegister(const Constant *PersonalityFn) const override; |
1487 | |
1488 | /// If a physical register, this returns the register that receives the |
1489 | /// exception typeid on entry to a landing pad. |
1490 | Register |
1491 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override; |
1492 | |
1493 | bool needsFixedCatchObjects() const override; |
1494 | |
1495 | /// This method returns a target specific FastISel object, |
1496 | /// or null if the target does not support "fast" ISel. |
1497 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
1498 | const TargetLibraryInfo *libInfo) const override; |
1499 | |
1500 | /// If the target has a standard location for the stack protector cookie, |
1501 | /// returns the address of that location. Otherwise, returns nullptr. |
1502 | Value *getIRStackGuard(IRBuilderBase &IRB) const override; |
1503 | |
1504 | bool useLoadStackGuardNode() const override; |
1505 | bool useStackGuardXorFP() const override; |
1506 | void insertSSPDeclarations(Module &M) const override; |
1507 | Value *getSDagStackGuard(const Module &M) const override; |
1508 | Function *getSSPStackGuardCheck(const Module &M) const override; |
1509 | SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
1510 | const SDLoc &DL) const override; |
1511 | |
1512 | |
1513 | /// Return true if the target stores SafeStack pointer at a fixed offset in |
1514 | /// some non-standard address space, and populates the address space and |
1515 | /// offset as appropriate. |
1516 | Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; |
1517 | |
1518 | std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, |
1519 | SDValue Chain, SDValue Pointer, |
1520 | MachinePointerInfo PtrInfo, |
1521 | Align Alignment, |
1522 | SelectionDAG &DAG) const; |
1523 | |
1524 | /// Customize the preferred legalization strategy for certain types. |
1525 | LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; |
1526 | |
1527 | bool softPromoteHalfType() const override { return true; } |
1528 | |
1529 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, |
1530 | EVT VT) const override; |
1531 | |
1532 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1533 | CallingConv::ID CC, |
1534 | EVT VT) const override; |
1535 | |
1536 | unsigned getVectorTypeBreakdownForCallingConv( |
1537 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
1538 | unsigned &NumIntermediates, MVT &RegisterVT) const override; |
1539 | |
1540 | bool isIntDivCheap(EVT VT, AttributeList Attr) const override; |
1541 | |
1542 | bool supportSwiftError() const override; |
1543 | |
1544 | bool supportKCFIBundles() const override { return true; } |
1545 | |
1546 | MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
1547 | MachineBasicBlock::instr_iterator &MBBI, |
1548 | const TargetInstrInfo *TII) const override; |
1549 | |
1550 | bool hasStackProbeSymbol(const MachineFunction &MF) const override; |
1551 | bool hasInlineStackProbe(const MachineFunction &MF) const override; |
1552 | StringRef getStackProbeSymbolName(const MachineFunction &MF) const override; |
1553 | |
1554 | unsigned getStackProbeSize(const MachineFunction &MF) const; |
1555 | |
1556 | bool hasVectorBlend() const override { return true; } |
1557 | |
1558 | unsigned getMaxSupportedInterleaveFactor() const override { return 4; } |
1559 | |
1560 | bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, |
1561 | unsigned OpNo) const override; |
1562 | |
1563 | SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, |
1564 | MachineMemOperand *MMO, SDValue &NewLoad, |
1565 | SDValue Ptr, SDValue PassThru, |
1566 | SDValue Mask) const override; |
1567 | SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, |
1568 | MachineMemOperand *MMO, SDValue Ptr, SDValue Val, |
1569 | SDValue Mask) const override; |
1570 | |
1571 | /// Lower interleaved load(s) into target specific |
1572 | /// instructions/intrinsics. |
1573 | bool lowerInterleavedLoad(LoadInst *LI, |
1574 | ArrayRef<ShuffleVectorInst *> Shuffles, |
1575 | ArrayRef<unsigned> Indices, |
1576 | unsigned Factor) const override; |
1577 | |
1578 | /// Lower interleaved store(s) into target specific |
1579 | /// instructions/intrinsics. |
1580 | bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
1581 | unsigned Factor) const override; |
1582 | |
1583 | SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, |
1584 | int JTI, SelectionDAG &DAG) const override; |
1585 | |
1586 | Align getPrefLoopAlignment(MachineLoop *ML) const override; |
1587 | |
1588 | EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override { |
1589 | if (VT == MVT::f80) |
1590 | return EVT::getIntegerVT(Context, BitWidth: 96); |
1591 | return TargetLoweringBase::getTypeToTransformTo(Context, VT); |
1592 | } |
1593 | |
1594 | protected: |
1595 | std::pair<const TargetRegisterClass *, uint8_t> |
1596 | findRepresentativeClass(const TargetRegisterInfo *TRI, |
1597 | MVT VT) const override; |
1598 | |
1599 | private: |
1600 | /// Keep a reference to the X86Subtarget around so that we can |
1601 | /// make the right decision when generating code for different targets. |
1602 | const X86Subtarget &Subtarget; |
1603 | |
1604 | /// A list of legal FP immediates. |
1605 | std::vector<APFloat> LegalFPImmediates; |
1606 | |
1607 | /// Indicate that this x86 target can instruction |
1608 | /// select the specified FP immediate natively. |
1609 | void addLegalFPImmediate(const APFloat& Imm) { |
1610 | LegalFPImmediates.push_back(x: Imm); |
1611 | } |
1612 | |
1613 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
1614 | CallingConv::ID CallConv, bool isVarArg, |
1615 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1616 | const SDLoc &dl, SelectionDAG &DAG, |
1617 | SmallVectorImpl<SDValue> &InVals, |
1618 | uint32_t *RegMask) const; |
1619 | SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, |
1620 | const SmallVectorImpl<ISD::InputArg> &ArgInfo, |
1621 | const SDLoc &dl, SelectionDAG &DAG, |
1622 | const CCValAssign &VA, MachineFrameInfo &MFI, |
1623 | unsigned i) const; |
1624 | SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, |
1625 | const SDLoc &dl, SelectionDAG &DAG, |
1626 | const CCValAssign &VA, |
1627 | ISD::ArgFlagsTy Flags, bool isByval) const; |
1628 | |
1629 | // Call lowering helpers. |
1630 | |
1631 | /// Check whether the call is eligible for tail call optimization. Targets |
1632 | /// that want to do tail call optimization should implement this function. |
1633 | bool IsEligibleForTailCallOptimization( |
1634 | TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, |
1635 | SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const; |
1636 | SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, |
1637 | SDValue Chain, bool IsTailCall, |
1638 | bool Is64Bit, int FPDiff, |
1639 | const SDLoc &dl) const; |
1640 | |
1641 | unsigned GetAlignedArgumentStackSize(unsigned StackSize, |
1642 | SelectionDAG &DAG) const; |
1643 | |
1644 | unsigned getAddressSpace() const; |
1645 | |
1646 | SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, |
1647 | SDValue &Chain) const; |
1648 | SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; |
1649 | |
1650 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1651 | SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; |
1652 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
1653 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
1654 | |
1655 | unsigned getGlobalWrapperKind(const GlobalValue *GV, |
1656 | const unsigned char OpFlags) const; |
1657 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
1658 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
1659 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
1660 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1661 | SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; |
1662 | |
1663 | /// Creates target global address or external symbol nodes for calls or |
1664 | /// other uses. |
1665 | SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, |
1666 | bool ForCall) const; |
1667 | |
1668 | SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1669 | SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1670 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; |
1671 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
1672 | SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
1673 | SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; |
1674 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
1675 | SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; |
1676 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
1677 | SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; |
1678 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
1679 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1680 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
1681 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
1682 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1683 | SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1684 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
1685 | SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; |
1686 | SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; |
1687 | SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; |
1688 | SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; |
1689 | SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; |
1690 | SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
1691 | SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1692 | SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1693 | SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; |
1694 | SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; |
1695 | SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
1696 | SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; |
1697 | SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, |
1698 | SDValue &Chain) const; |
1699 | SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1700 | SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; |
1701 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
1702 | SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; |
1703 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
1704 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
1705 | SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; |
1706 | |
1707 | SDValue |
1708 | LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1709 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1710 | const SDLoc &dl, SelectionDAG &DAG, |
1711 | SmallVectorImpl<SDValue> &InVals) const override; |
1712 | SDValue LowerCall(CallLoweringInfo &CLI, |
1713 | SmallVectorImpl<SDValue> &InVals) const override; |
1714 | |
1715 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1716 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1717 | const SmallVectorImpl<SDValue> &OutVals, |
1718 | const SDLoc &dl, SelectionDAG &DAG) const override; |
1719 | |
1720 | bool supportSplitCSR(MachineFunction *MF) const override { |
1721 | return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
1722 | MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind); |
1723 | } |
1724 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
1725 | void insertCopiesSplitCSR( |
1726 | MachineBasicBlock *Entry, |
1727 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
1728 | |
1729 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
1730 | |
1731 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
1732 | |
1733 | EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
1734 | ISD::NodeType ExtendKind) const override; |
1735 | |
1736 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
1737 | bool isVarArg, |
1738 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1739 | LLVMContext &Context) const override; |
1740 | |
1741 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; |
1742 | ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; |
1743 | |
1744 | TargetLoweringBase::AtomicExpansionKind |
1745 | shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
1746 | TargetLoweringBase::AtomicExpansionKind |
1747 | shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
1748 | TargetLoweringBase::AtomicExpansionKind |
1749 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
1750 | TargetLoweringBase::AtomicExpansionKind |
1751 | shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; |
1752 | void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; |
1753 | void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; |
1754 | |
1755 | LoadInst * |
1756 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; |
1757 | |
1758 | bool needsCmpXchgNb(Type *MemType) const; |
1759 | |
1760 | void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, |
1761 | MachineBasicBlock *DispatchBB, int FI) const; |
1762 | |
1763 | // Utility function to emit the low-level va_arg code for X86-64. |
1764 | MachineBasicBlock * |
1765 | EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
1766 | |
1767 | /// Utility function to emit the xmm reg save portion of va_start. |
1768 | MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, |
1769 | MachineInstr &MI2, |
1770 | MachineBasicBlock *BB) const; |
1771 | |
1772 | MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, |
1773 | MachineBasicBlock *BB) const; |
1774 | |
1775 | MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, |
1776 | MachineBasicBlock *BB) const; |
1777 | |
1778 | MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, |
1779 | MachineBasicBlock *BB) const; |
1780 | |
1781 | MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, |
1782 | MachineBasicBlock *BB) const; |
1783 | |
1784 | MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, |
1785 | MachineBasicBlock *BB) const; |
1786 | |
1787 | MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, |
1788 | MachineBasicBlock *BB) const; |
1789 | |
1790 | MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, |
1791 | MachineBasicBlock *BB) const; |
1792 | |
1793 | MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, |
1794 | MachineBasicBlock *MBB) const; |
1795 | |
1796 | void emitSetJmpShadowStackFix(MachineInstr &MI, |
1797 | MachineBasicBlock *MBB) const; |
1798 | |
1799 | MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, |
1800 | MachineBasicBlock *MBB) const; |
1801 | |
1802 | MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, |
1803 | MachineBasicBlock *MBB) const; |
1804 | |
1805 | MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, |
1806 | MachineBasicBlock *MBB) const; |
1807 | |
1808 | MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI, |
1809 | MachineBasicBlock *MBB) const; |
1810 | |
1811 | /// Emit flags for the given setcc condition and operands. Also returns the |
1812 | /// corresponding X86 condition code constant in X86CC. |
1813 | SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, |
1814 | const SDLoc &dl, SelectionDAG &DAG, |
1815 | SDValue &X86CC) const; |
1816 | |
1817 | bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, |
1818 | SDValue IntPow2) const override; |
1819 | |
1820 | /// Check if replacement of SQRT with RSQRT should be disabled. |
1821 | bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; |
1822 | |
1823 | /// Use rsqrt* to speed up sqrt calculations. |
1824 | SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, |
1825 | int &RefinementSteps, bool &UseOneConstNR, |
1826 | bool Reciprocal) const override; |
1827 | |
1828 | /// Use rcp* to speed up fdiv calculations. |
1829 | SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, |
1830 | int &RefinementSteps) const override; |
1831 | |
1832 | /// Reassociate floating point divisions into multiply by reciprocal. |
1833 | unsigned combineRepeatedFPDivisors() const override; |
1834 | |
1835 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
1836 | SmallVectorImpl<SDNode *> &Created) const override; |
1837 | |
1838 | SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, |
1839 | SDValue V2) const; |
1840 | }; |
1841 | |
1842 | namespace X86 { |
1843 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
1844 | const TargetLibraryInfo *libInfo); |
1845 | } // end namespace X86 |
1846 | |
1847 | // X86 specific Gather/Scatter nodes. |
1848 | // The class has the same order of operands as MaskedGatherScatterSDNode for |
1849 | // convenience. |
1850 | class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { |
1851 | public: |
1852 | // This is a intended as a utility and should never be directly created. |
1853 | X86MaskedGatherScatterSDNode() = delete; |
1854 | ~X86MaskedGatherScatterSDNode() = delete; |
1855 | |
1856 | const SDValue &getBasePtr() const { return getOperand(Num: 3); } |
1857 | const SDValue &getIndex() const { return getOperand(Num: 4); } |
1858 | const SDValue &getMask() const { return getOperand(Num: 2); } |
1859 | const SDValue &getScale() const { return getOperand(Num: 5); } |
1860 | |
1861 | static bool classof(const SDNode *N) { |
1862 | return N->getOpcode() == X86ISD::MGATHER || |
1863 | N->getOpcode() == X86ISD::MSCATTER; |
1864 | } |
1865 | }; |
1866 | |
1867 | class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { |
1868 | public: |
1869 | const SDValue &getPassThru() const { return getOperand(Num: 1); } |
1870 | |
1871 | static bool classof(const SDNode *N) { |
1872 | return N->getOpcode() == X86ISD::MGATHER; |
1873 | } |
1874 | }; |
1875 | |
1876 | class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { |
1877 | public: |
1878 | const SDValue &getValue() const { return getOperand(Num: 1); } |
1879 | |
1880 | static bool classof(const SDNode *N) { |
1881 | return N->getOpcode() == X86ISD::MSCATTER; |
1882 | } |
1883 | }; |
1884 | |
1885 | /// Generate unpacklo/unpackhi shuffle mask. |
1886 | void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, |
1887 | bool Unary); |
1888 | |
1889 | /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation |
1890 | /// imposed by AVX and specific to the unary pattern. Example: |
1891 | /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> |
1892 | /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> |
1893 | void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); |
1894 | |
1895 | } // end namespace llvm |
1896 | |
1897 | #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
1898 | |