| 1 | //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the interfaces that X86 uses to lower LLVM code into a |
| 10 | // selection DAG. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
| 15 | #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
| 16 | |
| 17 | #include "llvm/CodeGen/MachineFunction.h" |
| 18 | #include "llvm/CodeGen/TargetLowering.h" |
| 19 | |
| 20 | namespace llvm { |
| 21 | class X86Subtarget; |
| 22 | class X86TargetMachine; |
| 23 | |
| 24 | namespace X86ISD { |
| 25 | // X86 Specific DAG Nodes |
| 26 | enum NodeType : unsigned { |
| 27 | // Start the numbering where the builtin ops leave off. |
| 28 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
| 29 | |
| 30 | /// Bit scan forward. |
| 31 | BSF, |
| 32 | /// Bit scan reverse. |
| 33 | BSR, |
| 34 | |
| 35 | /// X86 funnel/double shift i16 instructions. These correspond to |
| 36 | /// X86::SHLDW and X86::SHRDW instructions which have different amt |
| 37 | /// modulo rules to generic funnel shifts. |
| 38 | /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. |
| 39 | FSHL, |
| 40 | FSHR, |
| 41 | |
| 42 | /// Bitwise logical AND of floating point values. This corresponds |
| 43 | /// to X86::ANDPS or X86::ANDPD. |
| 44 | FAND, |
| 45 | |
| 46 | /// Bitwise logical OR of floating point values. This corresponds |
| 47 | /// to X86::ORPS or X86::ORPD. |
| 48 | FOR, |
| 49 | |
| 50 | /// Bitwise logical XOR of floating point values. This corresponds |
| 51 | /// to X86::XORPS or X86::XORPD. |
| 52 | FXOR, |
| 53 | |
| 54 | /// Bitwise logical ANDNOT of floating point values. This |
| 55 | /// corresponds to X86::ANDNPS or X86::ANDNPD. |
| 56 | FANDN, |
| 57 | |
| 58 | /// These operations represent an abstract X86 call |
| 59 | /// instruction, which includes a bunch of information. In particular the |
| 60 | /// operands of these node are: |
| 61 | /// |
| 62 | /// #0 - The incoming token chain |
| 63 | /// #1 - The callee |
| 64 | /// #2 - The number of arg bytes the caller pushes on the stack. |
| 65 | /// #3 - The number of arg bytes the callee pops off the stack. |
| 66 | /// #4 - The value to pass in AL/AX/EAX (optional) |
| 67 | /// #5 - The value to pass in DL/DX/EDX (optional) |
| 68 | /// |
| 69 | /// The result values of these nodes are: |
| 70 | /// |
| 71 | /// #0 - The outgoing token chain |
| 72 | /// #1 - The first register result value (optional) |
| 73 | /// #2 - The second register result value (optional) |
| 74 | /// |
| 75 | CALL, |
| 76 | |
| 77 | /// Same as call except it adds the NoTrack prefix. |
| 78 | NT_CALL, |
| 79 | |
| 80 | // Pseudo for a OBJC call that gets emitted together with a special |
| 81 | // marker instruction. |
| 82 | CALL_RVMARKER, |
| 83 | |
| 84 | /// The same as ISD::CopyFromReg except that this node makes it explicit |
| 85 | /// that it may lower to an x87 FPU stack pop. Optimizations should be more |
| 86 | /// cautious when handling this node than a normal CopyFromReg to avoid |
| 87 | /// removing a required FPU stack pop. A key requirement is optimizations |
| 88 | /// should not optimize any users of a chain that contains a |
| 89 | /// POP_FROM_X87_REG to use a chain from a point earlier than the |
| 90 | /// POP_FROM_X87_REG (which may remove a required FPU stack pop). |
| 91 | POP_FROM_X87_REG, |
| 92 | |
| 93 | // Pseudo for a call to an imported function to ensure the correct machine |
| 94 | // instruction is emitted for Import Call Optimization. |
| 95 | IMP_CALL, |
| 96 | |
| 97 | /// X86 compare and logical compare instructions. |
| 98 | CMP, |
| 99 | FCMP, |
| 100 | COMI, |
| 101 | UCOMI, |
| 102 | |
| 103 | // X86 compare with Intrinsics similar to COMI. |
| 104 | COMX, |
| 105 | UCOMX, |
| 106 | |
| 107 | /// X86 bit-test instructions. |
| 108 | BT, |
| 109 | |
| 110 | /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS |
| 111 | /// operand, usually produced by a CMP instruction. |
| 112 | SETCC, |
| 113 | |
| 114 | /// X86 Select |
| 115 | SELECTS, |
| 116 | |
| 117 | // Same as SETCC except it's materialized with a sbb and the value is all |
| 118 | // one's or all zero's. |
| 119 | SETCC_CARRY, // R = carry_bit ? ~0 : 0 |
| 120 | |
| 121 | /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. |
| 122 | /// Operands are two FP values to compare; result is a mask of |
| 123 | /// 0s or 1s. Generally DTRT for C/C++ with NaNs. |
| 124 | FSETCC, |
| 125 | |
| 126 | /// X86 FP SETCC, similar to above, but with output as an i1 mask and |
| 127 | /// and a version with SAE. |
| 128 | FSETCCM, |
| 129 | FSETCCM_SAE, |
| 130 | |
| 131 | /// X86 conditional moves. Operand 0 and operand 1 are the two values |
| 132 | /// to select from. Operand 2 is the condition code, and operand 3 is the |
| 133 | /// flag operand produced by a CMP or TEST instruction. |
| 134 | CMOV, |
| 135 | |
| 136 | /// X86 conditional branches. Operand 0 is the chain operand, operand 1 |
| 137 | /// is the block to branch if condition is true, operand 2 is the |
| 138 | /// condition code, and operand 3 is the flag operand produced by a CMP |
| 139 | /// or TEST instruction. |
| 140 | BRCOND, |
| 141 | |
| 142 | /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and |
| 143 | /// operand 1 is the target address. |
| 144 | NT_BRIND, |
| 145 | |
| 146 | /// Return with a glue operand. Operand 0 is the chain operand, operand |
| 147 | /// 1 is the number of bytes of stack to pop. |
| 148 | RET_GLUE, |
| 149 | |
| 150 | /// Return from interrupt. Operand 0 is the number of bytes to pop. |
| 151 | IRET, |
| 152 | |
| 153 | /// Repeat fill, corresponds to X86::REP_STOSx. |
| 154 | REP_STOS, |
| 155 | |
| 156 | /// Repeat move, corresponds to X86::REP_MOVSx. |
| 157 | REP_MOVS, |
| 158 | |
| 159 | /// On Darwin, this node represents the result of the popl |
| 160 | /// at function entry, used for PIC code. |
| 161 | GlobalBaseReg, |
| 162 | |
| 163 | /// A wrapper node for TargetConstantPool, TargetJumpTable, |
| 164 | /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, |
| 165 | /// MCSymbol and TargetBlockAddress. |
| 166 | Wrapper, |
| 167 | |
| 168 | /// Special wrapper used under X86-64 PIC mode for RIP |
| 169 | /// relative displacements. |
| 170 | WrapperRIP, |
| 171 | |
| 172 | /// Copies a 64-bit value from an MMX vector to the low word |
| 173 | /// of an XMM vector, with the high word zero filled. |
| 174 | MOVQ2DQ, |
| 175 | |
| 176 | /// Copies a 64-bit value from the low word of an XMM vector |
| 177 | /// to an MMX vector. |
| 178 | MOVDQ2Q, |
| 179 | |
| 180 | /// Copies a 32-bit value from the low word of a MMX |
| 181 | /// vector to a GPR. |
| 182 | MMX_MOVD2W, |
| 183 | |
| 184 | /// Copies a GPR into the low 32-bit word of a MMX vector |
| 185 | /// and zero out the high word. |
| 186 | MMX_MOVW2D, |
| 187 | |
| 188 | /// Extract an 8-bit value from a vector and zero extend it to |
| 189 | /// i32, corresponds to X86::PEXTRB. |
| 190 | PEXTRB, |
| 191 | |
| 192 | /// Extract a 16-bit value from a vector and zero extend it to |
| 193 | /// i32, corresponds to X86::PEXTRW. |
| 194 | PEXTRW, |
| 195 | |
| 196 | /// Insert any element of a 4 x float vector into any element |
| 197 | /// of a destination 4 x floatvector. |
| 198 | INSERTPS, |
| 199 | |
| 200 | /// Insert the lower 8-bits of a 32-bit value to a vector, |
| 201 | /// corresponds to X86::PINSRB. |
| 202 | PINSRB, |
| 203 | |
| 204 | /// Insert the lower 16-bits of a 32-bit value to a vector, |
| 205 | /// corresponds to X86::PINSRW. |
| 206 | PINSRW, |
| 207 | |
| 208 | /// Shuffle 16 8-bit values within a vector. |
| 209 | PSHUFB, |
| 210 | |
| 211 | /// Compute Sum of Absolute Differences. |
| 212 | PSADBW, |
| 213 | /// Compute Double Block Packed Sum-Absolute-Differences |
| 214 | DBPSADBW, |
| 215 | |
| 216 | /// Bitwise Logical AND NOT of Packed FP values. |
| 217 | ANDNP, |
| 218 | |
| 219 | /// Blend where the selector is an immediate. |
| 220 | BLENDI, |
| 221 | |
| 222 | /// Dynamic (non-constant condition) vector blend where only the sign bits |
| 223 | /// of the condition elements are used. This is used to enforce that the |
| 224 | /// condition mask is not valid for generic VSELECT optimizations. This |
| 225 | /// is also used to implement the intrinsics. |
| 226 | /// Operands are in VSELECT order: MASK, TRUE, FALSE |
| 227 | BLENDV, |
| 228 | |
| 229 | /// Combined add and sub on an FP vector. |
| 230 | ADDSUB, |
| 231 | |
| 232 | // FP vector ops with rounding mode. |
| 233 | FADD_RND, |
| 234 | FADDS, |
| 235 | FADDS_RND, |
| 236 | FSUB_RND, |
| 237 | FSUBS, |
| 238 | FSUBS_RND, |
| 239 | FMUL_RND, |
| 240 | FMULS, |
| 241 | FMULS_RND, |
| 242 | FDIV_RND, |
| 243 | FDIVS, |
| 244 | FDIVS_RND, |
| 245 | FMAX_SAE, |
| 246 | FMAXS_SAE, |
| 247 | FMIN_SAE, |
| 248 | FMINS_SAE, |
| 249 | FSQRT_RND, |
| 250 | FSQRTS, |
| 251 | FSQRTS_RND, |
| 252 | |
| 253 | // FP vector get exponent. |
| 254 | FGETEXP, |
| 255 | FGETEXP_SAE, |
| 256 | FGETEXPS, |
| 257 | FGETEXPS_SAE, |
| 258 | // Extract Normalized Mantissas. |
| 259 | VGETMANT, |
| 260 | VGETMANT_SAE, |
| 261 | VGETMANTS, |
| 262 | VGETMANTS_SAE, |
| 263 | // FP Scale. |
| 264 | SCALEF, |
| 265 | SCALEF_RND, |
| 266 | SCALEFS, |
| 267 | SCALEFS_RND, |
| 268 | |
| 269 | /// Integer horizontal add/sub. |
| 270 | HADD, |
| 271 | HSUB, |
| 272 | |
| 273 | /// Floating point horizontal add/sub. |
| 274 | FHADD, |
| 275 | FHSUB, |
| 276 | |
| 277 | // Detect Conflicts Within a Vector |
| 278 | CONFLICT, |
| 279 | |
| 280 | /// Floating point max and min. |
| 281 | FMAX, |
| 282 | FMIN, |
| 283 | |
| 284 | /// Commutative FMIN and FMAX. |
| 285 | FMAXC, |
| 286 | FMINC, |
| 287 | |
| 288 | /// Scalar intrinsic floating point max and min. |
| 289 | FMAXS, |
| 290 | FMINS, |
| 291 | |
| 292 | /// Floating point reciprocal-sqrt and reciprocal approximation. |
| 293 | /// Note that these typically require refinement |
| 294 | /// in order to obtain suitable precision. |
| 295 | FRSQRT, |
| 296 | FRCP, |
| 297 | |
| 298 | // AVX-512 reciprocal approximations with a little more precision. |
| 299 | RSQRT14, |
| 300 | RSQRT14S, |
| 301 | RCP14, |
| 302 | RCP14S, |
| 303 | |
| 304 | // Thread Local Storage. |
| 305 | TLSADDR, |
| 306 | |
| 307 | // Thread Local Storage. A call to get the start address |
| 308 | // of the TLS block for the current module. |
| 309 | TLSBASEADDR, |
| 310 | |
| 311 | // Thread Local Storage. When calling to an OS provided |
| 312 | // thunk at the address from an earlier relocation. |
| 313 | TLSCALL, |
| 314 | |
| 315 | // Thread Local Storage. A descriptor containing pointer to |
| 316 | // code and to argument to get the TLS offset for the symbol. |
| 317 | TLSDESC, |
| 318 | |
| 319 | // Exception Handling helpers. |
| 320 | EH_RETURN, |
| 321 | |
| 322 | // SjLj exception handling setjmp. |
| 323 | EH_SJLJ_SETJMP, |
| 324 | |
| 325 | // SjLj exception handling longjmp. |
| 326 | EH_SJLJ_LONGJMP, |
| 327 | |
| 328 | // SjLj exception handling dispatch. |
| 329 | EH_SJLJ_SETUP_DISPATCH, |
| 330 | |
| 331 | /// Tail call return. See X86TargetLowering::LowerCall for |
| 332 | /// the list of operands. |
| 333 | TC_RETURN, |
| 334 | |
| 335 | // Vector move to low scalar and zero higher vector elements. |
| 336 | VZEXT_MOVL, |
| 337 | |
| 338 | // Vector integer truncate. |
| 339 | VTRUNC, |
| 340 | // Vector integer truncate with unsigned/signed saturation. |
| 341 | VTRUNCUS, |
| 342 | VTRUNCS, |
| 343 | |
| 344 | // Masked version of the above. Used when less than a 128-bit result is |
| 345 | // produced since the mask only applies to the lower elements and can't |
| 346 | // be represented by a select. |
| 347 | // SRC, PASSTHRU, MASK |
| 348 | VMTRUNC, |
| 349 | VMTRUNCUS, |
| 350 | VMTRUNCS, |
| 351 | |
| 352 | // Vector FP extend. |
| 353 | VFPEXT, |
| 354 | VFPEXT_SAE, |
| 355 | VFPEXTS, |
| 356 | VFPEXTS_SAE, |
| 357 | |
| 358 | // Vector FP round. |
| 359 | VFPROUND, |
| 360 | // Convert TWO packed single data to one packed data |
| 361 | VFPROUND2, |
| 362 | VFPROUND2_RND, |
| 363 | VFPROUND_RND, |
| 364 | VFPROUNDS, |
| 365 | VFPROUNDS_RND, |
| 366 | |
| 367 | // Masked version of above. Used for v2f64->v4f32. |
| 368 | // SRC, PASSTHRU, MASK |
| 369 | VMFPROUND, |
| 370 | |
| 371 | // 128-bit vector logical left / right shift |
| 372 | VSHLDQ, |
| 373 | VSRLDQ, |
| 374 | |
| 375 | // Vector shift elements |
| 376 | VSHL, |
| 377 | VSRL, |
| 378 | VSRA, |
| 379 | |
| 380 | // Vector variable shift |
| 381 | VSHLV, |
| 382 | VSRLV, |
| 383 | VSRAV, |
| 384 | |
| 385 | // Vector shift elements by immediate |
| 386 | VSHLI, |
| 387 | VSRLI, |
| 388 | VSRAI, |
| 389 | |
| 390 | // Shifts of mask registers. |
| 391 | KSHIFTL, |
| 392 | KSHIFTR, |
| 393 | |
| 394 | // Bit rotate by immediate |
| 395 | VROTLI, |
| 396 | VROTRI, |
| 397 | |
| 398 | // Vector packed double/float comparison. |
| 399 | CMPP, |
| 400 | |
| 401 | // Vector integer comparisons. |
| 402 | PCMPEQ, |
| 403 | PCMPGT, |
| 404 | |
| 405 | // v8i16 Horizontal minimum and position. |
| 406 | PHMINPOS, |
| 407 | |
| 408 | MULTISHIFT, |
| 409 | |
| 410 | /// Vector comparison generating mask bits for fp and |
| 411 | /// integer signed and unsigned data types. |
| 412 | CMPM, |
| 413 | // Vector mask comparison generating mask bits for FP values. |
| 414 | CMPMM, |
| 415 | // Vector mask comparison with SAE for FP values. |
| 416 | CMPMM_SAE, |
| 417 | |
| 418 | // Arithmetic operations with FLAGS results. |
| 419 | ADD, |
| 420 | SUB, |
| 421 | ADC, |
| 422 | SBB, |
| 423 | SMUL, |
| 424 | UMUL, |
| 425 | OR, |
| 426 | XOR, |
| 427 | AND, |
| 428 | |
| 429 | // Bit field extract. |
| 430 | BEXTR, |
| 431 | BEXTRI, |
| 432 | |
| 433 | // Zero High Bits Starting with Specified Bit Position. |
| 434 | BZHI, |
| 435 | |
| 436 | // Parallel extract and deposit. |
| 437 | PDEP, |
| 438 | PEXT, |
| 439 | |
| 440 | // X86-specific multiply by immediate. |
| 441 | MUL_IMM, |
| 442 | |
| 443 | // Vector sign bit extraction. |
| 444 | MOVMSK, |
| 445 | |
| 446 | // Vector bitwise comparisons. |
| 447 | PTEST, |
| 448 | |
| 449 | // Vector packed fp sign bitwise comparisons. |
| 450 | TESTP, |
| 451 | |
| 452 | // OR/AND test for masks. |
| 453 | KORTEST, |
| 454 | KTEST, |
| 455 | |
| 456 | // ADD for masks. |
| 457 | KADD, |
| 458 | |
| 459 | // Several flavors of instructions with vector shuffle behaviors. |
| 460 | // Saturated signed/unnsigned packing. |
| 461 | PACKSS, |
| 462 | PACKUS, |
| 463 | // Intra-lane alignr. |
| 464 | PALIGNR, |
| 465 | // AVX512 inter-lane alignr. |
| 466 | VALIGN, |
| 467 | PSHUFD, |
| 468 | PSHUFHW, |
| 469 | PSHUFLW, |
| 470 | SHUFP, |
| 471 | // VBMI2 Concat & Shift. |
| 472 | VSHLD, |
| 473 | VSHRD, |
| 474 | VSHLDV, |
| 475 | VSHRDV, |
| 476 | // Shuffle Packed Values at 128-bit granularity. |
| 477 | SHUF128, |
| 478 | MOVDDUP, |
| 479 | MOVSHDUP, |
| 480 | MOVSLDUP, |
| 481 | MOVLHPS, |
| 482 | MOVHLPS, |
| 483 | MOVSD, |
| 484 | MOVSS, |
| 485 | MOVSH, |
| 486 | UNPCKL, |
| 487 | UNPCKH, |
| 488 | VPERMILPV, |
| 489 | VPERMILPI, |
| 490 | VPERMI, |
| 491 | VPERM2X128, |
| 492 | |
| 493 | // Variable Permute (VPERM). |
| 494 | // Res = VPERMV MaskV, V0 |
| 495 | VPERMV, |
| 496 | |
| 497 | // 3-op Variable Permute (VPERMT2). |
| 498 | // Res = VPERMV3 V0, MaskV, V1 |
| 499 | VPERMV3, |
| 500 | |
| 501 | // Bitwise ternary logic. |
| 502 | VPTERNLOG, |
| 503 | // Fix Up Special Packed Float32/64 values. |
| 504 | VFIXUPIMM, |
| 505 | VFIXUPIMM_SAE, |
| 506 | VFIXUPIMMS, |
| 507 | VFIXUPIMMS_SAE, |
| 508 | // Range Restriction Calculation For Packed Pairs of Float32/64 values. |
| 509 | VRANGE, |
| 510 | VRANGE_SAE, |
| 511 | VRANGES, |
| 512 | VRANGES_SAE, |
| 513 | // Reduce - Perform Reduction Transformation on scalar\packed FP. |
| 514 | VREDUCE, |
| 515 | VREDUCE_SAE, |
| 516 | VREDUCES, |
| 517 | VREDUCES_SAE, |
| 518 | // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. |
| 519 | // Also used by the legacy (V)ROUND intrinsics where we mask out the |
| 520 | // scaling part of the immediate. |
| 521 | VRNDSCALE, |
| 522 | VRNDSCALE_SAE, |
| 523 | VRNDSCALES, |
| 524 | VRNDSCALES_SAE, |
| 525 | // Tests Types Of a FP Values for packed types. |
| 526 | VFPCLASS, |
| 527 | // Tests Types Of a FP Values for scalar types. |
| 528 | VFPCLASSS, |
| 529 | |
| 530 | // Broadcast (splat) scalar or element 0 of a vector. If the operand is |
| 531 | // a vector, this node may change the vector length as part of the splat. |
| 532 | VBROADCAST, |
| 533 | // Broadcast mask to vector. |
| 534 | VBROADCASTM, |
| 535 | |
| 536 | /// SSE4A Extraction and Insertion. |
| 537 | EXTRQI, |
| 538 | INSERTQI, |
| 539 | |
| 540 | // XOP arithmetic/logical shifts. |
| 541 | VPSHA, |
| 542 | VPSHL, |
| 543 | // XOP signed/unsigned integer comparisons. |
| 544 | VPCOM, |
| 545 | VPCOMU, |
| 546 | // XOP packed permute bytes. |
| 547 | VPPERM, |
| 548 | // XOP two source permutation. |
| 549 | VPERMIL2, |
| 550 | |
| 551 | // Vector multiply packed unsigned doubleword integers. |
| 552 | PMULUDQ, |
| 553 | // Vector multiply packed signed doubleword integers. |
| 554 | PMULDQ, |
| 555 | // Vector Multiply Packed UnsignedIntegers with Round and Scale. |
| 556 | MULHRS, |
| 557 | |
| 558 | // Multiply and Add Packed Integers. |
| 559 | VPMADDUBSW, |
| 560 | VPMADDWD, |
| 561 | |
| 562 | // AVX512IFMA multiply and add. |
| 563 | // NOTE: These are different than the instruction and perform |
| 564 | // op0 x op1 + op2. |
| 565 | VPMADD52L, |
| 566 | VPMADD52H, |
| 567 | |
| 568 | // VNNI |
| 569 | VPDPBUSD, |
| 570 | VPDPBUSDS, |
| 571 | VPDPWSSD, |
| 572 | VPDPWSSDS, |
| 573 | |
| 574 | // FMA nodes. |
| 575 | // We use the target independent ISD::FMA for the non-inverted case. |
| 576 | FNMADD, |
| 577 | FMSUB, |
| 578 | FNMSUB, |
| 579 | FMADDSUB, |
| 580 | FMSUBADD, |
| 581 | |
| 582 | // FMA with rounding mode. |
| 583 | FMADD_RND, |
| 584 | FNMADD_RND, |
| 585 | FMSUB_RND, |
| 586 | FNMSUB_RND, |
| 587 | FMADDSUB_RND, |
| 588 | FMSUBADD_RND, |
| 589 | |
| 590 | // AVX512-FP16 complex addition and multiplication. |
| 591 | VFMADDC, |
| 592 | VFMADDC_RND, |
| 593 | VFCMADDC, |
| 594 | VFCMADDC_RND, |
| 595 | |
| 596 | VFMULC, |
| 597 | VFMULC_RND, |
| 598 | VFCMULC, |
| 599 | VFCMULC_RND, |
| 600 | |
| 601 | VFMADDCSH, |
| 602 | VFMADDCSH_RND, |
| 603 | VFCMADDCSH, |
| 604 | VFCMADDCSH_RND, |
| 605 | |
| 606 | VFMULCSH, |
| 607 | VFMULCSH_RND, |
| 608 | VFCMULCSH, |
| 609 | VFCMULCSH_RND, |
| 610 | |
| 611 | VPDPBSUD, |
| 612 | VPDPBSUDS, |
| 613 | VPDPBUUD, |
| 614 | VPDPBUUDS, |
| 615 | VPDPBSSD, |
| 616 | VPDPBSSDS, |
| 617 | |
| 618 | VPDPWSUD, |
| 619 | VPDPWSUDS, |
| 620 | VPDPWUSD, |
| 621 | VPDPWUSDS, |
| 622 | VPDPWUUD, |
| 623 | VPDPWUUDS, |
| 624 | |
| 625 | VMINMAX, |
| 626 | VMINMAX_SAE, |
| 627 | VMINMAXS, |
| 628 | VMINMAXS_SAE, |
| 629 | |
| 630 | CVTP2IBS, |
| 631 | CVTP2IUBS, |
| 632 | CVTP2IBS_RND, |
| 633 | CVTP2IUBS_RND, |
| 634 | CVTTP2IBS, |
| 635 | CVTTP2IUBS, |
| 636 | CVTTP2IBS_SAE, |
| 637 | CVTTP2IUBS_SAE, |
| 638 | |
| 639 | MPSADBW, |
| 640 | |
| 641 | VCVT2PH2BF8, |
| 642 | VCVT2PH2BF8S, |
| 643 | VCVT2PH2HF8, |
| 644 | VCVT2PH2HF8S, |
| 645 | VCVTBIASPH2BF8, |
| 646 | VCVTBIASPH2BF8S, |
| 647 | VCVTBIASPH2HF8, |
| 648 | VCVTBIASPH2HF8S, |
| 649 | VCVTPH2BF8, |
| 650 | VCVTPH2BF8S, |
| 651 | VCVTPH2HF8, |
| 652 | VCVTPH2HF8S, |
| 653 | VMCVTBIASPH2BF8, |
| 654 | VMCVTBIASPH2BF8S, |
| 655 | VMCVTBIASPH2HF8, |
| 656 | VMCVTBIASPH2HF8S, |
| 657 | VMCVTPH2BF8, |
| 658 | VMCVTPH2BF8S, |
| 659 | VMCVTPH2HF8, |
| 660 | VMCVTPH2HF8S, |
| 661 | VCVTHF82PH, |
| 662 | |
| 663 | // Compress and expand. |
| 664 | COMPRESS, |
| 665 | EXPAND, |
| 666 | |
| 667 | // Bits shuffle |
| 668 | VPSHUFBITQMB, |
| 669 | |
| 670 | // Convert Unsigned/Integer to Floating-Point Value with rounding mode. |
| 671 | SINT_TO_FP_RND, |
| 672 | UINT_TO_FP_RND, |
| 673 | SCALAR_SINT_TO_FP, |
| 674 | SCALAR_UINT_TO_FP, |
| 675 | SCALAR_SINT_TO_FP_RND, |
| 676 | SCALAR_UINT_TO_FP_RND, |
| 677 | |
| 678 | // Vector float/double to signed/unsigned integer. |
| 679 | CVTP2SI, |
| 680 | CVTP2UI, |
| 681 | CVTP2SI_RND, |
| 682 | CVTP2UI_RND, |
| 683 | // Scalar float/double to signed/unsigned integer. |
| 684 | CVTS2SI, |
| 685 | CVTS2UI, |
| 686 | CVTS2SI_RND, |
| 687 | CVTS2UI_RND, |
| 688 | |
| 689 | // Vector float/double to signed/unsigned integer with truncation. |
| 690 | CVTTP2SI, |
| 691 | CVTTP2UI, |
| 692 | CVTTP2SI_SAE, |
| 693 | CVTTP2UI_SAE, |
| 694 | |
| 695 | // Saturation enabled Vector float/double to signed/unsigned |
| 696 | // integer with truncation. |
| 697 | CVTTP2SIS, |
| 698 | CVTTP2UIS, |
| 699 | CVTTP2SIS_SAE, |
| 700 | CVTTP2UIS_SAE, |
| 701 | // Masked versions of above. Used for v2f64 to v4i32. |
| 702 | // SRC, PASSTHRU, MASK |
| 703 | MCVTTP2SIS, |
| 704 | MCVTTP2UIS, |
| 705 | |
| 706 | // Scalar float/double to signed/unsigned integer with truncation. |
| 707 | CVTTS2SI, |
| 708 | CVTTS2UI, |
| 709 | CVTTS2SI_SAE, |
| 710 | CVTTS2UI_SAE, |
| 711 | |
| 712 | // Vector signed/unsigned integer to float/double. |
| 713 | CVTSI2P, |
| 714 | CVTUI2P, |
| 715 | |
| 716 | // Scalar float/double to signed/unsigned integer with saturation. |
| 717 | CVTTS2SIS, |
| 718 | CVTTS2UIS, |
| 719 | CVTTS2SIS_SAE, |
| 720 | CVTTS2UIS_SAE, |
| 721 | |
| 722 | // Masked versions of above. Used for v2f64->v4f32. |
| 723 | // SRC, PASSTHRU, MASK |
| 724 | MCVTP2SI, |
| 725 | MCVTP2UI, |
| 726 | MCVTTP2SI, |
| 727 | MCVTTP2UI, |
| 728 | MCVTSI2P, |
| 729 | MCVTUI2P, |
| 730 | |
| 731 | // Custom handling for FP_TO_xINT_SAT |
| 732 | FP_TO_SINT_SAT, |
| 733 | FP_TO_UINT_SAT, |
| 734 | |
| 735 | // Vector float to bfloat16. |
| 736 | // Convert packed single data to packed BF16 data |
| 737 | CVTNEPS2BF16, |
| 738 | // Masked version of above. |
| 739 | // SRC, PASSTHRU, MASK |
| 740 | MCVTNEPS2BF16, |
| 741 | |
| 742 | // Dot product of BF16/FP16 pairs to accumulated into |
| 743 | // packed single precision. |
| 744 | DPBF16PS, |
| 745 | DPFP16PS, |
| 746 | |
| 747 | // A stack checking function call. On Windows it's _chkstk call. |
| 748 | DYN_ALLOCA, |
| 749 | |
| 750 | // For allocating variable amounts of stack space when using |
| 751 | // segmented stacks. Check if the current stacklet has enough space, and |
| 752 | // falls back to heap allocation if not. |
| 753 | SEG_ALLOCA, |
| 754 | |
| 755 | // For allocating stack space when using stack clash protector. |
| 756 | // Allocation is performed by block, and each block is probed. |
| 757 | PROBED_ALLOCA, |
| 758 | |
| 759 | // Memory barriers. |
| 760 | MFENCE, |
| 761 | |
| 762 | // Get a random integer and indicate whether it is valid in CF. |
| 763 | RDRAND, |
| 764 | |
| 765 | // Get a NIST SP800-90B & C compliant random integer and |
| 766 | // indicate whether it is valid in CF. |
| 767 | RDSEED, |
| 768 | |
| 769 | // Protection keys |
| 770 | // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. |
| 771 | // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is |
| 772 | // value for ECX. |
| 773 | RDPKRU, |
| 774 | WRPKRU, |
| 775 | |
| 776 | // SSE42 string comparisons. |
| 777 | // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG |
| 778 | // will emit one or two instructions based on which results are used. If |
| 779 | // flags and index/mask this allows us to use a single instruction since |
| 780 | // we won't have to pick and opcode for flags. Instead we can rely on the |
| 781 | // DAG to CSE everything and decide at isel. |
| 782 | PCMPISTR, |
| 783 | PCMPESTR, |
| 784 | |
| 785 | // Test if in transactional execution. |
| 786 | XTEST, |
| 787 | |
| 788 | // Conversions between float and half-float. |
| 789 | CVTPS2PH, |
| 790 | CVTPS2PH_SAE, |
| 791 | CVTPH2PS, |
| 792 | CVTPH2PS_SAE, |
| 793 | |
| 794 | // Masked version of above. |
| 795 | // SRC, RND, PASSTHRU, MASK |
| 796 | MCVTPS2PH, |
| 797 | MCVTPS2PH_SAE, |
| 798 | |
| 799 | // Galois Field Arithmetic Instructions |
| 800 | GF2P8AFFINEINVQB, |
| 801 | GF2P8AFFINEQB, |
| 802 | GF2P8MULB, |
| 803 | |
| 804 | // LWP insert record. |
| 805 | LWPINS, |
| 806 | |
| 807 | // User level wait |
| 808 | UMWAIT, |
| 809 | TPAUSE, |
| 810 | |
| 811 | // Enqueue Stores Instructions |
| 812 | ENQCMD, |
| 813 | ENQCMDS, |
| 814 | |
| 815 | // For avx512-vp2intersect |
| 816 | VP2INTERSECT, |
| 817 | |
| 818 | // User level interrupts - testui |
| 819 | TESTUI, |
| 820 | |
| 821 | // Perform an FP80 add after changing precision control in FPCW. |
| 822 | FP80_ADD, |
| 823 | |
| 824 | // Conditional compare instructions |
| 825 | CCMP, |
| 826 | CTEST, |
| 827 | |
| 828 | /// X86 strict FP compare instructions. |
| 829 | FIRST_STRICTFP_OPCODE, |
| 830 | STRICT_FCMP = FIRST_STRICTFP_OPCODE, |
| 831 | STRICT_FCMPS, |
| 832 | |
| 833 | // Vector packed double/float comparison. |
| 834 | STRICT_CMPP, |
| 835 | |
| 836 | /// Vector comparison generating mask bits for fp and |
| 837 | /// integer signed and unsigned data types. |
| 838 | STRICT_CMPM, |
| 839 | |
| 840 | // Vector float/double to signed/unsigned integer with truncation. |
| 841 | STRICT_CVTTP2SI, |
| 842 | STRICT_CVTTP2UI, |
| 843 | |
| 844 | // Vector FP extend. |
| 845 | STRICT_VFPEXT, |
| 846 | |
| 847 | // Vector FP round. |
| 848 | STRICT_VFPROUND, |
| 849 | |
| 850 | // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. |
| 851 | // Also used by the legacy (V)ROUND intrinsics where we mask out the |
| 852 | // scaling part of the immediate. |
| 853 | STRICT_VRNDSCALE, |
| 854 | |
| 855 | // Vector signed/unsigned integer to float/double. |
| 856 | STRICT_CVTSI2P, |
| 857 | STRICT_CVTUI2P, |
| 858 | |
| 859 | // Strict FMA nodes. |
| 860 | STRICT_FNMADD, |
| 861 | STRICT_FMSUB, |
| 862 | STRICT_FNMSUB, |
| 863 | |
| 864 | // Conversions between float and half-float. |
| 865 | STRICT_CVTPS2PH, |
| 866 | STRICT_CVTPH2PS, |
| 867 | |
| 868 | // Perform an FP80 add after changing precision control in FPCW. |
| 869 | STRICT_FP80_ADD, |
| 870 | |
| 871 | /// Floating point max and min. |
| 872 | STRICT_FMAX, |
| 873 | STRICT_FMIN, |
| 874 | LAST_STRICTFP_OPCODE = STRICT_FMIN, |
| 875 | |
| 876 | // Compare and swap. |
| 877 | FIRST_MEMORY_OPCODE, |
| 878 | LCMPXCHG_DAG = FIRST_MEMORY_OPCODE, |
| 879 | LCMPXCHG8_DAG, |
| 880 | LCMPXCHG16_DAG, |
| 881 | LCMPXCHG16_SAVE_RBX_DAG, |
| 882 | |
| 883 | /// LOCK-prefixed arithmetic read-modify-write instructions. |
| 884 | /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) |
| 885 | LADD, |
| 886 | LSUB, |
| 887 | LOR, |
| 888 | LXOR, |
| 889 | LAND, |
| 890 | LBTS, |
| 891 | LBTC, |
| 892 | LBTR, |
| 893 | LBTS_RM, |
| 894 | LBTC_RM, |
| 895 | LBTR_RM, |
| 896 | |
| 897 | /// RAO arithmetic instructions. |
| 898 | /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) |
| 899 | AADD, |
| 900 | AOR, |
| 901 | AXOR, |
| 902 | AAND, |
| 903 | |
| 904 | // Load, scalar_to_vector, and zero extend. |
| 905 | VZEXT_LOAD, |
| 906 | |
| 907 | // extract_vector_elt, store. |
| 908 | , |
| 909 | |
| 910 | // scalar broadcast from memory. |
| 911 | VBROADCAST_LOAD, |
| 912 | |
| 913 | // subvector broadcast from memory. |
| 914 | SUBV_BROADCAST_LOAD, |
| 915 | |
| 916 | // Store FP control word into i16 memory. |
| 917 | FNSTCW16m, |
| 918 | |
| 919 | // Load FP control word from i16 memory. |
| 920 | FLDCW16m, |
| 921 | |
| 922 | // Store x87 FPU environment into memory. |
| 923 | FNSTENVm, |
| 924 | |
| 925 | // Load x87 FPU environment from memory. |
| 926 | FLDENVm, |
| 927 | |
| 928 | /// This instruction implements FP_TO_SINT with the |
| 929 | /// integer destination in memory and a FP reg source. This corresponds |
| 930 | /// to the X86::FIST*m instructions and the rounding mode change stuff. It |
| 931 | /// has two inputs (token chain and address) and two outputs (int value |
| 932 | /// and token chain). Memory VT specifies the type to store to. |
| 933 | FP_TO_INT_IN_MEM, |
| 934 | |
| 935 | /// This instruction implements SINT_TO_FP with the |
| 936 | /// integer source in memory and FP reg result. This corresponds to the |
| 937 | /// X86::FILD*m instructions. It has two inputs (token chain and address) |
| 938 | /// and two outputs (FP value and token chain). The integer source type is |
| 939 | /// specified by the memory VT. |
| 940 | FILD, |
| 941 | |
| 942 | /// This instruction implements a fp->int store from FP stack |
| 943 | /// slots. This corresponds to the fist instruction. It takes a |
| 944 | /// chain operand, value to store, address, and glue. The memory VT |
| 945 | /// specifies the type to store as. |
| 946 | FIST, |
| 947 | |
| 948 | /// This instruction implements an extending load to FP stack slots. |
| 949 | /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain |
| 950 | /// operand, and ptr to load from. The memory VT specifies the type to |
| 951 | /// load from. |
| 952 | FLD, |
| 953 | |
| 954 | /// This instruction implements a truncating store from FP stack |
| 955 | /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a |
| 956 | /// chain operand, value to store, address, and glue. The memory VT |
| 957 | /// specifies the type to store as. |
| 958 | FST, |
| 959 | |
| 960 | /// These instructions grab the address of the next argument |
| 961 | /// from a va_list. (reads and modifies the va_list in memory) |
| 962 | VAARG_64, |
| 963 | VAARG_X32, |
| 964 | |
| 965 | // Vector truncating store with unsigned/signed saturation |
| 966 | VTRUNCSTOREUS, |
| 967 | VTRUNCSTORES, |
| 968 | // Vector truncating masked store with unsigned/signed saturation |
| 969 | VMTRUNCSTOREUS, |
| 970 | VMTRUNCSTORES, |
| 971 | |
| 972 | // X86 specific gather and scatter |
| 973 | MGATHER, |
| 974 | MSCATTER, |
| 975 | |
| 976 | // Key locker nodes that produce flags. |
| 977 | AESENC128KL, |
| 978 | AESDEC128KL, |
| 979 | AESENC256KL, |
| 980 | AESDEC256KL, |
| 981 | AESENCWIDE128KL, |
| 982 | AESDECWIDE128KL, |
| 983 | AESENCWIDE256KL, |
| 984 | AESDECWIDE256KL, |
| 985 | |
| 986 | /// Compare and Add if Condition is Met. Compare value in operand 2 with |
| 987 | /// value in memory of operand 1. If condition of operand 4 is met, add |
| 988 | /// value operand 3 to m32 and write new value in operand 1. Operand 2 is |
| 989 | /// always updated with the original value from operand 1. |
| 990 | CMPCCXADD, |
| 991 | |
| 992 | // Save xmm argument registers to the stack, according to %al. An operator |
| 993 | // is needed so that this can be expanded with control flow. |
| 994 | VASTART_SAVE_XMM_REGS, |
| 995 | |
| 996 | // Conditional load/store instructions |
| 997 | CLOAD, |
| 998 | CSTORE, |
| 999 | LAST_MEMORY_OPCODE = CSTORE, |
| 1000 | }; |
| 1001 | } // end namespace X86ISD |
| 1002 | |
| 1003 | namespace X86 { |
| 1004 | /// Current rounding mode is represented in bits 11:10 of FPSR. These |
| 1005 | /// values are same as corresponding constants for rounding mode used |
| 1006 | /// in glibc. |
| 1007 | enum RoundingMode { |
| 1008 | rmToNearest = 0, // FE_TONEAREST |
| 1009 | rmDownward = 1 << 10, // FE_DOWNWARD |
| 1010 | rmUpward = 2 << 10, // FE_UPWARD |
| 1011 | rmTowardZero = 3 << 10, // FE_TOWARDZERO |
| 1012 | rmMask = 3 << 10 // Bit mask selecting rounding mode |
| 1013 | }; |
| 1014 | } |
| 1015 | |
| 1016 | /// Define some predicates that are used for node matching. |
| 1017 | namespace X86 { |
| 1018 | /// Returns true if Elt is a constant zero or floating point constant +0.0. |
| 1019 | bool isZeroNode(SDValue Elt); |
| 1020 | |
| 1021 | /// Returns true of the given offset can be |
| 1022 | /// fit into displacement field of the instruction. |
| 1023 | bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, |
| 1024 | bool hasSymbolicDisplacement); |
| 1025 | |
| 1026 | /// Determines whether the callee is required to pop its |
| 1027 | /// own arguments. Callee pop is necessary to support tail calls. |
| 1028 | bool isCalleePop(CallingConv::ID CallingConv, |
| 1029 | bool is64Bit, bool IsVarArg, bool GuaranteeTCO); |
| 1030 | |
| 1031 | /// If Op is a constant whose elements are all the same constant or |
| 1032 | /// undefined, return true and return the constant value in \p SplatVal. |
| 1033 | /// If we have undef bits that don't cover an entire element, we treat these |
| 1034 | /// as zero if AllowPartialUndefs is set, else we fail and return false. |
| 1035 | bool isConstantSplat(SDValue Op, APInt &SplatVal, |
| 1036 | bool AllowPartialUndefs = true); |
| 1037 | |
| 1038 | /// Check if Op is a load operation that could be folded into some other x86 |
| 1039 | /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0. |
| 1040 | bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, |
| 1041 | bool AssumeSingleUse = false); |
| 1042 | |
| 1043 | /// Check if Op is a load operation that could be folded into a vector splat |
| 1044 | /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2. |
| 1045 | bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, |
| 1046 | const X86Subtarget &Subtarget, |
| 1047 | bool AssumeSingleUse = false); |
| 1048 | |
| 1049 | /// Check if Op is a value that could be used to fold a store into some |
| 1050 | /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi). |
| 1051 | bool mayFoldIntoStore(SDValue Op); |
| 1052 | |
| 1053 | /// Check if Op is an operation that could be folded into a zero extend x86 |
| 1054 | /// instruction. |
| 1055 | bool mayFoldIntoZeroExtend(SDValue Op); |
| 1056 | |
| 1057 | /// True if the target supports the extended frame for async Swift |
| 1058 | /// functions. |
| 1059 | bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, |
| 1060 | const MachineFunction &MF); |
| 1061 | } // end namespace X86 |
| 1062 | |
| 1063 | //===--------------------------------------------------------------------===// |
| 1064 | // X86 Implementation of the TargetLowering interface |
| 1065 | class X86TargetLowering final : public TargetLowering { |
| 1066 | public: |
| 1067 | explicit X86TargetLowering(const X86TargetMachine &TM, |
| 1068 | const X86Subtarget &STI); |
| 1069 | |
| 1070 | unsigned getJumpTableEncoding() const override; |
| 1071 | bool useSoftFloat() const override; |
| 1072 | |
| 1073 | void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
| 1074 | ArgListTy &Args) const override; |
| 1075 | |
| 1076 | MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { |
| 1077 | return MVT::i8; |
| 1078 | } |
| 1079 | |
| 1080 | const MCExpr * |
| 1081 | LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
| 1082 | const MachineBasicBlock *MBB, unsigned uid, |
| 1083 | MCContext &Ctx) const override; |
| 1084 | |
| 1085 | /// Returns relocation base for the given PIC jumptable. |
| 1086 | SDValue getPICJumpTableRelocBase(SDValue Table, |
| 1087 | SelectionDAG &DAG) const override; |
| 1088 | const MCExpr * |
| 1089 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
| 1090 | unsigned JTI, MCContext &Ctx) const override; |
| 1091 | |
| 1092 | /// Return the desired alignment for ByVal aggregate |
| 1093 | /// function arguments in the caller parameter area. For X86, aggregates |
| 1094 | /// that contains are placed at 16-byte boundaries while the rest are at |
| 1095 | /// 4-byte boundaries. |
| 1096 | Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; |
| 1097 | |
| 1098 | EVT getOptimalMemOpType(const MemOp &Op, |
| 1099 | const AttributeList &FuncAttributes) const override; |
| 1100 | |
| 1101 | /// Returns true if it's safe to use load / store of the |
| 1102 | /// specified type to expand memcpy / memset inline. This is mostly true |
| 1103 | /// for all types except for some special cases. For example, on X86 |
| 1104 | /// targets without SSE2 f64 load / store are done with fldl / fstpl which |
| 1105 | /// also does type conversion. Note the specified type doesn't have to be |
| 1106 | /// legal as the hook is used before type legalization. |
| 1107 | bool isSafeMemOpType(MVT VT) const override; |
| 1108 | |
| 1109 | bool isMemoryAccessFast(EVT VT, Align Alignment) const; |
| 1110 | |
| 1111 | /// Returns true if the target allows unaligned memory accesses of the |
| 1112 | /// specified type. Returns whether it is "fast" in the last argument. |
| 1113 | bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, |
| 1114 | MachineMemOperand::Flags Flags, |
| 1115 | unsigned *Fast) const override; |
| 1116 | |
| 1117 | /// This function returns true if the memory access is aligned or if the |
| 1118 | /// target allows this specific unaligned memory access. If the access is |
| 1119 | /// allowed, the optional final parameter returns a relative speed of the |
| 1120 | /// access (as defined by the target). |
| 1121 | bool allowsMemoryAccess( |
| 1122 | LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, |
| 1123 | Align Alignment, |
| 1124 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
| 1125 | unsigned *Fast = nullptr) const override; |
| 1126 | |
| 1127 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
| 1128 | const MachineMemOperand &MMO, |
| 1129 | unsigned *Fast) const { |
| 1130 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), |
| 1131 | Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast); |
| 1132 | } |
| 1133 | |
| 1134 | /// Provide custom lowering hooks for some operations. |
| 1135 | /// |
| 1136 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
| 1137 | |
| 1138 | /// Replace the results of node with an illegal result |
| 1139 | /// type with new values built out of custom code. |
| 1140 | /// |
| 1141 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, |
| 1142 | SelectionDAG &DAG) const override; |
| 1143 | |
| 1144 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
| 1145 | |
| 1146 | bool preferABDSToABSWithNSW(EVT VT) const override; |
| 1147 | |
| 1148 | bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, |
| 1149 | EVT ExtVT) const override; |
| 1150 | |
| 1151 | bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, |
| 1152 | EVT VT) const override; |
| 1153 | |
| 1154 | /// Return true if the target has native support for |
| 1155 | /// the specified value type and it is 'desirable' to use the type for the |
| 1156 | /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 |
| 1157 | /// instruction encodings are longer and some i16 instructions are slow. |
| 1158 | bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; |
| 1159 | |
| 1160 | /// Return true if the target has native support for the |
| 1161 | /// specified value type and it is 'desirable' to use the type. e.g. On x86 |
| 1162 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
| 1163 | /// and some i16 instructions are slow. |
| 1164 | bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; |
| 1165 | |
| 1166 | /// Return prefered fold type, Abs if this is a vector, AddAnd if its an |
| 1167 | /// integer, None otherwise. |
| 1168 | TargetLowering::AndOrSETCCFoldKind |
| 1169 | isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, |
| 1170 | const SDNode *SETCC0, |
| 1171 | const SDNode *SETCC1) const override; |
| 1172 | |
| 1173 | /// Return the newly negated expression if the cost is not expensive and |
| 1174 | /// set the cost in \p Cost to indicate that if it is cheaper or neutral to |
| 1175 | /// do the negation. |
| 1176 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
| 1177 | bool LegalOperations, bool ForCodeSize, |
| 1178 | NegatibleCost &Cost, |
| 1179 | unsigned Depth) const override; |
| 1180 | |
| 1181 | MachineBasicBlock * |
| 1182 | EmitInstrWithCustomInserter(MachineInstr &MI, |
| 1183 | MachineBasicBlock *MBB) const override; |
| 1184 | |
| 1185 | /// This method returns the name of a target specific DAG node. |
| 1186 | const char *getTargetNodeName(unsigned Opcode) const override; |
| 1187 | |
| 1188 | /// Do not merge vector stores after legalization because that may conflict |
| 1189 | /// with x86-specific store splitting optimizations. |
| 1190 | bool mergeStoresAfterLegalization(EVT MemVT) const override { |
| 1191 | return !MemVT.isVector(); |
| 1192 | } |
| 1193 | |
| 1194 | bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
| 1195 | const MachineFunction &MF) const override; |
| 1196 | |
| 1197 | bool isCheapToSpeculateCttz(Type *Ty) const override; |
| 1198 | |
| 1199 | bool isCheapToSpeculateCtlz(Type *Ty) const override; |
| 1200 | |
| 1201 | bool isCtlzFast() const override; |
| 1202 | |
| 1203 | bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { |
| 1204 | // If the pair to store is a mixture of float and int values, we will |
| 1205 | // save two bitwise instructions and one float-to-int instruction and |
| 1206 | // increase one store instruction. There is potentially a more |
| 1207 | // significant benefit because it avoids the float->int domain switch |
| 1208 | // for input value. So It is more likely a win. |
| 1209 | if ((LTy.isFloatingPoint() && HTy.isInteger()) || |
| 1210 | (LTy.isInteger() && HTy.isFloatingPoint())) |
| 1211 | return true; |
| 1212 | // If the pair only contains int values, we will save two bitwise |
| 1213 | // instructions and increase one store instruction (costing one more |
| 1214 | // store buffer). Since the benefit is more blurred so we leave |
| 1215 | // such pair out until we get testcase to prove it is a win. |
| 1216 | return false; |
| 1217 | } |
| 1218 | |
| 1219 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
| 1220 | |
| 1221 | bool hasAndNotCompare(SDValue Y) const override; |
| 1222 | |
| 1223 | bool hasAndNot(SDValue Y) const override; |
| 1224 | |
| 1225 | bool hasBitTest(SDValue X, SDValue Y) const override; |
| 1226 | |
| 1227 | bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
| 1228 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
| 1229 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
| 1230 | SelectionDAG &DAG) const override; |
| 1231 | |
| 1232 | unsigned preferedOpcodeForCmpEqPiecesOfOperand( |
| 1233 | EVT VT, unsigned ShiftOpc, bool MayTransformRotate, |
| 1234 | const APInt &ShiftOrRotateAmt, |
| 1235 | const std::optional<APInt> &AndMask) const override; |
| 1236 | |
| 1237 | bool preferScalarizeSplat(SDNode *N) const override; |
| 1238 | |
| 1239 | CondMergingParams |
| 1240 | getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, |
| 1241 | const Value *Rhs) const override; |
| 1242 | |
| 1243 | bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
| 1244 | CombineLevel Level) const override; |
| 1245 | |
| 1246 | bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; |
| 1247 | |
| 1248 | bool |
| 1249 | shouldTransformSignedTruncationCheck(EVT XVT, |
| 1250 | unsigned KeptBits) const override { |
| 1251 | // For vectors, we don't have a preference.. |
| 1252 | if (XVT.isVector()) |
| 1253 | return false; |
| 1254 | |
| 1255 | auto VTIsOk = [](EVT VT) -> bool { |
| 1256 | return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || |
| 1257 | VT == MVT::i64; |
| 1258 | }; |
| 1259 | |
| 1260 | // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. |
| 1261 | // XVT will be larger than KeptBitsVT. |
| 1262 | MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits); |
| 1263 | return VTIsOk(XVT) && VTIsOk(KeptBitsVT); |
| 1264 | } |
| 1265 | |
| 1266 | ShiftLegalizationStrategy |
| 1267 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
| 1268 | unsigned ExpansionFactor) const override; |
| 1269 | |
| 1270 | bool shouldSplatInsEltVarIndex(EVT VT) const override; |
| 1271 | |
| 1272 | bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { |
| 1273 | // Converting to sat variants holds little benefit on X86 as we will just |
| 1274 | // need to saturate the value back using fp arithmatic. |
| 1275 | return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); |
| 1276 | } |
| 1277 | |
| 1278 | bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { |
| 1279 | return VT.isScalarInteger(); |
| 1280 | } |
| 1281 | |
| 1282 | /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. |
| 1283 | MVT hasFastEqualityCompare(unsigned NumBits) const override; |
| 1284 | |
| 1285 | /// Return the value type to use for ISD::SETCC. |
| 1286 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
| 1287 | EVT VT) const override; |
| 1288 | |
| 1289 | bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
| 1290 | const APInt &DemandedElts, |
| 1291 | TargetLoweringOpt &TLO) const override; |
| 1292 | |
| 1293 | /// Determine which of the bits specified in Mask are known to be either |
| 1294 | /// zero or one and return them in the KnownZero/KnownOne bitsets. |
| 1295 | void computeKnownBitsForTargetNode(const SDValue Op, |
| 1296 | KnownBits &Known, |
| 1297 | const APInt &DemandedElts, |
| 1298 | const SelectionDAG &DAG, |
| 1299 | unsigned Depth = 0) const override; |
| 1300 | |
| 1301 | /// Determine the number of bits in the operation that are sign bits. |
| 1302 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
| 1303 | const APInt &DemandedElts, |
| 1304 | const SelectionDAG &DAG, |
| 1305 | unsigned Depth) const override; |
| 1306 | |
| 1307 | bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, |
| 1308 | const APInt &DemandedElts, |
| 1309 | APInt &KnownUndef, |
| 1310 | APInt &KnownZero, |
| 1311 | TargetLoweringOpt &TLO, |
| 1312 | unsigned Depth) const override; |
| 1313 | |
| 1314 | bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, |
| 1315 | const APInt &DemandedElts, |
| 1316 | unsigned MaskIndex, |
| 1317 | TargetLoweringOpt &TLO, |
| 1318 | unsigned Depth) const; |
| 1319 | |
| 1320 | bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
| 1321 | const APInt &DemandedBits, |
| 1322 | const APInt &DemandedElts, |
| 1323 | KnownBits &Known, |
| 1324 | TargetLoweringOpt &TLO, |
| 1325 | unsigned Depth) const override; |
| 1326 | |
| 1327 | SDValue SimplifyMultipleUseDemandedBitsForTargetNode( |
| 1328 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
| 1329 | SelectionDAG &DAG, unsigned Depth) const override; |
| 1330 | |
| 1331 | bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( |
| 1332 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
| 1333 | bool PoisonOnly, unsigned Depth) const override; |
| 1334 | |
| 1335 | bool canCreateUndefOrPoisonForTargetNode( |
| 1336 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
| 1337 | bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override; |
| 1338 | |
| 1339 | bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, |
| 1340 | APInt &UndefElts, const SelectionDAG &DAG, |
| 1341 | unsigned Depth) const override; |
| 1342 | |
| 1343 | bool isTargetCanonicalConstantNode(SDValue Op) const override { |
| 1344 | // Peek through bitcasts/extracts/inserts to see if we have a vector |
| 1345 | // load/broadcast from memory. |
| 1346 | while (Op.getOpcode() == ISD::BITCAST || |
| 1347 | Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || |
| 1348 | (Op.getOpcode() == ISD::INSERT_SUBVECTOR && |
| 1349 | Op.getOperand(i: 0).isUndef())) |
| 1350 | Op = Op.getOperand(i: Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); |
| 1351 | |
| 1352 | return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || |
| 1353 | Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD || |
| 1354 | (Op.getOpcode() == ISD::LOAD && |
| 1355 | getTargetConstantFromLoad(LD: cast<LoadSDNode>(Val&: Op))) || |
| 1356 | TargetLowering::isTargetCanonicalConstantNode(Op); |
| 1357 | } |
| 1358 | |
| 1359 | const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; |
| 1360 | |
| 1361 | SDValue unwrapAddress(SDValue N) const override; |
| 1362 | |
| 1363 | SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; |
| 1364 | |
| 1365 | bool ExpandInlineAsm(CallInst *CI) const override; |
| 1366 | |
| 1367 | ConstraintType getConstraintType(StringRef Constraint) const override; |
| 1368 | |
| 1369 | /// Examine constraint string and operand type and determine a weight value. |
| 1370 | /// The operand object must already have been set up with the operand type. |
| 1371 | ConstraintWeight |
| 1372 | getSingleConstraintMatchWeight(AsmOperandInfo &Info, |
| 1373 | const char *Constraint) const override; |
| 1374 | |
| 1375 | const char *LowerXConstraint(EVT ConstraintVT) const override; |
| 1376 | |
| 1377 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
| 1378 | /// add anything to Ops. If hasMemory is true it means one of the asm |
| 1379 | /// constraint of the inline asm instruction being processed is 'm'. |
| 1380 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
| 1381 | std::vector<SDValue> &Ops, |
| 1382 | SelectionDAG &DAG) const override; |
| 1383 | |
| 1384 | InlineAsm::ConstraintCode |
| 1385 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override { |
| 1386 | if (ConstraintCode == "v" ) |
| 1387 | return InlineAsm::ConstraintCode::v; |
| 1388 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
| 1389 | } |
| 1390 | |
| 1391 | /// Handle Lowering flag assembly outputs. |
| 1392 | SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, |
| 1393 | const SDLoc &DL, |
| 1394 | const AsmOperandInfo &Constraint, |
| 1395 | SelectionDAG &DAG) const override; |
| 1396 | |
| 1397 | /// Given a physical register constraint |
| 1398 | /// (e.g. {edx}), return the register number and the register class for the |
| 1399 | /// register. This should only be used for C_Register constraints. On |
| 1400 | /// error, this returns a register number of 0. |
| 1401 | std::pair<unsigned, const TargetRegisterClass *> |
| 1402 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
| 1403 | StringRef Constraint, MVT VT) const override; |
| 1404 | |
| 1405 | /// Return true if the addressing mode represented |
| 1406 | /// by AM is legal for this target, for a load/store of the specified type. |
| 1407 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
| 1408 | Type *Ty, unsigned AS, |
| 1409 | Instruction *I = nullptr) const override; |
| 1410 | |
| 1411 | bool addressingModeSupportsTLS(const GlobalValue &GV) const override; |
| 1412 | |
| 1413 | /// Return true if the specified immediate is legal |
| 1414 | /// icmp immediate, that is the target has icmp instructions which can |
| 1415 | /// compare a register against the immediate without having to materialize |
| 1416 | /// the immediate into a register. |
| 1417 | bool isLegalICmpImmediate(int64_t Imm) const override; |
| 1418 | |
| 1419 | /// Return true if the specified immediate is legal |
| 1420 | /// add immediate, that is the target has add instructions which can |
| 1421 | /// add a register and the immediate without having to materialize |
| 1422 | /// the immediate into a register. |
| 1423 | bool isLegalAddImmediate(int64_t Imm) const override; |
| 1424 | |
| 1425 | bool isLegalStoreImmediate(int64_t Imm) const override; |
| 1426 | |
| 1427 | /// Add x86-specific opcodes to the default list. |
| 1428 | bool isBinOp(unsigned Opcode) const override; |
| 1429 | |
| 1430 | /// Returns true if the opcode is a commutative binary operation. |
| 1431 | bool isCommutativeBinOp(unsigned Opcode) const override; |
| 1432 | |
| 1433 | /// Return true if it's free to truncate a value of |
| 1434 | /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in |
| 1435 | /// register EAX to i16 by referencing its sub-register AX. |
| 1436 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
| 1437 | bool isTruncateFree(EVT VT1, EVT VT2) const override; |
| 1438 | |
| 1439 | bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; |
| 1440 | |
| 1441 | /// Return true if any actual instruction that defines a |
| 1442 | /// value of type Ty1 implicit zero-extends the value to Ty2 in the result |
| 1443 | /// register. This does not necessarily include registers defined in |
| 1444 | /// unknown ways, such as incoming arguments, or copies from unknown |
| 1445 | /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this |
| 1446 | /// does not necessarily apply to truncate instructions. e.g. on x86-64, |
| 1447 | /// all instructions that define 32-bit values implicit zero-extend the |
| 1448 | /// result out to 64 bits. |
| 1449 | bool isZExtFree(Type *Ty1, Type *Ty2) const override; |
| 1450 | bool isZExtFree(EVT VT1, EVT VT2) const override; |
| 1451 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
| 1452 | |
| 1453 | bool shouldConvertPhiType(Type *From, Type *To) const override; |
| 1454 | |
| 1455 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
| 1456 | /// extend node) is profitable. |
| 1457 | bool isVectorLoadExtDesirable(SDValue) const override; |
| 1458 | |
| 1459 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
| 1460 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this |
| 1461 | /// method returns true, otherwise fmuladd is expanded to fmul + fadd. |
| 1462 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
| 1463 | EVT VT) const override; |
| 1464 | |
| 1465 | /// Return true if it's profitable to narrow operations of type SrcVT to |
| 1466 | /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not |
| 1467 | /// from i32 to i16. |
| 1468 | bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override; |
| 1469 | |
| 1470 | bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, |
| 1471 | unsigned SelectOpcode, SDValue X, |
| 1472 | SDValue Y) const override; |
| 1473 | |
| 1474 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
| 1475 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
| 1476 | /// true and stores the intrinsic information into the IntrinsicInfo that was |
| 1477 | /// passed to the function. |
| 1478 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
| 1479 | MachineFunction &MF, |
| 1480 | unsigned Intrinsic) const override; |
| 1481 | |
| 1482 | /// Returns true if the target can instruction select the |
| 1483 | /// specified FP immediate natively. If false, the legalizer will |
| 1484 | /// materialize the FP immediate as a load from a constant pool. |
| 1485 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
| 1486 | bool ForCodeSize) const override; |
| 1487 | |
| 1488 | /// Targets can use this to indicate that they only support *some* |
| 1489 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
| 1490 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to |
| 1491 | /// be legal. |
| 1492 | bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; |
| 1493 | |
| 1494 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
| 1495 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
| 1496 | /// constant pool entry. |
| 1497 | bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; |
| 1498 | |
| 1499 | /// Returns true if lowering to a jump table is allowed. |
| 1500 | bool areJTsAllowed(const Function *Fn) const override; |
| 1501 | |
| 1502 | MVT getPreferredSwitchConditionType(LLVMContext &Context, |
| 1503 | EVT ConditionVT) const override; |
| 1504 | |
| 1505 | /// If true, then instruction selection should |
| 1506 | /// seek to shrink the FP constant of the specified type to a smaller type |
| 1507 | /// in order to save space and / or reduce runtime. |
| 1508 | bool ShouldShrinkFPConstant(EVT VT) const override; |
| 1509 | |
| 1510 | /// Return true if we believe it is correct and profitable to reduce the |
| 1511 | /// load node to a smaller type. |
| 1512 | bool |
| 1513 | shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, |
| 1514 | std::optional<unsigned> ByteOffset) const override; |
| 1515 | |
| 1516 | /// Return true if the specified scalar FP type is computed in an SSE |
| 1517 | /// register, not on the X87 floating point stack. |
| 1518 | bool isScalarFPTypeInSSEReg(EVT VT) const; |
| 1519 | |
| 1520 | /// Returns true if it is beneficial to convert a load of a constant |
| 1521 | /// to just the constant itself. |
| 1522 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
| 1523 | Type *Ty) const override; |
| 1524 | |
| 1525 | bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; |
| 1526 | |
| 1527 | bool convertSelectOfConstantsToMath(EVT VT) const override; |
| 1528 | |
| 1529 | bool decomposeMulByConstant(LLVMContext &Context, EVT VT, |
| 1530 | SDValue C) const override; |
| 1531 | |
| 1532 | /// Return true if EXTRACT_SUBVECTOR is cheap for this result type |
| 1533 | /// with this index. |
| 1534 | bool (EVT ResVT, EVT SrcVT, |
| 1535 | unsigned Index) const override; |
| 1536 | |
| 1537 | /// Scalar ops always have equal or better analysis/performance/power than |
| 1538 | /// the vector equivalent, so this always makes sense if the scalar op is |
| 1539 | /// supported. |
| 1540 | bool shouldScalarizeBinop(SDValue) const override; |
| 1541 | |
| 1542 | /// Extract of a scalar FP value from index 0 of a vector is free. |
| 1543 | bool (EVT VT, unsigned Index) const override { |
| 1544 | EVT EltVT = VT.getScalarType(); |
| 1545 | return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; |
| 1546 | } |
| 1547 | |
| 1548 | /// Overflow nodes should get combined/lowered to optimal instructions |
| 1549 | /// (they should allow eliminating explicit compares by getting flags from |
| 1550 | /// math ops). |
| 1551 | bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
| 1552 | bool MathUsed) const override; |
| 1553 | |
| 1554 | bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, |
| 1555 | unsigned AddrSpace) const override { |
| 1556 | // If we can replace more than 2 scalar stores, there will be a reduction |
| 1557 | // in instructions even after we add a vector constant load. |
| 1558 | return IsZero || NumElem > 2; |
| 1559 | } |
| 1560 | |
| 1561 | bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
| 1562 | const SelectionDAG &DAG, |
| 1563 | const MachineMemOperand &MMO) const override; |
| 1564 | |
| 1565 | Register getRegisterByName(const char* RegName, LLT VT, |
| 1566 | const MachineFunction &MF) const override; |
| 1567 | |
| 1568 | /// If a physical register, this returns the register that receives the |
| 1569 | /// exception address on entry to an EH pad. |
| 1570 | Register |
| 1571 | getExceptionPointerRegister(const Constant *PersonalityFn) const override; |
| 1572 | |
| 1573 | /// If a physical register, this returns the register that receives the |
| 1574 | /// exception typeid on entry to a landing pad. |
| 1575 | Register |
| 1576 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override; |
| 1577 | |
| 1578 | bool needsFixedCatchObjects() const override; |
| 1579 | |
| 1580 | /// This method returns a target specific FastISel object, |
| 1581 | /// or null if the target does not support "fast" ISel. |
| 1582 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
| 1583 | const TargetLibraryInfo *libInfo) const override; |
| 1584 | |
| 1585 | /// If the target has a standard location for the stack protector cookie, |
| 1586 | /// returns the address of that location. Otherwise, returns nullptr. |
| 1587 | Value *getIRStackGuard(IRBuilderBase &IRB) const override; |
| 1588 | |
| 1589 | bool useLoadStackGuardNode(const Module &M) const override; |
| 1590 | bool useStackGuardXorFP() const override; |
| 1591 | void insertSSPDeclarations(Module &M) const override; |
| 1592 | Value *getSDagStackGuard(const Module &M) const override; |
| 1593 | Function *getSSPStackGuardCheck(const Module &M) const override; |
| 1594 | SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
| 1595 | const SDLoc &DL) const override; |
| 1596 | |
| 1597 | |
| 1598 | /// Return true if the target stores SafeStack pointer at a fixed offset in |
| 1599 | /// some non-standard address space, and populates the address space and |
| 1600 | /// offset as appropriate. |
| 1601 | Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; |
| 1602 | |
| 1603 | std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, |
| 1604 | SDValue Chain, SDValue Pointer, |
| 1605 | MachinePointerInfo PtrInfo, |
| 1606 | Align Alignment, |
| 1607 | SelectionDAG &DAG) const; |
| 1608 | |
| 1609 | /// Customize the preferred legalization strategy for certain types. |
| 1610 | LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; |
| 1611 | |
| 1612 | bool softPromoteHalfType() const override { return true; } |
| 1613 | |
| 1614 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, |
| 1615 | EVT VT) const override; |
| 1616 | |
| 1617 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
| 1618 | CallingConv::ID CC, |
| 1619 | EVT VT) const override; |
| 1620 | |
| 1621 | unsigned getVectorTypeBreakdownForCallingConv( |
| 1622 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
| 1623 | unsigned &NumIntermediates, MVT &RegisterVT) const override; |
| 1624 | |
| 1625 | bool functionArgumentNeedsConsecutiveRegisters( |
| 1626 | Type *Ty, CallingConv::ID CallConv, bool isVarArg, |
| 1627 | const DataLayout &DL) const override; |
| 1628 | |
| 1629 | bool isIntDivCheap(EVT VT, AttributeList Attr) const override; |
| 1630 | |
| 1631 | bool supportSwiftError() const override; |
| 1632 | |
| 1633 | bool supportKCFIBundles() const override { return true; } |
| 1634 | |
| 1635 | MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
| 1636 | MachineBasicBlock::instr_iterator &MBBI, |
| 1637 | const TargetInstrInfo *TII) const override; |
| 1638 | |
| 1639 | bool hasStackProbeSymbol(const MachineFunction &MF) const override; |
| 1640 | bool hasInlineStackProbe(const MachineFunction &MF) const override; |
| 1641 | StringRef getStackProbeSymbolName(const MachineFunction &MF) const override; |
| 1642 | |
| 1643 | unsigned getStackProbeSize(const MachineFunction &MF) const; |
| 1644 | |
| 1645 | bool hasVectorBlend() const override { return true; } |
| 1646 | |
| 1647 | unsigned getMaxSupportedInterleaveFactor() const override { return 4; } |
| 1648 | |
| 1649 | bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, |
| 1650 | unsigned OpNo) const override; |
| 1651 | |
| 1652 | SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, |
| 1653 | MachineMemOperand *MMO, SDValue &NewLoad, |
| 1654 | SDValue Ptr, SDValue PassThru, |
| 1655 | SDValue Mask) const override; |
| 1656 | SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, |
| 1657 | MachineMemOperand *MMO, SDValue Ptr, SDValue Val, |
| 1658 | SDValue Mask) const override; |
| 1659 | |
| 1660 | /// Lower interleaved load(s) into target specific |
| 1661 | /// instructions/intrinsics. |
| 1662 | bool lowerInterleavedLoad(LoadInst *LI, |
| 1663 | ArrayRef<ShuffleVectorInst *> Shuffles, |
| 1664 | ArrayRef<unsigned> Indices, |
| 1665 | unsigned Factor) const override; |
| 1666 | |
| 1667 | /// Lower interleaved store(s) into target specific |
| 1668 | /// instructions/intrinsics. |
| 1669 | bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
| 1670 | unsigned Factor) const override; |
| 1671 | |
| 1672 | SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, |
| 1673 | int JTI, SelectionDAG &DAG) const override; |
| 1674 | |
| 1675 | Align getPrefLoopAlignment(MachineLoop *ML) const override; |
| 1676 | |
| 1677 | EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override { |
| 1678 | if (VT == MVT::f80) |
| 1679 | return EVT::getIntegerVT(Context, BitWidth: 96); |
| 1680 | return TargetLoweringBase::getTypeToTransformTo(Context, VT); |
| 1681 | } |
| 1682 | |
| 1683 | protected: |
| 1684 | std::pair<const TargetRegisterClass *, uint8_t> |
| 1685 | findRepresentativeClass(const TargetRegisterInfo *TRI, |
| 1686 | MVT VT) const override; |
| 1687 | |
| 1688 | private: |
| 1689 | /// Keep a reference to the X86Subtarget around so that we can |
| 1690 | /// make the right decision when generating code for different targets. |
| 1691 | const X86Subtarget &Subtarget; |
| 1692 | |
| 1693 | /// A list of legal FP immediates. |
| 1694 | std::vector<APFloat> LegalFPImmediates; |
| 1695 | |
| 1696 | /// Indicate that this x86 target can instruction |
| 1697 | /// select the specified FP immediate natively. |
| 1698 | void addLegalFPImmediate(const APFloat& Imm) { |
| 1699 | LegalFPImmediates.push_back(x: Imm); |
| 1700 | } |
| 1701 | |
| 1702 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
| 1703 | CallingConv::ID CallConv, bool isVarArg, |
| 1704 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1705 | const SDLoc &dl, SelectionDAG &DAG, |
| 1706 | SmallVectorImpl<SDValue> &InVals, |
| 1707 | uint32_t *RegMask) const; |
| 1708 | SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, |
| 1709 | const SmallVectorImpl<ISD::InputArg> &ArgInfo, |
| 1710 | const SDLoc &dl, SelectionDAG &DAG, |
| 1711 | const CCValAssign &VA, MachineFrameInfo &MFI, |
| 1712 | unsigned i) const; |
| 1713 | SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, |
| 1714 | const SDLoc &dl, SelectionDAG &DAG, |
| 1715 | const CCValAssign &VA, |
| 1716 | ISD::ArgFlagsTy Flags, bool isByval) const; |
| 1717 | |
| 1718 | // Call lowering helpers. |
| 1719 | |
| 1720 | /// Check whether the call is eligible for tail call optimization. Targets |
| 1721 | /// that want to do tail call optimization should implement this function. |
| 1722 | bool IsEligibleForTailCallOptimization( |
| 1723 | TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, |
| 1724 | SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const; |
| 1725 | SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, |
| 1726 | SDValue Chain, bool IsTailCall, |
| 1727 | bool Is64Bit, int FPDiff, |
| 1728 | const SDLoc &dl) const; |
| 1729 | |
| 1730 | unsigned GetAlignedArgumentStackSize(unsigned StackSize, |
| 1731 | SelectionDAG &DAG) const; |
| 1732 | |
| 1733 | unsigned getAddressSpace() const; |
| 1734 | |
| 1735 | SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, |
| 1736 | SDValue &Chain) const; |
| 1737 | SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; |
| 1738 | |
| 1739 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
| 1740 | SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; |
| 1741 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
| 1742 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
| 1743 | |
| 1744 | unsigned getGlobalWrapperKind(const GlobalValue *GV, |
| 1745 | const unsigned char OpFlags) const; |
| 1746 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
| 1747 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
| 1748 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
| 1749 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
| 1750 | SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; |
| 1751 | |
| 1752 | /// Creates target global address or external symbol nodes for calls or |
| 1753 | /// other uses. |
| 1754 | SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall, |
| 1755 | bool *IsImpCall) const; |
| 1756 | |
| 1757 | SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
| 1758 | SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
| 1759 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; |
| 1760 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
| 1761 | SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; |
| 1762 | SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; |
| 1763 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
| 1764 | SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; |
| 1765 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
| 1766 | SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; |
| 1767 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
| 1768 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
| 1769 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
| 1770 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
| 1771 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
| 1772 | SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
| 1773 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
| 1774 | SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; |
| 1775 | SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; |
| 1776 | SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; |
| 1777 | SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; |
| 1778 | SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; |
| 1779 | SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
| 1780 | SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
| 1781 | SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
| 1782 | SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; |
| 1783 | SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; |
| 1784 | SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
| 1785 | SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; |
| 1786 | SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, |
| 1787 | SDValue &Chain) const; |
| 1788 | SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
| 1789 | SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; |
| 1790 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
| 1791 | SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; |
| 1792 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
| 1793 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
| 1794 | SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; |
| 1795 | |
| 1796 | SDValue |
| 1797 | LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 1798 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1799 | const SDLoc &dl, SelectionDAG &DAG, |
| 1800 | SmallVectorImpl<SDValue> &InVals) const override; |
| 1801 | SDValue LowerCall(CallLoweringInfo &CLI, |
| 1802 | SmallVectorImpl<SDValue> &InVals) const override; |
| 1803 | |
| 1804 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 1805 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1806 | const SmallVectorImpl<SDValue> &OutVals, |
| 1807 | const SDLoc &dl, SelectionDAG &DAG) const override; |
| 1808 | |
| 1809 | bool supportSplitCSR(MachineFunction *MF) const override { |
| 1810 | return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
| 1811 | MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind); |
| 1812 | } |
| 1813 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
| 1814 | void insertCopiesSplitCSR( |
| 1815 | MachineBasicBlock *Entry, |
| 1816 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
| 1817 | |
| 1818 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
| 1819 | |
| 1820 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
| 1821 | |
| 1822 | EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
| 1823 | ISD::NodeType ExtendKind) const override; |
| 1824 | |
| 1825 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
| 1826 | bool isVarArg, |
| 1827 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1828 | LLVMContext &Context, |
| 1829 | const Type *RetTy) const override; |
| 1830 | |
| 1831 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; |
| 1832 | ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; |
| 1833 | |
| 1834 | TargetLoweringBase::AtomicExpansionKind |
| 1835 | shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
| 1836 | TargetLoweringBase::AtomicExpansionKind |
| 1837 | shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
| 1838 | TargetLoweringBase::AtomicExpansionKind |
| 1839 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
| 1840 | TargetLoweringBase::AtomicExpansionKind |
| 1841 | shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; |
| 1842 | void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; |
| 1843 | void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; |
| 1844 | |
| 1845 | LoadInst * |
| 1846 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; |
| 1847 | |
| 1848 | bool needsCmpXchgNb(Type *MemType) const; |
| 1849 | |
| 1850 | void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, |
| 1851 | MachineBasicBlock *DispatchBB, int FI) const; |
| 1852 | |
| 1853 | // Utility function to emit the low-level va_arg code for X86-64. |
| 1854 | MachineBasicBlock * |
| 1855 | EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
| 1856 | |
| 1857 | /// Utility function to emit the xmm reg save portion of va_start. |
| 1858 | MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, |
| 1859 | MachineInstr &MI2, |
| 1860 | MachineBasicBlock *BB) const; |
| 1861 | |
| 1862 | MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, |
| 1863 | MachineBasicBlock *BB) const; |
| 1864 | |
| 1865 | MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, |
| 1866 | MachineBasicBlock *BB) const; |
| 1867 | |
| 1868 | MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, |
| 1869 | MachineBasicBlock *BB) const; |
| 1870 | |
| 1871 | MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, |
| 1872 | MachineBasicBlock *BB) const; |
| 1873 | |
| 1874 | MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, |
| 1875 | MachineBasicBlock *BB) const; |
| 1876 | |
| 1877 | MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, |
| 1878 | MachineBasicBlock *BB) const; |
| 1879 | |
| 1880 | MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, |
| 1881 | MachineBasicBlock *MBB) const; |
| 1882 | |
| 1883 | void emitSetJmpShadowStackFix(MachineInstr &MI, |
| 1884 | MachineBasicBlock *MBB) const; |
| 1885 | |
| 1886 | MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, |
| 1887 | MachineBasicBlock *MBB) const; |
| 1888 | |
| 1889 | MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, |
| 1890 | MachineBasicBlock *MBB) const; |
| 1891 | |
| 1892 | MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, |
| 1893 | MachineBasicBlock *MBB) const; |
| 1894 | |
| 1895 | MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI, |
| 1896 | MachineBasicBlock *MBB) const; |
| 1897 | |
| 1898 | /// Emit flags for the given setcc condition and operands. Also returns the |
| 1899 | /// corresponding X86 condition code constant in X86CC. |
| 1900 | SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, |
| 1901 | const SDLoc &dl, SelectionDAG &DAG, |
| 1902 | SDValue &X86CC) const; |
| 1903 | |
| 1904 | bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, |
| 1905 | SDValue IntPow2) const override; |
| 1906 | |
| 1907 | /// Check if replacement of SQRT with RSQRT should be disabled. |
| 1908 | bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; |
| 1909 | |
| 1910 | /// Use rsqrt* to speed up sqrt calculations. |
| 1911 | SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, |
| 1912 | int &RefinementSteps, bool &UseOneConstNR, |
| 1913 | bool Reciprocal) const override; |
| 1914 | |
| 1915 | /// Use rcp* to speed up fdiv calculations. |
| 1916 | SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, |
| 1917 | int &RefinementSteps) const override; |
| 1918 | |
| 1919 | /// Reassociate floating point divisions into multiply by reciprocal. |
| 1920 | unsigned combineRepeatedFPDivisors() const override; |
| 1921 | |
| 1922 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
| 1923 | SmallVectorImpl<SDNode *> &Created) const override; |
| 1924 | |
| 1925 | SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, |
| 1926 | SDValue V2) const; |
| 1927 | }; |
| 1928 | |
| 1929 | namespace X86 { |
| 1930 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
| 1931 | const TargetLibraryInfo *libInfo); |
| 1932 | } // end namespace X86 |
| 1933 | |
| 1934 | // X86 specific Gather/Scatter nodes. |
| 1935 | // The class has the same order of operands as MaskedGatherScatterSDNode for |
| 1936 | // convenience. |
| 1937 | class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { |
| 1938 | public: |
| 1939 | // This is a intended as a utility and should never be directly created. |
| 1940 | X86MaskedGatherScatterSDNode() = delete; |
| 1941 | ~X86MaskedGatherScatterSDNode() = delete; |
| 1942 | |
| 1943 | const SDValue &getBasePtr() const { return getOperand(Num: 3); } |
| 1944 | const SDValue &getIndex() const { return getOperand(Num: 4); } |
| 1945 | const SDValue &getMask() const { return getOperand(Num: 2); } |
| 1946 | const SDValue &getScale() const { return getOperand(Num: 5); } |
| 1947 | |
| 1948 | static bool classof(const SDNode *N) { |
| 1949 | return N->getOpcode() == X86ISD::MGATHER || |
| 1950 | N->getOpcode() == X86ISD::MSCATTER; |
| 1951 | } |
| 1952 | }; |
| 1953 | |
| 1954 | class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { |
| 1955 | public: |
| 1956 | const SDValue &getPassThru() const { return getOperand(Num: 1); } |
| 1957 | |
| 1958 | static bool classof(const SDNode *N) { |
| 1959 | return N->getOpcode() == X86ISD::MGATHER; |
| 1960 | } |
| 1961 | }; |
| 1962 | |
| 1963 | class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { |
| 1964 | public: |
| 1965 | const SDValue &getValue() const { return getOperand(Num: 1); } |
| 1966 | |
| 1967 | static bool classof(const SDNode *N) { |
| 1968 | return N->getOpcode() == X86ISD::MSCATTER; |
| 1969 | } |
| 1970 | }; |
| 1971 | |
| 1972 | /// Generate unpacklo/unpackhi shuffle mask. |
| 1973 | void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, |
| 1974 | bool Unary); |
| 1975 | |
| 1976 | /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation |
| 1977 | /// imposed by AVX and specific to the unary pattern. Example: |
| 1978 | /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> |
| 1979 | /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> |
| 1980 | void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); |
| 1981 | |
| 1982 | } // end namespace llvm |
| 1983 | |
| 1984 | #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
| 1985 | |