| 1 | //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the interfaces that PPC uses to lower LLVM code into a |
| 10 | // selection DAG. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H |
| 15 | #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H |
| 16 | |
| 17 | #include "PPCInstrInfo.h" |
| 18 | #include "llvm/CodeGen/CallingConvLower.h" |
| 19 | #include "llvm/CodeGen/MachineFunction.h" |
| 20 | #include "llvm/CodeGen/MachineMemOperand.h" |
| 21 | #include "llvm/CodeGen/SelectionDAG.h" |
| 22 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
| 23 | #include "llvm/CodeGen/TargetLowering.h" |
| 24 | #include "llvm/CodeGen/ValueTypes.h" |
| 25 | #include "llvm/CodeGenTypes/MachineValueType.h" |
| 26 | #include "llvm/IR/Attributes.h" |
| 27 | #include "llvm/IR/CallingConv.h" |
| 28 | #include "llvm/IR/Function.h" |
| 29 | #include "llvm/IR/InlineAsm.h" |
| 30 | #include "llvm/IR/Metadata.h" |
| 31 | #include "llvm/IR/Type.h" |
| 32 | #include <optional> |
| 33 | #include <utility> |
| 34 | |
| 35 | namespace llvm { |
| 36 | |
| 37 | namespace PPCISD { |
| 38 | |
| 39 | // When adding a NEW PPCISD node please add it to the correct position in |
| 40 | // the enum. The order of elements in this enum matters! |
| 41 | // Values that are added between FIRST_MEMORY_OPCODE and LAST_MEMORY_OPCODE |
| 42 | // are considered memory opcodes and are treated differently than other |
| 43 | // entries. |
| 44 | enum NodeType : unsigned { |
| 45 | // Start the numbering where the builtin ops and target ops leave off. |
| 46 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
| 47 | |
| 48 | /// FSEL - Traditional three-operand fsel node. |
| 49 | /// |
| 50 | FSEL, |
| 51 | |
| 52 | /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions. |
| 53 | XSMAXC, |
| 54 | XSMINC, |
| 55 | |
| 56 | /// FCFID - The FCFID instruction, taking an f64 operand and producing |
| 57 | /// and f64 value containing the FP representation of the integer that |
| 58 | /// was temporarily in the f64 operand. |
| 59 | FCFID, |
| 60 | |
| 61 | /// Newer FCFID[US] integer-to-floating-point conversion instructions for |
| 62 | /// unsigned integers and single-precision outputs. |
| 63 | FCFIDU, |
| 64 | FCFIDS, |
| 65 | FCFIDUS, |
| 66 | |
| 67 | /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 |
| 68 | /// operand, producing an f64 value containing the integer representation |
| 69 | /// of that FP value. |
| 70 | FCTIDZ, |
| 71 | FCTIWZ, |
| 72 | |
| 73 | /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for |
| 74 | /// unsigned integers with round toward zero. |
| 75 | FCTIDUZ, |
| 76 | FCTIWUZ, |
| 77 | |
| 78 | /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in |
| 79 | /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. |
| 80 | VEXTS, |
| 81 | |
| 82 | /// Reciprocal estimate instructions (unary FP ops). |
| 83 | FRE, |
| 84 | FRSQRTE, |
| 85 | |
| 86 | /// Test instruction for software square root. |
| 87 | FTSQRT, |
| 88 | |
| 89 | /// Square root instruction. |
| 90 | FSQRT, |
| 91 | |
| 92 | /// VPERM - The PPC VPERM Instruction. |
| 93 | /// |
| 94 | VPERM, |
| 95 | |
| 96 | /// XXSPLT - The PPC VSX splat instructions |
| 97 | /// |
| 98 | XXSPLT, |
| 99 | |
| 100 | /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for |
| 101 | /// converting immediate single precision numbers to double precision |
| 102 | /// vector or scalar. |
| 103 | XXSPLTI_SP_TO_DP, |
| 104 | |
| 105 | /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction. |
| 106 | /// |
| 107 | XXSPLTI32DX, |
| 108 | |
| 109 | /// VECINSERT - The PPC vector insert instruction |
| 110 | /// |
| 111 | VECINSERT, |
| 112 | |
| 113 | /// VECSHL - The PPC vector shift left instruction |
| 114 | /// |
| 115 | VECSHL, |
| 116 | |
| 117 | /// XXPERMDI - The PPC XXPERMDI instruction |
| 118 | /// |
| 119 | XXPERMDI, |
| 120 | XXPERM, |
| 121 | |
| 122 | /// The CMPB instruction (takes two operands of i32 or i64). |
| 123 | CMPB, |
| 124 | |
| 125 | /// Hi/Lo - These represent the high and low 16-bit parts of a global |
| 126 | /// address respectively. These nodes have two operands, the first of |
| 127 | /// which must be a TargetGlobalAddress, and the second of which must be a |
| 128 | /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', |
| 129 | /// though these are usually folded into other nodes. |
| 130 | Hi, |
| 131 | Lo, |
| 132 | |
| 133 | /// The following two target-specific nodes are used for calls through |
| 134 | /// function pointers in the 64-bit SVR4 ABI. |
| 135 | |
| 136 | /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) |
| 137 | /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to |
| 138 | /// compute an allocation on the stack. |
| 139 | DYNALLOC, |
| 140 | |
| 141 | /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to |
| 142 | /// compute an offset from native SP to the address of the most recent |
| 143 | /// dynamic alloca. |
| 144 | DYNAREAOFFSET, |
| 145 | |
| 146 | /// To avoid stack clash, allocation is performed by block and each block is |
| 147 | /// probed. |
| 148 | PROBED_ALLOCA, |
| 149 | |
| 150 | /// The result of the mflr at function entry, used for PIC code. |
| 151 | GlobalBaseReg, |
| 152 | |
| 153 | /// These nodes represent PPC shifts. |
| 154 | /// |
| 155 | /// For scalar types, only the last `n + 1` bits of the shift amounts |
| 156 | /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. |
| 157 | /// for exact behaviors. |
| 158 | /// |
| 159 | /// For vector types, only the last n bits are used. See vsld. |
| 160 | SRL, |
| 161 | SRA, |
| 162 | SHL, |
| 163 | |
| 164 | /// These nodes represent PPC arithmetic operations with carry. |
| 165 | ADDC, |
| 166 | ADDE, |
| 167 | SUBC, |
| 168 | SUBE, |
| 169 | |
| 170 | /// FNMSUB - Negated multiply-subtract instruction. |
| 171 | FNMSUB, |
| 172 | |
| 173 | /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign |
| 174 | /// word and shift left immediate. |
| 175 | EXTSWSLI, |
| 176 | |
| 177 | /// The combination of sra[wd]i and addze used to implemented signed |
| 178 | /// integer division by a power of 2. The first operand is the dividend, |
| 179 | /// and the second is the constant shift amount (representing the |
| 180 | /// divisor). |
| 181 | SRA_ADDZE, |
| 182 | |
| 183 | /// CALL - A direct function call. |
| 184 | /// CALL_NOP is a call with the special NOP which follows 64-bit |
| 185 | /// CALL_NOTOC the caller does not use the TOC. |
| 186 | /// SVR4 calls and 32-bit/64-bit AIX calls. |
| 187 | CALL, |
| 188 | CALL_NOP, |
| 189 | CALL_NOTOC, |
| 190 | |
| 191 | /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a |
| 192 | /// MTCTR instruction. |
| 193 | MTCTR, |
| 194 | |
| 195 | /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a |
| 196 | /// BCTRL instruction. |
| 197 | BCTRL, |
| 198 | |
| 199 | /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl |
| 200 | /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX |
| 201 | /// and 64-bit AIX. |
| 202 | BCTRL_LOAD_TOC, |
| 203 | |
| 204 | /// The variants that implicitly define rounding mode for calls with |
| 205 | /// strictfp semantics. |
| 206 | CALL_RM, |
| 207 | CALL_NOP_RM, |
| 208 | CALL_NOTOC_RM, |
| 209 | BCTRL_RM, |
| 210 | BCTRL_LOAD_TOC_RM, |
| 211 | |
| 212 | /// Return with a glue operand, matched by 'blr' |
| 213 | RET_GLUE, |
| 214 | |
| 215 | /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. |
| 216 | /// This copies the bits corresponding to the specified CRREG into the |
| 217 | /// resultant GPR. Bits corresponding to other CR regs are undefined. |
| 218 | MFOCRF, |
| 219 | |
| 220 | /// Direct move from a VSX register to a GPR |
| 221 | MFVSR, |
| 222 | |
| 223 | /// Direct move from a GPR to a VSX register (algebraic) |
| 224 | MTVSRA, |
| 225 | |
| 226 | /// Direct move from a GPR to a VSX register (zero) |
| 227 | MTVSRZ, |
| 228 | |
| 229 | /// Direct move of 2 consecutive GPR to a VSX register. |
| 230 | BUILD_FP128, |
| 231 | |
| 232 | /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and |
| 233 | /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is |
| 234 | /// unsupported for this target. |
| 235 | /// Merge 2 GPRs to a single SPE register. |
| 236 | BUILD_SPE64, |
| 237 | |
| 238 | /// Extract SPE register component, second argument is high or low. |
| 239 | , |
| 240 | |
| 241 | /// Extract a subvector from signed integer vector and convert to FP. |
| 242 | /// It is primarily used to convert a (widened) illegal integer vector |
| 243 | /// type to a legal floating point vector type. |
| 244 | /// For example v2i32 -> widened to v4i32 -> v2f64 |
| 245 | SINT_VEC_TO_FP, |
| 246 | |
| 247 | /// Extract a subvector from unsigned integer vector and convert to FP. |
| 248 | /// As with SINT_VEC_TO_FP, used for converting illegal types. |
| 249 | UINT_VEC_TO_FP, |
| 250 | |
| 251 | /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to |
| 252 | /// place the value into the least significant element of the most |
| 253 | /// significant doubleword in the vector. This is not element zero for |
| 254 | /// anything smaller than a doubleword on either endianness. This node has |
| 255 | /// the same semantics as SCALAR_TO_VECTOR except that the value remains in |
| 256 | /// the aforementioned location in the vector register. |
| 257 | SCALAR_TO_VECTOR_PERMUTED, |
| 258 | |
| 259 | // FIXME: Remove these once the ANDI glue bug is fixed: |
| 260 | /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the |
| 261 | /// eq or gt bit of CR0 after executing andi. x, 1. This is used to |
| 262 | /// implement truncation of i32 or i64 to i1. |
| 263 | ANDI_rec_1_EQ_BIT, |
| 264 | ANDI_rec_1_GT_BIT, |
| 265 | |
| 266 | // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit |
| 267 | // target (returns (Lo, Hi)). It takes a chain operand. |
| 268 | READ_TIME_BASE, |
| 269 | |
| 270 | // EH_SJLJ_SETJMP - SjLj exception handling setjmp. |
| 271 | EH_SJLJ_SETJMP, |
| 272 | |
| 273 | // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. |
| 274 | EH_SJLJ_LONGJMP, |
| 275 | |
| 276 | /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* |
| 277 | /// instructions. For lack of better number, we use the opcode number |
| 278 | /// encoding for the OPC field to identify the compare. For example, 838 |
| 279 | /// is VCMPGTSH. |
| 280 | VCMP, |
| 281 | |
| 282 | /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the |
| 283 | /// altivec VCMP*_rec instructions. For lack of better number, we use the |
| 284 | /// opcode number encoding for the OPC field to identify the compare. For |
| 285 | /// example, 838 is VCMPGTSH. |
| 286 | VCMP_rec, |
| 287 | |
| 288 | /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This |
| 289 | /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the |
| 290 | /// condition register to branch on, OPC is the branch opcode to use (e.g. |
| 291 | /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is |
| 292 | /// an optional input flag argument. |
| 293 | COND_BRANCH, |
| 294 | |
| 295 | /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based |
| 296 | /// loops. |
| 297 | BDNZ, |
| 298 | BDZ, |
| 299 | |
| 300 | /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding |
| 301 | /// towards zero. Used only as part of the long double-to-int |
| 302 | /// conversion sequence. |
| 303 | FADDRTZ, |
| 304 | |
| 305 | /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. |
| 306 | MFFS, |
| 307 | |
| 308 | /// TC_RETURN - A tail call return. |
| 309 | /// operand #0 chain |
| 310 | /// operand #1 callee (register or absolute) |
| 311 | /// operand #2 stack adjustment |
| 312 | /// operand #3 optional in flag |
| 313 | TC_RETURN, |
| 314 | |
| 315 | /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls |
| 316 | CR6SET, |
| 317 | CR6UNSET, |
| 318 | |
| 319 | /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS |
| 320 | /// for non-position independent code on PPC32. |
| 321 | PPC32_GOT, |
| 322 | |
| 323 | /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and |
| 324 | /// local dynamic TLS and position indendepent code on PPC32. |
| 325 | PPC32_PICGOT, |
| 326 | |
| 327 | /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec |
| 328 | /// TLS model, produces an ADDIS8 instruction that adds the GOT |
| 329 | /// base to sym\@got\@tprel\@ha. |
| 330 | ADDIS_GOT_TPREL_HA, |
| 331 | |
| 332 | /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec |
| 333 | /// TLS model, produces a LD instruction with base register G8RReg |
| 334 | /// and offset sym\@got\@tprel\@l. This completes the addition that |
| 335 | /// finds the offset of "sym" relative to the thread pointer. |
| 336 | LD_GOT_TPREL_L, |
| 337 | |
| 338 | /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec |
| 339 | /// and local-exec TLS models, produces an ADD instruction that adds |
| 340 | /// the contents of G8RReg to the thread pointer. Symbol contains a |
| 341 | /// relocation sym\@tls which is to be replaced by the thread pointer |
| 342 | /// and identifies to the linker that the instruction is part of a |
| 343 | /// TLS sequence. |
| 344 | ADD_TLS, |
| 345 | |
| 346 | /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS |
| 347 | /// model, produces an ADDIS8 instruction that adds the GOT base |
| 348 | /// register to sym\@got\@tlsgd\@ha. |
| 349 | ADDIS_TLSGD_HA, |
| 350 | |
| 351 | /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS |
| 352 | /// model, produces an ADDI8 instruction that adds G8RReg to |
| 353 | /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by |
| 354 | /// ADDIS_TLSGD_L_ADDR until after register assignment. |
| 355 | ADDI_TLSGD_L, |
| 356 | |
| 357 | /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS |
| 358 | /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by |
| 359 | /// ADDIS_TLSGD_L_ADDR until after register assignment. |
| 360 | GET_TLS_ADDR, |
| 361 | |
| 362 | /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on |
| 363 | /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread |
| 364 | /// pointer. At the end of the call, the thread pointer is found in R3. |
| 365 | GET_TPOINTER, |
| 366 | |
| 367 | /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that |
| 368 | /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following |
| 369 | /// register assignment. |
| 370 | ADDI_TLSGD_L_ADDR, |
| 371 | |
| 372 | /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY |
| 373 | /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY |
| 374 | /// Op that combines two register copies of TOC entries |
| 375 | /// (region handle into R3 and variable offset into R4) followed by a |
| 376 | /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr. |
| 377 | /// This node is used in 64-bit mode as well (in which case the result is |
| 378 | /// G8RC and inputs are X3/X4). |
| 379 | TLSGD_AIX, |
| 380 | |
| 381 | /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, |
| 382 | /// produces a call to .__tls_get_mod(_$TLSML\@ml). |
| 383 | GET_TLS_MOD_AIX, |
| 384 | |
| 385 | /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) |
| 386 | /// Op that requires a single input of the module handle TOC entry in R3, |
| 387 | /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call |
| 388 | /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. |
| 389 | /// The only difference is the register class. |
| 390 | TLSLD_AIX, |
| 391 | |
| 392 | /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS |
| 393 | /// model, produces an ADDIS8 instruction that adds the GOT base |
| 394 | /// register to sym\@got\@tlsld\@ha. |
| 395 | ADDIS_TLSLD_HA, |
| 396 | |
| 397 | /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS |
| 398 | /// model, produces an ADDI8 instruction that adds G8RReg to |
| 399 | /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by |
| 400 | /// ADDIS_TLSLD_L_ADDR until after register assignment. |
| 401 | ADDI_TLSLD_L, |
| 402 | |
| 403 | /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS |
| 404 | /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by |
| 405 | /// ADDIS_TLSLD_L_ADDR until after register assignment. |
| 406 | GET_TLSLD_ADDR, |
| 407 | |
| 408 | /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that |
| 409 | /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion |
| 410 | /// following register assignment. |
| 411 | ADDI_TLSLD_L_ADDR, |
| 412 | |
| 413 | /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS |
| 414 | /// model, produces an ADDIS8 instruction that adds X3 to |
| 415 | /// sym\@dtprel\@ha. |
| 416 | ADDIS_DTPREL_HA, |
| 417 | |
| 418 | /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS |
| 419 | /// model, produces an ADDI8 instruction that adds G8RReg to |
| 420 | /// sym\@got\@dtprel\@l. |
| 421 | ADDI_DTPREL_L, |
| 422 | |
| 423 | /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS |
| 424 | /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. |
| 425 | PADDI_DTPREL, |
| 426 | |
| 427 | /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded |
| 428 | /// during instruction selection to optimize a BUILD_VECTOR into |
| 429 | /// operations on splats. This is necessary to avoid losing these |
| 430 | /// optimizations due to constant folding. |
| 431 | VADD_SPLAT, |
| 432 | |
| 433 | /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned |
| 434 | /// operand identifies the operating system entry point. |
| 435 | SC, |
| 436 | |
| 437 | /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer. |
| 438 | CLRBHRB, |
| 439 | |
| 440 | /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch |
| 441 | /// history rolling buffer entry. |
| 442 | MFBHRBE, |
| 443 | |
| 444 | /// CHAIN = RFEBB CHAIN, State - Return from event-based branch. |
| 445 | RFEBB, |
| 446 | |
| 447 | /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little |
| 448 | /// endian. Maps to an xxswapd instruction that corrects an lxvd2x |
| 449 | /// or stxvd2x instruction. The chain is necessary because the |
| 450 | /// sequence replaces a load and needs to provide the same number |
| 451 | /// of outputs. |
| 452 | XXSWAPD, |
| 453 | |
| 454 | /// An SDNode for swaps that are not associated with any loads/stores |
| 455 | /// and thereby have no chain. |
| 456 | SWAP_NO_CHAIN, |
| 457 | |
| 458 | /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or |
| 459 | /// lower (IDX=1) half of v4f32 to v2f64. |
| 460 | FP_EXTEND_HALF, |
| 461 | |
| 462 | /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done |
| 463 | /// either through an add like PADDI or through a PC Relative load like |
| 464 | /// PLD. |
| 465 | MAT_PCREL_ADDR, |
| 466 | |
| 467 | /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for |
| 468 | /// TLS global address when using dynamic access models. This can be done |
| 469 | /// through an add like PADDI. |
| 470 | TLS_DYNAMIC_MAT_PCREL_ADDR, |
| 471 | |
| 472 | /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address |
| 473 | /// when using local exec access models, and when prefixed instructions are |
| 474 | /// available. This is used with ADD_TLS to produce an add like PADDI. |
| 475 | TLS_LOCAL_EXEC_MAT_ADDR, |
| 476 | |
| 477 | /// ACC_BUILD = Build an accumulator register from 4 VSX registers. |
| 478 | ACC_BUILD, |
| 479 | |
| 480 | /// PAIR_BUILD = Build a vector pair register from 2 VSX registers. |
| 481 | PAIR_BUILD, |
| 482 | |
| 483 | /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of |
| 484 | /// an accumulator or pair register. This node is needed because |
| 485 | /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same |
| 486 | /// element type. |
| 487 | , |
| 488 | |
| 489 | /// XXMFACC = This corresponds to the xxmfacc instruction. |
| 490 | XXMFACC, |
| 491 | |
| 492 | // Constrained conversion from floating point to int |
| 493 | FIRST_STRICTFP_OPCODE, |
| 494 | STRICT_FCTIDZ = FIRST_STRICTFP_OPCODE, |
| 495 | STRICT_FCTIWZ, |
| 496 | STRICT_FCTIDUZ, |
| 497 | STRICT_FCTIWUZ, |
| 498 | |
| 499 | /// Constrained integer-to-floating-point conversion instructions. |
| 500 | STRICT_FCFID, |
| 501 | STRICT_FCFIDU, |
| 502 | STRICT_FCFIDS, |
| 503 | STRICT_FCFIDUS, |
| 504 | |
| 505 | /// Constrained floating point add in round-to-zero mode. |
| 506 | STRICT_FADDRTZ, |
| 507 | LAST_STRICTFP_OPCODE = STRICT_FADDRTZ, |
| 508 | |
| 509 | /// SETBC - The ISA 3.1 (P10) SETBC instruction. |
| 510 | SETBC, |
| 511 | |
| 512 | /// SETBCR - The ISA 3.1 (P10) SETBCR instruction. |
| 513 | SETBCR, |
| 514 | |
| 515 | // NOTE: The nodes below may require PC-Rel specific patterns if the |
| 516 | // address could be PC-Relative. When adding new nodes below, consider |
| 517 | // whether or not the address can be PC-Relative and add the corresponding |
| 518 | // PC-relative patterns and tests. |
| 519 | |
| 520 | /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a |
| 521 | /// byte-swapping store instruction. It byte-swaps the low "Type" bits of |
| 522 | /// the GPRC input, then stores it through Ptr. Type can be either i16 or |
| 523 | /// i32. |
| 524 | FIRST_MEMORY_OPCODE, |
| 525 | STBRX = FIRST_MEMORY_OPCODE, |
| 526 | |
| 527 | /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a |
| 528 | /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, |
| 529 | /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 |
| 530 | /// or i32. |
| 531 | LBRX, |
| 532 | |
| 533 | /// STFIWX - The STFIWX instruction. The first operand is an input token |
| 534 | /// chain, then an f64 value to store, then an address to store it to. |
| 535 | STFIWX, |
| 536 | |
| 537 | /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point |
| 538 | /// load which sign-extends from a 32-bit integer value into the |
| 539 | /// destination 64-bit register. |
| 540 | LFIWAX, |
| 541 | |
| 542 | /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point |
| 543 | /// load which zero-extends from a 32-bit integer value into the |
| 544 | /// destination 64-bit register. |
| 545 | LFIWZX, |
| 546 | |
| 547 | /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an |
| 548 | /// integer smaller than 64 bits into a VSR. The integer is zero-extended. |
| 549 | /// This can be used for converting loaded integers to floating point. |
| 550 | LXSIZX, |
| 551 | |
| 552 | /// STXSIX - The STXSI[bh]X instruction. The first operand is an input |
| 553 | /// chain, then an f64 value to store, then an address to store it to, |
| 554 | /// followed by a byte-width for the store. |
| 555 | STXSIX, |
| 556 | |
| 557 | /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. |
| 558 | /// Maps directly to an lxvd2x instruction that will be followed by |
| 559 | /// an xxswapd. |
| 560 | LXVD2X, |
| 561 | |
| 562 | /// LXVRZX - Load VSX Vector Rightmost and Zero Extend |
| 563 | /// This node represents v1i128 BUILD_VECTOR of a zero extending load |
| 564 | /// instruction from <byte, halfword, word, or doubleword> to i128. |
| 565 | /// Allows utilization of the Load VSX Vector Rightmost Instructions. |
| 566 | LXVRZX, |
| 567 | |
| 568 | /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. |
| 569 | /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on |
| 570 | /// the vector type to load vector in big-endian element order. |
| 571 | LOAD_VEC_BE, |
| 572 | |
| 573 | /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a |
| 574 | /// v2f32 value into the lower half of a VSR register. |
| 575 | LD_VSX_LH, |
| 576 | |
| 577 | /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory |
| 578 | /// instructions such as LXVDSX, LXVWSX. |
| 579 | LD_SPLAT, |
| 580 | |
| 581 | /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory |
| 582 | /// that zero-extends. |
| 583 | ZEXT_LD_SPLAT, |
| 584 | |
| 585 | /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory |
| 586 | /// that sign-extends. |
| 587 | SEXT_LD_SPLAT, |
| 588 | |
| 589 | /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. |
| 590 | /// Maps directly to an stxvd2x instruction that will be preceded by |
| 591 | /// an xxswapd. |
| 592 | STXVD2X, |
| 593 | |
| 594 | /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. |
| 595 | /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on |
| 596 | /// the vector type to store vector in big-endian element order. |
| 597 | STORE_VEC_BE, |
| 598 | |
| 599 | /// Store scalar integers from VSR. |
| 600 | ST_VSR_SCAL_INT, |
| 601 | |
| 602 | /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes |
| 603 | /// except they ensure that the compare input is zero-extended for |
| 604 | /// sub-word versions because the atomic loads zero-extend. |
| 605 | ATOMIC_CMP_SWAP_8, |
| 606 | ATOMIC_CMP_SWAP_16, |
| 607 | |
| 608 | /// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr |
| 609 | /// The store conditional instruction ST[BHWD]ARX that produces a glue |
| 610 | /// result to attach it to a conditional branch. |
| 611 | STORE_COND, |
| 612 | |
| 613 | /// GPRC = TOC_ENTRY GA, TOC |
| 614 | /// Loads the entry for GA from the TOC, where the TOC base is given by |
| 615 | /// the last operand. |
| 616 | TOC_ENTRY, |
| 617 | LAST_MEMORY_OPCODE = TOC_ENTRY, |
| 618 | }; |
| 619 | |
| 620 | } // end namespace PPCISD |
| 621 | |
| 622 | /// Define some predicates that are used for node matching. |
| 623 | namespace PPC { |
| 624 | |
| 625 | /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a |
| 626 | /// VPKUHUM instruction. |
| 627 | bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| 628 | SelectionDAG &DAG); |
| 629 | |
| 630 | /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a |
| 631 | /// VPKUWUM instruction. |
| 632 | bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| 633 | SelectionDAG &DAG); |
| 634 | |
| 635 | /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a |
| 636 | /// VPKUDUM instruction. |
| 637 | bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
| 638 | SelectionDAG &DAG); |
| 639 | |
| 640 | /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for |
| 641 | /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). |
| 642 | bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
| 643 | unsigned ShuffleKind, SelectionDAG &DAG); |
| 644 | |
| 645 | /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for |
| 646 | /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). |
| 647 | bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
| 648 | unsigned ShuffleKind, SelectionDAG &DAG); |
| 649 | |
| 650 | /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for |
| 651 | /// a VMRGEW or VMRGOW instruction |
| 652 | bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, |
| 653 | unsigned ShuffleKind, SelectionDAG &DAG); |
| 654 | /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable |
| 655 | /// for a XXSLDWI instruction. |
| 656 | bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
| 657 | bool &Swap, bool IsLE); |
| 658 | |
| 659 | /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable |
| 660 | /// for a XXBRH instruction. |
| 661 | bool isXXBRHShuffleMask(ShuffleVectorSDNode *N); |
| 662 | |
| 663 | /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable |
| 664 | /// for a XXBRW instruction. |
| 665 | bool isXXBRWShuffleMask(ShuffleVectorSDNode *N); |
| 666 | |
| 667 | /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable |
| 668 | /// for a XXBRD instruction. |
| 669 | bool isXXBRDShuffleMask(ShuffleVectorSDNode *N); |
| 670 | |
| 671 | /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable |
| 672 | /// for a XXBRQ instruction. |
| 673 | bool isXXBRQShuffleMask(ShuffleVectorSDNode *N); |
| 674 | |
| 675 | /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable |
| 676 | /// for a XXPERMDI instruction. |
| 677 | bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
| 678 | bool &Swap, bool IsLE); |
| 679 | |
| 680 | /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the |
| 681 | /// shift amount, otherwise return -1. |
| 682 | int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, |
| 683 | SelectionDAG &DAG); |
| 684 | |
| 685 | /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand |
| 686 | /// specifies a splat of a single element that is suitable for input to |
| 687 | /// VSPLTB/VSPLTH/VSPLTW. |
| 688 | bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); |
| 689 | |
| 690 | /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by |
| 691 | /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any |
| 692 | /// shuffle of v4f32/v4i32 vectors that just inserts one element from one |
| 693 | /// vector into the other. This function will also set a couple of |
| 694 | /// output parameters for how much the source vector needs to be shifted and |
| 695 | /// what byte number needs to be specified for the instruction to put the |
| 696 | /// element in the desired location of the target vector. |
| 697 | bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
| 698 | unsigned &InsertAtByte, bool &Swap, bool IsLE); |
| 699 | |
| 700 | /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is |
| 701 | /// appropriate for PPC mnemonics (which have a big endian bias - namely |
| 702 | /// elements are counted from the left of the vector register). |
| 703 | unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, |
| 704 | SelectionDAG &DAG); |
| 705 | |
| 706 | /// get_VSPLTI_elt - If this is a build_vector of constants which can be |
| 707 | /// formed by using a vspltis[bhw] instruction of the specified element |
| 708 | /// size, return the constant being splatted. The ByteSize field indicates |
| 709 | /// the number of bytes of each element [124] -> [bhw]. |
| 710 | SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); |
| 711 | |
| 712 | // Flags for computing the optimal addressing mode for loads and stores. |
| 713 | enum MemOpFlags { |
| 714 | MOF_None = 0, |
| 715 | |
| 716 | // Extension mode for integer loads. |
| 717 | MOF_SExt = 1, |
| 718 | MOF_ZExt = 1 << 1, |
| 719 | MOF_NoExt = 1 << 2, |
| 720 | |
| 721 | // Address computation flags. |
| 722 | MOF_NotAddNorCst = 1 << 5, // Not const. or sum of ptr and scalar. |
| 723 | MOF_RPlusSImm16 = 1 << 6, // Reg plus signed 16-bit constant. |
| 724 | MOF_RPlusLo = 1 << 7, // Reg plus signed 16-bit relocation |
| 725 | MOF_RPlusSImm16Mult4 = 1 << 8, // Reg plus 16-bit signed multiple of 4. |
| 726 | MOF_RPlusSImm16Mult16 = 1 << 9, // Reg plus 16-bit signed multiple of 16. |
| 727 | MOF_RPlusSImm34 = 1 << 10, // Reg plus 34-bit signed constant. |
| 728 | MOF_RPlusR = 1 << 11, // Sum of two variables. |
| 729 | MOF_PCRel = 1 << 12, // PC-Relative relocation. |
| 730 | MOF_AddrIsSImm32 = 1 << 13, // A simple 32-bit constant. |
| 731 | |
| 732 | // The in-memory type. |
| 733 | MOF_SubWordInt = 1 << 15, |
| 734 | MOF_WordInt = 1 << 16, |
| 735 | MOF_DoubleWordInt = 1 << 17, |
| 736 | MOF_ScalarFloat = 1 << 18, // Scalar single or double precision. |
| 737 | MOF_Vector = 1 << 19, // Vector types and quad precision scalars. |
| 738 | MOF_Vector256 = 1 << 20, |
| 739 | |
| 740 | // Subtarget features. |
| 741 | MOF_SubtargetBeforeP9 = 1 << 22, |
| 742 | MOF_SubtargetP9 = 1 << 23, |
| 743 | MOF_SubtargetP10 = 1 << 24, |
| 744 | MOF_SubtargetSPE = 1 << 25 |
| 745 | }; |
| 746 | |
| 747 | // The addressing modes for loads and stores. |
| 748 | enum AddrMode { |
| 749 | AM_None, |
| 750 | AM_DForm, |
| 751 | AM_DSForm, |
| 752 | AM_DQForm, |
| 753 | AM_PrefixDForm, |
| 754 | AM_XForm, |
| 755 | AM_PCRel |
| 756 | }; |
| 757 | } // end namespace PPC |
| 758 | |
| 759 | class PPCTargetLowering : public TargetLowering { |
| 760 | const PPCSubtarget &Subtarget; |
| 761 | |
| 762 | public: |
| 763 | explicit PPCTargetLowering(const PPCTargetMachine &TM, |
| 764 | const PPCSubtarget &STI); |
| 765 | |
| 766 | /// getTargetNodeName() - This method returns the name of a target specific |
| 767 | /// DAG node. |
| 768 | const char *getTargetNodeName(unsigned Opcode) const override; |
| 769 | |
| 770 | bool isSelectSupported(SelectSupportKind Kind) const override { |
| 771 | // PowerPC does not support scalar condition selects on vectors. |
| 772 | return (Kind != SelectSupportKind::ScalarCondVectorVal); |
| 773 | } |
| 774 | |
| 775 | /// getPreferredVectorAction - The code we generate when vector types are |
| 776 | /// legalized by promoting the integer element type is often much worse |
| 777 | /// than code we generate if we widen the type for applicable vector types. |
| 778 | /// The issue with promoting is that the vector is scalaraized, individual |
| 779 | /// elements promoted and then the vector is rebuilt. So say we load a pair |
| 780 | /// of v4i8's and shuffle them. This will turn into a mess of 8 extending |
| 781 | /// loads, moves back into VSR's (or memory ops if we don't have moves) and |
| 782 | /// then the VPERM for the shuffle. All in all a very slow sequence. |
| 783 | TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) |
| 784 | const override { |
| 785 | // Default handling for scalable and single-element vectors. |
| 786 | if (VT.isScalableVector() || VT.getVectorNumElements() == 1) |
| 787 | return TargetLoweringBase::getPreferredVectorAction(VT); |
| 788 | |
| 789 | // Split and promote vNi1 vectors so we don't produce v256i1/v512i1 |
| 790 | // types as those are only for MMA instructions. |
| 791 | if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16) |
| 792 | return TypeSplitVector; |
| 793 | if (VT.getScalarSizeInBits() == 1) |
| 794 | return TypePromoteInteger; |
| 795 | |
| 796 | // Widen vectors that have reasonably sized elements. |
| 797 | if (VT.getScalarSizeInBits() % 8 == 0) |
| 798 | return TypeWidenVector; |
| 799 | return TargetLoweringBase::getPreferredVectorAction(VT); |
| 800 | } |
| 801 | |
| 802 | bool useSoftFloat() const override; |
| 803 | |
| 804 | bool hasSPE() const; |
| 805 | |
| 806 | MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { |
| 807 | return MVT::i32; |
| 808 | } |
| 809 | |
| 810 | bool isCheapToSpeculateCttz(Type *Ty) const override { |
| 811 | return true; |
| 812 | } |
| 813 | |
| 814 | bool isCheapToSpeculateCtlz(Type *Ty) const override { |
| 815 | return true; |
| 816 | } |
| 817 | |
| 818 | bool |
| 819 | (Type *VectorTy, |
| 820 | unsigned ElemSizeInBits, |
| 821 | unsigned &Index) const override; |
| 822 | |
| 823 | bool isCtlzFast() const override { |
| 824 | return true; |
| 825 | } |
| 826 | |
| 827 | bool isEqualityCmpFoldedWithSignedCmp() const override { |
| 828 | return false; |
| 829 | } |
| 830 | |
| 831 | bool hasAndNotCompare(SDValue) const override { |
| 832 | return true; |
| 833 | } |
| 834 | |
| 835 | bool preferIncOfAddToSubOfNot(EVT VT) const override; |
| 836 | |
| 837 | bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { |
| 838 | return VT.isScalarInteger(); |
| 839 | } |
| 840 | |
| 841 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, |
| 842 | bool OptForSize, NegatibleCost &Cost, |
| 843 | unsigned Depth = 0) const override; |
| 844 | |
| 845 | /// getSetCCResultType - Return the ISD::SETCC ValueType |
| 846 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
| 847 | EVT VT) const override; |
| 848 | |
| 849 | /// Return true if target always benefits from combining into FMA for a |
| 850 | /// given value type. This must typically return false on targets where FMA |
| 851 | /// takes more cycles to execute than FADD. |
| 852 | bool enableAggressiveFMAFusion(EVT VT) const override; |
| 853 | |
| 854 | /// getPreIndexedAddressParts - returns true by value, base pointer and |
| 855 | /// offset pointer and addressing mode by reference if the node's address |
| 856 | /// can be legally represented as pre-indexed load / store address. |
| 857 | bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, |
| 858 | SDValue &Offset, |
| 859 | ISD::MemIndexedMode &AM, |
| 860 | SelectionDAG &DAG) const override; |
| 861 | |
| 862 | /// SelectAddressEVXRegReg - Given the specified addressed, check to see if |
| 863 | /// it can be more efficiently represented as [r+imm]. |
| 864 | bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, |
| 865 | SelectionDAG &DAG) const; |
| 866 | |
| 867 | /// SelectAddressRegReg - Given the specified addressed, check to see if it |
| 868 | /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment |
| 869 | /// is non-zero, only accept displacement which is not suitable for [r+imm]. |
| 870 | /// Returns false if it can be represented by [r+imm], which are preferred. |
| 871 | bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, |
| 872 | SelectionDAG &DAG, |
| 873 | MaybeAlign EncodingAlignment = std::nullopt) const; |
| 874 | |
| 875 | /// SelectAddressRegImm - Returns true if the address N can be represented |
| 876 | /// by a base register plus a signed 16-bit displacement [r+imm], and if it |
| 877 | /// is not better represented as reg+reg. If \p EncodingAlignment is |
| 878 | /// non-zero, only accept displacements suitable for instruction encoding |
| 879 | /// requirement, i.e. multiples of 4 for DS form. |
| 880 | bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, |
| 881 | SelectionDAG &DAG, |
| 882 | MaybeAlign EncodingAlignment) const; |
| 883 | bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, |
| 884 | SelectionDAG &DAG) const; |
| 885 | |
| 886 | /// SelectAddressRegRegOnly - Given the specified addressed, force it to be |
| 887 | /// represented as an indexed [r+r] operation. |
| 888 | bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, |
| 889 | SelectionDAG &DAG) const; |
| 890 | |
| 891 | /// SelectAddressPCRel - Represent the specified address as pc relative to |
| 892 | /// be represented as [pc+imm] |
| 893 | bool SelectAddressPCRel(SDValue N, SDValue &Base) const; |
| 894 | |
| 895 | Sched::Preference getSchedulingPreference(SDNode *N) const override; |
| 896 | |
| 897 | /// LowerOperation - Provide custom lowering hooks for some operations. |
| 898 | /// |
| 899 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
| 900 | |
| 901 | /// ReplaceNodeResults - Replace the results of node with an illegal result |
| 902 | /// type with new values built out of custom code. |
| 903 | /// |
| 904 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, |
| 905 | SelectionDAG &DAG) const override; |
| 906 | |
| 907 | SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; |
| 908 | SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; |
| 909 | |
| 910 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
| 911 | |
| 912 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
| 913 | SmallVectorImpl<SDNode *> &Created) const override; |
| 914 | |
| 915 | Register getRegisterByName(const char* RegName, LLT VT, |
| 916 | const MachineFunction &MF) const override; |
| 917 | |
| 918 | void computeKnownBitsForTargetNode(const SDValue Op, |
| 919 | KnownBits &Known, |
| 920 | const APInt &DemandedElts, |
| 921 | const SelectionDAG &DAG, |
| 922 | unsigned Depth = 0) const override; |
| 923 | |
| 924 | Align getPrefLoopAlignment(MachineLoop *ML) const override; |
| 925 | |
| 926 | bool shouldInsertFencesForAtomic(const Instruction *I) const override { |
| 927 | return true; |
| 928 | } |
| 929 | |
| 930 | Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, |
| 931 | AtomicOrdering Ord) const override; |
| 932 | |
| 933 | Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, |
| 934 | AtomicOrdering Ord) const override; |
| 935 | |
| 936 | Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, |
| 937 | AtomicOrdering Ord) const override; |
| 938 | Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, |
| 939 | AtomicOrdering Ord) const override; |
| 940 | |
| 941 | bool shouldInlineQuadwordAtomics() const; |
| 942 | |
| 943 | TargetLowering::AtomicExpansionKind |
| 944 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
| 945 | |
| 946 | TargetLowering::AtomicExpansionKind |
| 947 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; |
| 948 | |
| 949 | Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, |
| 950 | AtomicRMWInst *AI, Value *AlignedAddr, |
| 951 | Value *Incr, Value *Mask, |
| 952 | Value *ShiftAmt, |
| 953 | AtomicOrdering Ord) const override; |
| 954 | Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, |
| 955 | AtomicCmpXchgInst *CI, |
| 956 | Value *AlignedAddr, Value *CmpVal, |
| 957 | Value *NewVal, Value *Mask, |
| 958 | AtomicOrdering Ord) const override; |
| 959 | |
| 960 | MachineBasicBlock * |
| 961 | EmitInstrWithCustomInserter(MachineInstr &MI, |
| 962 | MachineBasicBlock *MBB) const override; |
| 963 | MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, |
| 964 | MachineBasicBlock *MBB, |
| 965 | unsigned AtomicSize, |
| 966 | unsigned BinOpcode, |
| 967 | unsigned CmpOpcode = 0, |
| 968 | unsigned CmpPred = 0) const; |
| 969 | MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI, |
| 970 | MachineBasicBlock *MBB, |
| 971 | bool is8bit, |
| 972 | unsigned Opcode, |
| 973 | unsigned CmpOpcode = 0, |
| 974 | unsigned CmpPred = 0) const; |
| 975 | |
| 976 | MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, |
| 977 | MachineBasicBlock *MBB) const; |
| 978 | |
| 979 | MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, |
| 980 | MachineBasicBlock *MBB) const; |
| 981 | |
| 982 | MachineBasicBlock *emitProbedAlloca(MachineInstr &MI, |
| 983 | MachineBasicBlock *MBB) const; |
| 984 | |
| 985 | bool hasInlineStackProbe(const MachineFunction &MF) const override; |
| 986 | |
| 987 | unsigned getStackProbeSize(const MachineFunction &MF) const; |
| 988 | |
| 989 | ConstraintType getConstraintType(StringRef Constraint) const override; |
| 990 | |
| 991 | /// Examine constraint string and operand type and determine a weight value. |
| 992 | /// The operand object must already have been set up with the operand type. |
| 993 | ConstraintWeight getSingleConstraintMatchWeight( |
| 994 | AsmOperandInfo &info, const char *constraint) const override; |
| 995 | |
| 996 | std::pair<unsigned, const TargetRegisterClass *> |
| 997 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
| 998 | StringRef Constraint, MVT VT) const override; |
| 999 | |
| 1000 | /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate |
| 1001 | /// function arguments in the caller parameter area. |
| 1002 | Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; |
| 1003 | |
| 1004 | /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops |
| 1005 | /// vector. If it is invalid, don't add anything to Ops. |
| 1006 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
| 1007 | std::vector<SDValue> &Ops, |
| 1008 | SelectionDAG &DAG) const override; |
| 1009 | |
| 1010 | InlineAsm::ConstraintCode |
| 1011 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override { |
| 1012 | if (ConstraintCode == "es" ) |
| 1013 | return InlineAsm::ConstraintCode::es; |
| 1014 | else if (ConstraintCode == "Q" ) |
| 1015 | return InlineAsm::ConstraintCode::Q; |
| 1016 | else if (ConstraintCode == "Z" ) |
| 1017 | return InlineAsm::ConstraintCode::Z; |
| 1018 | else if (ConstraintCode == "Zy" ) |
| 1019 | return InlineAsm::ConstraintCode::Zy; |
| 1020 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
| 1021 | } |
| 1022 | |
| 1023 | void CollectTargetIntrinsicOperands(const CallInst &I, |
| 1024 | SmallVectorImpl<SDValue> &Ops, |
| 1025 | SelectionDAG &DAG) const override; |
| 1026 | |
| 1027 | /// isLegalAddressingMode - Return true if the addressing mode represented |
| 1028 | /// by AM is legal for this target, for a load/store of the specified type. |
| 1029 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
| 1030 | Type *Ty, unsigned AS, |
| 1031 | Instruction *I = nullptr) const override; |
| 1032 | |
| 1033 | /// isLegalICmpImmediate - Return true if the specified immediate is legal |
| 1034 | /// icmp immediate, that is the target has icmp instructions which can |
| 1035 | /// compare a register against the immediate without having to materialize |
| 1036 | /// the immediate into a register. |
| 1037 | bool isLegalICmpImmediate(int64_t Imm) const override; |
| 1038 | |
| 1039 | /// isLegalAddImmediate - Return true if the specified immediate is legal |
| 1040 | /// add immediate, that is the target has add instructions which can |
| 1041 | /// add a register and the immediate without having to materialize |
| 1042 | /// the immediate into a register. |
| 1043 | bool isLegalAddImmediate(int64_t Imm) const override; |
| 1044 | |
| 1045 | /// isTruncateFree - Return true if it's free to truncate a value of |
| 1046 | /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in |
| 1047 | /// register X1 to i32 by referencing its sub-register R1. |
| 1048 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
| 1049 | bool isTruncateFree(EVT VT1, EVT VT2) const override; |
| 1050 | |
| 1051 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
| 1052 | |
| 1053 | bool isFPExtFree(EVT DestVT, EVT SrcVT) const override; |
| 1054 | |
| 1055 | /// Returns true if it is beneficial to convert a load of a constant |
| 1056 | /// to just the constant itself. |
| 1057 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
| 1058 | Type *Ty) const override; |
| 1059 | |
| 1060 | bool convertSelectOfConstantsToMath(EVT VT) const override { |
| 1061 | return true; |
| 1062 | } |
| 1063 | |
| 1064 | bool decomposeMulByConstant(LLVMContext &Context, EVT VT, |
| 1065 | SDValue C) const override; |
| 1066 | |
| 1067 | bool isDesirableToTransformToIntegerOp(unsigned Opc, |
| 1068 | EVT VT) const override { |
| 1069 | // Only handle float load/store pair because float(fpr) load/store |
| 1070 | // instruction has more cycles than integer(gpr) load/store in PPC. |
| 1071 | if (Opc != ISD::LOAD && Opc != ISD::STORE) |
| 1072 | return false; |
| 1073 | if (VT != MVT::f32 && VT != MVT::f64) |
| 1074 | return false; |
| 1075 | |
| 1076 | return true; |
| 1077 | } |
| 1078 | |
| 1079 | // Returns true if the address of the global is stored in TOC entry. |
| 1080 | bool isAccessedAsGotIndirect(SDValue N) const; |
| 1081 | |
| 1082 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
| 1083 | |
| 1084 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, |
| 1085 | const CallInst &I, |
| 1086 | MachineFunction &MF, |
| 1087 | unsigned Intrinsic) const override; |
| 1088 | |
| 1089 | /// It returns EVT::Other if the type should be determined using generic |
| 1090 | /// target-independent logic. |
| 1091 | EVT getOptimalMemOpType(const MemOp &Op, |
| 1092 | const AttributeList &FuncAttributes) const override; |
| 1093 | |
| 1094 | /// Is unaligned memory access allowed for the given type, and is it fast |
| 1095 | /// relative to software emulation. |
| 1096 | bool allowsMisalignedMemoryAccesses( |
| 1097 | EVT VT, unsigned AddrSpace, Align Alignment = Align(1), |
| 1098 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
| 1099 | unsigned *Fast = nullptr) const override; |
| 1100 | |
| 1101 | /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster |
| 1102 | /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be |
| 1103 | /// expanded to FMAs when this method returns true, otherwise fmuladd is |
| 1104 | /// expanded to fmul + fadd. |
| 1105 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
| 1106 | EVT VT) const override; |
| 1107 | |
| 1108 | bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; |
| 1109 | |
| 1110 | /// isProfitableToHoist - Check if it is profitable to hoist instruction |
| 1111 | /// \p I to its dominator block. |
| 1112 | /// For example, it is not profitable if \p I and it's only user can form a |
| 1113 | /// FMA instruction, because Powerpc prefers FMADD. |
| 1114 | bool isProfitableToHoist(Instruction *I) const override; |
| 1115 | |
| 1116 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; |
| 1117 | |
| 1118 | // Should we expand the build vector with shuffles? |
| 1119 | bool |
| 1120 | shouldExpandBuildVectorWithShuffles(EVT VT, |
| 1121 | unsigned DefinedValues) const override; |
| 1122 | |
| 1123 | // Keep the zero-extensions for arguments to libcalls. |
| 1124 | bool shouldKeepZExtForFP16Conv() const override { return true; } |
| 1125 | |
| 1126 | /// createFastISel - This method returns a target-specific FastISel object, |
| 1127 | /// or null if the target does not support "fast" instruction selection. |
| 1128 | FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, |
| 1129 | const TargetLibraryInfo *LibInfo) const override; |
| 1130 | |
| 1131 | /// Returns true if an argument of type Ty needs to be passed in a |
| 1132 | /// contiguous block of registers in calling convention CallConv. |
| 1133 | bool functionArgumentNeedsConsecutiveRegisters( |
| 1134 | Type *Ty, CallingConv::ID CallConv, bool isVarArg, |
| 1135 | const DataLayout &DL) const override { |
| 1136 | // We support any array type as "consecutive" block in the parameter |
| 1137 | // save area. The element type defines the alignment requirement and |
| 1138 | // whether the argument should go in GPRs, FPRs, or VRs if available. |
| 1139 | // |
| 1140 | // Note that clang uses this capability both to implement the ELFv2 |
| 1141 | // homogeneous float/vector aggregate ABI, and to avoid having to use |
| 1142 | // "byval" when passing aggregates that might fully fit in registers. |
| 1143 | return Ty->isArrayTy(); |
| 1144 | } |
| 1145 | |
| 1146 | /// If a physical register, this returns the register that receives the |
| 1147 | /// exception address on entry to an EH pad. |
| 1148 | Register |
| 1149 | getExceptionPointerRegister(const Constant *PersonalityFn) const override; |
| 1150 | |
| 1151 | /// If a physical register, this returns the register that receives the |
| 1152 | /// exception typeid on entry to a landing pad. |
| 1153 | Register |
| 1154 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override; |
| 1155 | |
| 1156 | /// Override to support customized stack guard loading. |
| 1157 | bool useLoadStackGuardNode(const Module &M) const override; |
| 1158 | void insertSSPDeclarations(Module &M) const override; |
| 1159 | Value *getSDagStackGuard(const Module &M) const override; |
| 1160 | |
| 1161 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
| 1162 | bool ForCodeSize) const override; |
| 1163 | |
| 1164 | unsigned getJumpTableEncoding() const override; |
| 1165 | bool isJumpTableRelative() const override; |
| 1166 | SDValue getPICJumpTableRelocBase(SDValue Table, |
| 1167 | SelectionDAG &DAG) const override; |
| 1168 | const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
| 1169 | unsigned JTI, |
| 1170 | MCContext &Ctx) const override; |
| 1171 | |
| 1172 | /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), |
| 1173 | /// compute the address flags of the node, get the optimal address mode |
| 1174 | /// based on the flags, and set the Base and Disp based on the address mode. |
| 1175 | PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, |
| 1176 | SDValue &Disp, SDValue &Base, |
| 1177 | SelectionDAG &DAG, |
| 1178 | MaybeAlign Align) const; |
| 1179 | /// SelectForceXFormMode - Given the specified address, force it to be |
| 1180 | /// represented as an indexed [r+r] operation (an XForm instruction). |
| 1181 | PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, |
| 1182 | SelectionDAG &DAG) const; |
| 1183 | |
| 1184 | bool splitValueIntoRegisterParts( |
| 1185 | SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
| 1186 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) |
| 1187 | const override; |
| 1188 | /// Structure that collects some common arguments that get passed around |
| 1189 | /// between the functions for call lowering. |
| 1190 | struct CallFlags { |
| 1191 | const CallingConv::ID CallConv; |
| 1192 | const bool IsTailCall : 1; |
| 1193 | const bool IsVarArg : 1; |
| 1194 | const bool IsPatchPoint : 1; |
| 1195 | const bool IsIndirect : 1; |
| 1196 | const bool HasNest : 1; |
| 1197 | const bool NoMerge : 1; |
| 1198 | |
| 1199 | CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg, |
| 1200 | bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge) |
| 1201 | : CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg), |
| 1202 | IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect), |
| 1203 | HasNest(HasNest), NoMerge(NoMerge) {} |
| 1204 | }; |
| 1205 | |
| 1206 | CCAssignFn *ccAssignFnForCall(CallingConv::ID CC, bool Return, |
| 1207 | bool IsVarArg) const; |
| 1208 | bool supportsTailCallFor(const CallBase *CB) const; |
| 1209 | |
| 1210 | private: |
| 1211 | struct ReuseLoadInfo { |
| 1212 | SDValue Ptr; |
| 1213 | SDValue Chain; |
| 1214 | SDValue ResChain; |
| 1215 | MachinePointerInfo MPI; |
| 1216 | bool IsDereferenceable = false; |
| 1217 | bool IsInvariant = false; |
| 1218 | Align Alignment; |
| 1219 | AAMDNodes AAInfo; |
| 1220 | const MDNode *Ranges = nullptr; |
| 1221 | |
| 1222 | ReuseLoadInfo() = default; |
| 1223 | |
| 1224 | MachineMemOperand::Flags MMOFlags() const { |
| 1225 | MachineMemOperand::Flags F = MachineMemOperand::MONone; |
| 1226 | if (IsDereferenceable) |
| 1227 | F |= MachineMemOperand::MODereferenceable; |
| 1228 | if (IsInvariant) |
| 1229 | F |= MachineMemOperand::MOInvariant; |
| 1230 | return F; |
| 1231 | } |
| 1232 | }; |
| 1233 | |
| 1234 | // Map that relates a set of common address flags to PPC addressing modes. |
| 1235 | std::map<PPC::AddrMode, SmallVector<unsigned, 16>> AddrModesMap; |
| 1236 | void initializeAddrModeMap(); |
| 1237 | |
| 1238 | bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, |
| 1239 | SelectionDAG &DAG, |
| 1240 | ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; |
| 1241 | |
| 1242 | void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, |
| 1243 | SelectionDAG &DAG, const SDLoc &dl) const; |
| 1244 | SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, |
| 1245 | const SDLoc &dl) const; |
| 1246 | |
| 1247 | bool directMoveIsProfitable(const SDValue &Op) const; |
| 1248 | SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, |
| 1249 | const SDLoc &dl) const; |
| 1250 | |
| 1251 | SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, |
| 1252 | const SDLoc &dl) const; |
| 1253 | |
| 1254 | SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const; |
| 1255 | |
| 1256 | SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; |
| 1257 | SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; |
| 1258 | |
| 1259 | bool IsEligibleForTailCallOptimization( |
| 1260 | const GlobalValue *CalleeGV, CallingConv::ID CalleeCC, |
| 1261 | CallingConv::ID CallerCC, bool isVarArg, |
| 1262 | const SmallVectorImpl<ISD::InputArg> &Ins) const; |
| 1263 | |
| 1264 | bool IsEligibleForTailCallOptimization_64SVR4( |
| 1265 | const GlobalValue *CalleeGV, CallingConv::ID CalleeCC, |
| 1266 | CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg, |
| 1267 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1268 | const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc, |
| 1269 | bool isCalleeExternalSymbol) const; |
| 1270 | |
| 1271 | bool isEligibleForTCO(const GlobalValue *CalleeGV, CallingConv::ID CalleeCC, |
| 1272 | CallingConv::ID CallerCC, const CallBase *CB, |
| 1273 | bool isVarArg, |
| 1274 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1275 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1276 | const Function *CallerFunc, |
| 1277 | bool isCalleeExternalSymbol) const; |
| 1278 | |
| 1279 | SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff, |
| 1280 | SDValue Chain, SDValue &LROpOut, |
| 1281 | SDValue &FPOpOut, |
| 1282 | const SDLoc &dl) const; |
| 1283 | |
| 1284 | SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const; |
| 1285 | |
| 1286 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
| 1287 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
| 1288 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
| 1289 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
| 1290 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
| 1291 | SDValue LowerGlobalTLSAddressAIX(SDValue Op, SelectionDAG &DAG) const; |
| 1292 | SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const; |
| 1293 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
| 1294 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
| 1295 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
| 1296 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
| 1297 | SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
| 1298 | SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
| 1299 | SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; |
| 1300 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
| 1301 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
| 1302 | SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; |
| 1303 | SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; |
| 1304 | SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const; |
| 1305 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
| 1306 | SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; |
| 1307 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
| 1308 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
| 1309 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; |
| 1310 | SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; |
| 1311 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, |
| 1312 | const SDLoc &dl) const; |
| 1313 | SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
| 1314 | SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
| 1315 | SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
| 1316 | SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; |
| 1317 | SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; |
| 1318 | SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; |
| 1319 | SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const; |
| 1320 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
| 1321 | SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
| 1322 | SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, ArrayRef<int> PermMask, |
| 1323 | EVT VT, SDValue V1, SDValue V2) const; |
| 1324 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
| 1325 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
| 1326 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
| 1327 | SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; |
| 1328 | SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; |
| 1329 | SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; |
| 1330 | SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const; |
| 1331 | SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const; |
| 1332 | SDValue lowerToLibCall(const char *LibCallName, SDValue Op, |
| 1333 | SelectionDAG &DAG) const; |
| 1334 | SDValue lowerLibCallBasedOnType(const char *LibCallFloatName, |
| 1335 | const char *LibCallDoubleName, SDValue Op, |
| 1336 | SelectionDAG &DAG) const; |
| 1337 | bool isLowringToMASSFiniteSafe(SDValue Op) const; |
| 1338 | bool isLowringToMASSSafe(SDValue Op) const; |
| 1339 | bool isScalarMASSConversionEnabled() const; |
| 1340 | SDValue lowerLibCallBase(const char *LibCallDoubleName, |
| 1341 | const char *LibCallFloatName, |
| 1342 | const char *LibCallDoubleNameFinite, |
| 1343 | const char *LibCallFloatNameFinite, SDValue Op, |
| 1344 | SelectionDAG &DAG) const; |
| 1345 | SDValue lowerPow(SDValue Op, SelectionDAG &DAG) const; |
| 1346 | SDValue lowerSin(SDValue Op, SelectionDAG &DAG) const; |
| 1347 | SDValue lowerCos(SDValue Op, SelectionDAG &DAG) const; |
| 1348 | SDValue lowerLog(SDValue Op, SelectionDAG &DAG) const; |
| 1349 | SDValue lowerLog10(SDValue Op, SelectionDAG &DAG) const; |
| 1350 | SDValue lowerExp(SDValue Op, SelectionDAG &DAG) const; |
| 1351 | SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const; |
| 1352 | SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
| 1353 | SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; |
| 1354 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
| 1355 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
| 1356 | SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; |
| 1357 | |
| 1358 | SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; |
| 1359 | SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; |
| 1360 | SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const; |
| 1361 | SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const; |
| 1362 | |
| 1363 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
| 1364 | CallingConv::ID CallConv, bool isVarArg, |
| 1365 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1366 | const SDLoc &dl, SelectionDAG &DAG, |
| 1367 | SmallVectorImpl<SDValue> &InVals) const; |
| 1368 | |
| 1369 | SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, |
| 1370 | SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, |
| 1371 | SDValue InGlue, SDValue Chain, SDValue CallSeqStart, |
| 1372 | SDValue &Callee, int SPDiff, unsigned NumBytes, |
| 1373 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1374 | SmallVectorImpl<SDValue> &InVals, |
| 1375 | const CallBase *CB) const; |
| 1376 | |
| 1377 | SDValue |
| 1378 | LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 1379 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1380 | const SDLoc &dl, SelectionDAG &DAG, |
| 1381 | SmallVectorImpl<SDValue> &InVals) const override; |
| 1382 | |
| 1383 | SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, |
| 1384 | SmallVectorImpl<SDValue> &InVals) const override; |
| 1385 | |
| 1386 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
| 1387 | bool isVarArg, |
| 1388 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1389 | LLVMContext &Context, const Type *RetTy) const override; |
| 1390 | |
| 1391 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 1392 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1393 | const SmallVectorImpl<SDValue> &OutVals, |
| 1394 | const SDLoc &dl, SelectionDAG &DAG) const override; |
| 1395 | |
| 1396 | SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, |
| 1397 | SelectionDAG &DAG, SDValue ArgVal, |
| 1398 | const SDLoc &dl) const; |
| 1399 | |
| 1400 | SDValue LowerFormalArguments_AIX( |
| 1401 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 1402 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
| 1403 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; |
| 1404 | SDValue LowerFormalArguments_64SVR4( |
| 1405 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 1406 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
| 1407 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; |
| 1408 | SDValue LowerFormalArguments_32SVR4( |
| 1409 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
| 1410 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
| 1411 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; |
| 1412 | |
| 1413 | SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, |
| 1414 | SDValue CallSeqStart, |
| 1415 | ISD::ArgFlagsTy Flags, SelectionDAG &DAG, |
| 1416 | const SDLoc &dl) const; |
| 1417 | |
| 1418 | SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags, |
| 1419 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1420 | const SmallVectorImpl<SDValue> &OutVals, |
| 1421 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1422 | const SDLoc &dl, SelectionDAG &DAG, |
| 1423 | SmallVectorImpl<SDValue> &InVals, |
| 1424 | const CallBase *CB) const; |
| 1425 | SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags, |
| 1426 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1427 | const SmallVectorImpl<SDValue> &OutVals, |
| 1428 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1429 | const SDLoc &dl, SelectionDAG &DAG, |
| 1430 | SmallVectorImpl<SDValue> &InVals, |
| 1431 | const CallBase *CB) const; |
| 1432 | SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, CallFlags CFlags, |
| 1433 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 1434 | const SmallVectorImpl<SDValue> &OutVals, |
| 1435 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 1436 | const SDLoc &dl, SelectionDAG &DAG, |
| 1437 | SmallVectorImpl<SDValue> &InVals, |
| 1438 | const CallBase *CB) const; |
| 1439 | |
| 1440 | SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; |
| 1441 | SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; |
| 1442 | SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; |
| 1443 | |
| 1444 | SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1445 | SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1446 | SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1447 | SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1448 | SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1449 | SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1450 | SDValue combineVectorShift(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1451 | SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1452 | SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1453 | SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1454 | SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1455 | SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1456 | SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1457 | SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1458 | SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN, |
| 1459 | SelectionDAG &DAG) const; |
| 1460 | SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, |
| 1461 | DAGCombinerInfo &DCI) const; |
| 1462 | |
| 1463 | /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces |
| 1464 | /// SETCC with integer subtraction when (1) there is a legal way of doing it |
| 1465 | /// (2) keeping the result of comparison in GPR has performance benefit. |
| 1466 | SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const; |
| 1467 | |
| 1468 | SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
| 1469 | int &RefinementSteps, bool &UseOneConstNR, |
| 1470 | bool Reciprocal) const override; |
| 1471 | SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
| 1472 | int &RefinementSteps) const override; |
| 1473 | SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
| 1474 | const DenormalMode &Mode) const override; |
| 1475 | SDValue getSqrtResultForDenormInput(SDValue Operand, |
| 1476 | SelectionDAG &DAG) const override; |
| 1477 | unsigned combineRepeatedFPDivisors() const override; |
| 1478 | |
| 1479 | SDValue |
| 1480 | combineElementTruncationToVectorTruncation(SDNode *N, |
| 1481 | DAGCombinerInfo &DCI) const; |
| 1482 | |
| 1483 | /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be |
| 1484 | /// handled by the VINSERTH instruction introduced in ISA 3.0. This is |
| 1485 | /// essentially any shuffle of v8i16 vectors that just inserts one element |
| 1486 | /// from one vector into the other. |
| 1487 | SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; |
| 1488 | |
| 1489 | /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be |
| 1490 | /// handled by the VINSERTB instruction introduced in ISA 3.0. This is |
| 1491 | /// essentially v16i8 vector version of VINSERTH. |
| 1492 | SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; |
| 1493 | |
| 1494 | /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be |
| 1495 | /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1. |
| 1496 | SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; |
| 1497 | |
| 1498 | // Return whether the call instruction can potentially be optimized to a |
| 1499 | // tail call. This will cause the optimizers to attempt to move, or |
| 1500 | // duplicate return instructions to help enable tail call optimizations. |
| 1501 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
| 1502 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
| 1503 | |
| 1504 | /// getAddrModeForFlags - Based on the set of address flags, select the most |
| 1505 | /// optimal instruction format to match by. |
| 1506 | PPC::AddrMode getAddrModeForFlags(unsigned Flags) const; |
| 1507 | |
| 1508 | /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute |
| 1509 | /// the address flags of the load/store instruction that is to be matched. |
| 1510 | /// The address flags are stored in a map, which is then searched |
| 1511 | /// through to determine the optimal load/store instruction format. |
| 1512 | unsigned computeMOFlags(const SDNode *Parent, SDValue N, |
| 1513 | SelectionDAG &DAG) const; |
| 1514 | }; // end class PPCTargetLowering |
| 1515 | |
| 1516 | namespace PPC { |
| 1517 | |
| 1518 | FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, |
| 1519 | const TargetLibraryInfo *LibInfo); |
| 1520 | |
| 1521 | } // end namespace PPC |
| 1522 | |
| 1523 | bool isIntS16Immediate(SDNode *N, int16_t &Imm); |
| 1524 | bool isIntS16Immediate(SDValue Op, int16_t &Imm); |
| 1525 | bool isIntS34Immediate(SDNode *N, int64_t &Imm); |
| 1526 | bool isIntS34Immediate(SDValue Op, int64_t &Imm); |
| 1527 | |
| 1528 | bool convertToNonDenormSingle(APInt &ArgAPInt); |
| 1529 | bool convertToNonDenormSingle(APFloat &ArgAPFloat); |
| 1530 | bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat); |
| 1531 | |
| 1532 | } // end namespace llvm |
| 1533 | |
| 1534 | #endif // LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H |
| 1535 | |