1//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that PPC uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
15#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
16
17#include "PPCInstrInfo.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineMemOperand.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22#include "llvm/CodeGen/SelectionDAGNodes.h"
23#include "llvm/CodeGen/TargetLowering.h"
24#include "llvm/CodeGen/ValueTypes.h"
25#include "llvm/CodeGenTypes/MachineValueType.h"
26#include "llvm/IR/Attributes.h"
27#include "llvm/IR/CallingConv.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/InlineAsm.h"
30#include "llvm/IR/Metadata.h"
31#include "llvm/IR/Type.h"
32#include <optional>
33#include <utility>
34
35namespace llvm {
36
37 namespace PPCISD {
38
39 // When adding a NEW PPCISD node please add it to the correct position in
40 // the enum. The order of elements in this enum matters!
41 // Values that are added after this entry:
42 // STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
43 // are considered memory opcodes and are treated differently than entries
44 // that come before it. For example, ADD or MUL should be placed before
45 // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
46 // after it.
47 enum NodeType : unsigned {
48 // Start the numbering where the builtin ops and target ops leave off.
49 FIRST_NUMBER = ISD::BUILTIN_OP_END,
50
51 /// FSEL - Traditional three-operand fsel node.
52 ///
53 FSEL,
54
55 /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
56 XSMAXC,
57 XSMINC,
58
59 /// FCFID - The FCFID instruction, taking an f64 operand and producing
60 /// and f64 value containing the FP representation of the integer that
61 /// was temporarily in the f64 operand.
62 FCFID,
63
64 /// Newer FCFID[US] integer-to-floating-point conversion instructions for
65 /// unsigned integers and single-precision outputs.
66 FCFIDU,
67 FCFIDS,
68 FCFIDUS,
69
70 /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
71 /// operand, producing an f64 value containing the integer representation
72 /// of that FP value.
73 FCTIDZ,
74 FCTIWZ,
75
76 /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
77 /// unsigned integers with round toward zero.
78 FCTIDUZ,
79 FCTIWUZ,
80
81 /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
82 /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
83 VEXTS,
84
85 /// Reciprocal estimate instructions (unary FP ops).
86 FRE,
87 FRSQRTE,
88
89 /// Test instruction for software square root.
90 FTSQRT,
91
92 /// Square root instruction.
93 FSQRT,
94
95 /// VPERM - The PPC VPERM Instruction.
96 ///
97 VPERM,
98
99 /// XXSPLT - The PPC VSX splat instructions
100 ///
101 XXSPLT,
102
103 /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
104 /// converting immediate single precision numbers to double precision
105 /// vector or scalar.
106 XXSPLTI_SP_TO_DP,
107
108 /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
109 ///
110 XXSPLTI32DX,
111
112 /// VECINSERT - The PPC vector insert instruction
113 ///
114 VECINSERT,
115
116 /// VECSHL - The PPC vector shift left instruction
117 ///
118 VECSHL,
119
120 /// XXPERMDI - The PPC XXPERMDI instruction
121 ///
122 XXPERMDI,
123 XXPERM,
124
125 /// The CMPB instruction (takes two operands of i32 or i64).
126 CMPB,
127
128 /// Hi/Lo - These represent the high and low 16-bit parts of a global
129 /// address respectively. These nodes have two operands, the first of
130 /// which must be a TargetGlobalAddress, and the second of which must be a
131 /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
132 /// though these are usually folded into other nodes.
133 Hi,
134 Lo,
135
136 /// The following two target-specific nodes are used for calls through
137 /// function pointers in the 64-bit SVR4 ABI.
138
139 /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
140 /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
141 /// compute an allocation on the stack.
142 DYNALLOC,
143
144 /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
145 /// compute an offset from native SP to the address of the most recent
146 /// dynamic alloca.
147 DYNAREAOFFSET,
148
149 /// To avoid stack clash, allocation is performed by block and each block is
150 /// probed.
151 PROBED_ALLOCA,
152
153 /// The result of the mflr at function entry, used for PIC code.
154 GlobalBaseReg,
155
156 /// These nodes represent PPC shifts.
157 ///
158 /// For scalar types, only the last `n + 1` bits of the shift amounts
159 /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
160 /// for exact behaviors.
161 ///
162 /// For vector types, only the last n bits are used. See vsld.
163 SRL,
164 SRA,
165 SHL,
166
167 /// FNMSUB - Negated multiply-subtract instruction.
168 FNMSUB,
169
170 /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
171 /// word and shift left immediate.
172 EXTSWSLI,
173
174 /// The combination of sra[wd]i and addze used to implemented signed
175 /// integer division by a power of 2. The first operand is the dividend,
176 /// and the second is the constant shift amount (representing the
177 /// divisor).
178 SRA_ADDZE,
179
180 /// CALL - A direct function call.
181 /// CALL_NOP is a call with the special NOP which follows 64-bit
182 /// CALL_NOTOC the caller does not use the TOC.
183 /// SVR4 calls and 32-bit/64-bit AIX calls.
184 CALL,
185 CALL_NOP,
186 CALL_NOTOC,
187
188 /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
189 /// MTCTR instruction.
190 MTCTR,
191
192 /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
193 /// BCTRL instruction.
194 BCTRL,
195
196 /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
197 /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX
198 /// and 64-bit AIX.
199 BCTRL_LOAD_TOC,
200
201 /// The variants that implicitly define rounding mode for calls with
202 /// strictfp semantics.
203 CALL_RM,
204 CALL_NOP_RM,
205 CALL_NOTOC_RM,
206 BCTRL_RM,
207 BCTRL_LOAD_TOC_RM,
208
209 /// Return with a glue operand, matched by 'blr'
210 RET_GLUE,
211
212 /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
213 /// This copies the bits corresponding to the specified CRREG into the
214 /// resultant GPR. Bits corresponding to other CR regs are undefined.
215 MFOCRF,
216
217 /// Direct move from a VSX register to a GPR
218 MFVSR,
219
220 /// Direct move from a GPR to a VSX register (algebraic)
221 MTVSRA,
222
223 /// Direct move from a GPR to a VSX register (zero)
224 MTVSRZ,
225
226 /// Direct move of 2 consecutive GPR to a VSX register.
227 BUILD_FP128,
228
229 /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
230 /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
231 /// unsupported for this target.
232 /// Merge 2 GPRs to a single SPE register.
233 BUILD_SPE64,
234
235 /// Extract SPE register component, second argument is high or low.
236 EXTRACT_SPE,
237
238 /// Extract a subvector from signed integer vector and convert to FP.
239 /// It is primarily used to convert a (widened) illegal integer vector
240 /// type to a legal floating point vector type.
241 /// For example v2i32 -> widened to v4i32 -> v2f64
242 SINT_VEC_TO_FP,
243
244 /// Extract a subvector from unsigned integer vector and convert to FP.
245 /// As with SINT_VEC_TO_FP, used for converting illegal types.
246 UINT_VEC_TO_FP,
247
248 /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to
249 /// place the value into the least significant element of the most
250 /// significant doubleword in the vector. This is not element zero for
251 /// anything smaller than a doubleword on either endianness. This node has
252 /// the same semantics as SCALAR_TO_VECTOR except that the value remains in
253 /// the aforementioned location in the vector register.
254 SCALAR_TO_VECTOR_PERMUTED,
255
256 // FIXME: Remove these once the ANDI glue bug is fixed:
257 /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
258 /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
259 /// implement truncation of i32 or i64 to i1.
260 ANDI_rec_1_EQ_BIT,
261 ANDI_rec_1_GT_BIT,
262
263 // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
264 // target (returns (Lo, Hi)). It takes a chain operand.
265 READ_TIME_BASE,
266
267 // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
268 EH_SJLJ_SETJMP,
269
270 // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
271 EH_SJLJ_LONGJMP,
272
273 /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
274 /// instructions. For lack of better number, we use the opcode number
275 /// encoding for the OPC field to identify the compare. For example, 838
276 /// is VCMPGTSH.
277 VCMP,
278
279 /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the
280 /// altivec VCMP*_rec instructions. For lack of better number, we use the
281 /// opcode number encoding for the OPC field to identify the compare. For
282 /// example, 838 is VCMPGTSH.
283 VCMP_rec,
284
285 /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
286 /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
287 /// condition register to branch on, OPC is the branch opcode to use (e.g.
288 /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
289 /// an optional input flag argument.
290 COND_BRANCH,
291
292 /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
293 /// loops.
294 BDNZ,
295 BDZ,
296
297 /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
298 /// towards zero. Used only as part of the long double-to-int
299 /// conversion sequence.
300 FADDRTZ,
301
302 /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
303 MFFS,
304
305 /// TC_RETURN - A tail call return.
306 /// operand #0 chain
307 /// operand #1 callee (register or absolute)
308 /// operand #2 stack adjustment
309 /// operand #3 optional in flag
310 TC_RETURN,
311
312 /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
313 CR6SET,
314 CR6UNSET,
315
316 /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
317 /// for non-position independent code on PPC32.
318 PPC32_GOT,
319
320 /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
321 /// local dynamic TLS and position indendepent code on PPC32.
322 PPC32_PICGOT,
323
324 /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
325 /// TLS model, produces an ADDIS8 instruction that adds the GOT
326 /// base to sym\@got\@tprel\@ha.
327 ADDIS_GOT_TPREL_HA,
328
329 /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
330 /// TLS model, produces a LD instruction with base register G8RReg
331 /// and offset sym\@got\@tprel\@l. This completes the addition that
332 /// finds the offset of "sym" relative to the thread pointer.
333 LD_GOT_TPREL_L,
334
335 /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec
336 /// and local-exec TLS models, produces an ADD instruction that adds
337 /// the contents of G8RReg to the thread pointer. Symbol contains a
338 /// relocation sym\@tls which is to be replaced by the thread pointer
339 /// and identifies to the linker that the instruction is part of a
340 /// TLS sequence.
341 ADD_TLS,
342
343 /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
344 /// model, produces an ADDIS8 instruction that adds the GOT base
345 /// register to sym\@got\@tlsgd\@ha.
346 ADDIS_TLSGD_HA,
347
348 /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
349 /// model, produces an ADDI8 instruction that adds G8RReg to
350 /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
351 /// ADDIS_TLSGD_L_ADDR until after register assignment.
352 ADDI_TLSGD_L,
353
354 /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
355 /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
356 /// ADDIS_TLSGD_L_ADDR until after register assignment.
357 GET_TLS_ADDR,
358
359 /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on
360 /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread
361 /// pointer. At the end of the call, the thread pointer is found in R3.
362 GET_TPOINTER,
363
364 /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
365 /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
366 /// register assignment.
367 ADDI_TLSGD_L_ADDR,
368
369 /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
370 /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
371 /// Op that combines two register copies of TOC entries
372 /// (region handle into R3 and variable offset into R4) followed by a
373 /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr.
374 /// This node is used in 64-bit mode as well (in which case the result is
375 /// G8RC and inputs are X3/X4).
376 TLSGD_AIX,
377
378 /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
379 /// produces a call to .__tls_get_mod(_$TLSML\@ml).
380 GET_TLS_MOD_AIX,
381
382 /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle)
383 /// Op that requires a single input of the module handle TOC entry in R3,
384 /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call
385 /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes.
386 /// The only difference is the register class.
387 TLSLD_AIX,
388
389 /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
390 /// model, produces an ADDIS8 instruction that adds the GOT base
391 /// register to sym\@got\@tlsld\@ha.
392 ADDIS_TLSLD_HA,
393
394 /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
395 /// model, produces an ADDI8 instruction that adds G8RReg to
396 /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
397 /// ADDIS_TLSLD_L_ADDR until after register assignment.
398 ADDI_TLSLD_L,
399
400 /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
401 /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
402 /// ADDIS_TLSLD_L_ADDR until after register assignment.
403 GET_TLSLD_ADDR,
404
405 /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
406 /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
407 /// following register assignment.
408 ADDI_TLSLD_L_ADDR,
409
410 /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
411 /// model, produces an ADDIS8 instruction that adds X3 to
412 /// sym\@dtprel\@ha.
413 ADDIS_DTPREL_HA,
414
415 /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
416 /// model, produces an ADDI8 instruction that adds G8RReg to
417 /// sym\@got\@dtprel\@l.
418 ADDI_DTPREL_L,
419
420 /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS
421 /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel.
422 PADDI_DTPREL,
423
424 /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
425 /// during instruction selection to optimize a BUILD_VECTOR into
426 /// operations on splats. This is necessary to avoid losing these
427 /// optimizations due to constant folding.
428 VADD_SPLAT,
429
430 /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
431 /// operand identifies the operating system entry point.
432 SC,
433
434 /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
435 CLRBHRB,
436
437 /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
438 /// history rolling buffer entry.
439 MFBHRBE,
440
441 /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
442 RFEBB,
443
444 /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
445 /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
446 /// or stxvd2x instruction. The chain is necessary because the
447 /// sequence replaces a load and needs to provide the same number
448 /// of outputs.
449 XXSWAPD,
450
451 /// An SDNode for swaps that are not associated with any loads/stores
452 /// and thereby have no chain.
453 SWAP_NO_CHAIN,
454
455 /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
456 /// lower (IDX=1) half of v4f32 to v2f64.
457 FP_EXTEND_HALF,
458
459 /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done
460 /// either through an add like PADDI or through a PC Relative load like
461 /// PLD.
462 MAT_PCREL_ADDR,
463
464 /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for
465 /// TLS global address when using dynamic access models. This can be done
466 /// through an add like PADDI.
467 TLS_DYNAMIC_MAT_PCREL_ADDR,
468
469 /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address
470 /// when using local exec access models, and when prefixed instructions are
471 /// available. This is used with ADD_TLS to produce an add like PADDI.
472 TLS_LOCAL_EXEC_MAT_ADDR,
473
474 /// ACC_BUILD = Build an accumulator register from 4 VSX registers.
475 ACC_BUILD,
476
477 /// PAIR_BUILD = Build a vector pair register from 2 VSX registers.
478 PAIR_BUILD,
479
480 /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of
481 /// an accumulator or pair register. This node is needed because
482 /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same
483 /// element type.
484 EXTRACT_VSX_REG,
485
486 /// XXMFACC = This corresponds to the xxmfacc instruction.
487 XXMFACC,
488
489 // Constrained conversion from floating point to int
490 STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
491 STRICT_FCTIWZ,
492 STRICT_FCTIDUZ,
493 STRICT_FCTIWUZ,
494
495 /// Constrained integer-to-floating-point conversion instructions.
496 STRICT_FCFID,
497 STRICT_FCFIDU,
498 STRICT_FCFIDS,
499 STRICT_FCFIDUS,
500
501 /// Constrained floating point add in round-to-zero mode.
502 STRICT_FADDRTZ,
503
504 // NOTE: The nodes below may require PC-Rel specific patterns if the
505 // address could be PC-Relative. When adding new nodes below, consider
506 // whether or not the address can be PC-Relative and add the corresponding
507 // PC-relative patterns and tests.
508
509 /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
510 /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
511 /// the GPRC input, then stores it through Ptr. Type can be either i16 or
512 /// i32.
513 STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
514
515 /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
516 /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
517 /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
518 /// or i32.
519 LBRX,
520
521 /// STFIWX - The STFIWX instruction. The first operand is an input token
522 /// chain, then an f64 value to store, then an address to store it to.
523 STFIWX,
524
525 /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
526 /// load which sign-extends from a 32-bit integer value into the
527 /// destination 64-bit register.
528 LFIWAX,
529
530 /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
531 /// load which zero-extends from a 32-bit integer value into the
532 /// destination 64-bit register.
533 LFIWZX,
534
535 /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
536 /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
537 /// This can be used for converting loaded integers to floating point.
538 LXSIZX,
539
540 /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
541 /// chain, then an f64 value to store, then an address to store it to,
542 /// followed by a byte-width for the store.
543 STXSIX,
544
545 /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
546 /// Maps directly to an lxvd2x instruction that will be followed by
547 /// an xxswapd.
548 LXVD2X,
549
550 /// LXVRZX - Load VSX Vector Rightmost and Zero Extend
551 /// This node represents v1i128 BUILD_VECTOR of a zero extending load
552 /// instruction from <byte, halfword, word, or doubleword> to i128.
553 /// Allows utilization of the Load VSX Vector Rightmost Instructions.
554 LXVRZX,
555
556 /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
557 /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
558 /// the vector type to load vector in big-endian element order.
559 LOAD_VEC_BE,
560
561 /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
562 /// v2f32 value into the lower half of a VSR register.
563 LD_VSX_LH,
564
565 /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
566 /// instructions such as LXVDSX, LXVWSX.
567 LD_SPLAT,
568
569 /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
570 /// that zero-extends.
571 ZEXT_LD_SPLAT,
572
573 /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
574 /// that sign-extends.
575 SEXT_LD_SPLAT,
576
577 /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
578 /// Maps directly to an stxvd2x instruction that will be preceded by
579 /// an xxswapd.
580 STXVD2X,
581
582 /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
583 /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
584 /// the vector type to store vector in big-endian element order.
585 STORE_VEC_BE,
586
587 /// Store scalar integers from VSR.
588 ST_VSR_SCAL_INT,
589
590 /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
591 /// except they ensure that the compare input is zero-extended for
592 /// sub-word versions because the atomic loads zero-extend.
593 ATOMIC_CMP_SWAP_8,
594 ATOMIC_CMP_SWAP_16,
595
596 /// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr
597 /// The store conditional instruction ST[BHWD]ARX that produces a glue
598 /// result to attach it to a conditional branch.
599 STORE_COND,
600
601 /// GPRC = TOC_ENTRY GA, TOC
602 /// Loads the entry for GA from the TOC, where the TOC base is given by
603 /// the last operand.
604 TOC_ENTRY
605 };
606
607 } // end namespace PPCISD
608
609 /// Define some predicates that are used for node matching.
610 namespace PPC {
611
612 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
613 /// VPKUHUM instruction.
614 bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
615 SelectionDAG &DAG);
616
617 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
618 /// VPKUWUM instruction.
619 bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
620 SelectionDAG &DAG);
621
622 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
623 /// VPKUDUM instruction.
624 bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
625 SelectionDAG &DAG);
626
627 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
628 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
629 bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
630 unsigned ShuffleKind, SelectionDAG &DAG);
631
632 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
633 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
634 bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
635 unsigned ShuffleKind, SelectionDAG &DAG);
636
637 /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
638 /// a VMRGEW or VMRGOW instruction
639 bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
640 unsigned ShuffleKind, SelectionDAG &DAG);
641 /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
642 /// for a XXSLDWI instruction.
643 bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
644 bool &Swap, bool IsLE);
645
646 /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
647 /// for a XXBRH instruction.
648 bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
649
650 /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
651 /// for a XXBRW instruction.
652 bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
653
654 /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
655 /// for a XXBRD instruction.
656 bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
657
658 /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
659 /// for a XXBRQ instruction.
660 bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
661
662 /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
663 /// for a XXPERMDI instruction.
664 bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
665 bool &Swap, bool IsLE);
666
667 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
668 /// shift amount, otherwise return -1.
669 int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
670 SelectionDAG &DAG);
671
672 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
673 /// specifies a splat of a single element that is suitable for input to
674 /// VSPLTB/VSPLTH/VSPLTW.
675 bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
676
677 /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by
678 /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any
679 /// shuffle of v4f32/v4i32 vectors that just inserts one element from one
680 /// vector into the other. This function will also set a couple of
681 /// output parameters for how much the source vector needs to be shifted and
682 /// what byte number needs to be specified for the instruction to put the
683 /// element in the desired location of the target vector.
684 bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
685 unsigned &InsertAtByte, bool &Swap, bool IsLE);
686
687 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
688 /// appropriate for PPC mnemonics (which have a big endian bias - namely
689 /// elements are counted from the left of the vector register).
690 unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
691 SelectionDAG &DAG);
692
693 /// get_VSPLTI_elt - If this is a build_vector of constants which can be
694 /// formed by using a vspltis[bhw] instruction of the specified element
695 /// size, return the constant being splatted. The ByteSize field indicates
696 /// the number of bytes of each element [124] -> [bhw].
697 SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
698
699 // Flags for computing the optimal addressing mode for loads and stores.
700 enum MemOpFlags {
701 MOF_None = 0,
702
703 // Extension mode for integer loads.
704 MOF_SExt = 1,
705 MOF_ZExt = 1 << 1,
706 MOF_NoExt = 1 << 2,
707
708 // Address computation flags.
709 MOF_NotAddNorCst = 1 << 5, // Not const. or sum of ptr and scalar.
710 MOF_RPlusSImm16 = 1 << 6, // Reg plus signed 16-bit constant.
711 MOF_RPlusLo = 1 << 7, // Reg plus signed 16-bit relocation
712 MOF_RPlusSImm16Mult4 = 1 << 8, // Reg plus 16-bit signed multiple of 4.
713 MOF_RPlusSImm16Mult16 = 1 << 9, // Reg plus 16-bit signed multiple of 16.
714 MOF_RPlusSImm34 = 1 << 10, // Reg plus 34-bit signed constant.
715 MOF_RPlusR = 1 << 11, // Sum of two variables.
716 MOF_PCRel = 1 << 12, // PC-Relative relocation.
717 MOF_AddrIsSImm32 = 1 << 13, // A simple 32-bit constant.
718
719 // The in-memory type.
720 MOF_SubWordInt = 1 << 15,
721 MOF_WordInt = 1 << 16,
722 MOF_DoubleWordInt = 1 << 17,
723 MOF_ScalarFloat = 1 << 18, // Scalar single or double precision.
724 MOF_Vector = 1 << 19, // Vector types and quad precision scalars.
725 MOF_Vector256 = 1 << 20,
726
727 // Subtarget features.
728 MOF_SubtargetBeforeP9 = 1 << 22,
729 MOF_SubtargetP9 = 1 << 23,
730 MOF_SubtargetP10 = 1 << 24,
731 MOF_SubtargetSPE = 1 << 25
732 };
733
734 // The addressing modes for loads and stores.
735 enum AddrMode {
736 AM_None,
737 AM_DForm,
738 AM_DSForm,
739 AM_DQForm,
740 AM_PrefixDForm,
741 AM_XForm,
742 AM_PCRel
743 };
744 } // end namespace PPC
745
746 class PPCTargetLowering : public TargetLowering {
747 const PPCSubtarget &Subtarget;
748
749 public:
750 explicit PPCTargetLowering(const PPCTargetMachine &TM,
751 const PPCSubtarget &STI);
752
753 /// getTargetNodeName() - This method returns the name of a target specific
754 /// DAG node.
755 const char *getTargetNodeName(unsigned Opcode) const override;
756
757 bool isSelectSupported(SelectSupportKind Kind) const override {
758 // PowerPC does not support scalar condition selects on vectors.
759 return (Kind != SelectSupportKind::ScalarCondVectorVal);
760 }
761
762 /// getPreferredVectorAction - The code we generate when vector types are
763 /// legalized by promoting the integer element type is often much worse
764 /// than code we generate if we widen the type for applicable vector types.
765 /// The issue with promoting is that the vector is scalaraized, individual
766 /// elements promoted and then the vector is rebuilt. So say we load a pair
767 /// of v4i8's and shuffle them. This will turn into a mess of 8 extending
768 /// loads, moves back into VSR's (or memory ops if we don't have moves) and
769 /// then the VPERM for the shuffle. All in all a very slow sequence.
770 TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
771 const override {
772 // Default handling for scalable and single-element vectors.
773 if (VT.isScalableVector() || VT.getVectorNumElements() == 1)
774 return TargetLoweringBase::getPreferredVectorAction(VT);
775
776 // Split and promote vNi1 vectors so we don't produce v256i1/v512i1
777 // types as those are only for MMA instructions.
778 if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16)
779 return TypeSplitVector;
780 if (VT.getScalarSizeInBits() == 1)
781 return TypePromoteInteger;
782
783 // Widen vectors that have reasonably sized elements.
784 if (VT.getScalarSizeInBits() % 8 == 0)
785 return TypeWidenVector;
786 return TargetLoweringBase::getPreferredVectorAction(VT);
787 }
788
789 bool useSoftFloat() const override;
790
791 bool hasSPE() const;
792
793 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
794 return MVT::i32;
795 }
796
797 bool isCheapToSpeculateCttz(Type *Ty) const override {
798 return true;
799 }
800
801 bool isCheapToSpeculateCtlz(Type *Ty) const override {
802 return true;
803 }
804
805 bool
806 shallExtractConstSplatVectorElementToStore(Type *VectorTy,
807 unsigned ElemSizeInBits,
808 unsigned &Index) const override;
809
810 bool isCtlzFast() const override {
811 return true;
812 }
813
814 bool isEqualityCmpFoldedWithSignedCmp() const override {
815 return false;
816 }
817
818 bool hasAndNotCompare(SDValue) const override {
819 return true;
820 }
821
822 bool preferIncOfAddToSubOfNot(EVT VT) const override;
823
824 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
825 return VT.isScalarInteger();
826 }
827
828 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
829 bool OptForSize, NegatibleCost &Cost,
830 unsigned Depth = 0) const override;
831
832 /// getSetCCResultType - Return the ISD::SETCC ValueType
833 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
834 EVT VT) const override;
835
836 /// Return true if target always benefits from combining into FMA for a
837 /// given value type. This must typically return false on targets where FMA
838 /// takes more cycles to execute than FADD.
839 bool enableAggressiveFMAFusion(EVT VT) const override;
840
841 /// getPreIndexedAddressParts - returns true by value, base pointer and
842 /// offset pointer and addressing mode by reference if the node's address
843 /// can be legally represented as pre-indexed load / store address.
844 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
845 SDValue &Offset,
846 ISD::MemIndexedMode &AM,
847 SelectionDAG &DAG) const override;
848
849 /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
850 /// it can be more efficiently represented as [r+imm].
851 bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
852 SelectionDAG &DAG) const;
853
854 /// SelectAddressRegReg - Given the specified addressed, check to see if it
855 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
856 /// is non-zero, only accept displacement which is not suitable for [r+imm].
857 /// Returns false if it can be represented by [r+imm], which are preferred.
858 bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
859 SelectionDAG &DAG,
860 MaybeAlign EncodingAlignment = std::nullopt) const;
861
862 /// SelectAddressRegImm - Returns true if the address N can be represented
863 /// by a base register plus a signed 16-bit displacement [r+imm], and if it
864 /// is not better represented as reg+reg. If \p EncodingAlignment is
865 /// non-zero, only accept displacements suitable for instruction encoding
866 /// requirement, i.e. multiples of 4 for DS form.
867 bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
868 SelectionDAG &DAG,
869 MaybeAlign EncodingAlignment) const;
870 bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base,
871 SelectionDAG &DAG) const;
872
873 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
874 /// represented as an indexed [r+r] operation.
875 bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
876 SelectionDAG &DAG) const;
877
878 /// SelectAddressPCRel - Represent the specified address as pc relative to
879 /// be represented as [pc+imm]
880 bool SelectAddressPCRel(SDValue N, SDValue &Base) const;
881
882 Sched::Preference getSchedulingPreference(SDNode *N) const override;
883
884 /// LowerOperation - Provide custom lowering hooks for some operations.
885 ///
886 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
887
888 /// ReplaceNodeResults - Replace the results of node with an illegal result
889 /// type with new values built out of custom code.
890 ///
891 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
892 SelectionDAG &DAG) const override;
893
894 SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
895 SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
896
897 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
898
899 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
900 SmallVectorImpl<SDNode *> &Created) const override;
901
902 Register getRegisterByName(const char* RegName, LLT VT,
903 const MachineFunction &MF) const override;
904
905 void computeKnownBitsForTargetNode(const SDValue Op,
906 KnownBits &Known,
907 const APInt &DemandedElts,
908 const SelectionDAG &DAG,
909 unsigned Depth = 0) const override;
910
911 Align getPrefLoopAlignment(MachineLoop *ML) const override;
912
913 bool shouldInsertFencesForAtomic(const Instruction *I) const override {
914 return true;
915 }
916
917 Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
918 AtomicOrdering Ord) const override;
919 Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
920 AtomicOrdering Ord) const override;
921
922 bool shouldInlineQuadwordAtomics() const;
923
924 TargetLowering::AtomicExpansionKind
925 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
926
927 TargetLowering::AtomicExpansionKind
928 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
929
930 Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
931 AtomicRMWInst *AI, Value *AlignedAddr,
932 Value *Incr, Value *Mask,
933 Value *ShiftAmt,
934 AtomicOrdering Ord) const override;
935 Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
936 AtomicCmpXchgInst *CI,
937 Value *AlignedAddr, Value *CmpVal,
938 Value *NewVal, Value *Mask,
939 AtomicOrdering Ord) const override;
940
941 MachineBasicBlock *
942 EmitInstrWithCustomInserter(MachineInstr &MI,
943 MachineBasicBlock *MBB) const override;
944 MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,
945 MachineBasicBlock *MBB,
946 unsigned AtomicSize,
947 unsigned BinOpcode,
948 unsigned CmpOpcode = 0,
949 unsigned CmpPred = 0) const;
950 MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,
951 MachineBasicBlock *MBB,
952 bool is8bit,
953 unsigned Opcode,
954 unsigned CmpOpcode = 0,
955 unsigned CmpPred = 0) const;
956
957 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
958 MachineBasicBlock *MBB) const;
959
960 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
961 MachineBasicBlock *MBB) const;
962
963 MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
964 MachineBasicBlock *MBB) const;
965
966 bool hasInlineStackProbe(const MachineFunction &MF) const override;
967
968 unsigned getStackProbeSize(const MachineFunction &MF) const;
969
970 ConstraintType getConstraintType(StringRef Constraint) const override;
971
972 /// Examine constraint string and operand type and determine a weight value.
973 /// The operand object must already have been set up with the operand type.
974 ConstraintWeight getSingleConstraintMatchWeight(
975 AsmOperandInfo &info, const char *constraint) const override;
976
977 std::pair<unsigned, const TargetRegisterClass *>
978 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
979 StringRef Constraint, MVT VT) const override;
980
981 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
982 /// function arguments in the caller parameter area. This is the actual
983 /// alignment, not its logarithm.
984 uint64_t getByValTypeAlignment(Type *Ty,
985 const DataLayout &DL) const override;
986
987 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
988 /// vector. If it is invalid, don't add anything to Ops.
989 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
990 std::vector<SDValue> &Ops,
991 SelectionDAG &DAG) const override;
992
993 InlineAsm::ConstraintCode
994 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
995 if (ConstraintCode == "es")
996 return InlineAsm::ConstraintCode::es;
997 else if (ConstraintCode == "Q")
998 return InlineAsm::ConstraintCode::Q;
999 else if (ConstraintCode == "Z")
1000 return InlineAsm::ConstraintCode::Z;
1001 else if (ConstraintCode == "Zy")
1002 return InlineAsm::ConstraintCode::Zy;
1003 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1004 }
1005
1006 void CollectTargetIntrinsicOperands(const CallInst &I,
1007 SmallVectorImpl<SDValue> &Ops,
1008 SelectionDAG &DAG) const override;
1009
1010 /// isLegalAddressingMode - Return true if the addressing mode represented
1011 /// by AM is legal for this target, for a load/store of the specified type.
1012 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1013 Type *Ty, unsigned AS,
1014 Instruction *I = nullptr) const override;
1015
1016 /// isLegalICmpImmediate - Return true if the specified immediate is legal
1017 /// icmp immediate, that is the target has icmp instructions which can
1018 /// compare a register against the immediate without having to materialize
1019 /// the immediate into a register.
1020 bool isLegalICmpImmediate(int64_t Imm) const override;
1021
1022 /// isLegalAddImmediate - Return true if the specified immediate is legal
1023 /// add immediate, that is the target has add instructions which can
1024 /// add a register and the immediate without having to materialize
1025 /// the immediate into a register.
1026 bool isLegalAddImmediate(int64_t Imm) const override;
1027
1028 /// isTruncateFree - Return true if it's free to truncate a value of
1029 /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
1030 /// register X1 to i32 by referencing its sub-register R1.
1031 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1032 bool isTruncateFree(EVT VT1, EVT VT2) const override;
1033
1034 bool isZExtFree(SDValue Val, EVT VT2) const override;
1035
1036 bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;
1037
1038 /// Returns true if it is beneficial to convert a load of a constant
1039 /// to just the constant itself.
1040 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1041 Type *Ty) const override;
1042
1043 bool convertSelectOfConstantsToMath(EVT VT) const override {
1044 return true;
1045 }
1046
1047 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1048 SDValue C) const override;
1049
1050 bool isDesirableToTransformToIntegerOp(unsigned Opc,
1051 EVT VT) const override {
1052 // Only handle float load/store pair because float(fpr) load/store
1053 // instruction has more cycles than integer(gpr) load/store in PPC.
1054 if (Opc != ISD::LOAD && Opc != ISD::STORE)
1055 return false;
1056 if (VT != MVT::f32 && VT != MVT::f64)
1057 return false;
1058
1059 return true;
1060 }
1061
1062 // Returns true if the address of the global is stored in TOC entry.
1063 bool isAccessedAsGotIndirect(SDValue N) const;
1064
1065 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
1066
1067 bool getTgtMemIntrinsic(IntrinsicInfo &Info,
1068 const CallInst &I,
1069 MachineFunction &MF,
1070 unsigned Intrinsic) const override;
1071
1072 /// It returns EVT::Other if the type should be determined using generic
1073 /// target-independent logic.
1074 EVT getOptimalMemOpType(const MemOp &Op,
1075 const AttributeList &FuncAttributes) const override;
1076
1077 /// Is unaligned memory access allowed for the given type, and is it fast
1078 /// relative to software emulation.
1079 bool allowsMisalignedMemoryAccesses(
1080 EVT VT, unsigned AddrSpace, Align Alignment = Align(1),
1081 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1082 unsigned *Fast = nullptr) const override;
1083
1084 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
1085 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
1086 /// expanded to FMAs when this method returns true, otherwise fmuladd is
1087 /// expanded to fmul + fadd.
1088 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1089 EVT VT) const override;
1090
1091 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
1092
1093 /// isProfitableToHoist - Check if it is profitable to hoist instruction
1094 /// \p I to its dominator block.
1095 /// For example, it is not profitable if \p I and it's only user can form a
1096 /// FMA instruction, because Powerpc prefers FMADD.
1097 bool isProfitableToHoist(Instruction *I) const override;
1098
1099 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1100
1101 // Should we expand the build vector with shuffles?
1102 bool
1103 shouldExpandBuildVectorWithShuffles(EVT VT,
1104 unsigned DefinedValues) const override;
1105
1106 // Keep the zero-extensions for arguments to libcalls.
1107 bool shouldKeepZExtForFP16Conv() const override { return true; }
1108
1109 /// createFastISel - This method returns a target-specific FastISel object,
1110 /// or null if the target does not support "fast" instruction selection.
1111 FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
1112 const TargetLibraryInfo *LibInfo) const override;
1113
1114 /// Returns true if an argument of type Ty needs to be passed in a
1115 /// contiguous block of registers in calling convention CallConv.
1116 bool functionArgumentNeedsConsecutiveRegisters(
1117 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
1118 const DataLayout &DL) const override {
1119 // We support any array type as "consecutive" block in the parameter
1120 // save area. The element type defines the alignment requirement and
1121 // whether the argument should go in GPRs, FPRs, or VRs if available.
1122 //
1123 // Note that clang uses this capability both to implement the ELFv2
1124 // homogeneous float/vector aggregate ABI, and to avoid having to use
1125 // "byval" when passing aggregates that might fully fit in registers.
1126 return Ty->isArrayTy();
1127 }
1128
1129 /// If a physical register, this returns the register that receives the
1130 /// exception address on entry to an EH pad.
1131 Register
1132 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1133
1134 /// If a physical register, this returns the register that receives the
1135 /// exception typeid on entry to a landing pad.
1136 Register
1137 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1138
1139 /// Override to support customized stack guard loading.
1140 bool useLoadStackGuardNode() const override;
1141 void insertSSPDeclarations(Module &M) const override;
1142 Value *getSDagStackGuard(const Module &M) const override;
1143
1144 bool isFPImmLegal(const APFloat &Imm, EVT VT,
1145 bool ForCodeSize) const override;
1146
1147 unsigned getJumpTableEncoding() const override;
1148 bool isJumpTableRelative() const override;
1149 SDValue getPICJumpTableRelocBase(SDValue Table,
1150 SelectionDAG &DAG) const override;
1151 const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1152 unsigned JTI,
1153 MCContext &Ctx) const override;
1154
1155 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
1156 /// compute the address flags of the node, get the optimal address mode
1157 /// based on the flags, and set the Base and Disp based on the address mode.
1158 PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N,
1159 SDValue &Disp, SDValue &Base,
1160 SelectionDAG &DAG,
1161 MaybeAlign Align) const;
1162 /// SelectForceXFormMode - Given the specified address, force it to be
1163 /// represented as an indexed [r+r] operation (an XForm instruction).
1164 PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base,
1165 SelectionDAG &DAG) const;
1166
1167 bool splitValueIntoRegisterParts(
1168 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1169 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
1170 const override;
1171 /// Structure that collects some common arguments that get passed around
1172 /// between the functions for call lowering.
1173 struct CallFlags {
1174 const CallingConv::ID CallConv;
1175 const bool IsTailCall : 1;
1176 const bool IsVarArg : 1;
1177 const bool IsPatchPoint : 1;
1178 const bool IsIndirect : 1;
1179 const bool HasNest : 1;
1180 const bool NoMerge : 1;
1181
1182 CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg,
1183 bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge)
1184 : CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg),
1185 IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect),
1186 HasNest(HasNest), NoMerge(NoMerge) {}
1187 };
1188
1189 CCAssignFn *ccAssignFnForCall(CallingConv::ID CC, bool Return,
1190 bool IsVarArg) const;
1191 bool supportsTailCallFor(const CallBase *CB) const;
1192
1193 private:
1194 struct ReuseLoadInfo {
1195 SDValue Ptr;
1196 SDValue Chain;
1197 SDValue ResChain;
1198 MachinePointerInfo MPI;
1199 bool IsDereferenceable = false;
1200 bool IsInvariant = false;
1201 Align Alignment;
1202 AAMDNodes AAInfo;
1203 const MDNode *Ranges = nullptr;
1204
1205 ReuseLoadInfo() = default;
1206
1207 MachineMemOperand::Flags MMOFlags() const {
1208 MachineMemOperand::Flags F = MachineMemOperand::MONone;
1209 if (IsDereferenceable)
1210 F |= MachineMemOperand::MODereferenceable;
1211 if (IsInvariant)
1212 F |= MachineMemOperand::MOInvariant;
1213 return F;
1214 }
1215 };
1216
1217 // Map that relates a set of common address flags to PPC addressing modes.
1218 std::map<PPC::AddrMode, SmallVector<unsigned, 16>> AddrModesMap;
1219 void initializeAddrModeMap();
1220
1221 bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
1222 SelectionDAG &DAG,
1223 ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
1224 void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
1225 SelectionDAG &DAG) const;
1226
1227 void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
1228 SelectionDAG &DAG, const SDLoc &dl) const;
1229 SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
1230 const SDLoc &dl) const;
1231
1232 bool directMoveIsProfitable(const SDValue &Op) const;
1233 SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
1234 const SDLoc &dl) const;
1235
1236 SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
1237 const SDLoc &dl) const;
1238
1239 SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
1240
1241 SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
1242 SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
1243
1244 bool IsEligibleForTailCallOptimization(
1245 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
1246 CallingConv::ID CallerCC, bool isVarArg,
1247 const SmallVectorImpl<ISD::InputArg> &Ins) const;
1248
1249 bool IsEligibleForTailCallOptimization_64SVR4(
1250 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
1251 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
1252 const SmallVectorImpl<ISD::OutputArg> &Outs,
1253 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
1254 bool isCalleeExternalSymbol) const;
1255
1256 bool isEligibleForTCO(const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
1257 CallingConv::ID CallerCC, const CallBase *CB,
1258 bool isVarArg,
1259 const SmallVectorImpl<ISD::OutputArg> &Outs,
1260 const SmallVectorImpl<ISD::InputArg> &Ins,
1261 const Function *CallerFunc,
1262 bool isCalleeExternalSymbol) const;
1263
1264 SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
1265 SDValue Chain, SDValue &LROpOut,
1266 SDValue &FPOpOut,
1267 const SDLoc &dl) const;
1268
1269 SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const;
1270
1271 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1272 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1273 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1274 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1275 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1276 SDValue LowerGlobalTLSAddressAIX(SDValue Op, SelectionDAG &DAG) const;
1277 SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
1278 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1279 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1280 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1281 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1282 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1283 SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
1284 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1285 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1286 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1287 SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
1288 SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1289 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1290 SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
1291 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1292 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1293 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1294 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1295 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
1296 const SDLoc &dl) const;
1297 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1298 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1299 SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
1300 SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
1301 SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
1302 SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;
1303 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1304 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1305 SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, ArrayRef<int> PermMask,
1306 EVT VT, SDValue V1, SDValue V2) const;
1307 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1308 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1309 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1310 SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
1311 SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
1312 SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
1313 SDValue lowerToLibCall(const char *LibCallName, SDValue Op,
1314 SelectionDAG &DAG) const;
1315 SDValue lowerLibCallBasedOnType(const char *LibCallFloatName,
1316 const char *LibCallDoubleName, SDValue Op,
1317 SelectionDAG &DAG) const;
1318 bool isLowringToMASSFiniteSafe(SDValue Op) const;
1319 bool isLowringToMASSSafe(SDValue Op) const;
1320 bool isScalarMASSConversionEnabled() const;
1321 SDValue lowerLibCallBase(const char *LibCallDoubleName,
1322 const char *LibCallFloatName,
1323 const char *LibCallDoubleNameFinite,
1324 const char *LibCallFloatNameFinite, SDValue Op,
1325 SelectionDAG &DAG) const;
1326 SDValue lowerPow(SDValue Op, SelectionDAG &DAG) const;
1327 SDValue lowerSin(SDValue Op, SelectionDAG &DAG) const;
1328 SDValue lowerCos(SDValue Op, SelectionDAG &DAG) const;
1329 SDValue lowerLog(SDValue Op, SelectionDAG &DAG) const;
1330 SDValue lowerLog10(SDValue Op, SelectionDAG &DAG) const;
1331 SDValue lowerExp(SDValue Op, SelectionDAG &DAG) const;
1332 SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
1333 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1334 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1335 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1336 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1337 SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
1338
1339 SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
1340 SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
1341
1342 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1343 CallingConv::ID CallConv, bool isVarArg,
1344 const SmallVectorImpl<ISD::InputArg> &Ins,
1345 const SDLoc &dl, SelectionDAG &DAG,
1346 SmallVectorImpl<SDValue> &InVals) const;
1347
1348 SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
1349 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
1350 SDValue InGlue, SDValue Chain, SDValue CallSeqStart,
1351 SDValue &Callee, int SPDiff, unsigned NumBytes,
1352 const SmallVectorImpl<ISD::InputArg> &Ins,
1353 SmallVectorImpl<SDValue> &InVals,
1354 const CallBase *CB) const;
1355
1356 SDValue
1357 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1358 const SmallVectorImpl<ISD::InputArg> &Ins,
1359 const SDLoc &dl, SelectionDAG &DAG,
1360 SmallVectorImpl<SDValue> &InVals) const override;
1361
1362 SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
1363 SmallVectorImpl<SDValue> &InVals) const override;
1364
1365 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1366 bool isVarArg,
1367 const SmallVectorImpl<ISD::OutputArg> &Outs,
1368 LLVMContext &Context) const override;
1369
1370 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1371 const SmallVectorImpl<ISD::OutputArg> &Outs,
1372 const SmallVectorImpl<SDValue> &OutVals,
1373 const SDLoc &dl, SelectionDAG &DAG) const override;
1374
1375 SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
1376 SelectionDAG &DAG, SDValue ArgVal,
1377 const SDLoc &dl) const;
1378
1379 SDValue LowerFormalArguments_AIX(
1380 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1381 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1382 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1383 SDValue LowerFormalArguments_64SVR4(
1384 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1385 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1386 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1387 SDValue LowerFormalArguments_32SVR4(
1388 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1389 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1390 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1391
1392 SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
1393 SDValue CallSeqStart,
1394 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
1395 const SDLoc &dl) const;
1396
1397 SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
1398 const SmallVectorImpl<ISD::OutputArg> &Outs,
1399 const SmallVectorImpl<SDValue> &OutVals,
1400 const SmallVectorImpl<ISD::InputArg> &Ins,
1401 const SDLoc &dl, SelectionDAG &DAG,
1402 SmallVectorImpl<SDValue> &InVals,
1403 const CallBase *CB) const;
1404 SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
1405 const SmallVectorImpl<ISD::OutputArg> &Outs,
1406 const SmallVectorImpl<SDValue> &OutVals,
1407 const SmallVectorImpl<ISD::InputArg> &Ins,
1408 const SDLoc &dl, SelectionDAG &DAG,
1409 SmallVectorImpl<SDValue> &InVals,
1410 const CallBase *CB) const;
1411 SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, CallFlags CFlags,
1412 const SmallVectorImpl<ISD::OutputArg> &Outs,
1413 const SmallVectorImpl<SDValue> &OutVals,
1414 const SmallVectorImpl<ISD::InputArg> &Ins,
1415 const SDLoc &dl, SelectionDAG &DAG,
1416 SmallVectorImpl<SDValue> &InVals,
1417 const CallBase *CB) const;
1418
1419 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1420 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1421 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1422
1423 SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
1424 SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
1425 SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
1426 SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
1427 SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
1428 SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
1429 SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
1430 SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
1431 SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
1432 SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
1433 SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
1434 SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
1435 SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
1436 SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
1437 SelectionDAG &DAG) const;
1438 SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
1439 DAGCombinerInfo &DCI) const;
1440
1441 /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
1442 /// SETCC with integer subtraction when (1) there is a legal way of doing it
1443 /// (2) keeping the result of comparison in GPR has performance benefit.
1444 SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
1445
1446 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1447 int &RefinementSteps, bool &UseOneConstNR,
1448 bool Reciprocal) const override;
1449 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1450 int &RefinementSteps) const override;
1451 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1452 const DenormalMode &Mode) const override;
1453 SDValue getSqrtResultForDenormInput(SDValue Operand,
1454 SelectionDAG &DAG) const override;
1455 unsigned combineRepeatedFPDivisors() const override;
1456
1457 SDValue
1458 combineElementTruncationToVectorTruncation(SDNode *N,
1459 DAGCombinerInfo &DCI) const;
1460
1461 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
1462 /// handled by the VINSERTH instruction introduced in ISA 3.0. This is
1463 /// essentially any shuffle of v8i16 vectors that just inserts one element
1464 /// from one vector into the other.
1465 SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1466
1467 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
1468 /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
1469 /// essentially v16i8 vector version of VINSERTH.
1470 SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1471
1472 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
1473 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1.
1474 SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1475
1476 // Return whether the call instruction can potentially be optimized to a
1477 // tail call. This will cause the optimizers to attempt to move, or
1478 // duplicate return instructions to help enable tail call optimizations.
1479 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1480 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1481
1482 /// getAddrModeForFlags - Based on the set of address flags, select the most
1483 /// optimal instruction format to match by.
1484 PPC::AddrMode getAddrModeForFlags(unsigned Flags) const;
1485
1486 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
1487 /// the address flags of the load/store instruction that is to be matched.
1488 /// The address flags are stored in a map, which is then searched
1489 /// through to determine the optimal load/store instruction format.
1490 unsigned computeMOFlags(const SDNode *Parent, SDValue N,
1491 SelectionDAG &DAG) const;
1492 }; // end class PPCTargetLowering
1493
1494 namespace PPC {
1495
1496 FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
1497 const TargetLibraryInfo *LibInfo);
1498
1499 } // end namespace PPC
1500
1501 bool isIntS16Immediate(SDNode *N, int16_t &Imm);
1502 bool isIntS16Immediate(SDValue Op, int16_t &Imm);
1503 bool isIntS34Immediate(SDNode *N, int64_t &Imm);
1504 bool isIntS34Immediate(SDValue Op, int64_t &Imm);
1505
1506 bool convertToNonDenormSingle(APInt &ArgAPInt);
1507 bool convertToNonDenormSingle(APFloat &ArgAPFloat);
1508 bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat);
1509
1510} // end namespace llvm
1511
1512#endif // LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
1513