1 | //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that PPC uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H |
16 | |
17 | #include "PPCInstrInfo.h" |
18 | #include "llvm/CodeGen/CallingConvLower.h" |
19 | #include "llvm/CodeGen/MachineFunction.h" |
20 | #include "llvm/CodeGen/MachineMemOperand.h" |
21 | #include "llvm/CodeGen/SelectionDAG.h" |
22 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
23 | #include "llvm/CodeGen/TargetLowering.h" |
24 | #include "llvm/CodeGen/ValueTypes.h" |
25 | #include "llvm/CodeGenTypes/MachineValueType.h" |
26 | #include "llvm/IR/Attributes.h" |
27 | #include "llvm/IR/CallingConv.h" |
28 | #include "llvm/IR/Function.h" |
29 | #include "llvm/IR/InlineAsm.h" |
30 | #include "llvm/IR/Metadata.h" |
31 | #include "llvm/IR/Type.h" |
32 | #include <optional> |
33 | #include <utility> |
34 | |
35 | namespace llvm { |
36 | |
37 | namespace PPCISD { |
38 | |
39 | // When adding a NEW PPCISD node please add it to the correct position in |
40 | // the enum. The order of elements in this enum matters! |
41 | // Values that are added after this entry: |
42 | // STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE |
43 | // are considered memory opcodes and are treated differently than entries |
44 | // that come before it. For example, ADD or MUL should be placed before |
45 | // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come |
46 | // after it. |
47 | enum NodeType : unsigned { |
48 | // Start the numbering where the builtin ops and target ops leave off. |
49 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
50 | |
51 | /// FSEL - Traditional three-operand fsel node. |
52 | /// |
53 | FSEL, |
54 | |
55 | /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions. |
56 | XSMAXC, |
57 | XSMINC, |
58 | |
59 | /// FCFID - The FCFID instruction, taking an f64 operand and producing |
60 | /// and f64 value containing the FP representation of the integer that |
61 | /// was temporarily in the f64 operand. |
62 | FCFID, |
63 | |
64 | /// Newer FCFID[US] integer-to-floating-point conversion instructions for |
65 | /// unsigned integers and single-precision outputs. |
66 | FCFIDU, |
67 | FCFIDS, |
68 | FCFIDUS, |
69 | |
70 | /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 |
71 | /// operand, producing an f64 value containing the integer representation |
72 | /// of that FP value. |
73 | FCTIDZ, |
74 | FCTIWZ, |
75 | |
76 | /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for |
77 | /// unsigned integers with round toward zero. |
78 | FCTIDUZ, |
79 | FCTIWUZ, |
80 | |
81 | /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in |
82 | /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. |
83 | VEXTS, |
84 | |
85 | /// Reciprocal estimate instructions (unary FP ops). |
86 | FRE, |
87 | FRSQRTE, |
88 | |
89 | /// Test instruction for software square root. |
90 | FTSQRT, |
91 | |
92 | /// Square root instruction. |
93 | FSQRT, |
94 | |
95 | /// VPERM - The PPC VPERM Instruction. |
96 | /// |
97 | VPERM, |
98 | |
99 | /// XXSPLT - The PPC VSX splat instructions |
100 | /// |
101 | XXSPLT, |
102 | |
103 | /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for |
104 | /// converting immediate single precision numbers to double precision |
105 | /// vector or scalar. |
106 | XXSPLTI_SP_TO_DP, |
107 | |
108 | /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction. |
109 | /// |
110 | XXSPLTI32DX, |
111 | |
112 | /// VECINSERT - The PPC vector insert instruction |
113 | /// |
114 | VECINSERT, |
115 | |
116 | /// VECSHL - The PPC vector shift left instruction |
117 | /// |
118 | VECSHL, |
119 | |
120 | /// XXPERMDI - The PPC XXPERMDI instruction |
121 | /// |
122 | XXPERMDI, |
123 | XXPERM, |
124 | |
125 | /// The CMPB instruction (takes two operands of i32 or i64). |
126 | CMPB, |
127 | |
128 | /// Hi/Lo - These represent the high and low 16-bit parts of a global |
129 | /// address respectively. These nodes have two operands, the first of |
130 | /// which must be a TargetGlobalAddress, and the second of which must be a |
131 | /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', |
132 | /// though these are usually folded into other nodes. |
133 | Hi, |
134 | Lo, |
135 | |
136 | /// The following two target-specific nodes are used for calls through |
137 | /// function pointers in the 64-bit SVR4 ABI. |
138 | |
139 | /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) |
140 | /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to |
141 | /// compute an allocation on the stack. |
142 | DYNALLOC, |
143 | |
144 | /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to |
145 | /// compute an offset from native SP to the address of the most recent |
146 | /// dynamic alloca. |
147 | DYNAREAOFFSET, |
148 | |
149 | /// To avoid stack clash, allocation is performed by block and each block is |
150 | /// probed. |
151 | PROBED_ALLOCA, |
152 | |
153 | /// The result of the mflr at function entry, used for PIC code. |
154 | GlobalBaseReg, |
155 | |
156 | /// These nodes represent PPC shifts. |
157 | /// |
158 | /// For scalar types, only the last `n + 1` bits of the shift amounts |
159 | /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. |
160 | /// for exact behaviors. |
161 | /// |
162 | /// For vector types, only the last n bits are used. See vsld. |
163 | SRL, |
164 | SRA, |
165 | SHL, |
166 | |
167 | /// FNMSUB - Negated multiply-subtract instruction. |
168 | FNMSUB, |
169 | |
170 | /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign |
171 | /// word and shift left immediate. |
172 | EXTSWSLI, |
173 | |
174 | /// The combination of sra[wd]i and addze used to implemented signed |
175 | /// integer division by a power of 2. The first operand is the dividend, |
176 | /// and the second is the constant shift amount (representing the |
177 | /// divisor). |
178 | SRA_ADDZE, |
179 | |
180 | /// CALL - A direct function call. |
181 | /// CALL_NOP is a call with the special NOP which follows 64-bit |
182 | /// CALL_NOTOC the caller does not use the TOC. |
183 | /// SVR4 calls and 32-bit/64-bit AIX calls. |
184 | CALL, |
185 | CALL_NOP, |
186 | CALL_NOTOC, |
187 | |
188 | /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a |
189 | /// MTCTR instruction. |
190 | MTCTR, |
191 | |
192 | /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a |
193 | /// BCTRL instruction. |
194 | BCTRL, |
195 | |
196 | /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl |
197 | /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX |
198 | /// and 64-bit AIX. |
199 | BCTRL_LOAD_TOC, |
200 | |
201 | /// The variants that implicitly define rounding mode for calls with |
202 | /// strictfp semantics. |
203 | CALL_RM, |
204 | CALL_NOP_RM, |
205 | CALL_NOTOC_RM, |
206 | BCTRL_RM, |
207 | BCTRL_LOAD_TOC_RM, |
208 | |
209 | /// Return with a glue operand, matched by 'blr' |
210 | RET_GLUE, |
211 | |
212 | /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. |
213 | /// This copies the bits corresponding to the specified CRREG into the |
214 | /// resultant GPR. Bits corresponding to other CR regs are undefined. |
215 | MFOCRF, |
216 | |
217 | /// Direct move from a VSX register to a GPR |
218 | MFVSR, |
219 | |
220 | /// Direct move from a GPR to a VSX register (algebraic) |
221 | MTVSRA, |
222 | |
223 | /// Direct move from a GPR to a VSX register (zero) |
224 | MTVSRZ, |
225 | |
226 | /// Direct move of 2 consecutive GPR to a VSX register. |
227 | BUILD_FP128, |
228 | |
229 | /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and |
230 | /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is |
231 | /// unsupported for this target. |
232 | /// Merge 2 GPRs to a single SPE register. |
233 | BUILD_SPE64, |
234 | |
235 | /// Extract SPE register component, second argument is high or low. |
236 | , |
237 | |
238 | /// Extract a subvector from signed integer vector and convert to FP. |
239 | /// It is primarily used to convert a (widened) illegal integer vector |
240 | /// type to a legal floating point vector type. |
241 | /// For example v2i32 -> widened to v4i32 -> v2f64 |
242 | SINT_VEC_TO_FP, |
243 | |
244 | /// Extract a subvector from unsigned integer vector and convert to FP. |
245 | /// As with SINT_VEC_TO_FP, used for converting illegal types. |
246 | UINT_VEC_TO_FP, |
247 | |
248 | /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to |
249 | /// place the value into the least significant element of the most |
250 | /// significant doubleword in the vector. This is not element zero for |
251 | /// anything smaller than a doubleword on either endianness. This node has |
252 | /// the same semantics as SCALAR_TO_VECTOR except that the value remains in |
253 | /// the aforementioned location in the vector register. |
254 | SCALAR_TO_VECTOR_PERMUTED, |
255 | |
256 | // FIXME: Remove these once the ANDI glue bug is fixed: |
257 | /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the |
258 | /// eq or gt bit of CR0 after executing andi. x, 1. This is used to |
259 | /// implement truncation of i32 or i64 to i1. |
260 | ANDI_rec_1_EQ_BIT, |
261 | ANDI_rec_1_GT_BIT, |
262 | |
263 | // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit |
264 | // target (returns (Lo, Hi)). It takes a chain operand. |
265 | READ_TIME_BASE, |
266 | |
267 | // EH_SJLJ_SETJMP - SjLj exception handling setjmp. |
268 | EH_SJLJ_SETJMP, |
269 | |
270 | // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. |
271 | EH_SJLJ_LONGJMP, |
272 | |
273 | /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* |
274 | /// instructions. For lack of better number, we use the opcode number |
275 | /// encoding for the OPC field to identify the compare. For example, 838 |
276 | /// is VCMPGTSH. |
277 | VCMP, |
278 | |
279 | /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the |
280 | /// altivec VCMP*_rec instructions. For lack of better number, we use the |
281 | /// opcode number encoding for the OPC field to identify the compare. For |
282 | /// example, 838 is VCMPGTSH. |
283 | VCMP_rec, |
284 | |
285 | /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This |
286 | /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the |
287 | /// condition register to branch on, OPC is the branch opcode to use (e.g. |
288 | /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is |
289 | /// an optional input flag argument. |
290 | COND_BRANCH, |
291 | |
292 | /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based |
293 | /// loops. |
294 | BDNZ, |
295 | BDZ, |
296 | |
297 | /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding |
298 | /// towards zero. Used only as part of the long double-to-int |
299 | /// conversion sequence. |
300 | FADDRTZ, |
301 | |
302 | /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. |
303 | MFFS, |
304 | |
305 | /// TC_RETURN - A tail call return. |
306 | /// operand #0 chain |
307 | /// operand #1 callee (register or absolute) |
308 | /// operand #2 stack adjustment |
309 | /// operand #3 optional in flag |
310 | TC_RETURN, |
311 | |
312 | /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls |
313 | CR6SET, |
314 | CR6UNSET, |
315 | |
316 | /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS |
317 | /// for non-position independent code on PPC32. |
318 | PPC32_GOT, |
319 | |
320 | /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and |
321 | /// local dynamic TLS and position indendepent code on PPC32. |
322 | PPC32_PICGOT, |
323 | |
324 | /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec |
325 | /// TLS model, produces an ADDIS8 instruction that adds the GOT |
326 | /// base to sym\@got\@tprel\@ha. |
327 | ADDIS_GOT_TPREL_HA, |
328 | |
329 | /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec |
330 | /// TLS model, produces a LD instruction with base register G8RReg |
331 | /// and offset sym\@got\@tprel\@l. This completes the addition that |
332 | /// finds the offset of "sym" relative to the thread pointer. |
333 | LD_GOT_TPREL_L, |
334 | |
335 | /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec |
336 | /// and local-exec TLS models, produces an ADD instruction that adds |
337 | /// the contents of G8RReg to the thread pointer. Symbol contains a |
338 | /// relocation sym\@tls which is to be replaced by the thread pointer |
339 | /// and identifies to the linker that the instruction is part of a |
340 | /// TLS sequence. |
341 | ADD_TLS, |
342 | |
343 | /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS |
344 | /// model, produces an ADDIS8 instruction that adds the GOT base |
345 | /// register to sym\@got\@tlsgd\@ha. |
346 | ADDIS_TLSGD_HA, |
347 | |
348 | /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS |
349 | /// model, produces an ADDI8 instruction that adds G8RReg to |
350 | /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by |
351 | /// ADDIS_TLSGD_L_ADDR until after register assignment. |
352 | ADDI_TLSGD_L, |
353 | |
354 | /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS |
355 | /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by |
356 | /// ADDIS_TLSGD_L_ADDR until after register assignment. |
357 | GET_TLS_ADDR, |
358 | |
359 | /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on |
360 | /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread |
361 | /// pointer. At the end of the call, the thread pointer is found in R3. |
362 | GET_TPOINTER, |
363 | |
364 | /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that |
365 | /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following |
366 | /// register assignment. |
367 | ADDI_TLSGD_L_ADDR, |
368 | |
369 | /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY |
370 | /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY |
371 | /// Op that combines two register copies of TOC entries |
372 | /// (region handle into R3 and variable offset into R4) followed by a |
373 | /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr. |
374 | /// This node is used in 64-bit mode as well (in which case the result is |
375 | /// G8RC and inputs are X3/X4). |
376 | TLSGD_AIX, |
377 | |
378 | /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, |
379 | /// produces a call to .__tls_get_mod(_$TLSML\@ml). |
380 | GET_TLS_MOD_AIX, |
381 | |
382 | /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) |
383 | /// Op that requires a single input of the module handle TOC entry in R3, |
384 | /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call |
385 | /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. |
386 | /// The only difference is the register class. |
387 | TLSLD_AIX, |
388 | |
389 | /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS |
390 | /// model, produces an ADDIS8 instruction that adds the GOT base |
391 | /// register to sym\@got\@tlsld\@ha. |
392 | ADDIS_TLSLD_HA, |
393 | |
394 | /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS |
395 | /// model, produces an ADDI8 instruction that adds G8RReg to |
396 | /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by |
397 | /// ADDIS_TLSLD_L_ADDR until after register assignment. |
398 | ADDI_TLSLD_L, |
399 | |
400 | /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS |
401 | /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by |
402 | /// ADDIS_TLSLD_L_ADDR until after register assignment. |
403 | GET_TLSLD_ADDR, |
404 | |
405 | /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that |
406 | /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion |
407 | /// following register assignment. |
408 | ADDI_TLSLD_L_ADDR, |
409 | |
410 | /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS |
411 | /// model, produces an ADDIS8 instruction that adds X3 to |
412 | /// sym\@dtprel\@ha. |
413 | ADDIS_DTPREL_HA, |
414 | |
415 | /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS |
416 | /// model, produces an ADDI8 instruction that adds G8RReg to |
417 | /// sym\@got\@dtprel\@l. |
418 | ADDI_DTPREL_L, |
419 | |
420 | /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS |
421 | /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. |
422 | PADDI_DTPREL, |
423 | |
424 | /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded |
425 | /// during instruction selection to optimize a BUILD_VECTOR into |
426 | /// operations on splats. This is necessary to avoid losing these |
427 | /// optimizations due to constant folding. |
428 | VADD_SPLAT, |
429 | |
430 | /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned |
431 | /// operand identifies the operating system entry point. |
432 | SC, |
433 | |
434 | /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer. |
435 | CLRBHRB, |
436 | |
437 | /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch |
438 | /// history rolling buffer entry. |
439 | MFBHRBE, |
440 | |
441 | /// CHAIN = RFEBB CHAIN, State - Return from event-based branch. |
442 | RFEBB, |
443 | |
444 | /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little |
445 | /// endian. Maps to an xxswapd instruction that corrects an lxvd2x |
446 | /// or stxvd2x instruction. The chain is necessary because the |
447 | /// sequence replaces a load and needs to provide the same number |
448 | /// of outputs. |
449 | XXSWAPD, |
450 | |
451 | /// An SDNode for swaps that are not associated with any loads/stores |
452 | /// and thereby have no chain. |
453 | SWAP_NO_CHAIN, |
454 | |
455 | /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or |
456 | /// lower (IDX=1) half of v4f32 to v2f64. |
457 | FP_EXTEND_HALF, |
458 | |
459 | /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done |
460 | /// either through an add like PADDI or through a PC Relative load like |
461 | /// PLD. |
462 | MAT_PCREL_ADDR, |
463 | |
464 | /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for |
465 | /// TLS global address when using dynamic access models. This can be done |
466 | /// through an add like PADDI. |
467 | TLS_DYNAMIC_MAT_PCREL_ADDR, |
468 | |
469 | /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address |
470 | /// when using local exec access models, and when prefixed instructions are |
471 | /// available. This is used with ADD_TLS to produce an add like PADDI. |
472 | TLS_LOCAL_EXEC_MAT_ADDR, |
473 | |
474 | /// ACC_BUILD = Build an accumulator register from 4 VSX registers. |
475 | ACC_BUILD, |
476 | |
477 | /// PAIR_BUILD = Build a vector pair register from 2 VSX registers. |
478 | PAIR_BUILD, |
479 | |
480 | /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of |
481 | /// an accumulator or pair register. This node is needed because |
482 | /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same |
483 | /// element type. |
484 | , |
485 | |
486 | /// XXMFACC = This corresponds to the xxmfacc instruction. |
487 | XXMFACC, |
488 | |
489 | // Constrained conversion from floating point to int |
490 | STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE, |
491 | STRICT_FCTIWZ, |
492 | STRICT_FCTIDUZ, |
493 | STRICT_FCTIWUZ, |
494 | |
495 | /// Constrained integer-to-floating-point conversion instructions. |
496 | STRICT_FCFID, |
497 | STRICT_FCFIDU, |
498 | STRICT_FCFIDS, |
499 | STRICT_FCFIDUS, |
500 | |
501 | /// Constrained floating point add in round-to-zero mode. |
502 | STRICT_FADDRTZ, |
503 | |
504 | // NOTE: The nodes below may require PC-Rel specific patterns if the |
505 | // address could be PC-Relative. When adding new nodes below, consider |
506 | // whether or not the address can be PC-Relative and add the corresponding |
507 | // PC-relative patterns and tests. |
508 | |
509 | /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a |
510 | /// byte-swapping store instruction. It byte-swaps the low "Type" bits of |
511 | /// the GPRC input, then stores it through Ptr. Type can be either i16 or |
512 | /// i32. |
513 | STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE, |
514 | |
515 | /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a |
516 | /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, |
517 | /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 |
518 | /// or i32. |
519 | LBRX, |
520 | |
521 | /// STFIWX - The STFIWX instruction. The first operand is an input token |
522 | /// chain, then an f64 value to store, then an address to store it to. |
523 | STFIWX, |
524 | |
525 | /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point |
526 | /// load which sign-extends from a 32-bit integer value into the |
527 | /// destination 64-bit register. |
528 | LFIWAX, |
529 | |
530 | /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point |
531 | /// load which zero-extends from a 32-bit integer value into the |
532 | /// destination 64-bit register. |
533 | LFIWZX, |
534 | |
535 | /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an |
536 | /// integer smaller than 64 bits into a VSR. The integer is zero-extended. |
537 | /// This can be used for converting loaded integers to floating point. |
538 | LXSIZX, |
539 | |
540 | /// STXSIX - The STXSI[bh]X instruction. The first operand is an input |
541 | /// chain, then an f64 value to store, then an address to store it to, |
542 | /// followed by a byte-width for the store. |
543 | STXSIX, |
544 | |
545 | /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. |
546 | /// Maps directly to an lxvd2x instruction that will be followed by |
547 | /// an xxswapd. |
548 | LXVD2X, |
549 | |
550 | /// LXVRZX - Load VSX Vector Rightmost and Zero Extend |
551 | /// This node represents v1i128 BUILD_VECTOR of a zero extending load |
552 | /// instruction from <byte, halfword, word, or doubleword> to i128. |
553 | /// Allows utilization of the Load VSX Vector Rightmost Instructions. |
554 | LXVRZX, |
555 | |
556 | /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. |
557 | /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on |
558 | /// the vector type to load vector in big-endian element order. |
559 | LOAD_VEC_BE, |
560 | |
561 | /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a |
562 | /// v2f32 value into the lower half of a VSR register. |
563 | LD_VSX_LH, |
564 | |
565 | /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory |
566 | /// instructions such as LXVDSX, LXVWSX. |
567 | LD_SPLAT, |
568 | |
569 | /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory |
570 | /// that zero-extends. |
571 | ZEXT_LD_SPLAT, |
572 | |
573 | /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory |
574 | /// that sign-extends. |
575 | SEXT_LD_SPLAT, |
576 | |
577 | /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. |
578 | /// Maps directly to an stxvd2x instruction that will be preceded by |
579 | /// an xxswapd. |
580 | STXVD2X, |
581 | |
582 | /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. |
583 | /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on |
584 | /// the vector type to store vector in big-endian element order. |
585 | STORE_VEC_BE, |
586 | |
587 | /// Store scalar integers from VSR. |
588 | ST_VSR_SCAL_INT, |
589 | |
590 | /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes |
591 | /// except they ensure that the compare input is zero-extended for |
592 | /// sub-word versions because the atomic loads zero-extend. |
593 | ATOMIC_CMP_SWAP_8, |
594 | ATOMIC_CMP_SWAP_16, |
595 | |
596 | /// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr |
597 | /// The store conditional instruction ST[BHWD]ARX that produces a glue |
598 | /// result to attach it to a conditional branch. |
599 | STORE_COND, |
600 | |
601 | /// GPRC = TOC_ENTRY GA, TOC |
602 | /// Loads the entry for GA from the TOC, where the TOC base is given by |
603 | /// the last operand. |
604 | TOC_ENTRY |
605 | }; |
606 | |
607 | } // end namespace PPCISD |
608 | |
609 | /// Define some predicates that are used for node matching. |
610 | namespace PPC { |
611 | |
612 | /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a |
613 | /// VPKUHUM instruction. |
614 | bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
615 | SelectionDAG &DAG); |
616 | |
617 | /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a |
618 | /// VPKUWUM instruction. |
619 | bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
620 | SelectionDAG &DAG); |
621 | |
622 | /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a |
623 | /// VPKUDUM instruction. |
624 | bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, |
625 | SelectionDAG &DAG); |
626 | |
627 | /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for |
628 | /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). |
629 | bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
630 | unsigned ShuffleKind, SelectionDAG &DAG); |
631 | |
632 | /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for |
633 | /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). |
634 | bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, |
635 | unsigned ShuffleKind, SelectionDAG &DAG); |
636 | |
637 | /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for |
638 | /// a VMRGEW or VMRGOW instruction |
639 | bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, |
640 | unsigned ShuffleKind, SelectionDAG &DAG); |
641 | /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable |
642 | /// for a XXSLDWI instruction. |
643 | bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
644 | bool &Swap, bool IsLE); |
645 | |
646 | /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable |
647 | /// for a XXBRH instruction. |
648 | bool isXXBRHShuffleMask(ShuffleVectorSDNode *N); |
649 | |
650 | /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable |
651 | /// for a XXBRW instruction. |
652 | bool isXXBRWShuffleMask(ShuffleVectorSDNode *N); |
653 | |
654 | /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable |
655 | /// for a XXBRD instruction. |
656 | bool isXXBRDShuffleMask(ShuffleVectorSDNode *N); |
657 | |
658 | /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable |
659 | /// for a XXBRQ instruction. |
660 | bool isXXBRQShuffleMask(ShuffleVectorSDNode *N); |
661 | |
662 | /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable |
663 | /// for a XXPERMDI instruction. |
664 | bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
665 | bool &Swap, bool IsLE); |
666 | |
667 | /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the |
668 | /// shift amount, otherwise return -1. |
669 | int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, |
670 | SelectionDAG &DAG); |
671 | |
672 | /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand |
673 | /// specifies a splat of a single element that is suitable for input to |
674 | /// VSPLTB/VSPLTH/VSPLTW. |
675 | bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); |
676 | |
677 | /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by |
678 | /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any |
679 | /// shuffle of v4f32/v4i32 vectors that just inserts one element from one |
680 | /// vector into the other. This function will also set a couple of |
681 | /// output parameters for how much the source vector needs to be shifted and |
682 | /// what byte number needs to be specified for the instruction to put the |
683 | /// element in the desired location of the target vector. |
684 | bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, |
685 | unsigned &InsertAtByte, bool &Swap, bool IsLE); |
686 | |
687 | /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is |
688 | /// appropriate for PPC mnemonics (which have a big endian bias - namely |
689 | /// elements are counted from the left of the vector register). |
690 | unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, |
691 | SelectionDAG &DAG); |
692 | |
693 | /// get_VSPLTI_elt - If this is a build_vector of constants which can be |
694 | /// formed by using a vspltis[bhw] instruction of the specified element |
695 | /// size, return the constant being splatted. The ByteSize field indicates |
696 | /// the number of bytes of each element [124] -> [bhw]. |
697 | SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); |
698 | |
699 | // Flags for computing the optimal addressing mode for loads and stores. |
700 | enum MemOpFlags { |
701 | MOF_None = 0, |
702 | |
703 | // Extension mode for integer loads. |
704 | MOF_SExt = 1, |
705 | MOF_ZExt = 1 << 1, |
706 | MOF_NoExt = 1 << 2, |
707 | |
708 | // Address computation flags. |
709 | MOF_NotAddNorCst = 1 << 5, // Not const. or sum of ptr and scalar. |
710 | MOF_RPlusSImm16 = 1 << 6, // Reg plus signed 16-bit constant. |
711 | MOF_RPlusLo = 1 << 7, // Reg plus signed 16-bit relocation |
712 | MOF_RPlusSImm16Mult4 = 1 << 8, // Reg plus 16-bit signed multiple of 4. |
713 | MOF_RPlusSImm16Mult16 = 1 << 9, // Reg plus 16-bit signed multiple of 16. |
714 | MOF_RPlusSImm34 = 1 << 10, // Reg plus 34-bit signed constant. |
715 | MOF_RPlusR = 1 << 11, // Sum of two variables. |
716 | MOF_PCRel = 1 << 12, // PC-Relative relocation. |
717 | MOF_AddrIsSImm32 = 1 << 13, // A simple 32-bit constant. |
718 | |
719 | // The in-memory type. |
720 | MOF_SubWordInt = 1 << 15, |
721 | MOF_WordInt = 1 << 16, |
722 | MOF_DoubleWordInt = 1 << 17, |
723 | MOF_ScalarFloat = 1 << 18, // Scalar single or double precision. |
724 | MOF_Vector = 1 << 19, // Vector types and quad precision scalars. |
725 | MOF_Vector256 = 1 << 20, |
726 | |
727 | // Subtarget features. |
728 | MOF_SubtargetBeforeP9 = 1 << 22, |
729 | MOF_SubtargetP9 = 1 << 23, |
730 | MOF_SubtargetP10 = 1 << 24, |
731 | MOF_SubtargetSPE = 1 << 25 |
732 | }; |
733 | |
734 | // The addressing modes for loads and stores. |
735 | enum AddrMode { |
736 | AM_None, |
737 | AM_DForm, |
738 | AM_DSForm, |
739 | AM_DQForm, |
740 | AM_PrefixDForm, |
741 | AM_XForm, |
742 | AM_PCRel |
743 | }; |
744 | } // end namespace PPC |
745 | |
746 | class PPCTargetLowering : public TargetLowering { |
747 | const PPCSubtarget &Subtarget; |
748 | |
749 | public: |
750 | explicit PPCTargetLowering(const PPCTargetMachine &TM, |
751 | const PPCSubtarget &STI); |
752 | |
753 | /// getTargetNodeName() - This method returns the name of a target specific |
754 | /// DAG node. |
755 | const char *getTargetNodeName(unsigned Opcode) const override; |
756 | |
757 | bool isSelectSupported(SelectSupportKind Kind) const override { |
758 | // PowerPC does not support scalar condition selects on vectors. |
759 | return (Kind != SelectSupportKind::ScalarCondVectorVal); |
760 | } |
761 | |
762 | /// getPreferredVectorAction - The code we generate when vector types are |
763 | /// legalized by promoting the integer element type is often much worse |
764 | /// than code we generate if we widen the type for applicable vector types. |
765 | /// The issue with promoting is that the vector is scalaraized, individual |
766 | /// elements promoted and then the vector is rebuilt. So say we load a pair |
767 | /// of v4i8's and shuffle them. This will turn into a mess of 8 extending |
768 | /// loads, moves back into VSR's (or memory ops if we don't have moves) and |
769 | /// then the VPERM for the shuffle. All in all a very slow sequence. |
770 | TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) |
771 | const override { |
772 | // Default handling for scalable and single-element vectors. |
773 | if (VT.isScalableVector() || VT.getVectorNumElements() == 1) |
774 | return TargetLoweringBase::getPreferredVectorAction(VT); |
775 | |
776 | // Split and promote vNi1 vectors so we don't produce v256i1/v512i1 |
777 | // types as those are only for MMA instructions. |
778 | if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16) |
779 | return TypeSplitVector; |
780 | if (VT.getScalarSizeInBits() == 1) |
781 | return TypePromoteInteger; |
782 | |
783 | // Widen vectors that have reasonably sized elements. |
784 | if (VT.getScalarSizeInBits() % 8 == 0) |
785 | return TypeWidenVector; |
786 | return TargetLoweringBase::getPreferredVectorAction(VT); |
787 | } |
788 | |
789 | bool useSoftFloat() const override; |
790 | |
791 | bool hasSPE() const; |
792 | |
793 | MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { |
794 | return MVT::i32; |
795 | } |
796 | |
797 | bool isCheapToSpeculateCttz(Type *Ty) const override { |
798 | return true; |
799 | } |
800 | |
801 | bool isCheapToSpeculateCtlz(Type *Ty) const override { |
802 | return true; |
803 | } |
804 | |
805 | bool |
806 | (Type *VectorTy, |
807 | unsigned ElemSizeInBits, |
808 | unsigned &Index) const override; |
809 | |
810 | bool isCtlzFast() const override { |
811 | return true; |
812 | } |
813 | |
814 | bool isEqualityCmpFoldedWithSignedCmp() const override { |
815 | return false; |
816 | } |
817 | |
818 | bool hasAndNotCompare(SDValue) const override { |
819 | return true; |
820 | } |
821 | |
822 | bool preferIncOfAddToSubOfNot(EVT VT) const override; |
823 | |
824 | bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { |
825 | return VT.isScalarInteger(); |
826 | } |
827 | |
828 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, |
829 | bool OptForSize, NegatibleCost &Cost, |
830 | unsigned Depth = 0) const override; |
831 | |
832 | /// getSetCCResultType - Return the ISD::SETCC ValueType |
833 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
834 | EVT VT) const override; |
835 | |
836 | /// Return true if target always benefits from combining into FMA for a |
837 | /// given value type. This must typically return false on targets where FMA |
838 | /// takes more cycles to execute than FADD. |
839 | bool enableAggressiveFMAFusion(EVT VT) const override; |
840 | |
841 | /// getPreIndexedAddressParts - returns true by value, base pointer and |
842 | /// offset pointer and addressing mode by reference if the node's address |
843 | /// can be legally represented as pre-indexed load / store address. |
844 | bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, |
845 | SDValue &Offset, |
846 | ISD::MemIndexedMode &AM, |
847 | SelectionDAG &DAG) const override; |
848 | |
849 | /// SelectAddressEVXRegReg - Given the specified addressed, check to see if |
850 | /// it can be more efficiently represented as [r+imm]. |
851 | bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, |
852 | SelectionDAG &DAG) const; |
853 | |
854 | /// SelectAddressRegReg - Given the specified addressed, check to see if it |
855 | /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment |
856 | /// is non-zero, only accept displacement which is not suitable for [r+imm]. |
857 | /// Returns false if it can be represented by [r+imm], which are preferred. |
858 | bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, |
859 | SelectionDAG &DAG, |
860 | MaybeAlign EncodingAlignment = std::nullopt) const; |
861 | |
862 | /// SelectAddressRegImm - Returns true if the address N can be represented |
863 | /// by a base register plus a signed 16-bit displacement [r+imm], and if it |
864 | /// is not better represented as reg+reg. If \p EncodingAlignment is |
865 | /// non-zero, only accept displacements suitable for instruction encoding |
866 | /// requirement, i.e. multiples of 4 for DS form. |
867 | bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, |
868 | SelectionDAG &DAG, |
869 | MaybeAlign EncodingAlignment) const; |
870 | bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, |
871 | SelectionDAG &DAG) const; |
872 | |
873 | /// SelectAddressRegRegOnly - Given the specified addressed, force it to be |
874 | /// represented as an indexed [r+r] operation. |
875 | bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, |
876 | SelectionDAG &DAG) const; |
877 | |
878 | /// SelectAddressPCRel - Represent the specified address as pc relative to |
879 | /// be represented as [pc+imm] |
880 | bool SelectAddressPCRel(SDValue N, SDValue &Base) const; |
881 | |
882 | Sched::Preference getSchedulingPreference(SDNode *N) const override; |
883 | |
884 | /// LowerOperation - Provide custom lowering hooks for some operations. |
885 | /// |
886 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
887 | |
888 | /// ReplaceNodeResults - Replace the results of node with an illegal result |
889 | /// type with new values built out of custom code. |
890 | /// |
891 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, |
892 | SelectionDAG &DAG) const override; |
893 | |
894 | SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; |
895 | SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; |
896 | |
897 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
898 | |
899 | SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, |
900 | SmallVectorImpl<SDNode *> &Created) const override; |
901 | |
902 | Register getRegisterByName(const char* RegName, LLT VT, |
903 | const MachineFunction &MF) const override; |
904 | |
905 | void computeKnownBitsForTargetNode(const SDValue Op, |
906 | KnownBits &Known, |
907 | const APInt &DemandedElts, |
908 | const SelectionDAG &DAG, |
909 | unsigned Depth = 0) const override; |
910 | |
911 | Align getPrefLoopAlignment(MachineLoop *ML) const override; |
912 | |
913 | bool shouldInsertFencesForAtomic(const Instruction *I) const override { |
914 | return true; |
915 | } |
916 | |
917 | Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, |
918 | AtomicOrdering Ord) const override; |
919 | Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, |
920 | AtomicOrdering Ord) const override; |
921 | |
922 | bool shouldInlineQuadwordAtomics() const; |
923 | |
924 | TargetLowering::AtomicExpansionKind |
925 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
926 | |
927 | TargetLowering::AtomicExpansionKind |
928 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; |
929 | |
930 | Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, |
931 | AtomicRMWInst *AI, Value *AlignedAddr, |
932 | Value *Incr, Value *Mask, |
933 | Value *ShiftAmt, |
934 | AtomicOrdering Ord) const override; |
935 | Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, |
936 | AtomicCmpXchgInst *CI, |
937 | Value *AlignedAddr, Value *CmpVal, |
938 | Value *NewVal, Value *Mask, |
939 | AtomicOrdering Ord) const override; |
940 | |
941 | MachineBasicBlock * |
942 | EmitInstrWithCustomInserter(MachineInstr &MI, |
943 | MachineBasicBlock *MBB) const override; |
944 | MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, |
945 | MachineBasicBlock *MBB, |
946 | unsigned AtomicSize, |
947 | unsigned BinOpcode, |
948 | unsigned CmpOpcode = 0, |
949 | unsigned CmpPred = 0) const; |
950 | MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI, |
951 | MachineBasicBlock *MBB, |
952 | bool is8bit, |
953 | unsigned Opcode, |
954 | unsigned CmpOpcode = 0, |
955 | unsigned CmpPred = 0) const; |
956 | |
957 | MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, |
958 | MachineBasicBlock *MBB) const; |
959 | |
960 | MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, |
961 | MachineBasicBlock *MBB) const; |
962 | |
963 | MachineBasicBlock *emitProbedAlloca(MachineInstr &MI, |
964 | MachineBasicBlock *MBB) const; |
965 | |
966 | bool hasInlineStackProbe(const MachineFunction &MF) const override; |
967 | |
968 | unsigned getStackProbeSize(const MachineFunction &MF) const; |
969 | |
970 | ConstraintType getConstraintType(StringRef Constraint) const override; |
971 | |
972 | /// Examine constraint string and operand type and determine a weight value. |
973 | /// The operand object must already have been set up with the operand type. |
974 | ConstraintWeight getSingleConstraintMatchWeight( |
975 | AsmOperandInfo &info, const char *constraint) const override; |
976 | |
977 | std::pair<unsigned, const TargetRegisterClass *> |
978 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
979 | StringRef Constraint, MVT VT) const override; |
980 | |
981 | /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate |
982 | /// function arguments in the caller parameter area. This is the actual |
983 | /// alignment, not its logarithm. |
984 | uint64_t getByValTypeAlignment(Type *Ty, |
985 | const DataLayout &DL) const override; |
986 | |
987 | /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops |
988 | /// vector. If it is invalid, don't add anything to Ops. |
989 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
990 | std::vector<SDValue> &Ops, |
991 | SelectionDAG &DAG) const override; |
992 | |
993 | InlineAsm::ConstraintCode |
994 | getInlineAsmMemConstraint(StringRef ConstraintCode) const override { |
995 | if (ConstraintCode == "es" ) |
996 | return InlineAsm::ConstraintCode::es; |
997 | else if (ConstraintCode == "Q" ) |
998 | return InlineAsm::ConstraintCode::Q; |
999 | else if (ConstraintCode == "Z" ) |
1000 | return InlineAsm::ConstraintCode::Z; |
1001 | else if (ConstraintCode == "Zy" ) |
1002 | return InlineAsm::ConstraintCode::Zy; |
1003 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
1004 | } |
1005 | |
1006 | void CollectTargetIntrinsicOperands(const CallInst &I, |
1007 | SmallVectorImpl<SDValue> &Ops, |
1008 | SelectionDAG &DAG) const override; |
1009 | |
1010 | /// isLegalAddressingMode - Return true if the addressing mode represented |
1011 | /// by AM is legal for this target, for a load/store of the specified type. |
1012 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
1013 | Type *Ty, unsigned AS, |
1014 | Instruction *I = nullptr) const override; |
1015 | |
1016 | /// isLegalICmpImmediate - Return true if the specified immediate is legal |
1017 | /// icmp immediate, that is the target has icmp instructions which can |
1018 | /// compare a register against the immediate without having to materialize |
1019 | /// the immediate into a register. |
1020 | bool isLegalICmpImmediate(int64_t Imm) const override; |
1021 | |
1022 | /// isLegalAddImmediate - Return true if the specified immediate is legal |
1023 | /// add immediate, that is the target has add instructions which can |
1024 | /// add a register and the immediate without having to materialize |
1025 | /// the immediate into a register. |
1026 | bool isLegalAddImmediate(int64_t Imm) const override; |
1027 | |
1028 | /// isTruncateFree - Return true if it's free to truncate a value of |
1029 | /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in |
1030 | /// register X1 to i32 by referencing its sub-register R1. |
1031 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
1032 | bool isTruncateFree(EVT VT1, EVT VT2) const override; |
1033 | |
1034 | bool isZExtFree(SDValue Val, EVT VT2) const override; |
1035 | |
1036 | bool isFPExtFree(EVT DestVT, EVT SrcVT) const override; |
1037 | |
1038 | /// Returns true if it is beneficial to convert a load of a constant |
1039 | /// to just the constant itself. |
1040 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
1041 | Type *Ty) const override; |
1042 | |
1043 | bool convertSelectOfConstantsToMath(EVT VT) const override { |
1044 | return true; |
1045 | } |
1046 | |
1047 | bool decomposeMulByConstant(LLVMContext &Context, EVT VT, |
1048 | SDValue C) const override; |
1049 | |
1050 | bool isDesirableToTransformToIntegerOp(unsigned Opc, |
1051 | EVT VT) const override { |
1052 | // Only handle float load/store pair because float(fpr) load/store |
1053 | // instruction has more cycles than integer(gpr) load/store in PPC. |
1054 | if (Opc != ISD::LOAD && Opc != ISD::STORE) |
1055 | return false; |
1056 | if (VT != MVT::f32 && VT != MVT::f64) |
1057 | return false; |
1058 | |
1059 | return true; |
1060 | } |
1061 | |
1062 | // Returns true if the address of the global is stored in TOC entry. |
1063 | bool isAccessedAsGotIndirect(SDValue N) const; |
1064 | |
1065 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
1066 | |
1067 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, |
1068 | const CallInst &I, |
1069 | MachineFunction &MF, |
1070 | unsigned Intrinsic) const override; |
1071 | |
1072 | /// It returns EVT::Other if the type should be determined using generic |
1073 | /// target-independent logic. |
1074 | EVT getOptimalMemOpType(const MemOp &Op, |
1075 | const AttributeList &FuncAttributes) const override; |
1076 | |
1077 | /// Is unaligned memory access allowed for the given type, and is it fast |
1078 | /// relative to software emulation. |
1079 | bool allowsMisalignedMemoryAccesses( |
1080 | EVT VT, unsigned AddrSpace, Align Alignment = Align(1), |
1081 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1082 | unsigned *Fast = nullptr) const override; |
1083 | |
1084 | /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster |
1085 | /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be |
1086 | /// expanded to FMAs when this method returns true, otherwise fmuladd is |
1087 | /// expanded to fmul + fadd. |
1088 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
1089 | EVT VT) const override; |
1090 | |
1091 | bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; |
1092 | |
1093 | /// isProfitableToHoist - Check if it is profitable to hoist instruction |
1094 | /// \p I to its dominator block. |
1095 | /// For example, it is not profitable if \p I and it's only user can form a |
1096 | /// FMA instruction, because Powerpc prefers FMADD. |
1097 | bool isProfitableToHoist(Instruction *I) const override; |
1098 | |
1099 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; |
1100 | |
1101 | // Should we expand the build vector with shuffles? |
1102 | bool |
1103 | shouldExpandBuildVectorWithShuffles(EVT VT, |
1104 | unsigned DefinedValues) const override; |
1105 | |
1106 | // Keep the zero-extensions for arguments to libcalls. |
1107 | bool shouldKeepZExtForFP16Conv() const override { return true; } |
1108 | |
1109 | /// createFastISel - This method returns a target-specific FastISel object, |
1110 | /// or null if the target does not support "fast" instruction selection. |
1111 | FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, |
1112 | const TargetLibraryInfo *LibInfo) const override; |
1113 | |
1114 | /// Returns true if an argument of type Ty needs to be passed in a |
1115 | /// contiguous block of registers in calling convention CallConv. |
1116 | bool functionArgumentNeedsConsecutiveRegisters( |
1117 | Type *Ty, CallingConv::ID CallConv, bool isVarArg, |
1118 | const DataLayout &DL) const override { |
1119 | // We support any array type as "consecutive" block in the parameter |
1120 | // save area. The element type defines the alignment requirement and |
1121 | // whether the argument should go in GPRs, FPRs, or VRs if available. |
1122 | // |
1123 | // Note that clang uses this capability both to implement the ELFv2 |
1124 | // homogeneous float/vector aggregate ABI, and to avoid having to use |
1125 | // "byval" when passing aggregates that might fully fit in registers. |
1126 | return Ty->isArrayTy(); |
1127 | } |
1128 | |
1129 | /// If a physical register, this returns the register that receives the |
1130 | /// exception address on entry to an EH pad. |
1131 | Register |
1132 | getExceptionPointerRegister(const Constant *PersonalityFn) const override; |
1133 | |
1134 | /// If a physical register, this returns the register that receives the |
1135 | /// exception typeid on entry to a landing pad. |
1136 | Register |
1137 | getExceptionSelectorRegister(const Constant *PersonalityFn) const override; |
1138 | |
1139 | /// Override to support customized stack guard loading. |
1140 | bool useLoadStackGuardNode() const override; |
1141 | void insertSSPDeclarations(Module &M) const override; |
1142 | Value *getSDagStackGuard(const Module &M) const override; |
1143 | |
1144 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
1145 | bool ForCodeSize) const override; |
1146 | |
1147 | unsigned getJumpTableEncoding() const override; |
1148 | bool isJumpTableRelative() const override; |
1149 | SDValue getPICJumpTableRelocBase(SDValue Table, |
1150 | SelectionDAG &DAG) const override; |
1151 | const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
1152 | unsigned JTI, |
1153 | MCContext &Ctx) const override; |
1154 | |
1155 | /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), |
1156 | /// compute the address flags of the node, get the optimal address mode |
1157 | /// based on the flags, and set the Base and Disp based on the address mode. |
1158 | PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, |
1159 | SDValue &Disp, SDValue &Base, |
1160 | SelectionDAG &DAG, |
1161 | MaybeAlign Align) const; |
1162 | /// SelectForceXFormMode - Given the specified address, force it to be |
1163 | /// represented as an indexed [r+r] operation (an XForm instruction). |
1164 | PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, |
1165 | SelectionDAG &DAG) const; |
1166 | |
1167 | bool splitValueIntoRegisterParts( |
1168 | SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
1169 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) |
1170 | const override; |
1171 | /// Structure that collects some common arguments that get passed around |
1172 | /// between the functions for call lowering. |
1173 | struct CallFlags { |
1174 | const CallingConv::ID CallConv; |
1175 | const bool IsTailCall : 1; |
1176 | const bool IsVarArg : 1; |
1177 | const bool IsPatchPoint : 1; |
1178 | const bool IsIndirect : 1; |
1179 | const bool HasNest : 1; |
1180 | const bool NoMerge : 1; |
1181 | |
1182 | CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg, |
1183 | bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge) |
1184 | : CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg), |
1185 | IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect), |
1186 | HasNest(HasNest), NoMerge(NoMerge) {} |
1187 | }; |
1188 | |
1189 | CCAssignFn *ccAssignFnForCall(CallingConv::ID CC, bool Return, |
1190 | bool IsVarArg) const; |
1191 | bool supportsTailCallFor(const CallBase *CB) const; |
1192 | |
1193 | private: |
1194 | struct ReuseLoadInfo { |
1195 | SDValue Ptr; |
1196 | SDValue Chain; |
1197 | SDValue ResChain; |
1198 | MachinePointerInfo MPI; |
1199 | bool IsDereferenceable = false; |
1200 | bool IsInvariant = false; |
1201 | Align Alignment; |
1202 | AAMDNodes AAInfo; |
1203 | const MDNode *Ranges = nullptr; |
1204 | |
1205 | ReuseLoadInfo() = default; |
1206 | |
1207 | MachineMemOperand::Flags MMOFlags() const { |
1208 | MachineMemOperand::Flags F = MachineMemOperand::MONone; |
1209 | if (IsDereferenceable) |
1210 | F |= MachineMemOperand::MODereferenceable; |
1211 | if (IsInvariant) |
1212 | F |= MachineMemOperand::MOInvariant; |
1213 | return F; |
1214 | } |
1215 | }; |
1216 | |
1217 | // Map that relates a set of common address flags to PPC addressing modes. |
1218 | std::map<PPC::AddrMode, SmallVector<unsigned, 16>> AddrModesMap; |
1219 | void initializeAddrModeMap(); |
1220 | |
1221 | bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, |
1222 | SelectionDAG &DAG, |
1223 | ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; |
1224 | void spliceIntoChain(SDValue ResChain, SDValue NewResChain, |
1225 | SelectionDAG &DAG) const; |
1226 | |
1227 | void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, |
1228 | SelectionDAG &DAG, const SDLoc &dl) const; |
1229 | SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, |
1230 | const SDLoc &dl) const; |
1231 | |
1232 | bool directMoveIsProfitable(const SDValue &Op) const; |
1233 | SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, |
1234 | const SDLoc &dl) const; |
1235 | |
1236 | SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, |
1237 | const SDLoc &dl) const; |
1238 | |
1239 | SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const; |
1240 | |
1241 | SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; |
1242 | SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; |
1243 | |
1244 | bool IsEligibleForTailCallOptimization( |
1245 | const GlobalValue *CalleeGV, CallingConv::ID CalleeCC, |
1246 | CallingConv::ID CallerCC, bool isVarArg, |
1247 | const SmallVectorImpl<ISD::InputArg> &Ins) const; |
1248 | |
1249 | bool IsEligibleForTailCallOptimization_64SVR4( |
1250 | const GlobalValue *CalleeGV, CallingConv::ID CalleeCC, |
1251 | CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg, |
1252 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1253 | const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc, |
1254 | bool isCalleeExternalSymbol) const; |
1255 | |
1256 | bool isEligibleForTCO(const GlobalValue *CalleeGV, CallingConv::ID CalleeCC, |
1257 | CallingConv::ID CallerCC, const CallBase *CB, |
1258 | bool isVarArg, |
1259 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1260 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1261 | const Function *CallerFunc, |
1262 | bool isCalleeExternalSymbol) const; |
1263 | |
1264 | SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff, |
1265 | SDValue Chain, SDValue &LROpOut, |
1266 | SDValue &FPOpOut, |
1267 | const SDLoc &dl) const; |
1268 | |
1269 | SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const; |
1270 | |
1271 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
1272 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; |
1273 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
1274 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; |
1275 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; |
1276 | SDValue LowerGlobalTLSAddressAIX(SDValue Op, SelectionDAG &DAG) const; |
1277 | SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const; |
1278 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
1279 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; |
1280 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
1281 | SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
1282 | SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
1283 | SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; |
1284 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
1285 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
1286 | SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; |
1287 | SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; |
1288 | SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const; |
1289 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
1290 | SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; |
1291 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
1292 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
1293 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; |
1294 | SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; |
1295 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, |
1296 | const SDLoc &dl) const; |
1297 | SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
1298 | SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
1299 | SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; |
1300 | SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; |
1301 | SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; |
1302 | SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const; |
1303 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1304 | SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
1305 | SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, ArrayRef<int> PermMask, |
1306 | EVT VT, SDValue V1, SDValue V2) const; |
1307 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
1308 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
1309 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
1310 | SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; |
1311 | SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; |
1312 | SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; |
1313 | SDValue lowerToLibCall(const char *LibCallName, SDValue Op, |
1314 | SelectionDAG &DAG) const; |
1315 | SDValue lowerLibCallBasedOnType(const char *LibCallFloatName, |
1316 | const char *LibCallDoubleName, SDValue Op, |
1317 | SelectionDAG &DAG) const; |
1318 | bool isLowringToMASSFiniteSafe(SDValue Op) const; |
1319 | bool isLowringToMASSSafe(SDValue Op) const; |
1320 | bool isScalarMASSConversionEnabled() const; |
1321 | SDValue lowerLibCallBase(const char *LibCallDoubleName, |
1322 | const char *LibCallFloatName, |
1323 | const char *LibCallDoubleNameFinite, |
1324 | const char *LibCallFloatNameFinite, SDValue Op, |
1325 | SelectionDAG &DAG) const; |
1326 | SDValue lowerPow(SDValue Op, SelectionDAG &DAG) const; |
1327 | SDValue lowerSin(SDValue Op, SelectionDAG &DAG) const; |
1328 | SDValue lowerCos(SDValue Op, SelectionDAG &DAG) const; |
1329 | SDValue lowerLog(SDValue Op, SelectionDAG &DAG) const; |
1330 | SDValue lowerLog10(SDValue Op, SelectionDAG &DAG) const; |
1331 | SDValue lowerExp(SDValue Op, SelectionDAG &DAG) const; |
1332 | SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const; |
1333 | SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1334 | SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; |
1335 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
1336 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
1337 | SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; |
1338 | |
1339 | SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; |
1340 | SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; |
1341 | |
1342 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
1343 | CallingConv::ID CallConv, bool isVarArg, |
1344 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1345 | const SDLoc &dl, SelectionDAG &DAG, |
1346 | SmallVectorImpl<SDValue> &InVals) const; |
1347 | |
1348 | SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, |
1349 | SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, |
1350 | SDValue InGlue, SDValue Chain, SDValue CallSeqStart, |
1351 | SDValue &Callee, int SPDiff, unsigned NumBytes, |
1352 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1353 | SmallVectorImpl<SDValue> &InVals, |
1354 | const CallBase *CB) const; |
1355 | |
1356 | SDValue |
1357 | LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1358 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1359 | const SDLoc &dl, SelectionDAG &DAG, |
1360 | SmallVectorImpl<SDValue> &InVals) const override; |
1361 | |
1362 | SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, |
1363 | SmallVectorImpl<SDValue> &InVals) const override; |
1364 | |
1365 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
1366 | bool isVarArg, |
1367 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1368 | LLVMContext &Context) const override; |
1369 | |
1370 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1371 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1372 | const SmallVectorImpl<SDValue> &OutVals, |
1373 | const SDLoc &dl, SelectionDAG &DAG) const override; |
1374 | |
1375 | SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, |
1376 | SelectionDAG &DAG, SDValue ArgVal, |
1377 | const SDLoc &dl) const; |
1378 | |
1379 | SDValue LowerFormalArguments_AIX( |
1380 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1381 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
1382 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; |
1383 | SDValue LowerFormalArguments_64SVR4( |
1384 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1385 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
1386 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; |
1387 | SDValue LowerFormalArguments_32SVR4( |
1388 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1389 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
1390 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; |
1391 | |
1392 | SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, |
1393 | SDValue CallSeqStart, |
1394 | ISD::ArgFlagsTy Flags, SelectionDAG &DAG, |
1395 | const SDLoc &dl) const; |
1396 | |
1397 | SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags, |
1398 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1399 | const SmallVectorImpl<SDValue> &OutVals, |
1400 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1401 | const SDLoc &dl, SelectionDAG &DAG, |
1402 | SmallVectorImpl<SDValue> &InVals, |
1403 | const CallBase *CB) const; |
1404 | SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags, |
1405 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1406 | const SmallVectorImpl<SDValue> &OutVals, |
1407 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1408 | const SDLoc &dl, SelectionDAG &DAG, |
1409 | SmallVectorImpl<SDValue> &InVals, |
1410 | const CallBase *CB) const; |
1411 | SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, CallFlags CFlags, |
1412 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
1413 | const SmallVectorImpl<SDValue> &OutVals, |
1414 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1415 | const SDLoc &dl, SelectionDAG &DAG, |
1416 | SmallVectorImpl<SDValue> &InVals, |
1417 | const CallBase *CB) const; |
1418 | |
1419 | SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; |
1420 | SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; |
1421 | SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; |
1422 | |
1423 | SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; |
1424 | SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; |
1425 | SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; |
1426 | SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const; |
1427 | SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; |
1428 | SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; |
1429 | SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; |
1430 | SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; |
1431 | SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const; |
1432 | SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; |
1433 | SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const; |
1434 | SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; |
1435 | SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; |
1436 | SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN, |
1437 | SelectionDAG &DAG) const; |
1438 | SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, |
1439 | DAGCombinerInfo &DCI) const; |
1440 | |
1441 | /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces |
1442 | /// SETCC with integer subtraction when (1) there is a legal way of doing it |
1443 | /// (2) keeping the result of comparison in GPR has performance benefit. |
1444 | SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const; |
1445 | |
1446 | SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
1447 | int &RefinementSteps, bool &UseOneConstNR, |
1448 | bool Reciprocal) const override; |
1449 | SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
1450 | int &RefinementSteps) const override; |
1451 | SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
1452 | const DenormalMode &Mode) const override; |
1453 | SDValue getSqrtResultForDenormInput(SDValue Operand, |
1454 | SelectionDAG &DAG) const override; |
1455 | unsigned combineRepeatedFPDivisors() const override; |
1456 | |
1457 | SDValue |
1458 | combineElementTruncationToVectorTruncation(SDNode *N, |
1459 | DAGCombinerInfo &DCI) const; |
1460 | |
1461 | /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be |
1462 | /// handled by the VINSERTH instruction introduced in ISA 3.0. This is |
1463 | /// essentially any shuffle of v8i16 vectors that just inserts one element |
1464 | /// from one vector into the other. |
1465 | SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; |
1466 | |
1467 | /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be |
1468 | /// handled by the VINSERTB instruction introduced in ISA 3.0. This is |
1469 | /// essentially v16i8 vector version of VINSERTH. |
1470 | SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; |
1471 | |
1472 | /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be |
1473 | /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1. |
1474 | SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; |
1475 | |
1476 | // Return whether the call instruction can potentially be optimized to a |
1477 | // tail call. This will cause the optimizers to attempt to move, or |
1478 | // duplicate return instructions to help enable tail call optimizations. |
1479 | bool mayBeEmittedAsTailCall(const CallInst *CI) const override; |
1480 | bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; |
1481 | |
1482 | /// getAddrModeForFlags - Based on the set of address flags, select the most |
1483 | /// optimal instruction format to match by. |
1484 | PPC::AddrMode getAddrModeForFlags(unsigned Flags) const; |
1485 | |
1486 | /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute |
1487 | /// the address flags of the load/store instruction that is to be matched. |
1488 | /// The address flags are stored in a map, which is then searched |
1489 | /// through to determine the optimal load/store instruction format. |
1490 | unsigned computeMOFlags(const SDNode *Parent, SDValue N, |
1491 | SelectionDAG &DAG) const; |
1492 | }; // end class PPCTargetLowering |
1493 | |
1494 | namespace PPC { |
1495 | |
1496 | FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, |
1497 | const TargetLibraryInfo *LibInfo); |
1498 | |
1499 | } // end namespace PPC |
1500 | |
1501 | bool isIntS16Immediate(SDNode *N, int16_t &Imm); |
1502 | bool isIntS16Immediate(SDValue Op, int16_t &Imm); |
1503 | bool isIntS34Immediate(SDNode *N, int64_t &Imm); |
1504 | bool isIntS34Immediate(SDValue Op, int64_t &Imm); |
1505 | |
1506 | bool convertToNonDenormSingle(APInt &ArgAPInt); |
1507 | bool convertToNonDenormSingle(APFloat &ArgAPFloat); |
1508 | bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat); |
1509 | |
1510 | } // end namespace llvm |
1511 | |
1512 | #endif // LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H |
1513 | |