1 | //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that NVPTX uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H |
16 | |
17 | #include "NVPTX.h" |
18 | #include "llvm/CodeGen/SelectionDAG.h" |
19 | #include "llvm/CodeGen/TargetLowering.h" |
20 | |
21 | namespace llvm { |
22 | namespace NVPTXISD { |
23 | enum NodeType : unsigned { |
24 | // Start the numbering from where ISD NodeType finishes. |
25 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
26 | Wrapper, |
27 | CALL, |
28 | RET_GLUE, |
29 | LOAD_PARAM, |
30 | DeclareParam, |
31 | DeclareScalarParam, |
32 | DeclareRetParam, |
33 | DeclareRet, |
34 | DeclareScalarRet, |
35 | PrintCall, |
36 | PrintConvergentCall, |
37 | PrintCallUni, |
38 | PrintConvergentCallUni, |
39 | CallArgBegin, |
40 | CallArg, |
41 | LastCallArg, |
42 | CallArgEnd, |
43 | CallVoid, |
44 | CallVal, |
45 | CallSymbol, |
46 | Prototype, |
47 | MoveParam, |
48 | PseudoUseParam, |
49 | RETURN, |
50 | CallSeqBegin, |
51 | CallSeqEnd, |
52 | CallPrototype, |
53 | ProxyReg, |
54 | FUN_SHFL_CLAMP, |
55 | FUN_SHFR_CLAMP, |
56 | MUL_WIDE_SIGNED, |
57 | MUL_WIDE_UNSIGNED, |
58 | IMAD, |
59 | SETP_F16X2, |
60 | SETP_BF16X2, |
61 | BFE, |
62 | BFI, |
63 | PRMT, |
64 | DYNAMIC_STACKALLOC, |
65 | Dummy, |
66 | |
67 | LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, |
68 | LoadV4, |
69 | LDGV2, // LDG.v2 |
70 | LDGV4, // LDG.v4 |
71 | LDUV2, // LDU.v2 |
72 | LDUV4, // LDU.v4 |
73 | StoreV2, |
74 | StoreV4, |
75 | LoadParam, |
76 | LoadParamV2, |
77 | LoadParamV4, |
78 | StoreParam, |
79 | StoreParamV2, |
80 | StoreParamV4, |
81 | StoreParamS32, // to sext and store a <32bit value, not used currently |
82 | StoreParamU32, // to zext and store a <32bit value, not used currently |
83 | StoreRetval, |
84 | StoreRetvalV2, |
85 | StoreRetvalV4, |
86 | |
87 | // Texture intrinsics |
88 | Tex1DFloatS32, |
89 | Tex1DFloatFloat, |
90 | Tex1DFloatFloatLevel, |
91 | Tex1DFloatFloatGrad, |
92 | Tex1DS32S32, |
93 | Tex1DS32Float, |
94 | Tex1DS32FloatLevel, |
95 | Tex1DS32FloatGrad, |
96 | Tex1DU32S32, |
97 | Tex1DU32Float, |
98 | Tex1DU32FloatLevel, |
99 | Tex1DU32FloatGrad, |
100 | Tex1DArrayFloatS32, |
101 | Tex1DArrayFloatFloat, |
102 | Tex1DArrayFloatFloatLevel, |
103 | Tex1DArrayFloatFloatGrad, |
104 | Tex1DArrayS32S32, |
105 | Tex1DArrayS32Float, |
106 | Tex1DArrayS32FloatLevel, |
107 | Tex1DArrayS32FloatGrad, |
108 | Tex1DArrayU32S32, |
109 | Tex1DArrayU32Float, |
110 | Tex1DArrayU32FloatLevel, |
111 | Tex1DArrayU32FloatGrad, |
112 | Tex2DFloatS32, |
113 | Tex2DFloatFloat, |
114 | Tex2DFloatFloatLevel, |
115 | Tex2DFloatFloatGrad, |
116 | Tex2DS32S32, |
117 | Tex2DS32Float, |
118 | Tex2DS32FloatLevel, |
119 | Tex2DS32FloatGrad, |
120 | Tex2DU32S32, |
121 | Tex2DU32Float, |
122 | Tex2DU32FloatLevel, |
123 | Tex2DU32FloatGrad, |
124 | Tex2DArrayFloatS32, |
125 | Tex2DArrayFloatFloat, |
126 | Tex2DArrayFloatFloatLevel, |
127 | Tex2DArrayFloatFloatGrad, |
128 | Tex2DArrayS32S32, |
129 | Tex2DArrayS32Float, |
130 | Tex2DArrayS32FloatLevel, |
131 | Tex2DArrayS32FloatGrad, |
132 | Tex2DArrayU32S32, |
133 | Tex2DArrayU32Float, |
134 | Tex2DArrayU32FloatLevel, |
135 | Tex2DArrayU32FloatGrad, |
136 | Tex3DFloatS32, |
137 | Tex3DFloatFloat, |
138 | Tex3DFloatFloatLevel, |
139 | Tex3DFloatFloatGrad, |
140 | Tex3DS32S32, |
141 | Tex3DS32Float, |
142 | Tex3DS32FloatLevel, |
143 | Tex3DS32FloatGrad, |
144 | Tex3DU32S32, |
145 | Tex3DU32Float, |
146 | Tex3DU32FloatLevel, |
147 | Tex3DU32FloatGrad, |
148 | TexCubeFloatFloat, |
149 | TexCubeFloatFloatLevel, |
150 | TexCubeS32Float, |
151 | TexCubeS32FloatLevel, |
152 | TexCubeU32Float, |
153 | TexCubeU32FloatLevel, |
154 | TexCubeArrayFloatFloat, |
155 | TexCubeArrayFloatFloatLevel, |
156 | TexCubeArrayS32Float, |
157 | TexCubeArrayS32FloatLevel, |
158 | TexCubeArrayU32Float, |
159 | TexCubeArrayU32FloatLevel, |
160 | Tld4R2DFloatFloat, |
161 | Tld4G2DFloatFloat, |
162 | Tld4B2DFloatFloat, |
163 | Tld4A2DFloatFloat, |
164 | Tld4R2DS64Float, |
165 | Tld4G2DS64Float, |
166 | Tld4B2DS64Float, |
167 | Tld4A2DS64Float, |
168 | Tld4R2DU64Float, |
169 | Tld4G2DU64Float, |
170 | Tld4B2DU64Float, |
171 | Tld4A2DU64Float, |
172 | TexUnified1DFloatS32, |
173 | TexUnified1DFloatFloat, |
174 | TexUnified1DFloatFloatLevel, |
175 | TexUnified1DFloatFloatGrad, |
176 | TexUnified1DS32S32, |
177 | TexUnified1DS32Float, |
178 | TexUnified1DS32FloatLevel, |
179 | TexUnified1DS32FloatGrad, |
180 | TexUnified1DU32S32, |
181 | TexUnified1DU32Float, |
182 | TexUnified1DU32FloatLevel, |
183 | TexUnified1DU32FloatGrad, |
184 | TexUnified1DArrayFloatS32, |
185 | TexUnified1DArrayFloatFloat, |
186 | TexUnified1DArrayFloatFloatLevel, |
187 | TexUnified1DArrayFloatFloatGrad, |
188 | TexUnified1DArrayS32S32, |
189 | TexUnified1DArrayS32Float, |
190 | TexUnified1DArrayS32FloatLevel, |
191 | TexUnified1DArrayS32FloatGrad, |
192 | TexUnified1DArrayU32S32, |
193 | TexUnified1DArrayU32Float, |
194 | TexUnified1DArrayU32FloatLevel, |
195 | TexUnified1DArrayU32FloatGrad, |
196 | TexUnified2DFloatS32, |
197 | TexUnified2DFloatFloat, |
198 | TexUnified2DFloatFloatLevel, |
199 | TexUnified2DFloatFloatGrad, |
200 | TexUnified2DS32S32, |
201 | TexUnified2DS32Float, |
202 | TexUnified2DS32FloatLevel, |
203 | TexUnified2DS32FloatGrad, |
204 | TexUnified2DU32S32, |
205 | TexUnified2DU32Float, |
206 | TexUnified2DU32FloatLevel, |
207 | TexUnified2DU32FloatGrad, |
208 | TexUnified2DArrayFloatS32, |
209 | TexUnified2DArrayFloatFloat, |
210 | TexUnified2DArrayFloatFloatLevel, |
211 | TexUnified2DArrayFloatFloatGrad, |
212 | TexUnified2DArrayS32S32, |
213 | TexUnified2DArrayS32Float, |
214 | TexUnified2DArrayS32FloatLevel, |
215 | TexUnified2DArrayS32FloatGrad, |
216 | TexUnified2DArrayU32S32, |
217 | TexUnified2DArrayU32Float, |
218 | TexUnified2DArrayU32FloatLevel, |
219 | TexUnified2DArrayU32FloatGrad, |
220 | TexUnified3DFloatS32, |
221 | TexUnified3DFloatFloat, |
222 | TexUnified3DFloatFloatLevel, |
223 | TexUnified3DFloatFloatGrad, |
224 | TexUnified3DS32S32, |
225 | TexUnified3DS32Float, |
226 | TexUnified3DS32FloatLevel, |
227 | TexUnified3DS32FloatGrad, |
228 | TexUnified3DU32S32, |
229 | TexUnified3DU32Float, |
230 | TexUnified3DU32FloatLevel, |
231 | TexUnified3DU32FloatGrad, |
232 | TexUnifiedCubeFloatFloat, |
233 | TexUnifiedCubeFloatFloatLevel, |
234 | TexUnifiedCubeS32Float, |
235 | TexUnifiedCubeS32FloatLevel, |
236 | TexUnifiedCubeU32Float, |
237 | TexUnifiedCubeU32FloatLevel, |
238 | TexUnifiedCubeArrayFloatFloat, |
239 | TexUnifiedCubeArrayFloatFloatLevel, |
240 | TexUnifiedCubeArrayS32Float, |
241 | TexUnifiedCubeArrayS32FloatLevel, |
242 | TexUnifiedCubeArrayU32Float, |
243 | TexUnifiedCubeArrayU32FloatLevel, |
244 | TexUnifiedCubeFloatFloatGrad, |
245 | TexUnifiedCubeS32FloatGrad, |
246 | TexUnifiedCubeU32FloatGrad, |
247 | TexUnifiedCubeArrayFloatFloatGrad, |
248 | TexUnifiedCubeArrayS32FloatGrad, |
249 | TexUnifiedCubeArrayU32FloatGrad, |
250 | Tld4UnifiedR2DFloatFloat, |
251 | Tld4UnifiedG2DFloatFloat, |
252 | Tld4UnifiedB2DFloatFloat, |
253 | Tld4UnifiedA2DFloatFloat, |
254 | Tld4UnifiedR2DS64Float, |
255 | Tld4UnifiedG2DS64Float, |
256 | Tld4UnifiedB2DS64Float, |
257 | Tld4UnifiedA2DS64Float, |
258 | Tld4UnifiedR2DU64Float, |
259 | Tld4UnifiedG2DU64Float, |
260 | Tld4UnifiedB2DU64Float, |
261 | Tld4UnifiedA2DU64Float, |
262 | |
263 | // Surface intrinsics |
264 | Suld1DI8Clamp, |
265 | Suld1DI16Clamp, |
266 | Suld1DI32Clamp, |
267 | Suld1DI64Clamp, |
268 | Suld1DV2I8Clamp, |
269 | Suld1DV2I16Clamp, |
270 | Suld1DV2I32Clamp, |
271 | Suld1DV2I64Clamp, |
272 | Suld1DV4I8Clamp, |
273 | Suld1DV4I16Clamp, |
274 | Suld1DV4I32Clamp, |
275 | |
276 | Suld1DArrayI8Clamp, |
277 | Suld1DArrayI16Clamp, |
278 | Suld1DArrayI32Clamp, |
279 | Suld1DArrayI64Clamp, |
280 | Suld1DArrayV2I8Clamp, |
281 | Suld1DArrayV2I16Clamp, |
282 | Suld1DArrayV2I32Clamp, |
283 | Suld1DArrayV2I64Clamp, |
284 | Suld1DArrayV4I8Clamp, |
285 | Suld1DArrayV4I16Clamp, |
286 | Suld1DArrayV4I32Clamp, |
287 | |
288 | Suld2DI8Clamp, |
289 | Suld2DI16Clamp, |
290 | Suld2DI32Clamp, |
291 | Suld2DI64Clamp, |
292 | Suld2DV2I8Clamp, |
293 | Suld2DV2I16Clamp, |
294 | Suld2DV2I32Clamp, |
295 | Suld2DV2I64Clamp, |
296 | Suld2DV4I8Clamp, |
297 | Suld2DV4I16Clamp, |
298 | Suld2DV4I32Clamp, |
299 | |
300 | Suld2DArrayI8Clamp, |
301 | Suld2DArrayI16Clamp, |
302 | Suld2DArrayI32Clamp, |
303 | Suld2DArrayI64Clamp, |
304 | Suld2DArrayV2I8Clamp, |
305 | Suld2DArrayV2I16Clamp, |
306 | Suld2DArrayV2I32Clamp, |
307 | Suld2DArrayV2I64Clamp, |
308 | Suld2DArrayV4I8Clamp, |
309 | Suld2DArrayV4I16Clamp, |
310 | Suld2DArrayV4I32Clamp, |
311 | |
312 | Suld3DI8Clamp, |
313 | Suld3DI16Clamp, |
314 | Suld3DI32Clamp, |
315 | Suld3DI64Clamp, |
316 | Suld3DV2I8Clamp, |
317 | Suld3DV2I16Clamp, |
318 | Suld3DV2I32Clamp, |
319 | Suld3DV2I64Clamp, |
320 | Suld3DV4I8Clamp, |
321 | Suld3DV4I16Clamp, |
322 | Suld3DV4I32Clamp, |
323 | |
324 | Suld1DI8Trap, |
325 | Suld1DI16Trap, |
326 | Suld1DI32Trap, |
327 | Suld1DI64Trap, |
328 | Suld1DV2I8Trap, |
329 | Suld1DV2I16Trap, |
330 | Suld1DV2I32Trap, |
331 | Suld1DV2I64Trap, |
332 | Suld1DV4I8Trap, |
333 | Suld1DV4I16Trap, |
334 | Suld1DV4I32Trap, |
335 | |
336 | Suld1DArrayI8Trap, |
337 | Suld1DArrayI16Trap, |
338 | Suld1DArrayI32Trap, |
339 | Suld1DArrayI64Trap, |
340 | Suld1DArrayV2I8Trap, |
341 | Suld1DArrayV2I16Trap, |
342 | Suld1DArrayV2I32Trap, |
343 | Suld1DArrayV2I64Trap, |
344 | Suld1DArrayV4I8Trap, |
345 | Suld1DArrayV4I16Trap, |
346 | Suld1DArrayV4I32Trap, |
347 | |
348 | Suld2DI8Trap, |
349 | Suld2DI16Trap, |
350 | Suld2DI32Trap, |
351 | Suld2DI64Trap, |
352 | Suld2DV2I8Trap, |
353 | Suld2DV2I16Trap, |
354 | Suld2DV2I32Trap, |
355 | Suld2DV2I64Trap, |
356 | Suld2DV4I8Trap, |
357 | Suld2DV4I16Trap, |
358 | Suld2DV4I32Trap, |
359 | |
360 | Suld2DArrayI8Trap, |
361 | Suld2DArrayI16Trap, |
362 | Suld2DArrayI32Trap, |
363 | Suld2DArrayI64Trap, |
364 | Suld2DArrayV2I8Trap, |
365 | Suld2DArrayV2I16Trap, |
366 | Suld2DArrayV2I32Trap, |
367 | Suld2DArrayV2I64Trap, |
368 | Suld2DArrayV4I8Trap, |
369 | Suld2DArrayV4I16Trap, |
370 | Suld2DArrayV4I32Trap, |
371 | |
372 | Suld3DI8Trap, |
373 | Suld3DI16Trap, |
374 | Suld3DI32Trap, |
375 | Suld3DI64Trap, |
376 | Suld3DV2I8Trap, |
377 | Suld3DV2I16Trap, |
378 | Suld3DV2I32Trap, |
379 | Suld3DV2I64Trap, |
380 | Suld3DV4I8Trap, |
381 | Suld3DV4I16Trap, |
382 | Suld3DV4I32Trap, |
383 | |
384 | Suld1DI8Zero, |
385 | Suld1DI16Zero, |
386 | Suld1DI32Zero, |
387 | Suld1DI64Zero, |
388 | Suld1DV2I8Zero, |
389 | Suld1DV2I16Zero, |
390 | Suld1DV2I32Zero, |
391 | Suld1DV2I64Zero, |
392 | Suld1DV4I8Zero, |
393 | Suld1DV4I16Zero, |
394 | Suld1DV4I32Zero, |
395 | |
396 | Suld1DArrayI8Zero, |
397 | Suld1DArrayI16Zero, |
398 | Suld1DArrayI32Zero, |
399 | Suld1DArrayI64Zero, |
400 | Suld1DArrayV2I8Zero, |
401 | Suld1DArrayV2I16Zero, |
402 | Suld1DArrayV2I32Zero, |
403 | Suld1DArrayV2I64Zero, |
404 | Suld1DArrayV4I8Zero, |
405 | Suld1DArrayV4I16Zero, |
406 | Suld1DArrayV4I32Zero, |
407 | |
408 | Suld2DI8Zero, |
409 | Suld2DI16Zero, |
410 | Suld2DI32Zero, |
411 | Suld2DI64Zero, |
412 | Suld2DV2I8Zero, |
413 | Suld2DV2I16Zero, |
414 | Suld2DV2I32Zero, |
415 | Suld2DV2I64Zero, |
416 | Suld2DV4I8Zero, |
417 | Suld2DV4I16Zero, |
418 | Suld2DV4I32Zero, |
419 | |
420 | Suld2DArrayI8Zero, |
421 | Suld2DArrayI16Zero, |
422 | Suld2DArrayI32Zero, |
423 | Suld2DArrayI64Zero, |
424 | Suld2DArrayV2I8Zero, |
425 | Suld2DArrayV2I16Zero, |
426 | Suld2DArrayV2I32Zero, |
427 | Suld2DArrayV2I64Zero, |
428 | Suld2DArrayV4I8Zero, |
429 | Suld2DArrayV4I16Zero, |
430 | Suld2DArrayV4I32Zero, |
431 | |
432 | Suld3DI8Zero, |
433 | Suld3DI16Zero, |
434 | Suld3DI32Zero, |
435 | Suld3DI64Zero, |
436 | Suld3DV2I8Zero, |
437 | Suld3DV2I16Zero, |
438 | Suld3DV2I32Zero, |
439 | Suld3DV2I64Zero, |
440 | Suld3DV4I8Zero, |
441 | Suld3DV4I16Zero, |
442 | Suld3DV4I32Zero |
443 | }; |
444 | } |
445 | |
446 | class NVPTXSubtarget; |
447 | |
448 | //===--------------------------------------------------------------------===// |
449 | // TargetLowering Implementation |
450 | //===--------------------------------------------------------------------===// |
451 | class NVPTXTargetLowering : public TargetLowering { |
452 | public: |
453 | explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, |
454 | const NVPTXSubtarget &STI); |
455 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
456 | |
457 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; |
458 | |
459 | const char *getTargetNodeName(unsigned Opcode) const override; |
460 | |
461 | bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, |
462 | MachineFunction &MF, |
463 | unsigned Intrinsic) const override; |
464 | |
465 | Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, |
466 | const DataLayout &DL) const; |
467 | |
468 | /// getFunctionParamOptimizedAlign - since function arguments are passed via |
469 | /// .param space, we may want to increase their alignment in a way that |
470 | /// ensures that we can effectively vectorize their loads & stores. We can |
471 | /// increase alignment only if the function has internal or has private |
472 | /// linkage as for other linkage types callers may already rely on default |
473 | /// alignment. To allow using 128-bit vectorized loads/stores, this function |
474 | /// ensures that alignment is 16 or greater. |
475 | Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, |
476 | const DataLayout &DL) const; |
477 | |
478 | /// Helper for computing alignment of a device function byval parameter. |
479 | Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, |
480 | Align InitialAlign, |
481 | const DataLayout &DL) const; |
482 | |
483 | // Helper for getting a function parameter name. Name is composed from |
484 | // its index and the function name. Negative index corresponds to special |
485 | // parameter (unsized array) used for passing variable arguments. |
486 | std::string getParamName(const Function *F, int Idx) const; |
487 | |
488 | /// isLegalAddressingMode - Return true if the addressing mode represented |
489 | /// by AM is legal for this target, for a load/store of the specified type |
490 | /// Used to guide target specific optimizations, like loop strength |
491 | /// reduction (LoopStrengthReduce.cpp) and memory optimization for |
492 | /// address mode (CodeGenPrepare.cpp) |
493 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
494 | unsigned AS, |
495 | Instruction *I = nullptr) const override; |
496 | |
497 | bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { |
498 | // Truncating 64-bit to 32-bit is free in SASS. |
499 | if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) |
500 | return false; |
501 | return SrcTy->getPrimitiveSizeInBits() == 64 && |
502 | DstTy->getPrimitiveSizeInBits() == 32; |
503 | } |
504 | |
505 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, |
506 | EVT VT) const override { |
507 | if (VT.isVector()) |
508 | return EVT::getVectorVT(Context&: Ctx, VT: MVT::i1, NumElements: VT.getVectorNumElements()); |
509 | return MVT::i1; |
510 | } |
511 | |
512 | ConstraintType getConstraintType(StringRef Constraint) const override; |
513 | std::pair<unsigned, const TargetRegisterClass *> |
514 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
515 | StringRef Constraint, MVT VT) const override; |
516 | |
517 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
518 | bool isVarArg, |
519 | const SmallVectorImpl<ISD::InputArg> &Ins, |
520 | const SDLoc &dl, SelectionDAG &DAG, |
521 | SmallVectorImpl<SDValue> &InVals) const override; |
522 | |
523 | SDValue LowerCall(CallLoweringInfo &CLI, |
524 | SmallVectorImpl<SDValue> &InVals) const override; |
525 | |
526 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
527 | |
528 | std::string |
529 | getPrototype(const DataLayout &DL, Type *, const ArgListTy &, |
530 | const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment, |
531 | std::optional<std::pair<unsigned, const APInt &>> VAInfo, |
532 | const CallBase &CB, unsigned UniqueCallSite) const; |
533 | |
534 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
535 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
536 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, |
537 | SelectionDAG &DAG) const override; |
538 | |
539 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
540 | std::vector<SDValue> &Ops, |
541 | SelectionDAG &DAG) const override; |
542 | |
543 | const NVPTXTargetMachine *nvTM; |
544 | |
545 | // PTX always uses 32-bit shift amounts |
546 | MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { |
547 | return MVT::i32; |
548 | } |
549 | |
550 | TargetLoweringBase::LegalizeTypeAction |
551 | getPreferredVectorAction(MVT VT) const override; |
552 | |
553 | // Get the degree of precision we want from 32-bit floating point division |
554 | // operations. |
555 | // |
556 | // 0 - Use ptx div.approx |
557 | // 1 - Use ptx.div.full (approximate, but less so than div.approx) |
558 | // 2 - Use IEEE-compliant div instructions, if available. |
559 | int getDivF32Level() const; |
560 | |
561 | // Get whether we should use a precise or approximate 32-bit floating point |
562 | // sqrt instruction. |
563 | bool usePrecSqrtF32() const; |
564 | |
565 | // Get whether we should use instructions that flush floating-point denormals |
566 | // to sign-preserving zero. |
567 | bool useF32FTZ(const MachineFunction &MF) const; |
568 | |
569 | SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
570 | int &, bool &UseOneConst, |
571 | bool Reciprocal) const override; |
572 | |
573 | unsigned combineRepeatedFPDivisors() const override { return 2; } |
574 | |
575 | bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const; |
576 | bool allowUnsafeFPMath(MachineFunction &MF) const; |
577 | |
578 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
579 | EVT) const override { |
580 | return true; |
581 | } |
582 | |
583 | bool enableAggressiveFMAFusion(EVT VT) const override { return true; } |
584 | |
585 | // The default is to transform llvm.ctlz(x, false) (where false indicates that |
586 | // x == 0 is not undefined behavior) into a branch that checks whether x is 0 |
587 | // and avoids calling ctlz in that case. We have a dedicated ctlz |
588 | // instruction, so we say that ctlz is cheap to speculate. |
589 | bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; } |
590 | |
591 | AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { |
592 | return AtomicExpansionKind::None; |
593 | } |
594 | |
595 | AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { |
596 | return AtomicExpansionKind::None; |
597 | } |
598 | |
599 | AtomicExpansionKind |
600 | shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; |
601 | |
602 | bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { |
603 | // There's rarely any point of packing something into a vector type if we |
604 | // already have the source data. |
605 | return true; |
606 | } |
607 | |
608 | private: |
609 | const NVPTXSubtarget &STI; // cache the subtarget here |
610 | SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; |
611 | |
612 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
613 | SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; |
614 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
615 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
616 | SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
617 | |
618 | SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; |
619 | SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; |
620 | SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; |
621 | |
622 | SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
623 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
624 | |
625 | SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
626 | SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
627 | |
628 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
629 | SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; |
630 | |
631 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
632 | SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; |
633 | SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; |
634 | |
635 | SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; |
636 | SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; |
637 | |
638 | SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; |
639 | |
640 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; |
641 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; |
642 | |
643 | SDValue LowerCopyToReg_128(SDValue Op, SelectionDAG &DAG) const; |
644 | unsigned getNumRegisters(LLVMContext &Context, EVT VT, |
645 | std::optional<MVT> RegisterVT) const override; |
646 | bool |
647 | splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, |
648 | SDValue *Parts, unsigned NumParts, MVT PartVT, |
649 | std::optional<CallingConv::ID> CC) const override; |
650 | |
651 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
652 | SelectionDAG &DAG) const override; |
653 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
654 | |
655 | Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx, |
656 | const DataLayout &DL) const; |
657 | }; |
658 | |
659 | } // namespace llvm |
660 | |
661 | #endif |
662 | |