1 | //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Interface definition of the TargetLowering class that is common |
11 | /// to all AMD GPUs. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H |
16 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H |
17 | |
18 | #include "llvm/CodeGen/CallingConvLower.h" |
19 | #include "llvm/CodeGen/TargetLowering.h" |
20 | |
21 | namespace llvm { |
22 | |
23 | class AMDGPUMachineFunction; |
24 | class AMDGPUSubtarget; |
25 | struct ArgDescriptor; |
26 | |
27 | class AMDGPUTargetLowering : public TargetLowering { |
28 | private: |
29 | const AMDGPUSubtarget *Subtarget; |
30 | |
31 | /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been |
32 | /// legalized from a smaller type VT. Need to match pre-legalized type because |
33 | /// the generic legalization inserts the add/sub between the select and |
34 | /// compare. |
35 | SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const; |
36 | |
37 | public: |
38 | /// \returns The minimum number of bits needed to store the value of \Op as an |
39 | /// unsigned integer. Truncating to this size and then zero-extending to the |
40 | /// original size will not change the value. |
41 | static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); |
42 | |
43 | /// \returns The minimum number of bits needed to store the value of \Op as a |
44 | /// signed integer. Truncating to this size and then sign-extending to the |
45 | /// original size will not change the value. |
46 | static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); |
47 | |
48 | protected: |
49 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
50 | SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; |
51 | /// Split a vector store into multiple scalar stores. |
52 | /// \returns The resulting chain. |
53 | |
54 | SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; |
55 | SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; |
56 | SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; |
57 | SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; |
58 | SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; |
59 | |
60 | SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const; |
61 | SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; |
62 | SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; |
63 | |
64 | static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags); |
65 | static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, |
66 | SDNodeFlags Flags); |
67 | SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, |
68 | SDNodeFlags Flags) const; |
69 | SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const; |
70 | std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG, |
71 | const SDLoc SL, SDValue Op, |
72 | SDNodeFlags Flags) const; |
73 | |
74 | SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const; |
75 | SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const; |
76 | SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const; |
77 | SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, |
78 | bool IsLog10, SDNodeFlags Flags) const; |
79 | SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const; |
80 | |
81 | SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, |
82 | SDNodeFlags Flags) const; |
83 | SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, |
84 | SDNodeFlags Flags) const; |
85 | SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const; |
86 | |
87 | SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const; |
88 | |
89 | SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; |
90 | |
91 | SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const; |
92 | SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; |
93 | SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
94 | SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
95 | |
96 | SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const; |
97 | SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const; |
98 | SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; |
99 | |
100 | SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; |
101 | |
102 | protected: |
103 | bool shouldCombineMemoryType(EVT VT) const; |
104 | SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
105 | SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
106 | SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
107 | SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
108 | |
109 | SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, |
110 | unsigned Opc, SDValue LHS, |
111 | uint32_t ValLo, uint32_t ValHi) const; |
112 | SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
113 | SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
114 | SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
115 | SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
116 | SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
117 | SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
118 | SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
119 | SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
120 | SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, |
121 | SDValue RHS, DAGCombinerInfo &DCI) const; |
122 | |
123 | SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, |
124 | SDValue N) const; |
125 | SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
126 | |
127 | TargetLowering::NegatibleCost |
128 | getConstantNegateCost(const ConstantFPSDNode *C) const; |
129 | |
130 | bool isConstantCostlierToNegate(SDValue N) const; |
131 | bool isConstantCheaperToNegate(SDValue N) const; |
132 | SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
133 | SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
134 | SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
135 | |
136 | static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); |
137 | |
138 | virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, |
139 | SelectionDAG &DAG) const; |
140 | |
141 | /// Return 64-bit value Op as two 32-bit integers. |
142 | std::pair<SDValue, SDValue> split64BitValue(SDValue Op, |
143 | SelectionDAG &DAG) const; |
144 | SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const; |
145 | SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const; |
146 | |
147 | /// Split a vector type into two parts. The first part is a power of two |
148 | /// vector. The second part is whatever is left over, and is a scalar if it |
149 | /// would otherwise be a 1-vector. |
150 | std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const; |
151 | |
152 | /// Split a vector value into two parts of types LoVT and HiVT. HiVT could be |
153 | /// scalar. |
154 | std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL, |
155 | const EVT &LoVT, const EVT &HighVT, |
156 | SelectionDAG &DAG) const; |
157 | |
158 | /// Split a vector load into 2 loads of half the vector. |
159 | SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; |
160 | |
161 | /// Widen a suitably aligned v3 load. For all other cases, split the input |
162 | /// vector load. |
163 | SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; |
164 | |
165 | /// Split a vector store into 2 stores of half the vector. |
166 | SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; |
167 | |
168 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
169 | SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; |
170 | SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; |
171 | SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; |
172 | void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, |
173 | SmallVectorImpl<SDValue> &Results) const; |
174 | |
175 | void analyzeFormalArgumentsCompute( |
176 | CCState &State, |
177 | const SmallVectorImpl<ISD::InputArg> &Ins) const; |
178 | |
179 | public: |
180 | AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); |
181 | |
182 | bool mayIgnoreSignedZero(SDValue Op) const; |
183 | |
184 | static inline SDValue stripBitcast(SDValue Val) { |
185 | return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(i: 0) : Val; |
186 | } |
187 | |
188 | static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc); |
189 | static bool allUsesHaveSourceMods(const SDNode *N, |
190 | unsigned CostThreshold = 4); |
191 | bool isFAbsFree(EVT VT) const override; |
192 | bool isFNegFree(EVT VT) const override; |
193 | bool isTruncateFree(EVT Src, EVT Dest) const override; |
194 | bool isTruncateFree(Type *Src, Type *Dest) const override; |
195 | |
196 | bool isZExtFree(Type *Src, Type *Dest) const override; |
197 | bool isZExtFree(EVT Src, EVT Dest) const override; |
198 | |
199 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
200 | bool LegalOperations, bool ForCodeSize, |
201 | NegatibleCost &Cost, |
202 | unsigned Depth) const override; |
203 | |
204 | bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override; |
205 | |
206 | bool isDesirableToCommuteWithShift(const SDNode *N, |
207 | CombineLevel Level) const override; |
208 | |
209 | EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
210 | ISD::NodeType ExtendKind) const override; |
211 | |
212 | MVT getVectorIdxTy(const DataLayout &) const override; |
213 | bool isSelectSupported(SelectSupportKind) const override; |
214 | |
215 | bool isFPImmLegal(const APFloat &Imm, EVT VT, |
216 | bool ForCodeSize) const override; |
217 | bool ShouldShrinkFPConstant(EVT VT) const override; |
218 | bool shouldReduceLoadWidth(SDNode *Load, |
219 | ISD::LoadExtType ExtType, |
220 | EVT ExtVT) const override; |
221 | |
222 | bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, |
223 | const MachineMemOperand &MMO) const final; |
224 | |
225 | bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, |
226 | unsigned NumElem, |
227 | unsigned AS) const override; |
228 | bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override; |
229 | bool isCheapToSpeculateCttz(Type *Ty) const override; |
230 | bool isCheapToSpeculateCtlz(Type *Ty) const override; |
231 | |
232 | bool isSDNodeAlwaysUniform(const SDNode *N) const override; |
233 | |
234 | // FIXME: This hook should not exist |
235 | AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { |
236 | return AtomicExpansionKind::None; |
237 | } |
238 | |
239 | AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { |
240 | return AtomicExpansionKind::None; |
241 | } |
242 | |
243 | AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *) const override { |
244 | return AtomicExpansionKind::None; |
245 | } |
246 | |
247 | static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); |
248 | static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); |
249 | |
250 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
251 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
252 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
253 | SelectionDAG &DAG) const override; |
254 | |
255 | SDValue addTokenForArgument(SDValue Chain, |
256 | SelectionDAG &DAG, |
257 | MachineFrameInfo &MFI, |
258 | int ClobberedFI) const; |
259 | |
260 | SDValue lowerUnhandledCall(CallLoweringInfo &CLI, |
261 | SmallVectorImpl<SDValue> &InVals, |
262 | StringRef Reason) const; |
263 | SDValue LowerCall(CallLoweringInfo &CLI, |
264 | SmallVectorImpl<SDValue> &InVals) const override; |
265 | |
266 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
267 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
268 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
269 | void ReplaceNodeResults(SDNode * N, |
270 | SmallVectorImpl<SDValue> &Results, |
271 | SelectionDAG &DAG) const override; |
272 | |
273 | SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, |
274 | SDValue RHS, SDValue True, SDValue False, |
275 | SDValue CC, DAGCombinerInfo &DCI) const; |
276 | |
277 | SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, |
278 | SDValue RHS, SDValue True, SDValue False, |
279 | SDValue CC, DAGCombinerInfo &DCI) const; |
280 | |
281 | const char* getTargetNodeName(unsigned Opcode) const override; |
282 | |
283 | // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for |
284 | // AMDGPU. Commit r319036, |
285 | // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6) |
286 | // turned on MergeConsecutiveStores() before Instruction Selection for all |
287 | // targets. Enough AMDGPU compiles go into an infinite loop ( |
288 | // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges; |
289 | // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for |
290 | // now. |
291 | bool mergeStoresAfterLegalization(EVT) const override { return false; } |
292 | |
293 | bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { |
294 | return true; |
295 | } |
296 | SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
297 | int &RefinementSteps, bool &UseOneConstNR, |
298 | bool Reciprocal) const override; |
299 | SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, |
300 | int &RefinementSteps) const override; |
301 | |
302 | virtual SDNode *PostISelFolding(MachineSDNode *N, |
303 | SelectionDAG &DAG) const = 0; |
304 | |
305 | /// Determine which of the bits specified in \p Mask are known to be |
306 | /// either zero or one and return them in the \p KnownZero and \p KnownOne |
307 | /// bitsets. |
308 | void computeKnownBitsForTargetNode(const SDValue Op, |
309 | KnownBits &Known, |
310 | const APInt &DemandedElts, |
311 | const SelectionDAG &DAG, |
312 | unsigned Depth = 0) const override; |
313 | |
314 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, |
315 | const SelectionDAG &DAG, |
316 | unsigned Depth = 0) const override; |
317 | |
318 | unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, |
319 | Register R, |
320 | const APInt &DemandedElts, |
321 | const MachineRegisterInfo &MRI, |
322 | unsigned Depth = 0) const override; |
323 | |
324 | bool isKnownNeverNaNForTargetNode(SDValue Op, |
325 | const SelectionDAG &DAG, |
326 | bool SNaN = false, |
327 | unsigned Depth = 0) const override; |
328 | |
329 | bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, |
330 | Register N1) const override; |
331 | |
332 | /// Helper function that adds Reg to the LiveIn list of the DAG's |
333 | /// MachineFunction. |
334 | /// |
335 | /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise |
336 | /// a copy from the register. |
337 | SDValue CreateLiveInRegister(SelectionDAG &DAG, |
338 | const TargetRegisterClass *RC, |
339 | Register Reg, EVT VT, |
340 | const SDLoc &SL, |
341 | bool RawReg = false) const; |
342 | SDValue CreateLiveInRegister(SelectionDAG &DAG, |
343 | const TargetRegisterClass *RC, |
344 | Register Reg, EVT VT) const { |
345 | return CreateLiveInRegister(DAG, RC, Reg, VT, SL: SDLoc(DAG.getEntryNode())); |
346 | } |
347 | |
348 | // Returns the raw live in register rather than a copy from it. |
349 | SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, |
350 | const TargetRegisterClass *RC, |
351 | Register Reg, EVT VT) const { |
352 | return CreateLiveInRegister(DAG, RC, Reg, VT, SL: SDLoc(DAG.getEntryNode()), RawReg: true); |
353 | } |
354 | |
355 | /// Similar to CreateLiveInRegister, except value maybe loaded from a stack |
356 | /// slot rather than passed in a register. |
357 | SDValue loadStackInputValue(SelectionDAG &DAG, |
358 | EVT VT, |
359 | const SDLoc &SL, |
360 | int64_t Offset) const; |
361 | |
362 | SDValue storeStackInputValue(SelectionDAG &DAG, |
363 | const SDLoc &SL, |
364 | SDValue Chain, |
365 | SDValue ArgVal, |
366 | int64_t Offset) const; |
367 | |
368 | SDValue loadInputValue(SelectionDAG &DAG, |
369 | const TargetRegisterClass *RC, |
370 | EVT VT, const SDLoc &SL, |
371 | const ArgDescriptor &Arg) const; |
372 | |
373 | enum ImplicitParameter { |
374 | FIRST_IMPLICIT, |
375 | PRIVATE_BASE, |
376 | SHARED_BASE, |
377 | QUEUE_PTR, |
378 | }; |
379 | |
380 | /// Helper function that returns the byte offset of the given |
381 | /// type of implicit parameter. |
382 | uint32_t getImplicitParameterOffset(const MachineFunction &MF, |
383 | const ImplicitParameter Param) const; |
384 | uint32_t getImplicitParameterOffset(const uint64_t ExplicitKernArgSize, |
385 | const ImplicitParameter Param) const; |
386 | |
387 | MVT getFenceOperandTy(const DataLayout &DL) const override { |
388 | return MVT::i32; |
389 | } |
390 | |
391 | AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; |
392 | |
393 | bool shouldSinkOperands(Instruction *I, |
394 | SmallVectorImpl<Use *> &Ops) const override; |
395 | }; |
396 | |
397 | namespace AMDGPUISD { |
398 | |
399 | enum NodeType : unsigned { |
400 | // AMDIL ISD Opcodes |
401 | FIRST_NUMBER = ISD::BUILTIN_OP_END, |
402 | UMUL, // 32bit unsigned multiplication |
403 | BRANCH_COND, |
404 | // End AMDIL ISD Opcodes |
405 | |
406 | // Function call. |
407 | CALL, |
408 | TC_RETURN, |
409 | TC_RETURN_GFX, |
410 | TC_RETURN_CHAIN, |
411 | TRAP, |
412 | |
413 | // Masked control flow nodes. |
414 | IF, |
415 | ELSE, |
416 | LOOP, |
417 | |
418 | // A uniform kernel return that terminates the wavefront. |
419 | ENDPGM, |
420 | |
421 | // s_endpgm, but we may want to insert it in the middle of the block. |
422 | ENDPGM_TRAP, |
423 | |
424 | // "s_trap 2" equivalent on hardware that does not support it. |
425 | SIMULATED_TRAP, |
426 | |
427 | // Return to a shader part's epilog code. |
428 | RETURN_TO_EPILOG, |
429 | |
430 | // Return with values from a non-entry function. |
431 | RET_GLUE, |
432 | |
433 | // Convert a unswizzled wave uniform stack address to an address compatible |
434 | // with a vector offset for use in stack access. |
435 | WAVE_ADDRESS, |
436 | |
437 | DWORDADDR, |
438 | FRACT, |
439 | |
440 | /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output |
441 | /// modifier behavior with dx10_enable. |
442 | CLAMP, |
443 | |
444 | // This is SETCC with the full mask result which is used for a compare with a |
445 | // result bit per item in the wavefront. |
446 | SETCC, |
447 | SETREG, |
448 | |
449 | DENORM_MODE, |
450 | |
451 | // FP ops with input and output chain. |
452 | FMA_W_CHAIN, |
453 | FMUL_W_CHAIN, |
454 | |
455 | // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. |
456 | // Denormals handled on some parts. |
457 | COS_HW, |
458 | SIN_HW, |
459 | FMAX_LEGACY, |
460 | FMIN_LEGACY, |
461 | |
462 | FMAX3, |
463 | SMAX3, |
464 | UMAX3, |
465 | FMIN3, |
466 | SMIN3, |
467 | UMIN3, |
468 | FMED3, |
469 | SMED3, |
470 | UMED3, |
471 | FMAXIMUM3, |
472 | FMINIMUM3, |
473 | FDOT2, |
474 | URECIP, |
475 | DIV_SCALE, |
476 | DIV_FMAS, |
477 | DIV_FIXUP, |
478 | // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is |
479 | // treated as an illegal operation. |
480 | FMAD_FTZ, |
481 | |
482 | // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. |
483 | // For f64, max error 2^29 ULP, handles denormals. |
484 | RCP, |
485 | RSQ, |
486 | RCP_LEGACY, |
487 | RCP_IFLAG, |
488 | |
489 | // log2, no denormal handling for f32. |
490 | LOG, |
491 | |
492 | // exp2, no denormal handling for f32. |
493 | EXP, |
494 | |
495 | FMUL_LEGACY, |
496 | RSQ_CLAMP, |
497 | FP_CLASS, |
498 | DOT4, |
499 | CARRY, |
500 | BORROW, |
501 | BFE_U32, // Extract range of bits with zero extension to 32-bits. |
502 | BFE_I32, // Extract range of bits with sign extension to 32-bits. |
503 | BFI, // (src0 & src1) | (~src0 & src2) |
504 | BFM, // Insert a range of bits into a 32-bit word. |
505 | FFBH_U32, // ctlz with -1 if input is zero. |
506 | FFBH_I32, |
507 | FFBL_B32, // cttz with -1 if input is zero. |
508 | MUL_U24, |
509 | MUL_I24, |
510 | MULHI_U24, |
511 | MULHI_I24, |
512 | MAD_U24, |
513 | MAD_I24, |
514 | MAD_U64_U32, |
515 | MAD_I64_I32, |
516 | PERM, |
517 | TEXTURE_FETCH, |
518 | R600_EXPORT, |
519 | CONST_ADDRESS, |
520 | REGISTER_LOAD, |
521 | REGISTER_STORE, |
522 | SAMPLE, |
523 | SAMPLEB, |
524 | SAMPLED, |
525 | SAMPLEL, |
526 | |
527 | // These cvt_f32_ubyte* nodes need to remain consecutive and in order. |
528 | CVT_F32_UBYTE0, |
529 | CVT_F32_UBYTE1, |
530 | CVT_F32_UBYTE2, |
531 | CVT_F32_UBYTE3, |
532 | |
533 | // Convert two float 32 numbers into a single register holding two packed f16 |
534 | // with round to zero. |
535 | CVT_PKRTZ_F16_F32, |
536 | CVT_PKNORM_I16_F32, |
537 | CVT_PKNORM_U16_F32, |
538 | CVT_PK_I16_I32, |
539 | CVT_PK_U16_U32, |
540 | |
541 | // Same as the standard node, except the high bits of the resulting integer |
542 | // are known 0. |
543 | FP_TO_FP16, |
544 | |
545 | /// This node is for VLIW targets and it is used to represent a vector |
546 | /// that is stored in consecutive registers with the same channel. |
547 | /// For example: |
548 | /// |X |Y|Z|W| |
549 | /// T0|v.x| | | | |
550 | /// T1|v.y| | | | |
551 | /// T2|v.z| | | | |
552 | /// T3|v.w| | | | |
553 | BUILD_VERTICAL_VECTOR, |
554 | /// Pointer to the start of the shader's constant data. |
555 | CONST_DATA_PTR, |
556 | PC_ADD_REL_OFFSET, |
557 | LDS, |
558 | FPTRUNC_ROUND_UPWARD, |
559 | FPTRUNC_ROUND_DOWNWARD, |
560 | |
561 | DUMMY_CHAIN, |
562 | FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, |
563 | LOAD_D16_HI, |
564 | LOAD_D16_LO, |
565 | LOAD_D16_HI_I8, |
566 | LOAD_D16_HI_U8, |
567 | LOAD_D16_LO_I8, |
568 | LOAD_D16_LO_U8, |
569 | |
570 | STORE_MSKOR, |
571 | LOAD_CONSTANT, |
572 | TBUFFER_STORE_FORMAT, |
573 | TBUFFER_STORE_FORMAT_D16, |
574 | TBUFFER_LOAD_FORMAT, |
575 | TBUFFER_LOAD_FORMAT_D16, |
576 | DS_ORDERED_COUNT, |
577 | ATOMIC_CMP_SWAP, |
578 | BUFFER_LOAD, |
579 | BUFFER_LOAD_UBYTE, |
580 | BUFFER_LOAD_USHORT, |
581 | BUFFER_LOAD_BYTE, |
582 | BUFFER_LOAD_SHORT, |
583 | BUFFER_LOAD_TFE, |
584 | BUFFER_LOAD_UBYTE_TFE, |
585 | BUFFER_LOAD_USHORT_TFE, |
586 | BUFFER_LOAD_BYTE_TFE, |
587 | BUFFER_LOAD_SHORT_TFE, |
588 | BUFFER_LOAD_FORMAT, |
589 | BUFFER_LOAD_FORMAT_TFE, |
590 | BUFFER_LOAD_FORMAT_D16, |
591 | SBUFFER_LOAD, |
592 | SBUFFER_LOAD_BYTE, |
593 | SBUFFER_LOAD_UBYTE, |
594 | SBUFFER_LOAD_SHORT, |
595 | SBUFFER_LOAD_USHORT, |
596 | BUFFER_STORE, |
597 | BUFFER_STORE_BYTE, |
598 | BUFFER_STORE_SHORT, |
599 | BUFFER_STORE_FORMAT, |
600 | BUFFER_STORE_FORMAT_D16, |
601 | BUFFER_ATOMIC_SWAP, |
602 | BUFFER_ATOMIC_ADD, |
603 | BUFFER_ATOMIC_SUB, |
604 | BUFFER_ATOMIC_SMIN, |
605 | BUFFER_ATOMIC_UMIN, |
606 | BUFFER_ATOMIC_SMAX, |
607 | BUFFER_ATOMIC_UMAX, |
608 | BUFFER_ATOMIC_AND, |
609 | BUFFER_ATOMIC_OR, |
610 | BUFFER_ATOMIC_XOR, |
611 | BUFFER_ATOMIC_INC, |
612 | BUFFER_ATOMIC_DEC, |
613 | BUFFER_ATOMIC_CMPSWAP, |
614 | BUFFER_ATOMIC_CSUB, |
615 | BUFFER_ATOMIC_FADD, |
616 | BUFFER_ATOMIC_FMIN, |
617 | BUFFER_ATOMIC_FMAX, |
618 | BUFFER_ATOMIC_COND_SUB_U32, |
619 | |
620 | LAST_AMDGPU_ISD_NUMBER |
621 | }; |
622 | |
623 | } // End namespace AMDGPUISD |
624 | |
625 | } // End namespace llvm |
626 | |
627 | #endif |
628 | |