1//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the entry points for global functions defined in
10// the LLVM NVPTX back-end.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
15#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
16
17#include "llvm/IR/PassManager.h"
18#include "llvm/Pass.h"
19#include "llvm/Support/AtomicOrdering.h"
20#include "llvm/Support/CodeGen.h"
21#include "llvm/Support/NVPTXAddrSpace.h"
22#include "llvm/Target/TargetMachine.h"
23
24namespace llvm {
25class FunctionPass;
26class MachineFunctionPass;
27class NVPTXTargetMachine;
28class PassRegistry;
29
30namespace NVPTXCC {
31enum CondCodes {
32 EQ,
33 NE,
34 LT,
35 LE,
36 GT,
37 GE
38};
39}
40
41FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
42 llvm::CodeGenOptLevel OptLevel);
43ModulePass *createNVPTXAssignValidGlobalNamesPass();
44ModulePass *createGenericToNVVMLegacyPass();
45ModulePass *createNVPTXCtorDtorLoweringLegacyPass();
46FunctionPass *createNVVMIntrRangePass();
47ModulePass *createNVVMReflectPass(unsigned int SmVersion);
48MachineFunctionPass *createNVPTXPrologEpilogPass();
49MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
50FunctionPass *createNVPTXImageOptimizerPass();
51ModulePass *createNVPTXLowerArgsPass();
52FunctionPass *createNVPTXSetByValParamAlignPass();
53FunctionPass *createNVPTXLowerAllocaPass();
54FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable,
55 bool NoTrapAfterNoreturn);
56FunctionPass *createNVPTXMarkKernelPtrsGlobalPass();
57FunctionPass *createNVPTXTagInvariantLoadsPass();
58FunctionPass *createNVPTXIRPeepholePass();
59MachineFunctionPass *createNVPTXPeephole();
60MachineFunctionPass *createNVPTXProxyRegErasurePass();
61MachineFunctionPass *createNVPTXForwardParamsPass();
62MachineFunctionPass *createNVPTXAddressFolderPass();
63
64void initializeNVVMReflectLegacyPassPass(PassRegistry &);
65void initializeGenericToNVVMLegacyPassPass(PassRegistry &);
66void initializeNVPTXAllocaHoistingPass(PassRegistry &);
67void initializeNVPTXAsmPrinterPass(PassRegistry &);
68void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry &);
69void initializeNVPTXAtomicLowerPass(PassRegistry &);
70void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
71void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
72void initializeNVPTXLowerAllocaPass(PassRegistry &);
73void initializeNVPTXLowerUnreachablePass(PassRegistry &);
74void initializeNVPTXLowerArgsLegacyPassPass(PassRegistry &);
75void initializeNVPTXSetByValParamAlignLegacyPassPass(PassRegistry &);
76void initializeNVPTXProxyRegErasurePass(PassRegistry &);
77void initializeNVPTXForwardParamsPassPass(PassRegistry &);
78void initializeNVPTXAddressFolderPassPass(PassRegistry &);
79void initializeNVVMIntrRangePass(PassRegistry &);
80void initializeNVVMReflectPass(PassRegistry &);
81void initializeNVPTXAAWrapperPassPass(PassRegistry &);
82void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
83void initializeNVPTXPeepholePass(PassRegistry &);
84void initializeNVPTXMarkKernelPtrsGlobalLegacyPassPass(PassRegistry &);
85void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
86void initializeNVPTXIRPeepholePass(PassRegistry &);
87void initializeNVPTXPrologEpilogPassPass(PassRegistry &);
88
89struct NVVMIntrRangePass : OptionalPassInfoMixin<NVVMIntrRangePass> {
90 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
91};
92
93struct NVPTXIRPeepholePass : OptionalPassInfoMixin<NVPTXIRPeepholePass> {
94 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
95};
96
97struct NVVMReflectPass : OptionalPassInfoMixin<NVVMReflectPass> {
98 NVVMReflectPass() : SmVersion(0) {}
99 NVVMReflectPass(unsigned SmVersion) : SmVersion(SmVersion) {}
100 PreservedAnalyses run(Module &F, ModuleAnalysisManager &AM);
101
102private:
103 unsigned SmVersion;
104};
105
106struct GenericToNVVMPass : OptionalPassInfoMixin<GenericToNVVMPass> {
107 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
108};
109
110struct NVPTXCopyByValArgsPass : OptionalPassInfoMixin<NVPTXCopyByValArgsPass> {
111 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
112};
113
114struct NVPTXSetByValParamAlignPass
115 : OptionalPassInfoMixin<NVPTXSetByValParamAlignPass> {
116 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
117};
118
119struct NVPTXLowerArgsPass : OptionalPassInfoMixin<NVPTXLowerArgsPass> {
120private:
121 TargetMachine &TM;
122
123public:
124 NVPTXLowerArgsPass(TargetMachine &TM) : TM(TM) {};
125 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
126};
127
128struct NVPTXMarkKernelPtrsGlobalPass
129 : OptionalPassInfoMixin<NVPTXMarkKernelPtrsGlobalPass> {
130 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
131};
132
133struct NVPTXTagInvariantLoadsPass
134 : OptionalPassInfoMixin<NVPTXTagInvariantLoadsPass> {
135 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
136};
137
138namespace NVPTX {
139enum DrvInterface {
140 NVCL,
141 CUDA
142};
143
144// A field inside TSFlags needs a shift and a mask. The usage is
145// always as follows :
146// ((TSFlags & fieldMask) >> fieldShift)
147// The enum keeps the mask, the shift, and all valid values of the
148// field in one place.
149enum VecInstType {
150 VecInstTypeShift = 0,
151 VecInstTypeMask = 0xF,
152
153 VecNOP = 0,
154 VecLoad = 1,
155 VecStore = 2,
156 VecBuild = 3,
157 VecShuffle = 4,
158 VecExtract = 5,
159 VecInsert = 6,
160 VecDest = 7,
161 VecOther = 15
162};
163
164enum SimpleMove {
165 SimpleMoveMask = 0x10,
166 SimpleMoveShift = 4
167};
168enum LoadStore {
169 isLoadMask = 0x20,
170 isLoadShift = 5,
171 isStoreMask = 0x40,
172 isStoreShift = 6
173};
174
175// Extends LLVM AtomicOrdering with PTX Orderings:
176using OrderingUnderlyingType = unsigned int;
177enum Ordering : OrderingUnderlyingType {
178 NotAtomic = (OrderingUnderlyingType)
179 AtomicOrdering::NotAtomic, // PTX calls these: "Weak"
180 // Unordered = 1, // NVPTX maps LLVM Unorderd to Relaxed
181 Relaxed = (OrderingUnderlyingType)AtomicOrdering::Monotonic,
182 // Consume = 3, // Unimplemented in LLVM; NVPTX would map to "Acquire"
183 Acquire = (OrderingUnderlyingType)AtomicOrdering::Acquire,
184 Release = (OrderingUnderlyingType)AtomicOrdering::Release,
185 AcquireRelease = (OrderingUnderlyingType)AtomicOrdering::AcquireRelease,
186 SequentiallyConsistent =
187 (OrderingUnderlyingType)AtomicOrdering::SequentiallyConsistent,
188 Volatile = SequentiallyConsistent + 1,
189 RelaxedMMIO = Volatile + 1,
190};
191
192using ScopeUnderlyingType = unsigned int;
193enum Scope : ScopeUnderlyingType {
194 Thread = 0,
195 Block = 1,
196 Cluster = 2,
197 Device = 3,
198 System = 4,
199 DefaultDevice = 5, // For SM < 70: denotes PTX op implicit/default .gpu scope
200 LASTSCOPE = DefaultDevice
201};
202
203using AddressSpaceUnderlyingType = unsigned int;
204enum AddressSpace : AddressSpaceUnderlyingType {
205 Generic = NVPTXAS::ADDRESS_SPACE_GENERIC,
206 Global = NVPTXAS::ADDRESS_SPACE_GLOBAL,
207 Shared = NVPTXAS::ADDRESS_SPACE_SHARED,
208 Const = NVPTXAS::ADDRESS_SPACE_CONST,
209 Local = NVPTXAS::ADDRESS_SPACE_LOCAL,
210 SharedCluster = NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER,
211 EntryParam = NVPTXAS::ADDRESS_SPACE_ENTRY_PARAM,
212
213 // DeviceParam is not a real address space, as it does not support pointers
214 // and instead can only be referenced by param+offset. For this reason it is
215 // only used in MIR as an instruction modifier and should not be used in LLVM
216 // IR.
217 DeviceParam
218};
219
220namespace PTXLdStInstCode {
221enum FromType { Unsigned = 0, Signed, Float, Untyped };
222} // namespace PTXLdStInstCode
223
224/// PTXCvtMode - Conversion code enumeration
225namespace PTXCvtMode {
226enum CvtMode {
227 NONE = 0,
228 RNI,
229 RZI,
230 RMI,
231 RPI,
232 RN,
233 RZ,
234 RM,
235 RP,
236 RNA,
237 RS,
238
239 BASE_MASK = 0x0F,
240 FTZ_FLAG = 0x10,
241 SAT_FLAG = 0x20,
242 RELU_FLAG = 0x40,
243 SATFINITE_FLAG = 0x80
244};
245}
246
247/// PTXCmpMode - Comparison mode enumeration
248namespace PTXCmpMode {
249enum CmpMode {
250 EQ = 0,
251 NE,
252 LT,
253 LE,
254 GT,
255 GE,
256 EQU,
257 NEU,
258 LTU,
259 LEU,
260 GTU,
261 GEU,
262 NUM,
263 // NAN is a MACRO
264 NotANumber,
265};
266}
267
268namespace PTXPrmtMode {
269enum PrmtMode {
270 NONE,
271 F4E,
272 B4E,
273 RC8,
274 ECL,
275 ECR,
276 RC16,
277};
278}
279
280enum class DivPrecisionLevel : unsigned {
281 Approx = 0,
282 Full = 1,
283 IEEE754 = 2,
284 IEEE754_NoFTZ = 3,
285};
286
287} // namespace NVPTX
288void initializeNVPTXDAGToDAGISelLegacyPass(PassRegistry &);
289} // namespace llvm
290
291// Defines symbolic names for NVPTX registers. This defines a mapping from
292// register name to register number.
293#define GET_REGINFO_ENUM
294#include "NVPTXGenRegisterInfo.inc"
295
296// Defines symbolic names for the NVPTX instructions.
297#define GET_INSTRINFO_ENUM
298#define GET_INSTRINFO_MC_HELPER_DECLS
299#define GET_INSTRINFO_OPERAND_ENUM
300#include "NVPTXGenInstrInfo.inc"
301
302#endif
303