1//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// \file
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12
13#include "llvm/IR/PassManager.h"
14#include "llvm/Pass.h"
15#include "llvm/Support/AMDGPUAddrSpace.h"
16#include "llvm/Support/CodeGen.h"
17
18namespace llvm {
19
20class AMDGPUTargetMachine;
21class TargetMachine;
22
23// GlobalISel passes
24void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
25FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
26void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
27FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
28FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
29void initializeAMDGPURegBankCombinerPass(PassRegistry &);
30
31void initializeAMDGPURegBankSelectPass(PassRegistry &);
32
33// SI Passes
34FunctionPass *createGCNDPPCombinePass();
35FunctionPass *createSIAnnotateControlFlowPass();
36FunctionPass *createSIFoldOperandsPass();
37FunctionPass *createSIPeepholeSDWAPass();
38FunctionPass *createSILowerI1CopiesPass();
39FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
40FunctionPass *createSIShrinkInstructionsPass();
41FunctionPass *createSILoadStoreOptimizerPass();
42FunctionPass *createSIWholeQuadModePass();
43FunctionPass *createSIFixControlFlowLiveIntervalsPass();
44FunctionPass *createSIOptimizeExecMaskingPreRAPass();
45FunctionPass *createSIOptimizeVGPRLiveRangePass();
46FunctionPass *createSIFixSGPRCopiesPass();
47FunctionPass *createLowerWWMCopiesPass();
48FunctionPass *createSIMemoryLegalizerPass();
49FunctionPass *createSIInsertWaitcntsPass();
50FunctionPass *createSIPreAllocateWWMRegsPass();
51FunctionPass *createSIFormMemoryClausesPass();
52
53FunctionPass *createSIPostRABundlerPass();
54FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
55ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
56FunctionPass *createAMDGPUCodeGenPreparePass();
57FunctionPass *createAMDGPULateCodeGenPreparePass();
58FunctionPass *createAMDGPUMachineCFGStructurizerPass();
59FunctionPass *createAMDGPURewriteOutArgumentsPass();
60ModulePass *
61createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
62ModulePass *createAMDGPULowerBufferFatPointersPass();
63FunctionPass *createSIModeRegisterPass();
64FunctionPass *createGCNPreRAOptimizationsPass();
65
66struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
67 AMDGPUSimplifyLibCallsPass() {}
68 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
69};
70
71struct AMDGPUImageIntrinsicOptimizerPass
72 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
73 AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {}
74 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
75
76private:
77 TargetMachine &TM;
78};
79
80struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
81 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
82};
83
84void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
85
86void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
87extern char &AMDGPUMachineCFGStructurizerID;
88
89void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
90
91Pass *createAMDGPUAnnotateKernelFeaturesPass();
92Pass *createAMDGPUAttributorLegacyPass();
93void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
94void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
95extern char &AMDGPUAnnotateKernelFeaturesID;
96
97// DPP/Iterative option enables the atomic optimizer with given strategy
98// whereas None disables the atomic optimizer.
99enum class ScanOptions { DPP, Iterative, None };
100FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
101void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
102extern char &AMDGPUAtomicOptimizerID;
103
104ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
105void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
106extern char &AMDGPUCtorDtorLoweringLegacyPassID;
107
108FunctionPass *createAMDGPULowerKernelArgumentsPass();
109void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
110extern char &AMDGPULowerKernelArgumentsID;
111
112FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
113void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
114extern char &AMDGPUPromoteKernelArgumentsID;
115
116struct AMDGPUPromoteKernelArgumentsPass
117 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
118 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
119};
120
121ModulePass *createAMDGPULowerKernelAttributesPass();
122void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
123extern char &AMDGPULowerKernelAttributesID;
124
125struct AMDGPULowerKernelAttributesPass
126 : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
127 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
128};
129
130void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &);
131extern char &AMDGPULowerModuleLDSLegacyPassID;
132
133struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
134 const AMDGPUTargetMachine &TM;
135 AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
136
137 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
138};
139
140void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &);
141extern char &AMDGPULowerBufferFatPointersID;
142
143struct AMDGPULowerBufferFatPointersPass
144 : PassInfoMixin<AMDGPULowerBufferFatPointersPass> {
145 AMDGPULowerBufferFatPointersPass(const TargetMachine &TM) : TM(TM) {}
146 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
147
148private:
149 const TargetMachine &TM;
150};
151
152void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
153extern char &AMDGPURewriteOutArgumentsID;
154
155void initializeGCNDPPCombinePass(PassRegistry &);
156extern char &GCNDPPCombineID;
157
158void initializeSIFoldOperandsPass(PassRegistry &);
159extern char &SIFoldOperandsID;
160
161void initializeSIPeepholeSDWAPass(PassRegistry &);
162extern char &SIPeepholeSDWAID;
163
164void initializeSIShrinkInstructionsPass(PassRegistry&);
165extern char &SIShrinkInstructionsID;
166
167void initializeSIFixSGPRCopiesPass(PassRegistry &);
168extern char &SIFixSGPRCopiesID;
169
170void initializeSIFixVGPRCopiesPass(PassRegistry &);
171extern char &SIFixVGPRCopiesID;
172
173void initializeSILowerWWMCopiesPass(PassRegistry &);
174extern char &SILowerWWMCopiesID;
175
176void initializeSILowerI1CopiesPass(PassRegistry &);
177extern char &SILowerI1CopiesID;
178
179void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
180extern char &AMDGPUGlobalISelDivergenceLoweringID;
181
182void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
183extern char &AMDGPUMarkLastScratchLoadID;
184
185void initializeSILowerSGPRSpillsPass(PassRegistry &);
186extern char &SILowerSGPRSpillsID;
187
188void initializeSILoadStoreOptimizerPass(PassRegistry &);
189extern char &SILoadStoreOptimizerID;
190
191void initializeSIWholeQuadModePass(PassRegistry &);
192extern char &SIWholeQuadModeID;
193
194void initializeSILowerControlFlowPass(PassRegistry &);
195extern char &SILowerControlFlowID;
196
197void initializeSIPreEmitPeepholePass(PassRegistry &);
198extern char &SIPreEmitPeepholeID;
199
200void initializeSILateBranchLoweringPass(PassRegistry &);
201extern char &SILateBranchLoweringPassID;
202
203void initializeSIOptimizeExecMaskingPass(PassRegistry &);
204extern char &SIOptimizeExecMaskingID;
205
206void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
207extern char &SIPreAllocateWWMRegsID;
208
209void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
210extern char &AMDGPUImageIntrinsicOptimizerID;
211
212void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
213extern char &AMDGPUPerfHintAnalysisID;
214
215void initializeGCNRegPressurePrinterPass(PassRegistry &);
216extern char &GCNRegPressurePrinterID;
217
218// Passes common to R600 and SI
219FunctionPass *createAMDGPUPromoteAlloca();
220void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
221extern char &AMDGPUPromoteAllocaID;
222
223FunctionPass *createAMDGPUPromoteAllocaToVector();
224void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
225extern char &AMDGPUPromoteAllocaToVectorID;
226
227struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
228 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
229 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
230
231private:
232 TargetMachine &TM;
233};
234
235struct AMDGPUPromoteAllocaToVectorPass
236 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
237 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
238 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
239
240private:
241 TargetMachine &TM;
242};
243
244struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
245 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
246 : TM(TM), ScanImpl(ScanImpl) {}
247 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
248
249private:
250 TargetMachine &TM;
251 ScanOptions ScanImpl;
252};
253
254Pass *createAMDGPUStructurizeCFGPass();
255FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
256ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
257
258struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
259 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
260 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
261
262private:
263 bool GlobalOpt;
264};
265
266class AMDGPUCodeGenPreparePass
267 : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
268private:
269 TargetMachine &TM;
270
271public:
272 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
273 PreservedAnalyses run(Function &, FunctionAnalysisManager &);
274};
275
276class AMDGPULowerKernelArgumentsPass
277 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
278private:
279 TargetMachine &TM;
280
281public:
282 AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){};
283 PreservedAnalyses run(Function &, FunctionAnalysisManager &);
284};
285
286class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
287private:
288 TargetMachine &TM;
289
290public:
291 AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
292 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
293};
294
295FunctionPass *createAMDGPUAnnotateUniformValues();
296
297ModulePass *createAMDGPUPrintfRuntimeBinding();
298void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
299extern char &AMDGPUPrintfRuntimeBindingID;
300
301void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
302extern char &AMDGPUResourceUsageAnalysisID;
303
304struct AMDGPUPrintfRuntimeBindingPass
305 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
306 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
307};
308
309ModulePass* createAMDGPUUnifyMetadataPass();
310void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
311extern char &AMDGPUUnifyMetadataID;
312
313struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
314 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
315};
316
317void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
318extern char &SIOptimizeExecMaskingPreRAID;
319
320void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
321extern char &SIOptimizeVGPRLiveRangeID;
322
323void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
324extern char &AMDGPUAnnotateUniformValuesPassID;
325
326void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
327extern char &AMDGPUCodeGenPrepareID;
328
329void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
330extern char &AMDGPURemoveIncompatibleFunctionsID;
331
332void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
333extern char &AMDGPULateCodeGenPrepareID;
334
335FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
336void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
337extern char &AMDGPURewriteUndefForPHILegacyPassID;
338
339class AMDGPURewriteUndefForPHIPass
340 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
341public:
342 AMDGPURewriteUndefForPHIPass() = default;
343 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
344};
345
346void initializeSIAnnotateControlFlowPass(PassRegistry&);
347extern char &SIAnnotateControlFlowPassID;
348
349void initializeSIMemoryLegalizerPass(PassRegistry&);
350extern char &SIMemoryLegalizerID;
351
352void initializeSIModeRegisterPass(PassRegistry&);
353extern char &SIModeRegisterID;
354
355void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
356extern char &AMDGPUInsertDelayAluID;
357
358void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
359extern char &AMDGPUInsertSingleUseVDSTID;
360
361void initializeSIInsertHardClausesPass(PassRegistry &);
362extern char &SIInsertHardClausesID;
363
364void initializeSIInsertWaitcntsPass(PassRegistry&);
365extern char &SIInsertWaitcntsID;
366
367void initializeSIFormMemoryClausesPass(PassRegistry&);
368extern char &SIFormMemoryClausesID;
369
370void initializeSIPostRABundlerPass(PassRegistry&);
371extern char &SIPostRABundlerID;
372
373void initializeGCNCreateVOPDPass(PassRegistry &);
374extern char &GCNCreateVOPDID;
375
376void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
377extern char &AMDGPUUnifyDivergentExitNodesID;
378
379ImmutablePass *createAMDGPUAAWrapperPass();
380void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
381ImmutablePass *createAMDGPUExternalAAWrapperPass();
382void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
383
384void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
385
386ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
387void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
388extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
389
390void initializeGCNNSAReassignPass(PassRegistry &);
391extern char &GCNNSAReassignID;
392
393void initializeGCNPreRALongBranchRegPass(PassRegistry &);
394extern char &GCNPreRALongBranchRegID;
395
396void initializeGCNPreRAOptimizationsPass(PassRegistry &);
397extern char &GCNPreRAOptimizationsID;
398
399FunctionPass *createAMDGPUSetWavePriorityPass();
400void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
401
402void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
403extern char &GCNRewritePartialRegUsesID;
404
405namespace AMDGPU {
406enum TargetIndex {
407 TI_CONSTDATA_START,
408 TI_SCRATCH_RSRC_DWORD0,
409 TI_SCRATCH_RSRC_DWORD1,
410 TI_SCRATCH_RSRC_DWORD2,
411 TI_SCRATCH_RSRC_DWORD3
412};
413
414// FIXME: Missing constant_32bit
415inline bool isFlatGlobalAddrSpace(unsigned AS) {
416 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
417 AS == AMDGPUAS::FLAT_ADDRESS ||
418 AS == AMDGPUAS::CONSTANT_ADDRESS ||
419 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
420}
421
422inline bool isExtendedGlobalAddrSpace(unsigned AS) {
423 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
424 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
425 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
426}
427
428static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
429 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
430
431 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
432 return true;
433
434 // This array is indexed by address space value enum elements 0 ... to 9
435 // clang-format off
436 static const bool ASAliasRules[10][10] = {
437 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
438 /* Flat */ {true, true, false, true, true, true, true, true, true, true},
439 /* Global */ {true, true, false, false, true, false, true, true, true, true},
440 /* Region */ {false, false, true, false, false, false, false, false, false, false},
441 /* Group */ {true, false, false, true, false, false, false, false, false, false},
442 /* Constant */ {true, true, false, false, false, false, true, true, true, true},
443 /* Private */ {true, false, false, false, false, true, false, false, false, false},
444 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true},
445 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true},
446 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true},
447 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true},
448 };
449 // clang-format on
450
451 return ASAliasRules[AS1][AS2];
452}
453
454}
455
456} // End namespace llvm
457
458#endif
459