| 1 | //===- AMDGPUPerfHintAnalysis.h ---- analysis of memory traffic -*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// \brief Analyzes if a function potentially memory bound and if a kernel |
| 11 | /// kernel may benefit from limiting number of waves to reduce cache thrashing. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H |
| 16 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H |
| 17 | |
| 18 | #include "llvm/IR/PassManager.h" |
| 19 | #include "llvm/IR/ValueMap.h" |
| 20 | |
| 21 | #include "llvm/Analysis/CGSCCPassManager.h" |
| 22 | #include "llvm/Analysis/LazyCallGraph.h" |
| 23 | |
| 24 | namespace llvm { |
| 25 | |
| 26 | class AMDGPUPerfHintAnalysis; |
| 27 | class CallGraphSCC; |
| 28 | class GCNTargetMachine; |
| 29 | class LazyCallGraph; |
| 30 | |
| 31 | class AMDGPUPerfHintAnalysis { |
| 32 | public: |
| 33 | struct FuncInfo { |
| 34 | unsigned MemInstCost; |
| 35 | unsigned InstCost; |
| 36 | unsigned IAMInstCost; // Indirect access memory instruction count |
| 37 | unsigned LSMInstCost; // Large stride memory instruction count |
| 38 | bool HasDenseGlobalMemAcc; // Set if at least 1 basic block has relatively |
| 39 | // high global memory access |
| 40 | FuncInfo() |
| 41 | : MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0), |
| 42 | HasDenseGlobalMemAcc(false) {} |
| 43 | }; |
| 44 | |
| 45 | typedef ValueMap<const Function *, FuncInfo> FuncInfoMap; |
| 46 | |
| 47 | private: |
| 48 | FuncInfoMap FIM; |
| 49 | |
| 50 | public: |
| 51 | AMDGPUPerfHintAnalysis() {} |
| 52 | |
| 53 | // OldPM |
| 54 | bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC); |
| 55 | |
| 56 | // NewPM |
| 57 | bool run(const GCNTargetMachine &TM, LazyCallGraph &CG); |
| 58 | |
| 59 | bool isMemoryBound(const Function *F) const; |
| 60 | |
| 61 | bool needsWaveLimiter(const Function *F) const; |
| 62 | }; |
| 63 | |
| 64 | struct AMDGPUPerfHintAnalysisPass |
| 65 | : public PassInfoMixin<AMDGPUPerfHintAnalysisPass> { |
| 66 | const GCNTargetMachine &TM; |
| 67 | std::unique_ptr<AMDGPUPerfHintAnalysis> Impl; |
| 68 | |
| 69 | AMDGPUPerfHintAnalysisPass(const GCNTargetMachine &TM) |
| 70 | : TM(TM), Impl(std::make_unique<AMDGPUPerfHintAnalysis>()) {} |
| 71 | |
| 72 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
| 73 | }; |
| 74 | |
| 75 | } // namespace llvm |
| 76 | #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H |
| 77 | |