1 | //===- NVVMIntrRange.cpp - Set range attributes for NVVM intrinsics -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass adds appropriate range attributes for calls to NVVM |
10 | // intrinsics that return a limited range of values. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "NVPTX.h" |
15 | #include "NVPTXUtilities.h" |
16 | #include "llvm/IR/InstIterator.h" |
17 | #include "llvm/IR/Instructions.h" |
18 | #include "llvm/IR/IntrinsicInst.h" |
19 | #include "llvm/IR/Intrinsics.h" |
20 | #include "llvm/IR/IntrinsicsNVPTX.h" |
21 | #include "llvm/IR/PassManager.h" |
22 | #include <cstdint> |
23 | |
24 | using namespace llvm; |
25 | |
26 | #define DEBUG_TYPE "nvvm-intr-range" |
27 | |
28 | namespace { |
29 | class NVVMIntrRange : public FunctionPass { |
30 | public: |
31 | static char ID; |
32 | NVVMIntrRange() : FunctionPass(ID) {} |
33 | |
34 | bool runOnFunction(Function &) override; |
35 | }; |
36 | } // namespace |
37 | |
38 | FunctionPass *llvm::createNVVMIntrRangePass() { return new NVVMIntrRange(); } |
39 | |
40 | char NVVMIntrRange::ID = 0; |
41 | INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range" , |
42 | "Add !range metadata to NVVM intrinsics." , false, false) |
43 | |
44 | // Adds the passed-in [Low,High) range information as metadata to the |
45 | // passed-in call instruction. |
46 | static bool addRangeAttr(uint64_t Low, uint64_t High, IntrinsicInst *II) { |
47 | if (II->getMetadata(KindID: LLVMContext::MD_range)) |
48 | return false; |
49 | |
50 | const uint64_t BitWidth = II->getType()->getIntegerBitWidth(); |
51 | ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High)); |
52 | |
53 | if (auto CurrentRange = II->getRange()) |
54 | Range = Range.intersectWith(CR: CurrentRange.value()); |
55 | |
56 | II->addRangeRetAttr(CR: Range); |
57 | return true; |
58 | } |
59 | |
60 | static bool runNVVMIntrRange(Function &F) { |
61 | struct Vector3 { |
62 | unsigned X, Y, Z; |
63 | }; |
64 | |
65 | // All these annotations are only valid for kernel functions. |
66 | if (!isKernelFunction(F)) |
67 | return false; |
68 | |
69 | const auto OverallReqNTID = getOverallReqNTID(F); |
70 | const auto OverallMaxNTID = getOverallMaxNTID(F); |
71 | const auto OverallClusterRank = getOverallClusterRank(F); |
72 | |
73 | // If this function lacks any range information, do nothing. |
74 | if (!(OverallReqNTID || OverallMaxNTID || OverallClusterRank)) |
75 | return false; |
76 | |
77 | const unsigned FunctionNTID = OverallReqNTID.value_or( |
78 | u: OverallMaxNTID.value_or(u: std::numeric_limits<unsigned>::max())); |
79 | |
80 | const unsigned FunctionClusterRank = |
81 | OverallClusterRank.value_or(u: std::numeric_limits<unsigned>::max()); |
82 | |
83 | const Vector3 MaxBlockSize{.X: std::min(a: 1024u, b: FunctionNTID), |
84 | .Y: std::min(a: 1024u, b: FunctionNTID), |
85 | .Z: std::min(a: 64u, b: FunctionNTID)}; |
86 | |
87 | // We conservatively use the maximum grid size as an upper bound for the |
88 | // cluster rank. |
89 | const Vector3 MaxClusterRank{.X: std::min(a: 0x7fffffffu, b: FunctionClusterRank), |
90 | .Y: std::min(a: 0xffffu, b: FunctionClusterRank), |
91 | .Z: std::min(a: 0xffffu, b: FunctionClusterRank)}; |
92 | |
93 | const auto ProccessIntrinsic = [&](IntrinsicInst *II) -> bool { |
94 | switch (II->getIntrinsicID()) { |
95 | // Index within block |
96 | case Intrinsic::nvvm_read_ptx_sreg_tid_x: |
97 | return addRangeAttr(Low: 0, High: MaxBlockSize.X, II); |
98 | case Intrinsic::nvvm_read_ptx_sreg_tid_y: |
99 | return addRangeAttr(Low: 0, High: MaxBlockSize.Y, II); |
100 | case Intrinsic::nvvm_read_ptx_sreg_tid_z: |
101 | return addRangeAttr(Low: 0, High: MaxBlockSize.Z, II); |
102 | |
103 | // Block size |
104 | case Intrinsic::nvvm_read_ptx_sreg_ntid_x: |
105 | return addRangeAttr(Low: 1, High: MaxBlockSize.X + 1, II); |
106 | case Intrinsic::nvvm_read_ptx_sreg_ntid_y: |
107 | return addRangeAttr(Low: 1, High: MaxBlockSize.Y + 1, II); |
108 | case Intrinsic::nvvm_read_ptx_sreg_ntid_z: |
109 | return addRangeAttr(Low: 1, High: MaxBlockSize.Z + 1, II); |
110 | |
111 | // Cluster size |
112 | case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x: |
113 | return addRangeAttr(Low: 0, High: MaxClusterRank.X, II); |
114 | case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y: |
115 | return addRangeAttr(Low: 0, High: MaxClusterRank.Y, II); |
116 | case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z: |
117 | return addRangeAttr(Low: 0, High: MaxClusterRank.Z, II); |
118 | case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x: |
119 | return addRangeAttr(Low: 1, High: MaxClusterRank.X + 1, II); |
120 | case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y: |
121 | return addRangeAttr(Low: 1, High: MaxClusterRank.Y + 1, II); |
122 | case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z: |
123 | return addRangeAttr(Low: 1, High: MaxClusterRank.Z + 1, II); |
124 | |
125 | case Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank: |
126 | if (OverallClusterRank) |
127 | return addRangeAttr(Low: 0, High: FunctionClusterRank, II); |
128 | break; |
129 | case Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank: |
130 | if (OverallClusterRank) |
131 | return addRangeAttr(Low: 1, High: FunctionClusterRank + 1, II); |
132 | break; |
133 | default: |
134 | return false; |
135 | } |
136 | return false; |
137 | }; |
138 | |
139 | // Go through the calls in this function. |
140 | bool Changed = false; |
141 | for (Instruction &I : instructions(F)) |
142 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &I)) |
143 | Changed |= ProccessIntrinsic(II); |
144 | |
145 | return Changed; |
146 | } |
147 | |
148 | bool NVVMIntrRange::runOnFunction(Function &F) { return runNVVMIntrRange(F); } |
149 | |
150 | PreservedAnalyses NVVMIntrRangePass::run(Function &F, |
151 | FunctionAnalysisManager &AM) { |
152 | return runNVVMIntrRange(F) ? PreservedAnalyses::none() |
153 | : PreservedAnalyses::all(); |
154 | } |
155 | |