1 | //===- NVPTXUtilities.cpp - Utility Functions -----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains miscellaneous utility functions |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "NVPTXUtilities.h" |
14 | #include "NVPTX.h" |
15 | #include "NVPTXTargetMachine.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/IR/Argument.h" |
20 | #include "llvm/IR/Constants.h" |
21 | #include "llvm/IR/Function.h" |
22 | #include "llvm/IR/GlobalVariable.h" |
23 | #include "llvm/IR/Module.h" |
24 | #include "llvm/Support/Alignment.h" |
25 | #include "llvm/Support/ModRef.h" |
26 | #include "llvm/Support/Mutex.h" |
27 | #include <cstdint> |
28 | #include <cstring> |
29 | #include <map> |
30 | #include <mutex> |
31 | #include <optional> |
32 | #include <string> |
33 | #include <vector> |
34 | |
35 | namespace llvm { |
36 | |
37 | namespace { |
38 | typedef std::map<std::string, std::vector<unsigned>> key_val_pair_t; |
39 | typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t; |
40 | |
41 | struct AnnotationCache { |
42 | sys::Mutex Lock; |
43 | std::map<const Module *, global_val_annot_t> Cache; |
44 | }; |
45 | |
46 | AnnotationCache &getAnnotationCache() { |
47 | static AnnotationCache AC; |
48 | return AC; |
49 | } |
50 | } // anonymous namespace |
51 | |
52 | void clearAnnotationCache(const Module *Mod) { |
53 | auto &AC = getAnnotationCache(); |
54 | std::lock_guard<sys::Mutex> Guard(AC.Lock); |
55 | AC.Cache.erase(x: Mod); |
56 | } |
57 | |
58 | static void readIntVecFromMDNode(const MDNode *MetadataNode, |
59 | std::vector<unsigned> &Vec) { |
60 | for (unsigned i = 0, e = MetadataNode->getNumOperands(); i != e; ++i) { |
61 | ConstantInt *Val = |
62 | mdconst::extract<ConstantInt>(MD: MetadataNode->getOperand(I: i)); |
63 | Vec.push_back(x: Val->getZExtValue()); |
64 | } |
65 | } |
66 | |
67 | static void cacheAnnotationFromMD(const MDNode *MetadataNode, |
68 | key_val_pair_t &retval) { |
69 | auto &AC = getAnnotationCache(); |
70 | std::lock_guard<sys::Mutex> Guard(AC.Lock); |
71 | assert(MetadataNode && "Invalid mdnode for annotation" ); |
72 | assert((MetadataNode->getNumOperands() % 2) == 1 && |
73 | "Invalid number of operands" ); |
74 | // start index = 1, to skip the global variable key |
75 | // increment = 2, to skip the value for each property-value pairs |
76 | for (unsigned i = 1, e = MetadataNode->getNumOperands(); i != e; i += 2) { |
77 | // property |
78 | const MDString *prop = dyn_cast<MDString>(Val: MetadataNode->getOperand(I: i)); |
79 | assert(prop && "Annotation property not a string" ); |
80 | std::string Key = prop->getString().str(); |
81 | |
82 | // value |
83 | if (ConstantInt *Val = mdconst::dyn_extract<ConstantInt>( |
84 | MD: MetadataNode->getOperand(I: i + 1))) { |
85 | retval[Key].push_back(x: Val->getZExtValue()); |
86 | } else if (MDNode *VecMd = |
87 | dyn_cast<MDNode>(Val: MetadataNode->getOperand(I: i + 1))) { |
88 | // note: only "grid_constant" annotations support vector MDNodes. |
89 | // assert: there can only exist one unique key value pair of |
90 | // the form (string key, MDNode node). Operands of such a node |
91 | // shall always be unsigned ints. |
92 | auto [It, Inserted] = retval.try_emplace(k: Key); |
93 | if (Inserted) { |
94 | readIntVecFromMDNode(MetadataNode: VecMd, Vec&: It->second); |
95 | continue; |
96 | } |
97 | } else { |
98 | llvm_unreachable("Value operand not a constant int or an mdnode" ); |
99 | } |
100 | } |
101 | } |
102 | |
103 | static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { |
104 | auto &AC = getAnnotationCache(); |
105 | std::lock_guard<sys::Mutex> Guard(AC.Lock); |
106 | NamedMDNode *NMD = m->getNamedMetadata(Name: "nvvm.annotations" ); |
107 | if (!NMD) |
108 | return; |
109 | key_val_pair_t tmp; |
110 | for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { |
111 | const MDNode *elem = NMD->getOperand(i); |
112 | |
113 | GlobalValue *entity = |
114 | mdconst::dyn_extract_or_null<GlobalValue>(MD: elem->getOperand(I: 0)); |
115 | // entity may be null due to DCE |
116 | if (!entity) |
117 | continue; |
118 | if (entity != gv) |
119 | continue; |
120 | |
121 | // accumulate annotations for entity in tmp |
122 | cacheAnnotationFromMD(MetadataNode: elem, retval&: tmp); |
123 | } |
124 | |
125 | if (tmp.empty()) // no annotations for this gv |
126 | return; |
127 | |
128 | AC.Cache[m][gv] = std::move(tmp); |
129 | } |
130 | |
131 | static std::optional<unsigned> findOneNVVMAnnotation(const GlobalValue *gv, |
132 | const std::string &prop) { |
133 | auto &AC = getAnnotationCache(); |
134 | std::lock_guard<sys::Mutex> Guard(AC.Lock); |
135 | const Module *m = gv->getParent(); |
136 | auto ACIt = AC.Cache.find(x: m); |
137 | if (ACIt == AC.Cache.end()) |
138 | cacheAnnotationFromMD(m, gv); |
139 | else if (ACIt->second.find(x: gv) == ACIt->second.end()) |
140 | cacheAnnotationFromMD(m, gv); |
141 | // Look up AC.Cache[m][gv] again because cacheAnnotationFromMD may have |
142 | // inserted the entry. |
143 | auto &KVP = AC.Cache[m][gv]; |
144 | auto It = KVP.find(x: prop); |
145 | if (It == KVP.end()) |
146 | return std::nullopt; |
147 | return It->second[0]; |
148 | } |
149 | |
150 | static bool findAllNVVMAnnotation(const GlobalValue *gv, |
151 | const std::string &prop, |
152 | std::vector<unsigned> &retval) { |
153 | auto &AC = getAnnotationCache(); |
154 | std::lock_guard<sys::Mutex> Guard(AC.Lock); |
155 | const Module *m = gv->getParent(); |
156 | auto ACIt = AC.Cache.find(x: m); |
157 | if (ACIt == AC.Cache.end()) |
158 | cacheAnnotationFromMD(m, gv); |
159 | else if (ACIt->second.find(x: gv) == ACIt->second.end()) |
160 | cacheAnnotationFromMD(m, gv); |
161 | // Look up AC.Cache[m][gv] again because cacheAnnotationFromMD may have |
162 | // inserted the entry. |
163 | auto &KVP = AC.Cache[m][gv]; |
164 | auto It = KVP.find(x: prop); |
165 | if (It == KVP.end()) |
166 | return false; |
167 | retval = It->second; |
168 | return true; |
169 | } |
170 | |
171 | static bool globalHasNVVMAnnotation(const Value &V, const std::string &Prop) { |
172 | if (const auto *GV = dyn_cast<GlobalValue>(Val: &V)) |
173 | if (const auto Annot = findOneNVVMAnnotation(gv: GV, prop: Prop)) { |
174 | assert((*Annot == 1) && "Unexpected annotation on a symbol" ); |
175 | return true; |
176 | } |
177 | |
178 | return false; |
179 | } |
180 | |
181 | static bool argHasNVVMAnnotation(const Value &Val, |
182 | const std::string &Annotation, |
183 | const bool StartArgIndexAtOne = false) { |
184 | if (const Argument *Arg = dyn_cast<Argument>(Val: &Val)) { |
185 | const Function *Func = Arg->getParent(); |
186 | std::vector<unsigned> Annot; |
187 | if (findAllNVVMAnnotation(gv: Func, prop: Annotation, retval&: Annot)) { |
188 | const unsigned BaseOffset = StartArgIndexAtOne ? 1 : 0; |
189 | if (is_contained(Range&: Annot, Element: BaseOffset + Arg->getArgNo())) { |
190 | return true; |
191 | } |
192 | } |
193 | } |
194 | return false; |
195 | } |
196 | |
197 | static std::optional<unsigned> getFnAttrParsedInt(const Function &F, |
198 | StringRef Attr) { |
199 | return F.hasFnAttribute(Kind: Attr) |
200 | ? std::optional(F.getFnAttributeAsParsedInteger(Kind: Attr)) |
201 | : std::nullopt; |
202 | } |
203 | |
204 | static SmallVector<unsigned, 3> getFnAttrParsedVector(const Function &F, |
205 | StringRef Attr) { |
206 | SmallVector<unsigned, 3> V; |
207 | auto &Ctx = F.getContext(); |
208 | |
209 | if (F.hasFnAttribute(Kind: Attr)) { |
210 | // We expect the attribute value to be of the form "x[,y[,z]]", where x, y, |
211 | // and z are unsigned values. |
212 | StringRef S = F.getFnAttribute(Kind: Attr).getValueAsString(); |
213 | for (unsigned I = 0; I < 3 && !S.empty(); I++) { |
214 | auto [First, Rest] = S.split(Separator: "," ); |
215 | unsigned IntVal; |
216 | if (First.trim().getAsInteger(Radix: 0, Result&: IntVal)) |
217 | Ctx.emitError(ErrorStr: "can't parse integer attribute " + First + " in " + Attr); |
218 | |
219 | V.push_back(Elt: IntVal); |
220 | S = Rest; |
221 | } |
222 | } |
223 | return V; |
224 | } |
225 | |
226 | static std::optional<uint64_t> getVectorProduct(ArrayRef<unsigned> V) { |
227 | if (V.empty()) |
228 | return std::nullopt; |
229 | |
230 | return std::accumulate(first: V.begin(), last: V.end(), init: 1, binary_op: std::multiplies<uint64_t>{}); |
231 | } |
232 | |
233 | bool isParamGridConstant(const Argument &Arg) { |
234 | assert(isKernelFunction(*Arg.getParent()) && |
235 | "only kernel arguments can be grid_constant" ); |
236 | |
237 | if (!Arg.hasByValAttr()) |
238 | return false; |
239 | |
240 | // Lowering an argument as a grid_constant violates the byval semantics (and |
241 | // the C++ API) by reusing the same memory location for the argument across |
242 | // multiple threads. If an argument doesn't read memory and its address is not |
243 | // captured (its address is not compared with any value), then the tweak of |
244 | // the C++ API and byval semantics is unobservable by the program and we can |
245 | // lower the arg as a grid_constant. |
246 | if (Arg.onlyReadsMemory()) { |
247 | const auto CI = Arg.getAttributes().getCaptureInfo(); |
248 | if (!capturesAddress(CC: CI) && !capturesFullProvenance(CC: CI)) |
249 | return true; |
250 | } |
251 | |
252 | // "grid_constant" counts argument indices starting from 1 |
253 | if (argHasNVVMAnnotation(Val: Arg, Annotation: "grid_constant" , |
254 | /*StartArgIndexAtOne*/ true)) |
255 | return true; |
256 | |
257 | return false; |
258 | } |
259 | |
260 | bool isTexture(const Value &V) { return globalHasNVVMAnnotation(V, Prop: "texture" ); } |
261 | |
262 | bool isSurface(const Value &V) { return globalHasNVVMAnnotation(V, Prop: "surface" ); } |
263 | |
264 | bool isSampler(const Value &V) { |
265 | const char *AnnotationName = "sampler" ; |
266 | |
267 | return globalHasNVVMAnnotation(V, Prop: AnnotationName) || |
268 | argHasNVVMAnnotation(Val: V, Annotation: AnnotationName); |
269 | } |
270 | |
271 | bool isImageReadOnly(const Value &V) { |
272 | return argHasNVVMAnnotation(Val: V, Annotation: "rdoimage" ); |
273 | } |
274 | |
275 | bool isImageWriteOnly(const Value &V) { |
276 | return argHasNVVMAnnotation(Val: V, Annotation: "wroimage" ); |
277 | } |
278 | |
279 | bool isImageReadWrite(const Value &V) { |
280 | return argHasNVVMAnnotation(Val: V, Annotation: "rdwrimage" ); |
281 | } |
282 | |
283 | bool isImage(const Value &V) { |
284 | return isImageReadOnly(V) || isImageWriteOnly(V) || isImageReadWrite(V); |
285 | } |
286 | |
287 | bool isManaged(const Value &V) { return globalHasNVVMAnnotation(V, Prop: "managed" ); } |
288 | |
289 | StringRef getTextureName(const Value &V) { |
290 | assert(V.hasName() && "Found texture variable with no name" ); |
291 | return V.getName(); |
292 | } |
293 | |
294 | StringRef getSurfaceName(const Value &V) { |
295 | assert(V.hasName() && "Found surface variable with no name" ); |
296 | return V.getName(); |
297 | } |
298 | |
299 | StringRef getSamplerName(const Value &V) { |
300 | assert(V.hasName() && "Found sampler variable with no name" ); |
301 | return V.getName(); |
302 | } |
303 | |
304 | SmallVector<unsigned, 3> getMaxNTID(const Function &F) { |
305 | return getFnAttrParsedVector(F, Attr: "nvvm.maxntid" ); |
306 | } |
307 | |
308 | SmallVector<unsigned, 3> getReqNTID(const Function &F) { |
309 | return getFnAttrParsedVector(F, Attr: "nvvm.reqntid" ); |
310 | } |
311 | |
312 | SmallVector<unsigned, 3> getClusterDim(const Function &F) { |
313 | return getFnAttrParsedVector(F, Attr: "nvvm.cluster_dim" ); |
314 | } |
315 | |
316 | std::optional<uint64_t> getOverallMaxNTID(const Function &F) { |
317 | // Note: The semantics here are a bit strange. The PTX ISA states the |
318 | // following (11.4.2. Performance-Tuning Directives: .maxntid): |
319 | // |
320 | // Note that this directive guarantees that the total number of threads does |
321 | // not exceed the maximum, but does not guarantee that the limit in any |
322 | // particular dimension is not exceeded. |
323 | const auto MaxNTID = getMaxNTID(F); |
324 | return getVectorProduct(V: MaxNTID); |
325 | } |
326 | |
327 | std::optional<uint64_t> getOverallReqNTID(const Function &F) { |
328 | // Note: The semantics here are a bit strange. See getMaxNTID. |
329 | const auto ReqNTID = getReqNTID(F); |
330 | return getVectorProduct(V: ReqNTID); |
331 | } |
332 | |
333 | std::optional<uint64_t> getOverallClusterRank(const Function &F) { |
334 | // maxclusterrank and cluster_dim are mutually exclusive. |
335 | if (const auto ClusterRank = getMaxClusterRank(F)) |
336 | return ClusterRank; |
337 | |
338 | // Note: The semantics here are a bit strange. See getMaxNTID. |
339 | const auto ClusterDim = getClusterDim(F); |
340 | return getVectorProduct(V: ClusterDim); |
341 | } |
342 | |
343 | std::optional<unsigned> getMaxClusterRank(const Function &F) { |
344 | return getFnAttrParsedInt(F, Attr: "nvvm.maxclusterrank" ); |
345 | } |
346 | |
347 | std::optional<unsigned> getMinCTASm(const Function &F) { |
348 | return getFnAttrParsedInt(F, Attr: "nvvm.minctasm" ); |
349 | } |
350 | |
351 | std::optional<unsigned> getMaxNReg(const Function &F) { |
352 | return getFnAttrParsedInt(F, Attr: "nvvm.maxnreg" ); |
353 | } |
354 | |
355 | MaybeAlign getAlign(const CallInst &I, unsigned Index) { |
356 | // First check the alignstack metadata |
357 | if (MaybeAlign StackAlign = |
358 | I.getAttributes().getAttributes(Index).getStackAlignment()) |
359 | return StackAlign; |
360 | |
361 | // If that is missing, check the legacy nvvm metadata |
362 | if (MDNode *alignNode = I.getMetadata(Kind: "callalign" )) { |
363 | for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) { |
364 | if (const ConstantInt *CI = |
365 | mdconst::dyn_extract<ConstantInt>(MD: alignNode->getOperand(I: i))) { |
366 | unsigned V = CI->getZExtValue(); |
367 | if ((V >> 16) == Index) |
368 | return Align(V & 0xFFFF); |
369 | if ((V >> 16) > Index) |
370 | return std::nullopt; |
371 | } |
372 | } |
373 | } |
374 | return std::nullopt; |
375 | } |
376 | |
377 | Function *getMaybeBitcastedCallee(const CallBase *CB) { |
378 | return dyn_cast<Function>(Val: CB->getCalledOperand()->stripPointerCasts()); |
379 | } |
380 | |
381 | bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM) { |
382 | const auto &ST = |
383 | *static_cast<const NVPTXTargetMachine &>(TM).getSubtargetImpl(); |
384 | if (!ST.hasNoReturn()) |
385 | return false; |
386 | |
387 | assert((isa<Function>(V) || isa<CallInst>(V)) && |
388 | "Expect either a call instruction or a function" ); |
389 | |
390 | if (const CallInst *CallI = dyn_cast<CallInst>(Val: V)) |
391 | return CallI->doesNotReturn() && |
392 | CallI->getFunctionType()->getReturnType()->isVoidTy(); |
393 | |
394 | const Function *F = cast<Function>(Val: V); |
395 | return F->doesNotReturn() && |
396 | F->getFunctionType()->getReturnType()->isVoidTy() && |
397 | !isKernelFunction(F: *F); |
398 | } |
399 | |
400 | } // namespace llvm |
401 | |