1//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains miscellaneous utility functions
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXUtilities.h"
14#include "NVPTX.h"
15#include "NVPTXTargetMachine.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/Argument.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/Function.h"
22#include "llvm/IR/GlobalVariable.h"
23#include "llvm/IR/Module.h"
24#include "llvm/Support/Alignment.h"
25#include "llvm/Support/ModRef.h"
26#include "llvm/Support/Mutex.h"
27#include <cstdint>
28#include <cstring>
29#include <map>
30#include <mutex>
31#include <optional>
32#include <string>
33#include <vector>
34
35namespace llvm {
36
37namespace {
38typedef std::map<std::string, std::vector<unsigned>> key_val_pair_t;
39typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
40
41struct AnnotationCache {
42 sys::Mutex Lock;
43 std::map<const Module *, global_val_annot_t> Cache;
44};
45
46AnnotationCache &getAnnotationCache() {
47 static AnnotationCache AC;
48 return AC;
49}
50} // anonymous namespace
51
52void clearAnnotationCache(const Module *Mod) {
53 auto &AC = getAnnotationCache();
54 std::lock_guard<sys::Mutex> Guard(AC.Lock);
55 AC.Cache.erase(x: Mod);
56}
57
58static void readIntVecFromMDNode(const MDNode *MetadataNode,
59 std::vector<unsigned> &Vec) {
60 for (unsigned i = 0, e = MetadataNode->getNumOperands(); i != e; ++i) {
61 ConstantInt *Val =
62 mdconst::extract<ConstantInt>(MD: MetadataNode->getOperand(I: i));
63 Vec.push_back(x: Val->getZExtValue());
64 }
65}
66
67static void cacheAnnotationFromMD(const MDNode *MetadataNode,
68 key_val_pair_t &retval) {
69 auto &AC = getAnnotationCache();
70 std::lock_guard<sys::Mutex> Guard(AC.Lock);
71 assert(MetadataNode && "Invalid mdnode for annotation");
72 assert((MetadataNode->getNumOperands() % 2) == 1 &&
73 "Invalid number of operands");
74 // start index = 1, to skip the global variable key
75 // increment = 2, to skip the value for each property-value pairs
76 for (unsigned i = 1, e = MetadataNode->getNumOperands(); i != e; i += 2) {
77 // property
78 const MDString *prop = dyn_cast<MDString>(Val: MetadataNode->getOperand(I: i));
79 assert(prop && "Annotation property not a string");
80 std::string Key = prop->getString().str();
81
82 // value
83 if (ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(
84 MD: MetadataNode->getOperand(I: i + 1))) {
85 retval[Key].push_back(x: Val->getZExtValue());
86 } else if (MDNode *VecMd =
87 dyn_cast<MDNode>(Val: MetadataNode->getOperand(I: i + 1))) {
88 // note: only "grid_constant" annotations support vector MDNodes.
89 // assert: there can only exist one unique key value pair of
90 // the form (string key, MDNode node). Operands of such a node
91 // shall always be unsigned ints.
92 auto [It, Inserted] = retval.try_emplace(k: Key);
93 if (Inserted) {
94 readIntVecFromMDNode(MetadataNode: VecMd, Vec&: It->second);
95 continue;
96 }
97 } else {
98 llvm_unreachable("Value operand not a constant int or an mdnode");
99 }
100 }
101}
102
103static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
104 auto &AC = getAnnotationCache();
105 std::lock_guard<sys::Mutex> Guard(AC.Lock);
106 NamedMDNode *NMD = m->getNamedMetadata(Name: "nvvm.annotations");
107 if (!NMD)
108 return;
109 key_val_pair_t tmp;
110 for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
111 const MDNode *elem = NMD->getOperand(i);
112
113 GlobalValue *entity =
114 mdconst::dyn_extract_or_null<GlobalValue>(MD: elem->getOperand(I: 0));
115 // entity may be null due to DCE
116 if (!entity)
117 continue;
118 if (entity != gv)
119 continue;
120
121 // accumulate annotations for entity in tmp
122 cacheAnnotationFromMD(MetadataNode: elem, retval&: tmp);
123 }
124
125 if (tmp.empty()) // no annotations for this gv
126 return;
127
128 AC.Cache[m][gv] = std::move(tmp);
129}
130
131static std::optional<unsigned> findOneNVVMAnnotation(const GlobalValue *gv,
132 const std::string &prop) {
133 auto &AC = getAnnotationCache();
134 std::lock_guard<sys::Mutex> Guard(AC.Lock);
135 const Module *m = gv->getParent();
136 auto ACIt = AC.Cache.find(x: m);
137 if (ACIt == AC.Cache.end())
138 cacheAnnotationFromMD(m, gv);
139 else if (ACIt->second.find(x: gv) == ACIt->second.end())
140 cacheAnnotationFromMD(m, gv);
141 // Look up AC.Cache[m][gv] again because cacheAnnotationFromMD may have
142 // inserted the entry.
143 auto &KVP = AC.Cache[m][gv];
144 auto It = KVP.find(x: prop);
145 if (It == KVP.end())
146 return std::nullopt;
147 return It->second[0];
148}
149
150static bool findAllNVVMAnnotation(const GlobalValue *gv,
151 const std::string &prop,
152 std::vector<unsigned> &retval) {
153 auto &AC = getAnnotationCache();
154 std::lock_guard<sys::Mutex> Guard(AC.Lock);
155 const Module *m = gv->getParent();
156 auto ACIt = AC.Cache.find(x: m);
157 if (ACIt == AC.Cache.end())
158 cacheAnnotationFromMD(m, gv);
159 else if (ACIt->second.find(x: gv) == ACIt->second.end())
160 cacheAnnotationFromMD(m, gv);
161 // Look up AC.Cache[m][gv] again because cacheAnnotationFromMD may have
162 // inserted the entry.
163 auto &KVP = AC.Cache[m][gv];
164 auto It = KVP.find(x: prop);
165 if (It == KVP.end())
166 return false;
167 retval = It->second;
168 return true;
169}
170
171static bool globalHasNVVMAnnotation(const Value &V, const std::string &Prop) {
172 if (const auto *GV = dyn_cast<GlobalValue>(Val: &V))
173 if (const auto Annot = findOneNVVMAnnotation(gv: GV, prop: Prop)) {
174 assert((*Annot == 1) && "Unexpected annotation on a symbol");
175 return true;
176 }
177
178 return false;
179}
180
181static bool argHasNVVMAnnotation(const Value &Val,
182 const std::string &Annotation,
183 const bool StartArgIndexAtOne = false) {
184 if (const Argument *Arg = dyn_cast<Argument>(Val: &Val)) {
185 const Function *Func = Arg->getParent();
186 std::vector<unsigned> Annot;
187 if (findAllNVVMAnnotation(gv: Func, prop: Annotation, retval&: Annot)) {
188 const unsigned BaseOffset = StartArgIndexAtOne ? 1 : 0;
189 if (is_contained(Range&: Annot, Element: BaseOffset + Arg->getArgNo())) {
190 return true;
191 }
192 }
193 }
194 return false;
195}
196
197static std::optional<unsigned> getFnAttrParsedInt(const Function &F,
198 StringRef Attr) {
199 return F.hasFnAttribute(Kind: Attr)
200 ? std::optional(F.getFnAttributeAsParsedInteger(Kind: Attr))
201 : std::nullopt;
202}
203
204static SmallVector<unsigned, 3> getFnAttrParsedVector(const Function &F,
205 StringRef Attr) {
206 SmallVector<unsigned, 3> V;
207 auto &Ctx = F.getContext();
208
209 if (F.hasFnAttribute(Kind: Attr)) {
210 // We expect the attribute value to be of the form "x[,y[,z]]", where x, y,
211 // and z are unsigned values.
212 StringRef S = F.getFnAttribute(Kind: Attr).getValueAsString();
213 for (unsigned I = 0; I < 3 && !S.empty(); I++) {
214 auto [First, Rest] = S.split(Separator: ",");
215 unsigned IntVal;
216 if (First.trim().getAsInteger(Radix: 0, Result&: IntVal))
217 Ctx.emitError(ErrorStr: "can't parse integer attribute " + First + " in " + Attr);
218
219 V.push_back(Elt: IntVal);
220 S = Rest;
221 }
222 }
223 return V;
224}
225
226static std::optional<uint64_t> getVectorProduct(ArrayRef<unsigned> V) {
227 if (V.empty())
228 return std::nullopt;
229
230 return std::accumulate(first: V.begin(), last: V.end(), init: 1, binary_op: std::multiplies<uint64_t>{});
231}
232
233bool isParamGridConstant(const Argument &Arg) {
234 assert(isKernelFunction(*Arg.getParent()) &&
235 "only kernel arguments can be grid_constant");
236
237 if (!Arg.hasByValAttr())
238 return false;
239
240 // Lowering an argument as a grid_constant violates the byval semantics (and
241 // the C++ API) by reusing the same memory location for the argument across
242 // multiple threads. If an argument doesn't read memory and its address is not
243 // captured (its address is not compared with any value), then the tweak of
244 // the C++ API and byval semantics is unobservable by the program and we can
245 // lower the arg as a grid_constant.
246 if (Arg.onlyReadsMemory()) {
247 const auto CI = Arg.getAttributes().getCaptureInfo();
248 if (!capturesAddress(CC: CI) && !capturesFullProvenance(CC: CI))
249 return true;
250 }
251
252 // "grid_constant" counts argument indices starting from 1
253 if (argHasNVVMAnnotation(Val: Arg, Annotation: "grid_constant",
254 /*StartArgIndexAtOne*/ true))
255 return true;
256
257 return false;
258}
259
260bool isTexture(const Value &V) { return globalHasNVVMAnnotation(V, Prop: "texture"); }
261
262bool isSurface(const Value &V) { return globalHasNVVMAnnotation(V, Prop: "surface"); }
263
264bool isSampler(const Value &V) {
265 const char *AnnotationName = "sampler";
266
267 return globalHasNVVMAnnotation(V, Prop: AnnotationName) ||
268 argHasNVVMAnnotation(Val: V, Annotation: AnnotationName);
269}
270
271bool isImageReadOnly(const Value &V) {
272 return argHasNVVMAnnotation(Val: V, Annotation: "rdoimage");
273}
274
275bool isImageWriteOnly(const Value &V) {
276 return argHasNVVMAnnotation(Val: V, Annotation: "wroimage");
277}
278
279bool isImageReadWrite(const Value &V) {
280 return argHasNVVMAnnotation(Val: V, Annotation: "rdwrimage");
281}
282
283bool isImage(const Value &V) {
284 return isImageReadOnly(V) || isImageWriteOnly(V) || isImageReadWrite(V);
285}
286
287bool isManaged(const Value &V) { return globalHasNVVMAnnotation(V, Prop: "managed"); }
288
289StringRef getTextureName(const Value &V) {
290 assert(V.hasName() && "Found texture variable with no name");
291 return V.getName();
292}
293
294StringRef getSurfaceName(const Value &V) {
295 assert(V.hasName() && "Found surface variable with no name");
296 return V.getName();
297}
298
299StringRef getSamplerName(const Value &V) {
300 assert(V.hasName() && "Found sampler variable with no name");
301 return V.getName();
302}
303
304SmallVector<unsigned, 3> getMaxNTID(const Function &F) {
305 return getFnAttrParsedVector(F, Attr: "nvvm.maxntid");
306}
307
308SmallVector<unsigned, 3> getReqNTID(const Function &F) {
309 return getFnAttrParsedVector(F, Attr: "nvvm.reqntid");
310}
311
312SmallVector<unsigned, 3> getClusterDim(const Function &F) {
313 return getFnAttrParsedVector(F, Attr: "nvvm.cluster_dim");
314}
315
316std::optional<uint64_t> getOverallMaxNTID(const Function &F) {
317 // Note: The semantics here are a bit strange. The PTX ISA states the
318 // following (11.4.2. Performance-Tuning Directives: .maxntid):
319 //
320 // Note that this directive guarantees that the total number of threads does
321 // not exceed the maximum, but does not guarantee that the limit in any
322 // particular dimension is not exceeded.
323 const auto MaxNTID = getMaxNTID(F);
324 return getVectorProduct(V: MaxNTID);
325}
326
327std::optional<uint64_t> getOverallReqNTID(const Function &F) {
328 // Note: The semantics here are a bit strange. See getMaxNTID.
329 const auto ReqNTID = getReqNTID(F);
330 return getVectorProduct(V: ReqNTID);
331}
332
333std::optional<uint64_t> getOverallClusterRank(const Function &F) {
334 // maxclusterrank and cluster_dim are mutually exclusive.
335 if (const auto ClusterRank = getMaxClusterRank(F))
336 return ClusterRank;
337
338 // Note: The semantics here are a bit strange. See getMaxNTID.
339 const auto ClusterDim = getClusterDim(F);
340 return getVectorProduct(V: ClusterDim);
341}
342
343std::optional<unsigned> getMaxClusterRank(const Function &F) {
344 return getFnAttrParsedInt(F, Attr: "nvvm.maxclusterrank");
345}
346
347std::optional<unsigned> getMinCTASm(const Function &F) {
348 return getFnAttrParsedInt(F, Attr: "nvvm.minctasm");
349}
350
351std::optional<unsigned> getMaxNReg(const Function &F) {
352 return getFnAttrParsedInt(F, Attr: "nvvm.maxnreg");
353}
354
355MaybeAlign getAlign(const CallInst &I, unsigned Index) {
356 // First check the alignstack metadata
357 if (MaybeAlign StackAlign =
358 I.getAttributes().getAttributes(Index).getStackAlignment())
359 return StackAlign;
360
361 // If that is missing, check the legacy nvvm metadata
362 if (MDNode *alignNode = I.getMetadata(Kind: "callalign")) {
363 for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
364 if (const ConstantInt *CI =
365 mdconst::dyn_extract<ConstantInt>(MD: alignNode->getOperand(I: i))) {
366 unsigned V = CI->getZExtValue();
367 if ((V >> 16) == Index)
368 return Align(V & 0xFFFF);
369 if ((V >> 16) > Index)
370 return std::nullopt;
371 }
372 }
373 }
374 return std::nullopt;
375}
376
377Function *getMaybeBitcastedCallee(const CallBase *CB) {
378 return dyn_cast<Function>(Val: CB->getCalledOperand()->stripPointerCasts());
379}
380
381bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM) {
382 const auto &ST =
383 *static_cast<const NVPTXTargetMachine &>(TM).getSubtargetImpl();
384 if (!ST.hasNoReturn())
385 return false;
386
387 assert((isa<Function>(V) || isa<CallInst>(V)) &&
388 "Expect either a call instruction or a function");
389
390 if (const CallInst *CallI = dyn_cast<CallInst>(Val: V))
391 return CallI->doesNotReturn() &&
392 CallI->getFunctionType()->getReturnType()->isVoidTy();
393
394 const Function *F = cast<Function>(Val: V);
395 return F->doesNotReturn() &&
396 F->getFunctionType()->getReturnType()->isVoidTy() &&
397 !isKernelFunction(F: *F);
398}
399
400} // namespace llvm
401