1//===- AMDGPUArchByHIP.cpp - list AMDGPU installed ----------*- C++ -*-----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a tool for detecting name of AMDGPU installed in system
10// using HIP runtime. This tool is used by AMDGPU OpenMP and HIP driver.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/Sequence.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/ConvertUTF.h"
18#include "llvm/Support/DynamicLibrary.h"
19#include "llvm/Support/Error.h"
20#include "llvm/Support/FileSystem.h"
21#include "llvm/Support/Path.h"
22#include "llvm/Support/Process.h"
23#include "llvm/Support/Program.h"
24#include "llvm/Support/VersionTuple.h"
25#include "llvm/Support/WithColor.h"
26#include "llvm/Support/raw_ostream.h"
27#include <algorithm>
28#include <string>
29#include <vector>
30
31#ifdef _WIN32
32#include <windows.h>
33#endif
34
35using namespace llvm;
36
37// R0600 struct layout (HIP 6.x+)
38typedef struct alignas(8) {
39 char padding[1160];
40 char gcnArchName[256];
41 char padding2[56];
42} hipDeviceProp_tR0600;
43
44// R0000 struct layout (legacy)
45typedef struct alignas(8) {
46 char padding[396];
47 char gcnArchName[256];
48 char padding2[1024];
49} hipDeviceProp_tR0000;
50
51typedef enum {
52 hipSuccess = 0,
53} hipError_t;
54
55typedef hipError_t (*hipGetDeviceCount_t)(int *);
56typedef hipError_t (*hipGetDevicePropertiesR0600_t)(hipDeviceProp_tR0600 *,
57 int);
58typedef hipError_t (*hipGetDevicePropertiesR0000_t)(hipDeviceProp_tR0000 *,
59 int);
60typedef hipError_t (*hipGetDeviceProperties_t)(hipDeviceProp_tR0000 *, int);
61typedef hipError_t (*hipRuntimeGetVersion_t)(int *);
62typedef const char *(*hipGetErrorString_t)(hipError_t);
63
64extern cl::opt<bool> Verbose;
65
66cl::OptionCategory AMDGPUArchByHIPCategory("amdgpu-arch (HIP) options");
67
68enum class HipApiVersion {
69 Auto, // Automatic fallback (R0600 -> R0000 -> unversioned)
70 R0600, // Force R0600 API (HIP 6.x+)
71 R0000, // Force R0000 API (legacy HIP)
72 Unversioned // Force unversioned API (very old HIP)
73};
74
75static cl::opt<HipApiVersion> HipApi(
76 "hip-api-version", cl::desc("Select HIP API version for device properties"),
77 cl::values(clEnumValN(HipApiVersion::Auto, "auto",
78 "Auto-detect (R0600 -> R0000 -> unversioned)"),
79 clEnumValN(HipApiVersion::R0600, "r0600", "Force R0600 API"),
80 clEnumValN(HipApiVersion::R0000, "r0000", "Force R0000 API"),
81 clEnumValN(HipApiVersion::Unversioned, "unversioned",
82 "Force unversioned API")),
83 cl::init(Val: HipApiVersion::Auto), cl::cat(AMDGPUArchByHIPCategory));
84
85#ifdef _WIN32
86// Return candidate bin/ directories by walking parent dirs of ExeDir.
87SmallVector<std::string, 8> getCandidateBinPaths(StringRef ExeDir) {
88 SmallVector<std::string, 8> Paths;
89 Paths.push_back(sys::path::convert_to_slash(ExeDir));
90 // Search parent/bin dirs: <root>/lib/llvm/bin needs depth 2,
91 // <root>/opt/rocm/lib/llvm/bin needs 3. Cap at 6.
92 constexpr int MaxParentLevels = 6;
93 SmallString<256> Parent(sys::path::parent_path(ExeDir));
94 for (int Depth = 0; Depth < MaxParentLevels && !Parent.empty(); ++Depth) {
95 if (sys::path::root_path(Parent) == StringRef(Parent))
96 break;
97 SmallString<256> Candidate(Parent);
98 sys::path::append(Candidate, "bin");
99 std::string CandStr = sys::path::convert_to_slash(Candidate);
100 auto IsDup = [&](const std::string &P) {
101 return StringRef(P).equals_insensitive(CandStr);
102 };
103 if (llvm::none_of(Paths, IsDup))
104 Paths.push_back(CandStr);
105 Parent = SmallString<256>(sys::path::parent_path(Parent));
106 }
107 return Paths;
108}
109
110static std::vector<std::string> getSearchPaths() {
111 std::vector<std::string> Paths;
112
113 // Get the directory of the current executable
114 if (auto MainExe = sys::fs::getMainExecutable(nullptr, nullptr);
115 !MainExe.empty()) {
116 StringRef ExeDir = sys::path::parent_path(MainExe);
117 auto BinPaths = getCandidateBinPaths(ExeDir);
118 Paths.insert(Paths.end(), BinPaths.begin(), BinPaths.end());
119 }
120
121 // Get the system directory
122 wchar_t SystemDirectory[MAX_PATH];
123 if (GetSystemDirectoryW(SystemDirectory, MAX_PATH) > 0) {
124 std::string Utf8SystemDir;
125 if (convertUTF16ToUTF8String(
126 ArrayRef<UTF16>(reinterpret_cast<const UTF16 *>(SystemDirectory),
127 wcslen(SystemDirectory)),
128 Utf8SystemDir))
129 Paths.push_back(Utf8SystemDir);
130 }
131
132 // Get the Windows directory
133 wchar_t WindowsDirectory[MAX_PATH];
134 if (GetWindowsDirectoryW(WindowsDirectory, MAX_PATH) > 0) {
135 std::string Utf8WindowsDir;
136 if (convertUTF16ToUTF8String(
137 ArrayRef<UTF16>(reinterpret_cast<const UTF16 *>(WindowsDirectory),
138 wcslen(WindowsDirectory)),
139 Utf8WindowsDir))
140 Paths.push_back(Utf8WindowsDir);
141 }
142
143 // Get the current working directory
144 SmallVector<char, 256> CWD;
145 if (sys::fs::current_path(CWD))
146 Paths.push_back(std::string(CWD.begin(), CWD.end()));
147
148 // Get the PATH environment variable
149 if (std::optional<std::string> PathEnv = sys::Process::GetEnv("PATH")) {
150 SmallVector<StringRef, 16> PathList;
151 StringRef(*PathEnv).split(PathList, sys::EnvPathSeparator);
152 for (auto &Path : PathList)
153 Paths.push_back(Path.str());
154 }
155
156 return Paths;
157}
158
159// Custom comparison function for dll name
160// Returns true when A's version is greater than B's (descending order).
161bool compareVersions(StringRef A, StringRef B) {
162 auto ParseVersion = [](StringRef S) -> VersionTuple {
163 StringRef Filename = sys::path::filename(S);
164 size_t Pos = Filename.find_last_of('_');
165 if (Pos == StringRef::npos)
166 return VersionTuple();
167
168 StringRef VerStr = Filename.substr(Pos + 1);
169 size_t DotPos = VerStr.find('.');
170 if (DotPos != StringRef::npos)
171 VerStr = VerStr.substr(0, DotPos);
172
173 VersionTuple Vt;
174 (void)Vt.tryParse(VerStr);
175 return Vt;
176 };
177
178 VersionTuple VtA = ParseVersion(A);
179 VersionTuple VtB = ParseVersion(B);
180 return VtA > VtB;
181}
182#endif
183
184// On Windows, prefer amdhip64_n.dll where n is ROCm major version and greater
185// value of n takes precedence. If amdhip64_n.dll is not found, fall back to
186// amdhip64.dll. The reason is that a normal driver installation only has
187// amdhip64_n.dll but we do not know what n is since this program may be used
188// with a future version of HIP runtime.
189//
190// On Linux, always use default libamdhip64.so.
191static std::pair<std::string, bool> findNewestHIPDLL() {
192#ifdef _WIN32
193 StringRef HipDLLPrefix = "amdhip64_";
194 StringRef HipDLLSuffix = ".dll";
195
196 std::vector<std::string> SearchPaths = getSearchPaths();
197 std::vector<std::string> DLLNames;
198
199 for (const auto &Dir : SearchPaths) {
200 std::error_code EC;
201 for (sys::fs::directory_iterator DirIt(Dir, EC), DirEnd;
202 DirIt != DirEnd && !EC; DirIt.increment(EC)) {
203 StringRef Filename = sys::path::filename(DirIt->path());
204 if (Filename.starts_with(HipDLLPrefix) &&
205 Filename.ends_with(HipDLLSuffix))
206 DLLNames.push_back(sys::path::convert_to_slash(DirIt->path()));
207 }
208 }
209
210 if (DLLNames.empty())
211 return {"amdhip64.dll", true};
212
213 // stable_sort preserves the insertion order from getSearchPaths() on
214 // version ties, so a colocated build DLL wins over a system copy.
215 llvm::stable_sort(DLLNames, compareVersions);
216 return {DLLNames[0], false};
217#else
218 // On Linux, fallback to default shared object
219 return {"libamdhip64.so", true};
220#endif
221}
222
223#ifdef _WIN32
224// Pre-load DLL with LOAD_WITH_ALTERED_SEARCH_PATH so transitive deps
225// resolve from its directory. Pinned so getPermanentLibrary reuses it.
226static void primeLibraryLoad(StringRef Path) {
227 // One DLL primed per process; subsequent calls are no-ops.
228 // Not thread-safe, but offload-arch is single-threaded.
229 static HMODULE PinnedModule = nullptr;
230 if (PinnedModule || !sys::path::is_absolute(Path))
231 return;
232 SmallVector<UTF16, 256> WPath;
233 if (!convertUTF8ToUTF16String(Path, WPath))
234 return;
235 WPath.push_back(0);
236 PinnedModule = LoadLibraryExW(reinterpret_cast<LPCWSTR>(WPath.data()),
237 nullptr, LOAD_WITH_ALTERED_SEARCH_PATH);
238 DWORD Err = GetLastError();
239 if (!PinnedModule && Verbose)
240 WithColor::note() << "priming LoadLibraryExW failed for " << Path
241 << " (error " << Err << ")\n";
242}
243#endif
244
245int printGPUsByHIP() {
246 auto [DynamicHIPPath, IsFallback] = findNewestHIPDLL();
247
248 if (Verbose) {
249 if (IsFallback)
250 outs() << "Using default HIP runtime: " << DynamicHIPPath << '\n';
251 else
252 outs() << "Found HIP runtime: " << DynamicHIPPath << '\n';
253 }
254
255 std::string ErrMsg;
256#ifdef _WIN32
257 // Prime DLL load so transitive deps resolve from its directory.
258 if (!IsFallback)
259 primeLibraryLoad(DynamicHIPPath);
260#endif
261 auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>(
262 args: llvm::sys::DynamicLibrary::getPermanentLibrary(filename: DynamicHIPPath.c_str(),
263 errMsg: &ErrMsg));
264 if (!DynlibHandle->isValid()) {
265 if (Verbose)
266 llvm::errs() << "Failed to load " << DynamicHIPPath << ": " << ErrMsg
267 << '\n';
268 return 1;
269 }
270
271 if (Verbose)
272 outs() << "Successfully loaded HIP runtime library\n";
273
274#define DYNAMIC_INIT_HIP(SYMBOL) \
275 { \
276 void *SymbolPtr = DynlibHandle->getAddressOfSymbol(#SYMBOL); \
277 if (!SymbolPtr) { \
278 llvm::errs() << "Failed to find symbol " << #SYMBOL << '\n'; \
279 return 1; \
280 } \
281 if (Verbose) \
282 outs() << "Found symbol: " << #SYMBOL << '\n'; \
283 SYMBOL = reinterpret_cast<decltype(SYMBOL)>(SymbolPtr); \
284 }
285
286 hipGetDeviceCount_t hipGetDeviceCount;
287 hipRuntimeGetVersion_t hipRuntimeGetVersion = nullptr;
288 hipGetDevicePropertiesR0600_t hipGetDevicePropertiesR0600 = nullptr;
289 hipGetDevicePropertiesR0000_t hipGetDevicePropertiesR0000 = nullptr;
290 hipGetDeviceProperties_t hipGetDeviceProperties = nullptr;
291 hipGetErrorString_t hipGetErrorString = nullptr;
292
293 DYNAMIC_INIT_HIP(hipGetDeviceCount);
294
295#undef DYNAMIC_INIT_HIP
296
297 auto LoadSymbol = [&](const char *Name, auto &FuncPtr,
298 const char *Desc = "") {
299 void *Sym = DynlibHandle->getAddressOfSymbol(symbolName: Name);
300 if (Sym) {
301 FuncPtr = reinterpret_cast<decltype(FuncPtr)>(Sym);
302 if (Verbose)
303 outs() << "Found symbol: " << Name << (Desc[0] ? " " : "") << Desc
304 << '\n';
305 return true;
306 }
307 return false;
308 };
309
310 LoadSymbol("hipGetErrorString", hipGetErrorString);
311
312 if (LoadSymbol("hipRuntimeGetVersion", hipRuntimeGetVersion)) {
313 int RuntimeVersion = 0;
314 if (hipRuntimeGetVersion(&RuntimeVersion) == hipSuccess) {
315 int Major = RuntimeVersion / 10000000;
316 int Minor = (RuntimeVersion / 100000) % 100;
317 int Patch = RuntimeVersion % 100000;
318 if (Verbose)
319 outs() << "HIP Runtime Version: " << Major << "." << Minor << "."
320 << Patch << '\n';
321 }
322 }
323
324 LoadSymbol("hipGetDevicePropertiesR0600", hipGetDevicePropertiesR0600,
325 "(HIP 6.x+ API)");
326 LoadSymbol("hipGetDevicePropertiesR0000", hipGetDevicePropertiesR0000,
327 "(legacy API)");
328 if (!hipGetDevicePropertiesR0600 && !hipGetDevicePropertiesR0000)
329 LoadSymbol("hipGetDeviceProperties", hipGetDeviceProperties,
330 "(unversioned legacy API)");
331
332 int DeviceCount;
333 if (Verbose)
334 outs() << "Calling hipGetDeviceCount...\n";
335 hipError_t Err = hipGetDeviceCount(&DeviceCount);
336 if (Err != hipSuccess) {
337 llvm::errs() << "Failed to get device count";
338 if (hipGetErrorString) {
339 llvm::errs() << ": " << hipGetErrorString(Err);
340 }
341 llvm::errs() << " (error code: " << Err << ")\n";
342 return 1;
343 }
344
345 if (Verbose)
346 outs() << "Found " << DeviceCount << " device(s)\n";
347
348 auto TryGetProperties = [&](auto *ApiFunc, auto *DummyProp, const char *Name,
349 int DeviceId) -> std::string {
350 if (!ApiFunc)
351 return "";
352
353 if (Verbose)
354 outs() << "Using " << Name << "...\n";
355
356 using PropType = std::remove_pointer_t<decltype(DummyProp)>;
357 PropType Prop;
358 hipError_t Err = ApiFunc(&Prop, DeviceId);
359
360 if (Err == hipSuccess) {
361 if (Verbose) {
362 outs() << Name << " struct: sizeof = " << sizeof(PropType)
363 << " bytes, offsetof(gcnArchName) = "
364 << offsetof(PropType, gcnArchName) << " bytes\n";
365 }
366 return Prop.gcnArchName;
367 }
368
369 if (Verbose)
370 llvm::errs() << Name << " failed (error code: " << Err << ")\n";
371 return "";
372 };
373
374 for (auto I : llvm::seq(Size: DeviceCount)) {
375 if (Verbose)
376 outs() << "Processing device " << I << "...\n";
377
378 std::string ArchName;
379 auto TryR0600 = [&](int Dev) -> bool {
380 if (!hipGetDevicePropertiesR0600)
381 return false;
382 ArchName = TryGetProperties(hipGetDevicePropertiesR0600,
383 (hipDeviceProp_tR0600 *)nullptr,
384 "R0600 API (HIP 6.x+)", Dev);
385 return !ArchName.empty();
386 };
387 auto TryR0000 = [&](int Dev) -> bool {
388 if (!hipGetDevicePropertiesR0000)
389 return false;
390 ArchName = TryGetProperties(hipGetDevicePropertiesR0000,
391 (hipDeviceProp_tR0000 *)nullptr,
392 "R0000 API (legacy HIP)", Dev);
393 return !ArchName.empty();
394 };
395 auto TryUnversioned = [&](int Dev) -> bool {
396 if (!hipGetDeviceProperties)
397 return false;
398 ArchName = TryGetProperties(hipGetDeviceProperties,
399 (hipDeviceProp_tR0000 *)nullptr,
400 "unversioned API (very old HIP)", Dev);
401 return !ArchName.empty();
402 };
403
404 [[maybe_unused]] bool OK;
405 switch (HipApi) {
406 case HipApiVersion::Auto:
407 OK = TryR0600(I) || TryR0000(I) || TryUnversioned(I);
408 break;
409 case HipApiVersion::R0600:
410 OK = TryR0600(I);
411 break;
412 case HipApiVersion::R0000:
413 OK = TryR0000(I);
414 break;
415 case HipApiVersion::Unversioned:
416 OK = TryUnversioned(I);
417 break;
418 }
419
420 if (ArchName.empty()) {
421 llvm::errs() << "Failed to get device properties for device " << I
422 << " - no APIs available or all failed\n";
423 return 1;
424 }
425
426 if (Verbose)
427 outs() << "Device " << I << " arch name: ";
428 llvm::outs() << ArchName << '\n';
429 }
430
431 return 0;
432}
433