1//===- InstrProfilingPlatformROCmInternal.h - ROCm shared interface -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Private interface shared between the ROCm host-shadow drain
10// (InstrProfilingPlatformROCm.cpp) and the Linux-only supplemental
11// HSA-introspection drain (InstrProfilingPlatformROCmHSA.cpp).
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H
16#define PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H
17
18#include <stddef.h>
19#include <stdlib.h>
20
21// For prototype declarations
22struct OffloadSectionShadowGroup;
23
24namespace __prof_rocm {
25
26// free()-based scope guard. Use .release() to transfer ownership.
27struct UniqueFree {
28 void *Ptr;
29 explicit UniqueFree(void *P = nullptr) : Ptr(P) {}
30 ~UniqueFree() { free(ptr: Ptr); }
31 UniqueFree(const UniqueFree &) = delete;
32 UniqueFree &operator=(const UniqueFree &) = delete;
33 char *get() const { return static_cast<char *>(Ptr); }
34 void reset(void *P) {
35 free(ptr: Ptr);
36 Ptr = P;
37 }
38 void *release() {
39 void *P = Ptr;
40 Ptr = nullptr;
41 return P;
42 }
43};
44
45// Grow a heap array (doubling from InitCap) to hold at least MinCount elements
46// of ElemSize bytes each.
47// Success: zero new memory, update pointer, return 0.
48// Failure: return -1, data is still intact.
49inline int growArray(void **Arr, int *Cap, int MinCount, int InitCap,
50 size_t ElemSize) {
51 if (*Cap >= MinCount)
52 return 0;
53 int NewCap = *Cap ? *Cap : InitCap;
54 while (NewCap < MinCount)
55 NewCap *= 2;
56 void *New = realloc(ptr: *Arr, size: (size_t)NewCap * ElemSize);
57 if (!New)
58 return -1;
59 __builtin_memset((char *)New + (size_t)*Cap * ElemSize, 0,
60 (size_t)(NewCap - *Cap) * ElemSize);
61 *Arr = New;
62 *Cap = NewCap;
63 return 0;
64}
65
66// Set of (data, counters, names) device section-bounds tuples that have already
67// been drained. Both ROCm drains record here so each unique device counter set
68// is written exactly once.
69// See test/profile/instrprof-rocm-bounds-dedup.cpp.
70struct ProfBoundsSet {
71 struct Tuple {
72 const void *Data;
73 const void *Counters;
74 const void *Names;
75 };
76 enum { kInitCap = 64 };
77
78 Tuple *Items = nullptr;
79 int Count = 0;
80 int Cap = 0;
81
82 // True iff this exact (Data, Counters, Names) tuple was already recorded. All
83 // three fields must match: two code objects can share, e.g., a names section.
84 bool contains(const void *D, const void *C, const void *N) const {
85 for (int I = 0; I < Count; ++I)
86 if (Items[I].Data == D && Items[I].Counters == C && Items[I].Names == N)
87 return true;
88 return false;
89 }
90
91 // Record a tuple unless already present. Returns true only when a new tuple
92 // was added (false for a duplicate or when the growth failed under OOM).
93 bool record(const void *D, const void *C, const void *N) {
94 if (contains(D, C, N))
95 return false;
96 if (growArray(Arr: (void **)&Items, Cap: &Cap, MinCount: Count + 1, InitCap: kInitCap, ElemSize: sizeof(*Items)))
97 return false;
98 Items[Count].Data = D;
99 Items[Count].Counters = C;
100 Items[Count].Names = N;
101 ++Count;
102 return true;
103 }
104};
105
106// HIP/host-shadow helpers defined in InstrProfilingPlatformROCm.cpp and reused
107// by the HSA drain.
108int isVerboseMode();
109void ensureHipLoaded();
110// True once the loaded HIP runtime exposes hipMemcpy (device-to-host copies).
111int hipMemcpyAvailable();
112int memcpyDeviceToHost(void *Dst, const void *Src, size_t Size);
113int processDeviceOffloadPrf(void *DeviceOffloadPrf, const char *Target,
114 const ::OffloadSectionShadowGroup *Sections);
115
116#if defined(__linux__)
117// Implemented in InstrProfilingPlatformROCmHSA.cpp.
118
119// Record a drained section-bounds tuple so the supplemental HSA pass skips any
120// code object the host-shadow path already drained.
121void profRecordDrainedBounds(const void *Data, const void *Counters,
122 const void *Names);
123
124// Walk every GPU agent's loaded executables via HSA and drain each
125// __llvm_profile_sections table the host-shadow pass did not already handle.
126int drainDevicesViaHsa(void);
127#endif
128
129} // namespace __prof_rocm
130
131#endif // PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H
132