| 1 | //===- InstrProfilingPlatformROCmInternal.h - ROCm shared interface -------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Private interface shared between the ROCm host-shadow drain |
| 10 | // (InstrProfilingPlatformROCm.cpp) and the Linux-only supplemental |
| 11 | // HSA-introspection drain (InstrProfilingPlatformROCmHSA.cpp). |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #ifndef PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H |
| 16 | #define PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H |
| 17 | |
| 18 | #include <stddef.h> |
| 19 | #include <stdlib.h> |
| 20 | |
| 21 | // For prototype declarations |
| 22 | struct OffloadSectionShadowGroup; |
| 23 | |
| 24 | namespace __prof_rocm { |
| 25 | |
| 26 | // free()-based scope guard. Use .release() to transfer ownership. |
| 27 | struct UniqueFree { |
| 28 | void *Ptr; |
| 29 | explicit UniqueFree(void *P = nullptr) : Ptr(P) {} |
| 30 | ~UniqueFree() { free(ptr: Ptr); } |
| 31 | UniqueFree(const UniqueFree &) = delete; |
| 32 | UniqueFree &operator=(const UniqueFree &) = delete; |
| 33 | char *get() const { return static_cast<char *>(Ptr); } |
| 34 | void reset(void *P) { |
| 35 | free(ptr: Ptr); |
| 36 | Ptr = P; |
| 37 | } |
| 38 | void *release() { |
| 39 | void *P = Ptr; |
| 40 | Ptr = nullptr; |
| 41 | return P; |
| 42 | } |
| 43 | }; |
| 44 | |
| 45 | // Grow a heap array (doubling from InitCap) to hold at least MinCount elements |
| 46 | // of ElemSize bytes each. |
| 47 | // Success: zero new memory, update pointer, return 0. |
| 48 | // Failure: return -1, data is still intact. |
| 49 | inline int growArray(void **Arr, int *Cap, int MinCount, int InitCap, |
| 50 | size_t ElemSize) { |
| 51 | if (*Cap >= MinCount) |
| 52 | return 0; |
| 53 | int NewCap = *Cap ? *Cap : InitCap; |
| 54 | while (NewCap < MinCount) |
| 55 | NewCap *= 2; |
| 56 | void *New = realloc(ptr: *Arr, size: (size_t)NewCap * ElemSize); |
| 57 | if (!New) |
| 58 | return -1; |
| 59 | __builtin_memset((char *)New + (size_t)*Cap * ElemSize, 0, |
| 60 | (size_t)(NewCap - *Cap) * ElemSize); |
| 61 | *Arr = New; |
| 62 | *Cap = NewCap; |
| 63 | return 0; |
| 64 | } |
| 65 | |
| 66 | // Set of (data, counters, names) device section-bounds tuples that have already |
| 67 | // been drained. Both ROCm drains record here so each unique device counter set |
| 68 | // is written exactly once. |
| 69 | // See test/profile/instrprof-rocm-bounds-dedup.cpp. |
| 70 | struct ProfBoundsSet { |
| 71 | struct Tuple { |
| 72 | const void *Data; |
| 73 | const void *Counters; |
| 74 | const void *Names; |
| 75 | }; |
| 76 | enum { kInitCap = 64 }; |
| 77 | |
| 78 | Tuple *Items = nullptr; |
| 79 | int Count = 0; |
| 80 | int Cap = 0; |
| 81 | |
| 82 | // True iff this exact (Data, Counters, Names) tuple was already recorded. All |
| 83 | // three fields must match: two code objects can share, e.g., a names section. |
| 84 | bool contains(const void *D, const void *C, const void *N) const { |
| 85 | for (int I = 0; I < Count; ++I) |
| 86 | if (Items[I].Data == D && Items[I].Counters == C && Items[I].Names == N) |
| 87 | return true; |
| 88 | return false; |
| 89 | } |
| 90 | |
| 91 | // Record a tuple unless already present. Returns true only when a new tuple |
| 92 | // was added (false for a duplicate or when the growth failed under OOM). |
| 93 | bool record(const void *D, const void *C, const void *N) { |
| 94 | if (contains(D, C, N)) |
| 95 | return false; |
| 96 | if (growArray(Arr: (void **)&Items, Cap: &Cap, MinCount: Count + 1, InitCap: kInitCap, ElemSize: sizeof(*Items))) |
| 97 | return false; |
| 98 | Items[Count].Data = D; |
| 99 | Items[Count].Counters = C; |
| 100 | Items[Count].Names = N; |
| 101 | ++Count; |
| 102 | return true; |
| 103 | } |
| 104 | }; |
| 105 | |
| 106 | // HIP/host-shadow helpers defined in InstrProfilingPlatformROCm.cpp and reused |
| 107 | // by the HSA drain. |
| 108 | int isVerboseMode(); |
| 109 | void ensureHipLoaded(); |
| 110 | // True once the loaded HIP runtime exposes hipMemcpy (device-to-host copies). |
| 111 | int hipMemcpyAvailable(); |
| 112 | int memcpyDeviceToHost(void *Dst, const void *Src, size_t Size); |
| 113 | int processDeviceOffloadPrf(void *DeviceOffloadPrf, const char *Target, |
| 114 | const ::OffloadSectionShadowGroup *Sections); |
| 115 | |
| 116 | #if defined(__linux__) |
| 117 | // Implemented in InstrProfilingPlatformROCmHSA.cpp. |
| 118 | |
| 119 | // Record a drained section-bounds tuple so the supplemental HSA pass skips any |
| 120 | // code object the host-shadow path already drained. |
| 121 | void profRecordDrainedBounds(const void *Data, const void *Counters, |
| 122 | const void *Names); |
| 123 | |
| 124 | // Walk every GPU agent's loaded executables via HSA and drain each |
| 125 | // __llvm_profile_sections table the host-shadow pass did not already handle. |
| 126 | int drainDevicesViaHsa(void); |
| 127 | #endif |
| 128 | |
| 129 | } // namespace __prof_rocm |
| 130 | |
| 131 | #endif // PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H |
| 132 | |