| 1 | //===-- stack_depot.h -------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef SCUDO_STACK_DEPOT_H_ |
| 10 | #define SCUDO_STACK_DEPOT_H_ |
| 11 | |
| 12 | #include "atomic_helpers.h" |
| 13 | #include "common.h" |
| 14 | #include "mutex.h" |
| 15 | |
| 16 | namespace scudo { |
| 17 | |
| 18 | class MurMur2HashBuilder { |
| 19 | static const u32 M = 0x5bd1e995; |
| 20 | static const u32 Seed = 0x9747b28c; |
| 21 | static const u32 R = 24; |
| 22 | u32 H; |
| 23 | |
| 24 | public: |
| 25 | explicit MurMur2HashBuilder(u32 Init = 0) { H = Seed ^ Init; } |
| 26 | void add(u32 K) { |
| 27 | K *= M; |
| 28 | K ^= K >> R; |
| 29 | K *= M; |
| 30 | H *= M; |
| 31 | H ^= K; |
| 32 | } |
| 33 | u32 get() { |
| 34 | u32 X = H; |
| 35 | X ^= X >> 13; |
| 36 | X *= M; |
| 37 | X ^= X >> 15; |
| 38 | return X; |
| 39 | } |
| 40 | }; |
| 41 | |
| 42 | class alignas(atomic_u64) StackDepot { |
| 43 | HybridMutex RingEndMu; |
| 44 | u32 RingEnd = 0; |
| 45 | |
| 46 | // This data structure stores a stack trace for each allocation and |
| 47 | // deallocation when stack trace recording is enabled, that may be looked up |
| 48 | // using a hash of the stack trace. The lower bits of the hash are an index |
| 49 | // into the Tab array, which stores an index into the Ring array where the |
| 50 | // stack traces are stored. As the name implies, Ring is a ring buffer, so a |
| 51 | // stack trace may wrap around to the start of the array. |
| 52 | // |
| 53 | // Each stack trace in Ring is prefixed by a stack trace marker consisting of |
| 54 | // a fixed 1 bit in bit 0 (this allows disambiguation between stack frames |
| 55 | // and stack trace markers in the case where instruction pointers are 4-byte |
| 56 | // aligned, as they are on arm64), the stack trace hash in bits 1-32, and the |
| 57 | // size of the stack trace in bits 33-63. |
| 58 | // |
| 59 | // The insert() function is potentially racy in its accesses to the Tab and |
| 60 | // Ring arrays, but find() is resilient to races in the sense that, barring |
| 61 | // hash collisions, it will either return the correct stack trace or no stack |
| 62 | // trace at all, even if two instances of insert() raced with one another. |
| 63 | // This is achieved by re-checking the hash of the stack trace before |
| 64 | // returning the trace. |
| 65 | |
| 66 | u32 RingSize = 0; |
| 67 | u32 RingMask = 0; |
| 68 | u32 TabMask = 0; |
| 69 | // This is immediately followed by RingSize atomic_u64 and |
| 70 | // (TabMask + 1) atomic_u32. |
| 71 | |
| 72 | atomic_u64 *getRing() { |
| 73 | return reinterpret_cast<atomic_u64 *>(reinterpret_cast<char *>(this) + |
| 74 | sizeof(StackDepot)); |
| 75 | } |
| 76 | |
| 77 | atomic_u32 *getTab() { |
| 78 | return reinterpret_cast<atomic_u32 *>(reinterpret_cast<char *>(this) + |
| 79 | sizeof(StackDepot) + |
| 80 | sizeof(atomic_u64) * RingSize); |
| 81 | } |
| 82 | |
| 83 | const atomic_u64 *getRing() const { |
| 84 | return reinterpret_cast<const atomic_u64 *>( |
| 85 | reinterpret_cast<const char *>(this) + sizeof(StackDepot)); |
| 86 | } |
| 87 | |
| 88 | const atomic_u32 *getTab() const { |
| 89 | return reinterpret_cast<const atomic_u32 *>( |
| 90 | reinterpret_cast<const char *>(this) + sizeof(StackDepot) + |
| 91 | sizeof(atomic_u64) * RingSize); |
| 92 | } |
| 93 | |
| 94 | public: |
| 95 | void init(u32 RingSz, u32 TabSz) { |
| 96 | DCHECK(isPowerOfTwo(RingSz)); |
| 97 | DCHECK(isPowerOfTwo(TabSz)); |
| 98 | RingSize = RingSz; |
| 99 | RingMask = RingSz - 1; |
| 100 | TabMask = TabSz - 1; |
| 101 | } |
| 102 | |
| 103 | // Ensure that RingSize, RingMask and TabMask are set up in a way that |
| 104 | // all accesses are within range of BufSize. |
| 105 | bool isValid(uptr BufSize) const { |
| 106 | if (!isPowerOfTwo(X: RingSize)) |
| 107 | return false; |
| 108 | uptr RingBytes = sizeof(atomic_u64) * RingSize; |
| 109 | if (RingMask + 1 != RingSize) |
| 110 | return false; |
| 111 | |
| 112 | if (TabMask == 0) |
| 113 | return false; |
| 114 | uptr TabSize = TabMask + 1; |
| 115 | if (!isPowerOfTwo(X: TabSize)) |
| 116 | return false; |
| 117 | uptr TabBytes = sizeof(atomic_u32) * TabSize; |
| 118 | |
| 119 | // Subtract and detect underflow. |
| 120 | if (BufSize < sizeof(StackDepot)) |
| 121 | return false; |
| 122 | BufSize -= sizeof(StackDepot); |
| 123 | if (BufSize < TabBytes) |
| 124 | return false; |
| 125 | BufSize -= TabBytes; |
| 126 | if (BufSize < RingBytes) |
| 127 | return false; |
| 128 | return BufSize == RingBytes; |
| 129 | } |
| 130 | |
| 131 | // Insert hash of the stack trace [Begin, End) into the stack depot, and |
| 132 | // return the hash. |
| 133 | u32 insert(uptr *Begin, uptr *End) { |
| 134 | auto *Tab = getTab(); |
| 135 | auto *Ring = getRing(); |
| 136 | |
| 137 | MurMur2HashBuilder B; |
| 138 | for (uptr *I = Begin; I != End; ++I) |
| 139 | B.add(K: u32(*I) >> 2); |
| 140 | u32 Hash = B.get(); |
| 141 | |
| 142 | u32 Pos = Hash & TabMask; |
| 143 | u32 RingPos = atomic_load_relaxed(A: &Tab[Pos]); |
| 144 | u64 Entry = atomic_load_relaxed(A: &Ring[RingPos]); |
| 145 | u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1; |
| 146 | if (Entry == Id) |
| 147 | return Hash; |
| 148 | |
| 149 | ScopedLock Lock(RingEndMu); |
| 150 | RingPos = RingEnd; |
| 151 | atomic_store_relaxed(A: &Tab[Pos], V: RingPos); |
| 152 | atomic_store_relaxed(A: &Ring[RingPos], V: Id); |
| 153 | for (uptr *I = Begin; I != End; ++I) { |
| 154 | RingPos = (RingPos + 1) & RingMask; |
| 155 | atomic_store_relaxed(A: &Ring[RingPos], V: *I); |
| 156 | } |
| 157 | RingEnd = (RingPos + 1) & RingMask; |
| 158 | return Hash; |
| 159 | } |
| 160 | |
| 161 | // Look up a stack trace by hash. Returns true if successful. The trace may be |
| 162 | // accessed via operator[] passing indexes between *RingPosPtr and |
| 163 | // *RingPosPtr + *SizePtr. |
| 164 | bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const { |
| 165 | auto *Tab = getTab(); |
| 166 | auto *Ring = getRing(); |
| 167 | |
| 168 | u32 Pos = Hash & TabMask; |
| 169 | u32 RingPos = atomic_load_relaxed(A: &Tab[Pos]); |
| 170 | if (RingPos >= RingSize) |
| 171 | return false; |
| 172 | u64 Entry = atomic_load_relaxed(A: &Ring[RingPos]); |
| 173 | u64 HashWithTagBit = (u64(Hash) << 1) | 1; |
| 174 | if ((Entry & 0x1ffffffff) != HashWithTagBit) |
| 175 | return false; |
| 176 | u32 Size = u32(Entry >> 33); |
| 177 | if (Size >= RingSize) |
| 178 | return false; |
| 179 | *RingPosPtr = (RingPos + 1) & RingMask; |
| 180 | *SizePtr = Size; |
| 181 | MurMur2HashBuilder B; |
| 182 | for (uptr I = 0; I != Size; ++I) { |
| 183 | RingPos = (RingPos + 1) & RingMask; |
| 184 | B.add(K: u32(atomic_load_relaxed(A: &Ring[RingPos])) >> 2); |
| 185 | } |
| 186 | return B.get() == Hash; |
| 187 | } |
| 188 | |
| 189 | u64 at(uptr RingPos) const { |
| 190 | auto *Ring = getRing(); |
| 191 | return atomic_load_relaxed(A: &Ring[RingPos & RingMask]); |
| 192 | } |
| 193 | |
| 194 | // This is done for the purpose of fork safety in multithreaded programs and |
| 195 | // does not fully disable StackDepot. In particular, find() still works and |
| 196 | // only insert() is blocked. |
| 197 | void disable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.lock(); } |
| 198 | |
| 199 | void enable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.unlock(); } |
| 200 | }; |
| 201 | |
| 202 | // We need StackDepot to be aligned to 8-bytes so the ring we store after |
| 203 | // is correctly assigned. |
| 204 | static_assert(sizeof(StackDepot) % alignof(atomic_u64) == 0); |
| 205 | |
| 206 | } // namespace scudo |
| 207 | |
| 208 | #endif // SCUDO_STACK_DEPOT_H_ |
| 209 | |