1//===-- stack_depot.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef SCUDO_STACK_DEPOT_H_
10#define SCUDO_STACK_DEPOT_H_
11
12#include "atomic_helpers.h"
13#include "common.h"
14#include "mutex.h"
15
16namespace scudo {
17
18class MurMur2HashBuilder {
19 static const u32 M = 0x5bd1e995;
20 static const u32 Seed = 0x9747b28c;
21 static const u32 R = 24;
22 u32 H;
23
24public:
25 explicit MurMur2HashBuilder(u32 Init = 0) { H = Seed ^ Init; }
26 void add(u32 K) {
27 K *= M;
28 K ^= K >> R;
29 K *= M;
30 H *= M;
31 H ^= K;
32 }
33 u32 get() {
34 u32 X = H;
35 X ^= X >> 13;
36 X *= M;
37 X ^= X >> 15;
38 return X;
39 }
40};
41
42class alignas(atomic_u64) StackDepot {
43 HybridMutex RingEndMu;
44 u32 RingEnd = 0;
45
46 // This data structure stores a stack trace for each allocation and
47 // deallocation when stack trace recording is enabled, that may be looked up
48 // using a hash of the stack trace. The lower bits of the hash are an index
49 // into the Tab array, which stores an index into the Ring array where the
50 // stack traces are stored. As the name implies, Ring is a ring buffer, so a
51 // stack trace may wrap around to the start of the array.
52 //
53 // Each stack trace in Ring is prefixed by a stack trace marker consisting of
54 // a fixed 1 bit in bit 0 (this allows disambiguation between stack frames
55 // and stack trace markers in the case where instruction pointers are 4-byte
56 // aligned, as they are on arm64), the stack trace hash in bits 1-32, and the
57 // size of the stack trace in bits 33-63.
58 //
59 // The insert() function is potentially racy in its accesses to the Tab and
60 // Ring arrays, but find() is resilient to races in the sense that, barring
61 // hash collisions, it will either return the correct stack trace or no stack
62 // trace at all, even if two instances of insert() raced with one another.
63 // This is achieved by re-checking the hash of the stack trace before
64 // returning the trace.
65
66 u32 RingSize = 0;
67 u32 RingMask = 0;
68 u32 TabMask = 0;
69 // This is immediately followed by RingSize atomic_u64 and
70 // (TabMask + 1) atomic_u32.
71
72 atomic_u64 *getRing() {
73 return reinterpret_cast<atomic_u64 *>(reinterpret_cast<char *>(this) +
74 sizeof(StackDepot));
75 }
76
77 atomic_u32 *getTab() {
78 return reinterpret_cast<atomic_u32 *>(reinterpret_cast<char *>(this) +
79 sizeof(StackDepot) +
80 sizeof(atomic_u64) * RingSize);
81 }
82
83 const atomic_u64 *getRing() const {
84 return reinterpret_cast<const atomic_u64 *>(
85 reinterpret_cast<const char *>(this) + sizeof(StackDepot));
86 }
87
88 const atomic_u32 *getTab() const {
89 return reinterpret_cast<const atomic_u32 *>(
90 reinterpret_cast<const char *>(this) + sizeof(StackDepot) +
91 sizeof(atomic_u64) * RingSize);
92 }
93
94public:
95 void init(u32 RingSz, u32 TabSz) {
96 DCHECK(isPowerOfTwo(RingSz));
97 DCHECK(isPowerOfTwo(TabSz));
98 RingSize = RingSz;
99 RingMask = RingSz - 1;
100 TabMask = TabSz - 1;
101 }
102
103 // Ensure that RingSize, RingMask and TabMask are set up in a way that
104 // all accesses are within range of BufSize.
105 bool isValid(uptr BufSize) const {
106 if (!isPowerOfTwo(X: RingSize))
107 return false;
108 uptr RingBytes = sizeof(atomic_u64) * RingSize;
109 if (RingMask + 1 != RingSize)
110 return false;
111
112 if (TabMask == 0)
113 return false;
114 uptr TabSize = TabMask + 1;
115 if (!isPowerOfTwo(X: TabSize))
116 return false;
117 uptr TabBytes = sizeof(atomic_u32) * TabSize;
118
119 // Subtract and detect underflow.
120 if (BufSize < sizeof(StackDepot))
121 return false;
122 BufSize -= sizeof(StackDepot);
123 if (BufSize < TabBytes)
124 return false;
125 BufSize -= TabBytes;
126 if (BufSize < RingBytes)
127 return false;
128 return BufSize == RingBytes;
129 }
130
131 // Insert hash of the stack trace [Begin, End) into the stack depot, and
132 // return the hash.
133 u32 insert(uptr *Begin, uptr *End) {
134 auto *Tab = getTab();
135 auto *Ring = getRing();
136
137 MurMur2HashBuilder B;
138 for (uptr *I = Begin; I != End; ++I)
139 B.add(K: u32(*I) >> 2);
140 u32 Hash = B.get();
141
142 u32 Pos = Hash & TabMask;
143 u32 RingPos = atomic_load_relaxed(A: &Tab[Pos]);
144 u64 Entry = atomic_load_relaxed(A: &Ring[RingPos]);
145 u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1;
146 if (Entry == Id)
147 return Hash;
148
149 ScopedLock Lock(RingEndMu);
150 RingPos = RingEnd;
151 atomic_store_relaxed(A: &Tab[Pos], V: RingPos);
152 atomic_store_relaxed(A: &Ring[RingPos], V: Id);
153 for (uptr *I = Begin; I != End; ++I) {
154 RingPos = (RingPos + 1) & RingMask;
155 atomic_store_relaxed(A: &Ring[RingPos], V: *I);
156 }
157 RingEnd = (RingPos + 1) & RingMask;
158 return Hash;
159 }
160
161 // Look up a stack trace by hash. Returns true if successful. The trace may be
162 // accessed via operator[] passing indexes between *RingPosPtr and
163 // *RingPosPtr + *SizePtr.
164 bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const {
165 auto *Tab = getTab();
166 auto *Ring = getRing();
167
168 u32 Pos = Hash & TabMask;
169 u32 RingPos = atomic_load_relaxed(A: &Tab[Pos]);
170 if (RingPos >= RingSize)
171 return false;
172 u64 Entry = atomic_load_relaxed(A: &Ring[RingPos]);
173 u64 HashWithTagBit = (u64(Hash) << 1) | 1;
174 if ((Entry & 0x1ffffffff) != HashWithTagBit)
175 return false;
176 u32 Size = u32(Entry >> 33);
177 if (Size >= RingSize)
178 return false;
179 *RingPosPtr = (RingPos + 1) & RingMask;
180 *SizePtr = Size;
181 MurMur2HashBuilder B;
182 for (uptr I = 0; I != Size; ++I) {
183 RingPos = (RingPos + 1) & RingMask;
184 B.add(K: u32(atomic_load_relaxed(A: &Ring[RingPos])) >> 2);
185 }
186 return B.get() == Hash;
187 }
188
189 u64 at(uptr RingPos) const {
190 auto *Ring = getRing();
191 return atomic_load_relaxed(A: &Ring[RingPos & RingMask]);
192 }
193
194 // This is done for the purpose of fork safety in multithreaded programs and
195 // does not fully disable StackDepot. In particular, find() still works and
196 // only insert() is blocked.
197 void disable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.lock(); }
198
199 void enable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.unlock(); }
200};
201
202// We need StackDepot to be aligned to 8-bytes so the ring we store after
203// is correctly assigned.
204static_assert(sizeof(StackDepot) % alignof(atomic_u64) == 0);
205
206} // namespace scudo
207
208#endif // SCUDO_STACK_DEPOT_H_
209