| 1 | //===-- memprof_allocator.cpp --------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file is a part of MemProfiler, a memory profiler. |
| 10 | // |
| 11 | // Implementation of MemProf's memory allocator, which uses the allocator |
| 12 | // from sanitizer_common. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "memprof_allocator.h" |
| 17 | #include "memprof_mapping.h" |
| 18 | #include "memprof_mibmap.h" |
| 19 | #include "memprof_rawprofile.h" |
| 20 | #include "memprof_stack.h" |
| 21 | #include "memprof_thread.h" |
| 22 | #include "profile/MemProfData.inc" |
| 23 | #include "sanitizer_common/sanitizer_allocator_checks.h" |
| 24 | #include "sanitizer_common/sanitizer_allocator_interface.h" |
| 25 | #include "sanitizer_common/sanitizer_allocator_report.h" |
| 26 | #include "sanitizer_common/sanitizer_array_ref.h" |
| 27 | #include "sanitizer_common/sanitizer_common.h" |
| 28 | #include "sanitizer_common/sanitizer_errno.h" |
| 29 | #include "sanitizer_common/sanitizer_file.h" |
| 30 | #include "sanitizer_common/sanitizer_flags.h" |
| 31 | #include "sanitizer_common/sanitizer_internal_defs.h" |
| 32 | #include "sanitizer_common/sanitizer_stackdepot.h" |
| 33 | |
| 34 | #include <sched.h> |
| 35 | #include <time.h> |
| 36 | |
| 37 | #define MAX_HISTOGRAM_PRINT_SIZE 32U |
| 38 | |
| 39 | extern bool __memprof_histogram; |
| 40 | |
| 41 | namespace __memprof { |
| 42 | namespace { |
| 43 | using ::llvm::memprof::MemInfoBlock; |
| 44 | |
| 45 | void Print(const MemInfoBlock &M, const u64 id, bool print_terse) { |
| 46 | u64 p; |
| 47 | |
| 48 | if (print_terse) { |
| 49 | p = M.TotalSize * 100 / M.AllocCount; |
| 50 | Printf(format: "MIB:%llu/%u/%llu.%02llu/%u/%u/" , id, M.AllocCount, p / 100, p % 100, |
| 51 | M.MinSize, M.MaxSize); |
| 52 | p = M.TotalAccessCount * 100 / M.AllocCount; |
| 53 | Printf(format: "%llu.%02llu/%llu/%llu/" , p / 100, p % 100, M.MinAccessCount, |
| 54 | M.MaxAccessCount); |
| 55 | p = M.TotalLifetime * 100 / M.AllocCount; |
| 56 | Printf(format: "%llu.%02llu/%u/%u/" , p / 100, p % 100, M.MinLifetime, |
| 57 | M.MaxLifetime); |
| 58 | Printf(format: "%u/%u/%u/%u\n" , M.NumMigratedCpu, M.NumLifetimeOverlaps, |
| 59 | M.NumSameAllocCpu, M.NumSameDeallocCpu); |
| 60 | } else { |
| 61 | p = M.TotalSize * 100 / M.AllocCount; |
| 62 | Printf(format: "Memory allocation stack id = %llu\n" , id); |
| 63 | Printf(format: "\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n" , |
| 64 | M.AllocCount, p / 100, p % 100, M.MinSize, M.MaxSize); |
| 65 | p = M.TotalAccessCount * 100 / M.AllocCount; |
| 66 | Printf(format: "\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n" , p / 100, |
| 67 | p % 100, M.MinAccessCount, M.MaxAccessCount); |
| 68 | p = M.TotalLifetime * 100 / M.AllocCount; |
| 69 | Printf(format: "\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n" , p / 100, |
| 70 | p % 100, M.MinLifetime, M.MaxLifetime); |
| 71 | Printf(format: "\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " |
| 72 | "cpu: %u, num same dealloc_cpu: %u\n" , |
| 73 | M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu, |
| 74 | M.NumSameDeallocCpu); |
| 75 | Printf(format: "AccessCountHistogram[%u]: " , M.AccessHistogramSize); |
| 76 | uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE |
| 77 | ? MAX_HISTOGRAM_PRINT_SIZE |
| 78 | : M.AccessHistogramSize; |
| 79 | for (size_t i = 0; i < PrintSize; ++i) { |
| 80 | Printf(format: "%llu " , ((uint64_t *)M.AccessHistogram)[i]); |
| 81 | } |
| 82 | Printf(format: "\n" ); |
| 83 | } |
| 84 | } |
| 85 | } // namespace |
| 86 | |
| 87 | static int GetCpuId(void) { |
| 88 | // _memprof_preinit is called via the preinit_array, which subsequently calls |
| 89 | // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu |
| 90 | // will seg fault as the address of __vdso_getcpu will be null. |
| 91 | if (!memprof_inited) |
| 92 | return -1; |
| 93 | return sched_getcpu(); |
| 94 | } |
| 95 | |
| 96 | // Compute the timestamp in ms. |
| 97 | static int GetTimestamp(void) { |
| 98 | // timespec_get will segfault if called from dl_init |
| 99 | if (!memprof_timestamp_inited) { |
| 100 | // By returning 0, this will be effectively treated as being |
| 101 | // timestamped at memprof init time (when memprof_init_timestamp_s |
| 102 | // is initialized). |
| 103 | return 0; |
| 104 | } |
| 105 | timespec ts; |
| 106 | clock_gettime(CLOCK_REALTIME, tp: &ts); |
| 107 | return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000; |
| 108 | } |
| 109 | |
| 110 | static MemprofAllocator &get_allocator(); |
| 111 | |
| 112 | // The memory chunk allocated from the underlying allocator looks like this: |
| 113 | // H H U U U U U U |
| 114 | // H -- ChunkHeader (32 bytes) |
| 115 | // U -- user memory. |
| 116 | |
| 117 | // If there is left padding before the ChunkHeader (due to use of memalign), |
| 118 | // we store a magic value in the first uptr word of the memory block and |
| 119 | // store the address of ChunkHeader in the next uptr. |
| 120 | // M B L L L L L L L L L H H U U U U U U |
| 121 | // | ^ |
| 122 | // ---------------------| |
| 123 | // M -- magic value kAllocBegMagic |
| 124 | // B -- address of ChunkHeader pointing to the first 'H' |
| 125 | |
| 126 | constexpr uptr kMaxAllowedMallocBits = 40; |
| 127 | |
| 128 | // Should be no more than 32-bytes |
| 129 | struct { |
| 130 | // 1-st 4 bytes. |
| 131 | u32 ; |
| 132 | // 2-nd 4 bytes |
| 133 | u32 ; |
| 134 | // 3-rd 4 bytes |
| 135 | u32 ; |
| 136 | // 4-th 4 bytes |
| 137 | // Note only 1 bit is needed for this flag if we need space in the future for |
| 138 | // more fields. |
| 139 | u32 ; |
| 140 | // 5-th and 6-th 4 bytes |
| 141 | // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this |
| 142 | // could be shrunk to kMaxAllowedMallocBits if we need space in the future for |
| 143 | // more fields. |
| 144 | atomic_uint64_t ; |
| 145 | // 23 bits available |
| 146 | // 7-th and 8-th 4 bytes |
| 147 | u64 ; // TODO: hash of type name |
| 148 | }; |
| 149 | |
| 150 | static const uptr = sizeof(ChunkHeader); |
| 151 | COMPILER_CHECK(kChunkHeaderSize == 32); |
| 152 | |
| 153 | struct MemprofChunk : ChunkHeader { |
| 154 | uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; } |
| 155 | uptr UsedSize() { |
| 156 | return atomic_load(a: &user_requested_size, mo: memory_order_relaxed); |
| 157 | } |
| 158 | void *AllocBeg() { |
| 159 | if (from_memalign) |
| 160 | return get_allocator().GetBlockBegin(p: reinterpret_cast<void *>(this)); |
| 161 | return reinterpret_cast<void *>(this); |
| 162 | } |
| 163 | }; |
| 164 | |
| 165 | class { |
| 166 | static constexpr uptr = |
| 167 | FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL); |
| 168 | atomic_uintptr_t ; |
| 169 | MemprofChunk *; |
| 170 | |
| 171 | public: |
| 172 | MemprofChunk *() const { |
| 173 | return atomic_load(a: &magic, mo: memory_order_acquire) == kAllocBegMagic |
| 174 | ? chunk_header |
| 175 | : nullptr; |
| 176 | } |
| 177 | |
| 178 | void (MemprofChunk *p) { |
| 179 | if (p) { |
| 180 | chunk_header = p; |
| 181 | atomic_store(a: &magic, v: kAllocBegMagic, mo: memory_order_release); |
| 182 | return; |
| 183 | } |
| 184 | |
| 185 | uptr old = kAllocBegMagic; |
| 186 | if (!atomic_compare_exchange_strong(a: &magic, cmp: &old, xchg: 0, |
| 187 | mo: memory_order_release)) { |
| 188 | CHECK_EQ(old, kAllocBegMagic); |
| 189 | } |
| 190 | } |
| 191 | }; |
| 192 | |
| 193 | void FlushUnneededMemProfShadowMemory(uptr p, uptr size) { |
| 194 | // Since memprof's mapping is compacting, the shadow chunk may be |
| 195 | // not page-aligned, so we only flush the page-aligned portion. |
| 196 | ReleaseMemoryPagesToOS(beg: MemToShadow(p), end: MemToShadow(p: p + size)); |
| 197 | } |
| 198 | |
| 199 | void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const { |
| 200 | // Statistics. |
| 201 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
| 202 | thread_stats.mmaps++; |
| 203 | thread_stats.mmaped += size; |
| 204 | } |
| 205 | |
| 206 | void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const { |
| 207 | // We are about to unmap a chunk of user memory. |
| 208 | // Mark the corresponding shadow memory as not needed. |
| 209 | FlushUnneededMemProfShadowMemory(p, size); |
| 210 | // Statistics. |
| 211 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
| 212 | thread_stats.munmaps++; |
| 213 | thread_stats.munmaped += size; |
| 214 | } |
| 215 | |
| 216 | AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) { |
| 217 | CHECK(ms); |
| 218 | return &ms->allocator_cache; |
| 219 | } |
| 220 | |
| 221 | // Accumulates the access count from the shadow for the given pointer and size. |
| 222 | u64 GetShadowCount(uptr p, u32 size) { |
| 223 | u64 *shadow = (u64 *)MEM_TO_SHADOW(p); |
| 224 | u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size); |
| 225 | u64 count = 0; |
| 226 | for (; shadow <= shadow_end; shadow++) |
| 227 | count += *shadow; |
| 228 | return count; |
| 229 | } |
| 230 | |
| 231 | // Accumulates the access count from the shadow for the given pointer and size. |
| 232 | // See memprof_mapping.h for an overview on histogram counters. |
| 233 | u64 GetShadowCountHistogram(uptr p, u32 size) { |
| 234 | u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p); |
| 235 | u8 *shadow_end = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p + size); |
| 236 | u64 count = 0; |
| 237 | for (; shadow <= shadow_end; shadow++) |
| 238 | count += *shadow; |
| 239 | return count; |
| 240 | } |
| 241 | |
| 242 | // Clears the shadow counters (when memory is allocated). |
| 243 | void ClearShadow(uptr addr, uptr size) { |
| 244 | CHECK(AddrIsAlignedByGranularity(addr)); |
| 245 | CHECK(AddrIsInMem(addr)); |
| 246 | CHECK(AddrIsAlignedByGranularity(addr + size)); |
| 247 | CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY)); |
| 248 | CHECK(REAL(memset)); |
| 249 | uptr shadow_beg; |
| 250 | uptr shadow_end; |
| 251 | if (__memprof_histogram) { |
| 252 | shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr); |
| 253 | shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size); |
| 254 | } else { |
| 255 | shadow_beg = MEM_TO_SHADOW(addr); |
| 256 | shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1; |
| 257 | } |
| 258 | |
| 259 | if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) { |
| 260 | REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); |
| 261 | } else { |
| 262 | uptr page_size = GetPageSizeCached(); |
| 263 | uptr page_beg = RoundUpTo(size: shadow_beg, boundary: page_size); |
| 264 | uptr page_end = RoundDownTo(x: shadow_end, boundary: page_size); |
| 265 | |
| 266 | if (page_beg >= page_end) { |
| 267 | REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); |
| 268 | } else { |
| 269 | if (page_beg != shadow_beg) { |
| 270 | REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg); |
| 271 | } |
| 272 | if (page_end != shadow_end) { |
| 273 | REAL(memset)((void *)page_end, 0, shadow_end - page_end); |
| 274 | } |
| 275 | ReserveShadowMemoryRange(beg: page_beg, end: page_end - 1, name: nullptr); |
| 276 | } |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | struct Allocator { |
| 281 | static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits; |
| 282 | |
| 283 | MemprofAllocator allocator; |
| 284 | StaticSpinMutex fallback_mutex; |
| 285 | AllocatorCache fallback_allocator_cache; |
| 286 | |
| 287 | uptr max_user_defined_malloc_size; |
| 288 | |
| 289 | // Holds the mapping of stack ids to MemInfoBlocks. |
| 290 | MIBMapTy MIBMap; |
| 291 | |
| 292 | atomic_uint8_t destructing; |
| 293 | atomic_uint8_t constructed; |
| 294 | |
| 295 | // ------------------- Initialization ------------------------ |
| 296 | explicit Allocator(LinkerInitialized) { |
| 297 | atomic_store_relaxed(a: &destructing, v: 0); |
| 298 | atomic_store_relaxed(a: &constructed, v: 1); |
| 299 | } |
| 300 | |
| 301 | ~Allocator() { |
| 302 | atomic_store_relaxed(a: &destructing, v: 1); |
| 303 | if (flags()->dump_at_exit) |
| 304 | FinishAndWrite(); |
| 305 | } |
| 306 | |
| 307 | static void PrintCallback(const uptr Key, LockedMemInfoBlock *const &Value, |
| 308 | void *Arg) { |
| 309 | SpinMutexLock l(&Value->mutex); |
| 310 | Print(M: Value->mib, id: Key, print_terse: bool(Arg)); |
| 311 | } |
| 312 | |
| 313 | // See memprof_mapping.h for an overview on histogram counters. |
| 314 | static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) { |
| 315 | if (__memprof_histogram) { |
| 316 | return CreateNewMIBWithHistogram(p, m, user_size); |
| 317 | } else { |
| 318 | return CreateNewMIBWithoutHistogram(p, m, user_size); |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m, |
| 323 | u64 user_size) { |
| 324 | |
| 325 | u64 c = GetShadowCountHistogram(p, size: user_size); |
| 326 | long curtime = GetTimestamp(); |
| 327 | uint32_t HistogramSize = |
| 328 | RoundUpTo(size: user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY; |
| 329 | uintptr_t Histogram = |
| 330 | (uintptr_t)InternalAlloc(size: HistogramSize * sizeof(uint64_t)); |
| 331 | memset(s: (void *)Histogram, c: 0, n: HistogramSize * sizeof(uint64_t)); |
| 332 | for (size_t i = 0; i < HistogramSize; ++i) { |
| 333 | u8 Counter = |
| 334 | *((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i)); |
| 335 | ((uint64_t *)Histogram)[i] = (uint64_t)Counter; |
| 336 | } |
| 337 | MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id, |
| 338 | GetCpuId(), Histogram, HistogramSize); |
| 339 | return newMIB; |
| 340 | } |
| 341 | |
| 342 | static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m, |
| 343 | u64 user_size) { |
| 344 | u64 c = GetShadowCount(p, size: user_size); |
| 345 | long curtime = GetTimestamp(); |
| 346 | MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id, |
| 347 | GetCpuId(), 0, 0); |
| 348 | return newMIB; |
| 349 | } |
| 350 | |
| 351 | void FinishAndWrite() { |
| 352 | if (flags()->print_text && common_flags()->print_module_map) |
| 353 | DumpProcessMap(); |
| 354 | |
| 355 | allocator.ForceLock(); |
| 356 | |
| 357 | InsertLiveBlocks(); |
| 358 | if (flags()->print_text) { |
| 359 | if (!flags()->print_terse) |
| 360 | Printf(format: "Recorded MIBs (incl. live on exit):\n" ); |
| 361 | MIBMap.ForEach(cb: PrintCallback, |
| 362 | arg: reinterpret_cast<void *>(flags()->print_terse)); |
| 363 | StackDepotPrintAll(); |
| 364 | } else { |
| 365 | // Serialize the contents to a raw profile. Format documented in |
| 366 | // memprof_rawprofile.h. |
| 367 | char *Buffer = nullptr; |
| 368 | |
| 369 | __sanitizer::ListOfModules List; |
| 370 | List.init(); |
| 371 | ArrayRef<LoadedModule> Modules(List.begin(), List.end()); |
| 372 | u64 BytesSerialized = SerializeToRawProfile(BlockCache&: MIBMap, Modules, Buffer); |
| 373 | CHECK(Buffer && BytesSerialized && "could not serialize to buffer" ); |
| 374 | report_file.Write(buffer: Buffer, length: BytesSerialized); |
| 375 | } |
| 376 | |
| 377 | allocator.ForceUnlock(); |
| 378 | } |
| 379 | |
| 380 | // Inserts any blocks which have been allocated but not yet deallocated. |
| 381 | void InsertLiveBlocks() { |
| 382 | allocator.ForEachChunk( |
| 383 | callback: [](uptr chunk, void *alloc) { |
| 384 | u64 user_requested_size; |
| 385 | Allocator *A = (Allocator *)alloc; |
| 386 | MemprofChunk *m = |
| 387 | A->GetMemprofChunk(alloc_beg: (void *)chunk, user_requested_size); |
| 388 | if (!m) |
| 389 | return; |
| 390 | uptr user_beg = ((uptr)m) + kChunkHeaderSize; |
| 391 | MemInfoBlock newMIB = CreateNewMIB(p: user_beg, m, user_size: user_requested_size); |
| 392 | InsertOrMerge(Id: m->alloc_context_id, Block: newMIB, Map&: A->MIBMap); |
| 393 | }, |
| 394 | arg: this); |
| 395 | } |
| 396 | |
| 397 | void InitLinkerInitialized() { |
| 398 | SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); |
| 399 | allocator.InitLinkerInitialized( |
| 400 | release_to_os_interval_ms: common_flags()->allocator_release_to_os_interval_ms); |
| 401 | max_user_defined_malloc_size = common_flags()->max_allocation_size_mb |
| 402 | ? common_flags()->max_allocation_size_mb |
| 403 | << 20 |
| 404 | : kMaxAllowedMallocSize; |
| 405 | } |
| 406 | |
| 407 | // -------------------- Allocation/Deallocation routines --------------- |
| 408 | void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack, |
| 409 | AllocType alloc_type) { |
| 410 | if (UNLIKELY(!memprof_inited)) |
| 411 | MemprofInitFromRtl(); |
| 412 | if (UNLIKELY(IsRssLimitExceeded())) { |
| 413 | if (AllocatorMayReturnNull()) |
| 414 | return nullptr; |
| 415 | ReportRssLimitExceeded(stack); |
| 416 | } |
| 417 | CHECK(stack); |
| 418 | const uptr min_alignment = MEMPROF_ALIGNMENT; |
| 419 | if (alignment < min_alignment) |
| 420 | alignment = min_alignment; |
| 421 | if (size == 0) { |
| 422 | // We'd be happy to avoid allocating memory for zero-size requests, but |
| 423 | // some programs/tests depend on this behavior and assume that malloc |
| 424 | // would not return NULL even for zero-size allocations. Moreover, it |
| 425 | // looks like operator new should never return NULL, and results of |
| 426 | // consecutive "new" calls must be different even if the allocated size |
| 427 | // is zero. |
| 428 | size = 1; |
| 429 | } |
| 430 | CHECK(IsPowerOfTwo(alignment)); |
| 431 | uptr rounded_size = RoundUpTo(size, boundary: alignment); |
| 432 | uptr needed_size = rounded_size + kChunkHeaderSize; |
| 433 | if (alignment > min_alignment) |
| 434 | needed_size += alignment; |
| 435 | CHECK(IsAligned(needed_size, min_alignment)); |
| 436 | if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize || |
| 437 | size > max_user_defined_malloc_size) { |
| 438 | if (AllocatorMayReturnNull()) { |
| 439 | Report(format: "WARNING: MemProfiler failed to allocate 0x%zx bytes\n" , size); |
| 440 | return nullptr; |
| 441 | } |
| 442 | uptr malloc_limit = |
| 443 | Min(a: kMaxAllowedMallocSize, b: max_user_defined_malloc_size); |
| 444 | ReportAllocationSizeTooBig(user_size: size, max_size: malloc_limit, stack); |
| 445 | } |
| 446 | |
| 447 | MemprofThread *t = GetCurrentThread(); |
| 448 | void *allocated; |
| 449 | if (t) { |
| 450 | AllocatorCache *cache = GetAllocatorCache(ms: &t->malloc_storage()); |
| 451 | allocated = allocator.Allocate(cache, size: needed_size, alignment: 8); |
| 452 | } else { |
| 453 | SpinMutexLock l(&fallback_mutex); |
| 454 | AllocatorCache *cache = &fallback_allocator_cache; |
| 455 | allocated = allocator.Allocate(cache, size: needed_size, alignment: 8); |
| 456 | } |
| 457 | if (UNLIKELY(!allocated)) { |
| 458 | SetAllocatorOutOfMemory(); |
| 459 | if (AllocatorMayReturnNull()) |
| 460 | return nullptr; |
| 461 | ReportOutOfMemory(requested_size: size, stack); |
| 462 | } |
| 463 | |
| 464 | uptr alloc_beg = reinterpret_cast<uptr>(allocated); |
| 465 | uptr alloc_end = alloc_beg + needed_size; |
| 466 | uptr = alloc_beg + kChunkHeaderSize; |
| 467 | uptr user_beg = beg_plus_header; |
| 468 | if (!IsAligned(a: user_beg, alignment)) |
| 469 | user_beg = RoundUpTo(size: user_beg, boundary: alignment); |
| 470 | uptr user_end = user_beg + size; |
| 471 | CHECK_LE(user_end, alloc_end); |
| 472 | uptr chunk_beg = user_beg - kChunkHeaderSize; |
| 473 | MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
| 474 | m->from_memalign = alloc_beg != chunk_beg; |
| 475 | CHECK(size); |
| 476 | |
| 477 | m->cpu_id = GetCpuId(); |
| 478 | m->timestamp_ms = GetTimestamp(); |
| 479 | m->alloc_context_id = StackDepotPut(stack: *stack); |
| 480 | |
| 481 | uptr size_rounded_down_to_granularity = |
| 482 | RoundDownTo(x: size, SHADOW_GRANULARITY); |
| 483 | if (size_rounded_down_to_granularity) |
| 484 | ClearShadow(addr: user_beg, size: size_rounded_down_to_granularity); |
| 485 | |
| 486 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
| 487 | thread_stats.mallocs++; |
| 488 | thread_stats.malloced += size; |
| 489 | thread_stats.malloced_overhead += needed_size - size; |
| 490 | if (needed_size > SizeClassMap::kMaxSize) |
| 491 | thread_stats.malloc_large++; |
| 492 | else |
| 493 | thread_stats.malloced_by_size[SizeClassMap::ClassID(size: needed_size)]++; |
| 494 | |
| 495 | void *res = reinterpret_cast<void *>(user_beg); |
| 496 | atomic_store(a: &m->user_requested_size, v: size, mo: memory_order_release); |
| 497 | if (alloc_beg != chunk_beg) { |
| 498 | CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg); |
| 499 | reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m); |
| 500 | } |
| 501 | RunMallocHooks(ptr: res, size); |
| 502 | return res; |
| 503 | } |
| 504 | |
| 505 | void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment, |
| 506 | BufferedStackTrace *stack, AllocType alloc_type) { |
| 507 | uptr p = reinterpret_cast<uptr>(ptr); |
| 508 | if (p == 0) |
| 509 | return; |
| 510 | |
| 511 | RunFreeHooks(ptr); |
| 512 | |
| 513 | uptr chunk_beg = p - kChunkHeaderSize; |
| 514 | MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
| 515 | |
| 516 | u64 user_requested_size = |
| 517 | atomic_exchange(a: &m->user_requested_size, v: 0, mo: memory_order_acquire); |
| 518 | if (memprof_inited && atomic_load_relaxed(a: &constructed) && |
| 519 | !atomic_load_relaxed(a: &destructing)) { |
| 520 | MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_size: user_requested_size); |
| 521 | InsertOrMerge(Id: m->alloc_context_id, Block: newMIB, Map&: MIBMap); |
| 522 | } |
| 523 | |
| 524 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
| 525 | thread_stats.frees++; |
| 526 | thread_stats.freed += user_requested_size; |
| 527 | |
| 528 | void *alloc_beg = m->AllocBeg(); |
| 529 | if (alloc_beg != m) { |
| 530 | // Clear the magic value, as allocator internals may overwrite the |
| 531 | // contents of deallocated chunk, confusing GetMemprofChunk lookup. |
| 532 | reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr); |
| 533 | } |
| 534 | |
| 535 | MemprofThread *t = GetCurrentThread(); |
| 536 | if (t) { |
| 537 | AllocatorCache *cache = GetAllocatorCache(ms: &t->malloc_storage()); |
| 538 | allocator.Deallocate(cache, p: alloc_beg); |
| 539 | } else { |
| 540 | SpinMutexLock l(&fallback_mutex); |
| 541 | AllocatorCache *cache = &fallback_allocator_cache; |
| 542 | allocator.Deallocate(cache, p: alloc_beg); |
| 543 | } |
| 544 | } |
| 545 | |
| 546 | void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) { |
| 547 | CHECK(old_ptr && new_size); |
| 548 | uptr p = reinterpret_cast<uptr>(old_ptr); |
| 549 | uptr chunk_beg = p - kChunkHeaderSize; |
| 550 | MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
| 551 | |
| 552 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
| 553 | thread_stats.reallocs++; |
| 554 | thread_stats.realloced += new_size; |
| 555 | |
| 556 | void *new_ptr = Allocate(size: new_size, alignment: 8, stack, alloc_type: FROM_MALLOC); |
| 557 | if (new_ptr) { |
| 558 | CHECK_NE(REAL(memcpy), nullptr); |
| 559 | uptr memcpy_size = Min(a: new_size, b: m->UsedSize()); |
| 560 | REAL(memcpy)(new_ptr, old_ptr, memcpy_size); |
| 561 | Deallocate(ptr: old_ptr, delete_size: 0, delete_alignment: 0, stack, alloc_type: FROM_MALLOC); |
| 562 | } |
| 563 | return new_ptr; |
| 564 | } |
| 565 | |
| 566 | void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { |
| 567 | if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { |
| 568 | if (AllocatorMayReturnNull()) |
| 569 | return nullptr; |
| 570 | ReportCallocOverflow(count: nmemb, size, stack); |
| 571 | } |
| 572 | void *ptr = Allocate(size: nmemb * size, alignment: 8, stack, alloc_type: FROM_MALLOC); |
| 573 | // If the memory comes from the secondary allocator no need to clear it |
| 574 | // as it comes directly from mmap. |
| 575 | if (ptr && allocator.FromPrimary(p: ptr)) |
| 576 | REAL(memset)(ptr, 0, nmemb * size); |
| 577 | return ptr; |
| 578 | } |
| 579 | |
| 580 | void CommitBack(MemprofThreadLocalMallocStorage *ms) { |
| 581 | AllocatorCache *ac = GetAllocatorCache(ms); |
| 582 | allocator.SwallowCache(cache: ac); |
| 583 | } |
| 584 | |
| 585 | // -------------------------- Chunk lookup ---------------------- |
| 586 | |
| 587 | // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). |
| 588 | MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) { |
| 589 | if (!alloc_beg) |
| 590 | return nullptr; |
| 591 | MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get(); |
| 592 | if (!p) { |
| 593 | if (!allocator.FromPrimary(p: alloc_beg)) |
| 594 | return nullptr; |
| 595 | p = reinterpret_cast<MemprofChunk *>(alloc_beg); |
| 596 | } |
| 597 | // The size is reset to 0 on deallocation (and a min of 1 on |
| 598 | // allocation). |
| 599 | user_requested_size = |
| 600 | atomic_load(a: &p->user_requested_size, mo: memory_order_acquire); |
| 601 | if (user_requested_size) |
| 602 | return p; |
| 603 | return nullptr; |
| 604 | } |
| 605 | |
| 606 | MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) { |
| 607 | void *alloc_beg = allocator.GetBlockBegin(p: reinterpret_cast<void *>(p)); |
| 608 | return GetMemprofChunk(alloc_beg, user_requested_size); |
| 609 | } |
| 610 | |
| 611 | uptr AllocationSize(uptr p) { |
| 612 | u64 user_requested_size; |
| 613 | MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size); |
| 614 | if (!m) |
| 615 | return 0; |
| 616 | if (m->Beg() != p) |
| 617 | return 0; |
| 618 | return user_requested_size; |
| 619 | } |
| 620 | |
| 621 | uptr AllocationSizeFast(uptr p) { |
| 622 | return reinterpret_cast<MemprofChunk *>(p - kChunkHeaderSize)->UsedSize(); |
| 623 | } |
| 624 | |
| 625 | void Purge() { allocator.ForceReleaseToOS(); } |
| 626 | |
| 627 | void PrintStats() { allocator.PrintStats(); } |
| 628 | |
| 629 | void ForceLock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { |
| 630 | allocator.ForceLock(); |
| 631 | fallback_mutex.Lock(); |
| 632 | } |
| 633 | |
| 634 | void ForceUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { |
| 635 | fallback_mutex.Unlock(); |
| 636 | allocator.ForceUnlock(); |
| 637 | } |
| 638 | }; |
| 639 | |
| 640 | static Allocator instance(LINKER_INITIALIZED); |
| 641 | |
| 642 | static MemprofAllocator &get_allocator() { return instance.allocator; } |
| 643 | |
| 644 | void InitializeAllocator() { instance.InitLinkerInitialized(); } |
| 645 | |
| 646 | void MemprofThreadLocalMallocStorage::CommitBack() { |
| 647 | instance.CommitBack(ms: this); |
| 648 | } |
| 649 | |
| 650 | void PrintInternalAllocatorStats() { instance.PrintStats(); } |
| 651 | |
| 652 | void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) { |
| 653 | instance.Deallocate(ptr, delete_size: 0, delete_alignment: 0, stack, alloc_type); |
| 654 | } |
| 655 | |
| 656 | void memprof_delete(void *ptr, uptr size, uptr alignment, |
| 657 | BufferedStackTrace *stack, AllocType alloc_type) { |
| 658 | instance.Deallocate(ptr, delete_size: size, delete_alignment: alignment, stack, alloc_type); |
| 659 | } |
| 660 | |
| 661 | void *memprof_malloc(uptr size, BufferedStackTrace *stack) { |
| 662 | return SetErrnoOnNull(instance.Allocate(size, alignment: 8, stack, alloc_type: FROM_MALLOC)); |
| 663 | } |
| 664 | |
| 665 | void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { |
| 666 | return SetErrnoOnNull(instance.Calloc(nmemb, size, stack)); |
| 667 | } |
| 668 | |
| 669 | void *memprof_reallocarray(void *p, uptr nmemb, uptr size, |
| 670 | BufferedStackTrace *stack) { |
| 671 | if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { |
| 672 | errno = errno_ENOMEM; |
| 673 | if (AllocatorMayReturnNull()) |
| 674 | return nullptr; |
| 675 | ReportReallocArrayOverflow(count: nmemb, size, stack); |
| 676 | } |
| 677 | return memprof_realloc(p, size: nmemb * size, stack); |
| 678 | } |
| 679 | |
| 680 | void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) { |
| 681 | if (!p) |
| 682 | return SetErrnoOnNull(instance.Allocate(size, alignment: 8, stack, alloc_type: FROM_MALLOC)); |
| 683 | if (size == 0) { |
| 684 | if (flags()->allocator_frees_and_returns_null_on_realloc_zero) { |
| 685 | instance.Deallocate(ptr: p, delete_size: 0, delete_alignment: 0, stack, alloc_type: FROM_MALLOC); |
| 686 | return nullptr; |
| 687 | } |
| 688 | // Allocate a size of 1 if we shouldn't free() on Realloc to 0 |
| 689 | size = 1; |
| 690 | } |
| 691 | return SetErrnoOnNull(instance.Reallocate(old_ptr: p, new_size: size, stack)); |
| 692 | } |
| 693 | |
| 694 | void *memprof_valloc(uptr size, BufferedStackTrace *stack) { |
| 695 | return SetErrnoOnNull( |
| 696 | instance.Allocate(size, alignment: GetPageSizeCached(), stack, alloc_type: FROM_MALLOC)); |
| 697 | } |
| 698 | |
| 699 | void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) { |
| 700 | uptr PageSize = GetPageSizeCached(); |
| 701 | if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) { |
| 702 | errno = errno_ENOMEM; |
| 703 | if (AllocatorMayReturnNull()) |
| 704 | return nullptr; |
| 705 | ReportPvallocOverflow(size, stack); |
| 706 | } |
| 707 | // pvalloc(0) should allocate one page. |
| 708 | size = size ? RoundUpTo(size, boundary: PageSize) : PageSize; |
| 709 | return SetErrnoOnNull(instance.Allocate(size, alignment: PageSize, stack, alloc_type: FROM_MALLOC)); |
| 710 | } |
| 711 | |
| 712 | void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack, |
| 713 | AllocType alloc_type) { |
| 714 | if (UNLIKELY(!IsPowerOfTwo(alignment))) { |
| 715 | errno = errno_EINVAL; |
| 716 | if (AllocatorMayReturnNull()) |
| 717 | return nullptr; |
| 718 | ReportInvalidAllocationAlignment(alignment, stack); |
| 719 | } |
| 720 | return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type)); |
| 721 | } |
| 722 | |
| 723 | void *memprof_aligned_alloc(uptr alignment, uptr size, |
| 724 | BufferedStackTrace *stack) { |
| 725 | if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) { |
| 726 | errno = errno_EINVAL; |
| 727 | if (AllocatorMayReturnNull()) |
| 728 | return nullptr; |
| 729 | ReportInvalidAlignedAllocAlignment(size, alignment, stack); |
| 730 | } |
| 731 | return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type: FROM_MALLOC)); |
| 732 | } |
| 733 | |
| 734 | int memprof_posix_memalign(void **memptr, uptr alignment, uptr size, |
| 735 | BufferedStackTrace *stack) { |
| 736 | if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { |
| 737 | if (AllocatorMayReturnNull()) |
| 738 | return errno_EINVAL; |
| 739 | ReportInvalidPosixMemalignAlignment(alignment, stack); |
| 740 | } |
| 741 | void *ptr = instance.Allocate(size, alignment, stack, alloc_type: FROM_MALLOC); |
| 742 | if (UNLIKELY(!ptr)) |
| 743 | // OOM error is already taken care of by Allocate. |
| 744 | return errno_ENOMEM; |
| 745 | CHECK(IsAligned((uptr)ptr, alignment)); |
| 746 | *memptr = ptr; |
| 747 | return 0; |
| 748 | } |
| 749 | |
| 750 | static const void *memprof_malloc_begin(const void *p) { |
| 751 | u64 user_requested_size; |
| 752 | MemprofChunk *m = |
| 753 | instance.GetMemprofChunkByAddr(p: (uptr)p, user_requested_size); |
| 754 | if (!m) |
| 755 | return nullptr; |
| 756 | if (user_requested_size == 0) |
| 757 | return nullptr; |
| 758 | |
| 759 | return (const void *)m->Beg(); |
| 760 | } |
| 761 | |
| 762 | uptr memprof_malloc_usable_size(const void *ptr) { |
| 763 | if (!ptr) |
| 764 | return 0; |
| 765 | uptr usable_size = instance.AllocationSize(p: reinterpret_cast<uptr>(ptr)); |
| 766 | return usable_size; |
| 767 | } |
| 768 | |
| 769 | } // namespace __memprof |
| 770 | |
| 771 | // ---------------------- Interface ---------------- {{{1 |
| 772 | using namespace __memprof; |
| 773 | |
| 774 | uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } |
| 775 | |
| 776 | int __sanitizer_get_ownership(const void *p) { |
| 777 | return memprof_malloc_usable_size(ptr: p) != 0; |
| 778 | } |
| 779 | |
| 780 | const void *__sanitizer_get_allocated_begin(const void *p) { |
| 781 | return memprof_malloc_begin(p); |
| 782 | } |
| 783 | |
| 784 | uptr __sanitizer_get_allocated_size(const void *p) { |
| 785 | return memprof_malloc_usable_size(ptr: p); |
| 786 | } |
| 787 | |
| 788 | uptr __sanitizer_get_allocated_size_fast(const void *p) { |
| 789 | DCHECK_EQ(p, __sanitizer_get_allocated_begin(p)); |
| 790 | uptr ret = instance.AllocationSizeFast(p: reinterpret_cast<uptr>(p)); |
| 791 | DCHECK_EQ(ret, __sanitizer_get_allocated_size(p)); |
| 792 | return ret; |
| 793 | } |
| 794 | |
| 795 | void __sanitizer_purge_allocator() { instance.Purge(); } |
| 796 | |
| 797 | int __memprof_profile_dump() { |
| 798 | instance.FinishAndWrite(); |
| 799 | // In the future we may want to return non-zero if there are any errors |
| 800 | // detected during the dumping process. |
| 801 | return 0; |
| 802 | } |
| 803 | |
| 804 | void __memprof_profile_reset() { |
| 805 | if (report_file.fd != kInvalidFd && report_file.fd != kStdoutFd && |
| 806 | report_file.fd != kStderrFd) { |
| 807 | CloseFile(report_file.fd); |
| 808 | // Setting the file descriptor to kInvalidFd ensures that we will reopen the |
| 809 | // file when invoking Write again. |
| 810 | report_file.fd = kInvalidFd; |
| 811 | } |
| 812 | } |
| 813 | |