| 1 | //===-- sanitizer_procmaps_mac.cpp ----------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Information about the process mappings (Mac-specific parts). |
| 10 | //===----------------------------------------------------------------------===// |
| 11 | |
| 12 | #include "sanitizer_platform.h" |
| 13 | #if SANITIZER_APPLE |
| 14 | #include "sanitizer_common.h" |
| 15 | #include "sanitizer_placement_new.h" |
| 16 | #include "sanitizer_procmaps.h" |
| 17 | |
| 18 | #include <mach-o/dyld.h> |
| 19 | #include <mach-o/loader.h> |
| 20 | #include <mach/mach.h> |
| 21 | |
| 22 | // These are not available in older macOS SDKs. |
| 23 | #ifndef CPU_SUBTYPE_X86_64_H |
| 24 | #define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */ |
| 25 | #endif |
| 26 | #ifndef CPU_SUBTYPE_ARM_V7S |
| 27 | #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */ |
| 28 | #endif |
| 29 | #ifndef CPU_SUBTYPE_ARM_V7K |
| 30 | #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12) |
| 31 | #endif |
| 32 | #ifndef CPU_TYPE_ARM64 |
| 33 | #define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) |
| 34 | #endif |
| 35 | |
| 36 | namespace __sanitizer { |
| 37 | |
| 38 | // Contains information used to iterate through sections. |
| 39 | struct MemoryMappedSegmentData { |
| 40 | char name[kMaxSegName]; |
| 41 | uptr nsects; |
| 42 | const char *current_load_cmd_addr; |
| 43 | u32 lc_type; |
| 44 | uptr base_virt_addr; |
| 45 | uptr addr_mask; |
| 46 | }; |
| 47 | |
| 48 | template <typename Section> |
| 49 | static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data, |
| 50 | bool isWritable) { |
| 51 | const Section *sc = (const Section *)data->current_load_cmd_addr; |
| 52 | data->current_load_cmd_addr += sizeof(Section); |
| 53 | |
| 54 | uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr; |
| 55 | uptr sec_end = sec_start + sc->size; |
| 56 | module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable, |
| 57 | sc->sectname); |
| 58 | } |
| 59 | |
| 60 | void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) { |
| 61 | // Don't iterate over sections when the caller hasn't set up the |
| 62 | // data pointer, when there are no sections, or when the segment |
| 63 | // is executable. Avoid iterating over executable sections because |
| 64 | // it will confuse libignore, and because the extra granularity |
| 65 | // of information is not needed by any sanitizers. |
| 66 | if (!data_ || !data_->nsects || IsExecutable()) { |
| 67 | module->addAddressRange(start, end, IsExecutable(), IsWritable(), |
| 68 | data_ ? data_->name : nullptr); |
| 69 | return; |
| 70 | } |
| 71 | |
| 72 | do { |
| 73 | if (data_->lc_type == LC_SEGMENT) { |
| 74 | NextSectionLoad<struct section>(module, data_, IsWritable()); |
| 75 | #ifdef MH_MAGIC_64 |
| 76 | } else if (data_->lc_type == LC_SEGMENT_64) { |
| 77 | NextSectionLoad<struct section_64>(module, data_, IsWritable()); |
| 78 | #endif |
| 79 | } |
| 80 | } while (--data_->nsects); |
| 81 | } |
| 82 | |
| 83 | MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) { |
| 84 | Reset(); |
| 85 | } |
| 86 | |
| 87 | MemoryMappingLayout::~MemoryMappingLayout() { |
| 88 | } |
| 89 | |
| 90 | bool MemoryMappingLayout::Error() const { |
| 91 | return false; |
| 92 | } |
| 93 | |
| 94 | // More information about Mach-O headers can be found in mach-o/loader.h |
| 95 | // Each Mach-O image has a header (mach_header or mach_header_64) starting with |
| 96 | // a magic number, and a list of linker load commands directly following the |
| 97 | // header. |
| 98 | // A load command is at least two 32-bit words: the command type and the |
| 99 | // command size in bytes. We're interested only in segment load commands |
| 100 | // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped |
| 101 | // into the task's address space. |
| 102 | // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or |
| 103 | // segment_command_64 correspond to the memory address, memory size and the |
| 104 | // file offset of the current memory segment. |
| 105 | // Because these fields are taken from the images as is, one needs to add |
| 106 | // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime. |
| 107 | |
| 108 | void MemoryMappingLayout::Reset() { |
| 109 | // Count down from the top. |
| 110 | // TODO(glider): as per man 3 dyld, iterating over the headers with |
| 111 | // _dyld_image_count is thread-unsafe. We need to register callbacks for |
| 112 | // adding and removing images which will invalidate the MemoryMappingLayout |
| 113 | // state. |
| 114 | data_.current_image = _dyld_image_count(); |
| 115 | data_.current_load_cmd_count = -1; |
| 116 | data_.current_load_cmd_addr = 0; |
| 117 | data_.current_magic = 0; |
| 118 | data_.current_filetype = 0; |
| 119 | data_.current_arch = kModuleArchUnknown; |
| 120 | internal_memset(data_.current_uuid, 0, kModuleUUIDSize); |
| 121 | } |
| 122 | |
| 123 | // The dyld load address should be unchanged throughout process execution, |
| 124 | // and it is expensive to compute once many libraries have been loaded, |
| 125 | // so cache it here and do not reset. |
| 126 | static mach_header *dyld_hdr = 0; |
| 127 | static const char kDyldPath[] = "/usr/lib/dyld" ; |
| 128 | static const int kDyldImageIdx = -1; |
| 129 | |
| 130 | // static |
| 131 | void MemoryMappingLayout::CacheMemoryMappings() { |
| 132 | // No-op on Mac for now. |
| 133 | } |
| 134 | |
| 135 | void MemoryMappingLayout::LoadFromCache() { |
| 136 | // No-op on Mac for now. |
| 137 | } |
| 138 | |
| 139 | static bool IsDyldHdr(const mach_header *hdr) { |
| 140 | return (hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) && |
| 141 | hdr->filetype == MH_DYLINKER; |
| 142 | } |
| 143 | |
| 144 | // _dyld_get_image_header() and related APIs don't report dyld itself. |
| 145 | // We work around this by manually recursing through the memory map |
| 146 | // until we hit a Mach header matching dyld instead. These recurse |
| 147 | // calls are expensive, but the first memory map generation occurs |
| 148 | // early in the process, when dyld is one of the only images loaded, |
| 149 | // so it will be hit after only a few iterations. These assumptions don't hold |
| 150 | // on macOS 13+ anymore (dyld itself has moved into the shared cache). |
| 151 | static mach_header *GetDyldImageHeaderViaVMRegion() { |
| 152 | vm_address_t address = 0; |
| 153 | |
| 154 | while (true) { |
| 155 | vm_size_t size = 0; |
| 156 | unsigned depth = 1; |
| 157 | struct vm_region_submap_info_64 info; |
| 158 | mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; |
| 159 | kern_return_t err = |
| 160 | vm_region_recurse_64(mach_task_self(), &address, &size, &depth, |
| 161 | (vm_region_info_t)&info, &count); |
| 162 | if (err != KERN_SUCCESS) return nullptr; |
| 163 | |
| 164 | if (size >= sizeof(mach_header) && info.protection & kProtectionRead) { |
| 165 | mach_header *hdr = (mach_header *)address; |
| 166 | if (IsDyldHdr(hdr)) { |
| 167 | return hdr; |
| 168 | } |
| 169 | } |
| 170 | address += size; |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | extern "C" { |
| 175 | struct dyld_shared_cache_dylib_text_info { |
| 176 | uint64_t version; // current version 2 |
| 177 | // following fields all exist in version 1 |
| 178 | uint64_t loadAddressUnslid; |
| 179 | uint64_t textSegmentSize; |
| 180 | uuid_t dylibUuid; |
| 181 | const char *path; // pointer invalid at end of iterations |
| 182 | // following fields all exist in version 2 |
| 183 | uint64_t textSegmentOffset; // offset from start of cache |
| 184 | }; |
| 185 | typedef struct dyld_shared_cache_dylib_text_info |
| 186 | dyld_shared_cache_dylib_text_info; |
| 187 | |
| 188 | extern bool _dyld_get_shared_cache_uuid(uuid_t uuid); |
| 189 | extern const void *_dyld_get_shared_cache_range(size_t *length); |
| 190 | extern int dyld_shared_cache_iterate_text( |
| 191 | const uuid_t cacheUuid, |
| 192 | void (^callback)(const dyld_shared_cache_dylib_text_info *info)); |
| 193 | } // extern "C" |
| 194 | |
| 195 | static mach_header *GetDyldImageHeaderViaSharedCache() { |
| 196 | uuid_t uuid; |
| 197 | bool hasCache = _dyld_get_shared_cache_uuid(uuid); |
| 198 | if (!hasCache) |
| 199 | return nullptr; |
| 200 | |
| 201 | size_t cacheLength; |
| 202 | __block uptr cacheStart = (uptr)_dyld_get_shared_cache_range(&cacheLength); |
| 203 | CHECK(cacheStart && cacheLength); |
| 204 | |
| 205 | __block mach_header *dyldHdr = nullptr; |
| 206 | int res = dyld_shared_cache_iterate_text( |
| 207 | uuid, ^(const dyld_shared_cache_dylib_text_info *info) { |
| 208 | CHECK_GE(info->version, 2); |
| 209 | mach_header *hdr = |
| 210 | (mach_header *)(cacheStart + info->textSegmentOffset); |
| 211 | if (IsDyldHdr(hdr)) |
| 212 | dyldHdr = hdr; |
| 213 | }); |
| 214 | CHECK_EQ(res, 0); |
| 215 | |
| 216 | return dyldHdr; |
| 217 | } |
| 218 | |
| 219 | const mach_header *get_dyld_hdr() { |
| 220 | if (!dyld_hdr) { |
| 221 | // On macOS 13+, dyld itself has moved into the shared cache. Looking it up |
| 222 | // via vm_region_recurse_64() causes spins/hangs/crashes. |
| 223 | if (GetMacosAlignedVersion() >= MacosVersion(13, 0)) { |
| 224 | dyld_hdr = GetDyldImageHeaderViaSharedCache(); |
| 225 | if (!dyld_hdr) { |
| 226 | VReport(1, |
| 227 | "Failed to lookup the dyld image header in the shared cache on " |
| 228 | "macOS 13+ (or no shared cache in use). Falling back to " |
| 229 | "lookup via vm_region_recurse_64().\n" ); |
| 230 | dyld_hdr = GetDyldImageHeaderViaVMRegion(); |
| 231 | } |
| 232 | } else { |
| 233 | dyld_hdr = GetDyldImageHeaderViaVMRegion(); |
| 234 | } |
| 235 | CHECK(dyld_hdr); |
| 236 | } |
| 237 | |
| 238 | return dyld_hdr; |
| 239 | } |
| 240 | |
| 241 | // Next and NextSegmentLoad were inspired by base/sysinfo.cc in |
| 242 | // Google Perftools, https://github.com/gperftools/gperftools. |
| 243 | |
| 244 | // NextSegmentLoad scans the current image for the next segment load command |
| 245 | // and returns the start and end addresses and file offset of the corresponding |
| 246 | // segment. |
| 247 | // Note that the segment addresses are not necessarily sorted. |
| 248 | template <u32 kLCSegment, typename SegmentCommand> |
| 249 | static bool NextSegmentLoad(MemoryMappedSegment *segment, |
| 250 | MemoryMappedSegmentData *seg_data, |
| 251 | MemoryMappingLayoutData *layout_data) { |
| 252 | const char *lc = layout_data->current_load_cmd_addr; |
| 253 | |
| 254 | layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize; |
| 255 | layout_data->current_load_cmd_count--; |
| 256 | if (((const load_command *)lc)->cmd == kLCSegment) { |
| 257 | const SegmentCommand* sc = (const SegmentCommand *)lc; |
| 258 | uptr base_virt_addr, addr_mask; |
| 259 | if (layout_data->current_image == kDyldImageIdx) { |
| 260 | base_virt_addr = (uptr)get_dyld_hdr(); |
| 261 | // vmaddr is masked with 0xfffff because on macOS versions < 10.12, |
| 262 | // it contains an absolute address rather than an offset for dyld. |
| 263 | // To make matters even more complicated, this absolute address |
| 264 | // isn't actually the absolute segment address, but the offset portion |
| 265 | // of the address is accurate when combined with the dyld base address, |
| 266 | // and the mask will give just this offset. |
| 267 | addr_mask = 0xfffff; |
| 268 | } else { |
| 269 | base_virt_addr = |
| 270 | (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image); |
| 271 | addr_mask = ~0; |
| 272 | } |
| 273 | |
| 274 | segment->start = (sc->vmaddr & addr_mask) + base_virt_addr; |
| 275 | segment->end = segment->start + sc->vmsize; |
| 276 | // Most callers don't need section information, so only fill this struct |
| 277 | // when required. |
| 278 | if (seg_data) { |
| 279 | seg_data->nsects = sc->nsects; |
| 280 | seg_data->current_load_cmd_addr = |
| 281 | (const char *)lc + sizeof(SegmentCommand); |
| 282 | seg_data->lc_type = kLCSegment; |
| 283 | seg_data->base_virt_addr = base_virt_addr; |
| 284 | seg_data->addr_mask = addr_mask; |
| 285 | internal_strncpy(seg_data->name, sc->segname, |
| 286 | ARRAY_SIZE(seg_data->name)); |
| 287 | } |
| 288 | |
| 289 | // Return the initial protection. |
| 290 | segment->protection = sc->initprot; |
| 291 | segment->offset = (layout_data->current_filetype == |
| 292 | /*MH_EXECUTE*/ 0x2) |
| 293 | ? sc->vmaddr |
| 294 | : sc->fileoff; |
| 295 | if (segment->filename) { |
| 296 | const char *src = (layout_data->current_image == kDyldImageIdx) |
| 297 | ? kDyldPath |
| 298 | : _dyld_get_image_name(layout_data->current_image); |
| 299 | internal_strncpy(segment->filename, src, segment->filename_size); |
| 300 | } |
| 301 | segment->arch = layout_data->current_arch; |
| 302 | internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize); |
| 303 | return true; |
| 304 | } |
| 305 | return false; |
| 306 | } |
| 307 | |
| 308 | ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) { |
| 309 | cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK; |
| 310 | switch (cputype) { |
| 311 | case CPU_TYPE_I386: |
| 312 | return kModuleArchI386; |
| 313 | case CPU_TYPE_X86_64: |
| 314 | if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64; |
| 315 | if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H; |
| 316 | CHECK(0 && "Invalid subtype of x86_64" ); |
| 317 | return kModuleArchUnknown; |
| 318 | case CPU_TYPE_ARM: |
| 319 | if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6; |
| 320 | if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7; |
| 321 | if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S; |
| 322 | if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K; |
| 323 | CHECK(0 && "Invalid subtype of ARM" ); |
| 324 | return kModuleArchUnknown; |
| 325 | case CPU_TYPE_ARM64: |
| 326 | return kModuleArchARM64; |
| 327 | default: |
| 328 | CHECK(0 && "Invalid CPU type" ); |
| 329 | return kModuleArchUnknown; |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | static const load_command *NextCommand(const load_command *lc) { |
| 334 | return (const load_command *)((const char *)lc + lc->cmdsize); |
| 335 | } |
| 336 | |
| 337 | # ifdef MH_MAGIC_64 |
| 338 | static constexpr size_t header_size = sizeof(mach_header_64); |
| 339 | # else |
| 340 | static constexpr size_t header_size = sizeof(mach_header); |
| 341 | # endif |
| 342 | |
| 343 | static void FindUUID(const load_command *first_lc, const mach_header *hdr, |
| 344 | u8 *uuid_output) { |
| 345 | uint32_t curcmd = 0; |
| 346 | for (const load_command *lc = first_lc; curcmd < hdr->ncmds; |
| 347 | curcmd++, lc = NextCommand(lc)) { |
| 348 | CHECK_LT((const char *)lc, |
| 349 | (const char *)hdr + header_size + hdr->sizeofcmds); |
| 350 | |
| 351 | if (lc->cmd != LC_UUID) |
| 352 | continue; |
| 353 | |
| 354 | const uuid_command *uuid_lc = (const uuid_command *)lc; |
| 355 | const uint8_t *uuid = &uuid_lc->uuid[0]; |
| 356 | internal_memcpy(uuid_output, uuid, kModuleUUIDSize); |
| 357 | return; |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | static bool IsModuleInstrumented(const load_command *first_lc, |
| 362 | const mach_header *hdr) { |
| 363 | uint32_t curcmd = 0; |
| 364 | for (const load_command *lc = first_lc; curcmd < hdr->ncmds; |
| 365 | curcmd++, lc = NextCommand(lc)) { |
| 366 | CHECK_LT((const char *)lc, |
| 367 | (const char *)hdr + header_size + hdr->sizeofcmds); |
| 368 | |
| 369 | if (lc->cmd != LC_LOAD_DYLIB) |
| 370 | continue; |
| 371 | |
| 372 | const dylib_command *dylib_lc = (const dylib_command *)lc; |
| 373 | uint32_t dylib_name_offset = dylib_lc->dylib.name.offset; |
| 374 | const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset; |
| 375 | dylib_name = StripModuleName(dylib_name); |
| 376 | if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt." ))) { |
| 377 | return true; |
| 378 | } |
| 379 | } |
| 380 | return false; |
| 381 | } |
| 382 | |
| 383 | const ImageHeader *MemoryMappingLayout::CurrentImageHeader() { |
| 384 | const mach_header *hdr = (data_.current_image == kDyldImageIdx) |
| 385 | ? get_dyld_hdr() |
| 386 | : _dyld_get_image_header(data_.current_image); |
| 387 | return (const ImageHeader *)hdr; |
| 388 | } |
| 389 | |
| 390 | bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { |
| 391 | for (; data_.current_image >= kDyldImageIdx; data_.current_image--) { |
| 392 | const mach_header *hdr = (const mach_header *)CurrentImageHeader(); |
| 393 | if (!hdr) continue; |
| 394 | if (data_.current_load_cmd_count < 0) { |
| 395 | // Set up for this image; |
| 396 | data_.current_load_cmd_count = hdr->ncmds; |
| 397 | data_.current_magic = hdr->magic; |
| 398 | data_.current_filetype = hdr->filetype; |
| 399 | data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype); |
| 400 | switch (data_.current_magic) { |
| 401 | #ifdef MH_MAGIC_64 |
| 402 | case MH_MAGIC_64: { |
| 403 | data_.current_load_cmd_addr = |
| 404 | (const char *)hdr + sizeof(mach_header_64); |
| 405 | break; |
| 406 | } |
| 407 | #endif |
| 408 | case MH_MAGIC: { |
| 409 | data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header); |
| 410 | break; |
| 411 | } |
| 412 | default: { |
| 413 | continue; |
| 414 | } |
| 415 | } |
| 416 | FindUUID((const load_command *)data_.current_load_cmd_addr, hdr, |
| 417 | data_.current_uuid); |
| 418 | data_.current_instrumented = IsModuleInstrumented( |
| 419 | (const load_command *)data_.current_load_cmd_addr, hdr); |
| 420 | } |
| 421 | |
| 422 | while (data_.current_load_cmd_count > 0) { |
| 423 | switch (data_.current_magic) { |
| 424 | // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64. |
| 425 | #ifdef MH_MAGIC_64 |
| 426 | case MH_MAGIC_64: { |
| 427 | if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>( |
| 428 | segment, segment->data_, &data_)) |
| 429 | return true; |
| 430 | break; |
| 431 | } |
| 432 | #endif |
| 433 | case MH_MAGIC: { |
| 434 | if (NextSegmentLoad<LC_SEGMENT, struct segment_command>( |
| 435 | segment, segment->data_, &data_)) |
| 436 | return true; |
| 437 | break; |
| 438 | } |
| 439 | } |
| 440 | } |
| 441 | // If we get here, no more load_cmd's in this image talk about |
| 442 | // segments. Go on to the next image. |
| 443 | data_.current_load_cmd_count = -1; // This will trigger loading next image |
| 444 | } |
| 445 | return false; |
| 446 | } |
| 447 | |
| 448 | void MemoryMappingLayout::DumpListOfModules( |
| 449 | InternalMmapVectorNoCtor<LoadedModule> *modules) { |
| 450 | Reset(); |
| 451 | InternalMmapVector<char> module_name(kMaxPathLength); |
| 452 | MemoryMappedSegment segment(module_name.data(), module_name.size()); |
| 453 | MemoryMappedSegmentData data; |
| 454 | segment.data_ = &data; |
| 455 | while (Next(&segment)) { |
| 456 | // skip the __PAGEZERO segment, its vmsize is 0 |
| 457 | if (segment.filename[0] == '\0' || (segment.start == segment.end)) |
| 458 | continue; |
| 459 | LoadedModule *cur_module = nullptr; |
| 460 | if (!modules->empty() && |
| 461 | 0 == internal_strcmp(segment.filename, modules->back().full_name())) { |
| 462 | cur_module = &modules->back(); |
| 463 | } else { |
| 464 | modules->push_back(LoadedModule()); |
| 465 | cur_module = &modules->back(); |
| 466 | cur_module->set(segment.filename, segment.start, segment.arch, |
| 467 | segment.uuid, data_.current_instrumented); |
| 468 | } |
| 469 | segment.AddAddressRanges(cur_module); |
| 470 | } |
| 471 | } |
| 472 | |
| 473 | } // namespace __sanitizer |
| 474 | |
| 475 | #endif // SANITIZER_APPLE |
| 476 | |