1//===-- sanitizer_procmaps_mac.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Information about the process mappings (Mac-specific parts).
10//===----------------------------------------------------------------------===//
11
12#include "sanitizer_platform.h"
13#if SANITIZER_APPLE
14#include "sanitizer_common.h"
15#include "sanitizer_placement_new.h"
16#include "sanitizer_procmaps.h"
17
18#include <mach-o/dyld.h>
19#include <mach-o/loader.h>
20#include <mach/mach.h>
21
22// These are not available in older macOS SDKs.
23# ifndef CPU_SUBTYPE_X86_64_H
24# define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */
25# endif
26# ifndef CPU_SUBTYPE_ARM_V7S
27# define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */
28# endif
29# ifndef CPU_SUBTYPE_ARM_V7K
30# define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12)
31# endif
32# ifndef CPU_TYPE_ARM64
33# define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64)
34# endif
35# ifndef CPU_SUBTYPE_ARM64E
36# define CPU_SUBTYPE_ARM64E ((cpu_subtype_t)2)
37# endif
38
39namespace __sanitizer {
40
41// Contains information used to iterate through sections.
42struct MemoryMappedSegmentData {
43 char name[kMaxSegName];
44 uptr nsects;
45 const char *current_load_cmd_addr;
46 u32 lc_type;
47 uptr base_virt_addr;
48};
49
50template <typename Section>
51static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
52 bool isWritable) {
53 const Section *sc = (const Section *)data->current_load_cmd_addr;
54 data->current_load_cmd_addr += sizeof(Section);
55
56 uptr sec_start = sc->addr + data->base_virt_addr;
57 uptr sec_end = sec_start + sc->size;
58 module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
59 sc->sectname);
60}
61
62static bool VerifyMemoryMapping(MemoryMappingLayout* mapping) {
63 InternalMmapVector<LoadedModule> modules;
64 modules.reserve(128); // matches DumpProcessMap
65 mapping->DumpListOfModules(&modules);
66
67 InternalMmapVector<LoadedModule::AddressRange> segments;
68 for (uptr i = 0; i < modules.size(); ++i) {
69 for (auto& range : modules[i].ranges()) {
70 if (range.beg == range.end)
71 continue;
72 segments.push_back(range);
73 }
74 }
75
76 // Verify that none of the segments overlap:
77 // 1. Sort the segments by the start address
78 // 2. Check that every segment starts after the previous one ends.
79 Sort(segments.data(), segments.size(),
80 [](LoadedModule::AddressRange& a, LoadedModule::AddressRange& b) {
81 return a.beg < b.beg;
82 });
83
84 // To avoid spam, we only print the report message once-per-process.
85 static bool invalid_module_map_reported = false;
86 bool well_formed = true;
87
88 for (size_t i = 1; i < segments.size(); i++) {
89 uptr cur_start = segments[i].beg;
90 uptr prev_end = segments[i - 1].end;
91 if (cur_start < prev_end) {
92 well_formed = false;
93 VReport(2, "Overlapping mappings: %s start = %p, %s end = %p\n",
94 segments[i].name, (void*)cur_start, segments[i - 1].name,
95 (void*)prev_end);
96 if (!invalid_module_map_reported) {
97 Report(
98 "WARN: Invalid dyld module map detected. This is most likely a bug "
99 "in the sanitizer.\n");
100 Report("WARN: Backtraces may be unreliable.\n");
101 invalid_module_map_reported = true;
102 }
103 }
104 }
105
106 for (auto& m : modules) m.clear();
107
108 mapping->Reset();
109 return well_formed;
110}
111
112void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
113 // Don't iterate over sections when the caller hasn't set up the
114 // data pointer, when there are no sections, or when the segment
115 // is executable. Avoid iterating over executable sections because
116 // it will confuse libignore, and because the extra granularity
117 // of information is not needed by any sanitizers.
118 if (!data_ || !data_->nsects || IsExecutable()) {
119 module->addAddressRange(start, end, IsExecutable(), IsWritable(),
120 data_ ? data_->name : nullptr);
121 return;
122 }
123
124 do {
125 if (data_->lc_type == LC_SEGMENT) {
126 NextSectionLoad<struct section>(module, data_, IsWritable());
127#ifdef MH_MAGIC_64
128 } else if (data_->lc_type == LC_SEGMENT_64) {
129 NextSectionLoad<struct section_64>(module, data_, IsWritable());
130#endif
131 }
132 } while (--data_->nsects);
133}
134
135MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
136 Reset();
137 VerifyMemoryMapping(this);
138}
139
140MemoryMappingLayout::~MemoryMappingLayout() {
141}
142
143bool MemoryMappingLayout::Error() const {
144 return false;
145}
146
147// More information about Mach-O headers can be found in mach-o/loader.h
148// Each Mach-O image has a header (mach_header or mach_header_64) starting with
149// a magic number, and a list of linker load commands directly following the
150// header.
151// A load command is at least two 32-bit words: the command type and the
152// command size in bytes. We're interested only in segment load commands
153// (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped
154// into the task's address space.
155// The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or
156// segment_command_64 correspond to the memory address, memory size and the
157// file offset of the current memory segment.
158// Because these fields are taken from the images as is, one needs to add
159// _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime.
160
161void MemoryMappingLayout::Reset() {
162 // Count down from the top.
163 // TODO(glider): as per man 3 dyld, iterating over the headers with
164 // _dyld_image_count is thread-unsafe. We need to register callbacks for
165 // adding and removing images which will invalidate the MemoryMappingLayout
166 // state.
167 data_.current_image = _dyld_image_count();
168 data_.current_load_cmd_count = -1;
169 data_.current_load_cmd_addr = 0;
170 data_.current_magic = 0;
171 data_.current_filetype = 0;
172 data_.current_arch = kModuleArchUnknown;
173 internal_memset(data_.current_uuid, 0, kModuleUUIDSize);
174}
175
176// The dyld load address should be unchanged throughout process execution,
177// and it is expensive to compute once many libraries have been loaded,
178// so cache it here and do not reset.
179static mach_header *dyld_hdr = 0;
180static const char kDyldPath[] = "/usr/lib/dyld";
181static const int kDyldImageIdx = -1;
182
183// static
184void MemoryMappingLayout::CacheMemoryMappings() {
185 // No-op on Mac for now.
186}
187
188void MemoryMappingLayout::LoadFromCache() {
189 // No-op on Mac for now.
190}
191
192static bool IsDyldHdr(const mach_header *hdr) {
193 return (hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) &&
194 hdr->filetype == MH_DYLINKER;
195}
196
197// _dyld_get_image_header() and related APIs don't report dyld itself.
198// We work around this by manually recursing through the memory map
199// until we hit a Mach header matching dyld instead. These recurse
200// calls are expensive, but the first memory map generation occurs
201// early in the process, when dyld is one of the only images loaded,
202// so it will be hit after only a few iterations. These assumptions don't hold
203// on macOS 13+ anymore (dyld itself has moved into the shared cache).
204static mach_header *GetDyldImageHeaderViaVMRegion() {
205 vm_address_t address = 0;
206
207 while (true) {
208 vm_size_t size = 0;
209 unsigned depth = 1;
210 struct vm_region_submap_info_64 info;
211 mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
212 kern_return_t err =
213 vm_region_recurse_64(mach_task_self(), &address, &size, &depth,
214 (vm_region_info_t)&info, &count);
215 if (err != KERN_SUCCESS) return nullptr;
216
217 if (size >= sizeof(mach_header) && info.protection & kProtectionRead) {
218 mach_header *hdr = (mach_header *)address;
219 if (IsDyldHdr(hdr)) {
220 return hdr;
221 }
222 }
223 address += size;
224 }
225}
226
227extern "C" {
228struct dyld_shared_cache_dylib_text_info {
229 uint64_t version; // current version 2
230 // following fields all exist in version 1
231 uint64_t loadAddressUnslid;
232 uint64_t textSegmentSize;
233 uuid_t dylibUuid;
234 const char *path; // pointer invalid at end of iterations
235 // following fields all exist in version 2
236 uint64_t textSegmentOffset; // offset from start of cache
237};
238typedef struct dyld_shared_cache_dylib_text_info
239 dyld_shared_cache_dylib_text_info;
240
241extern bool _dyld_get_shared_cache_uuid(uuid_t uuid);
242extern const void *_dyld_get_shared_cache_range(size_t *length);
243extern intptr_t _dyld_get_image_slide(const struct mach_header* mh);
244extern int dyld_shared_cache_iterate_text(
245 const uuid_t cacheUuid,
246 void (^callback)(const dyld_shared_cache_dylib_text_info *info));
247} // extern "C"
248
249static mach_header *GetDyldImageHeaderViaSharedCache() {
250 uuid_t uuid;
251 bool hasCache = _dyld_get_shared_cache_uuid(uuid);
252 if (!hasCache)
253 return nullptr;
254
255 size_t cacheLength;
256 __block uptr cacheStart = (uptr)_dyld_get_shared_cache_range(&cacheLength);
257 CHECK(cacheStart && cacheLength);
258
259 __block mach_header *dyldHdr = nullptr;
260 int res = dyld_shared_cache_iterate_text(
261 uuid, ^(const dyld_shared_cache_dylib_text_info *info) {
262 CHECK_GE(info->version, 2);
263 mach_header *hdr =
264 (mach_header *)(cacheStart + info->textSegmentOffset);
265 if (IsDyldHdr(hdr))
266 dyldHdr = hdr;
267 });
268 CHECK_EQ(res, 0);
269
270 return dyldHdr;
271}
272
273const mach_header *get_dyld_hdr() {
274 if (!dyld_hdr) {
275 // On macOS 13+, dyld itself has moved into the shared cache. Looking it up
276 // via vm_region_recurse_64() causes spins/hangs/crashes.
277 if (GetMacosAlignedVersion() >= MacosVersion(13, 0)) {
278 dyld_hdr = GetDyldImageHeaderViaSharedCache();
279 if (!dyld_hdr) {
280 VReport(1,
281 "Failed to lookup the dyld image header in the shared cache on "
282 "macOS 13+ (or no shared cache in use). Falling back to "
283 "lookup via vm_region_recurse_64().\n");
284 dyld_hdr = GetDyldImageHeaderViaVMRegion();
285 }
286 } else {
287 dyld_hdr = GetDyldImageHeaderViaVMRegion();
288 }
289 CHECK(dyld_hdr);
290 }
291
292 return dyld_hdr;
293}
294
295// Next and NextSegmentLoad were inspired by base/sysinfo.cc in
296// Google Perftools, https://github.com/gperftools/gperftools.
297
298// NextSegmentLoad scans the current image for the next segment load command
299// and returns the start and end addresses and file offset of the corresponding
300// segment.
301// Note that the segment addresses are not necessarily sorted.
302template <u32 kLCSegment, typename SegmentCommand>
303static bool NextSegmentLoad(MemoryMappedSegment *segment,
304 MemoryMappedSegmentData *seg_data,
305 MemoryMappingLayoutData *layout_data) {
306 const char *lc = layout_data->current_load_cmd_addr;
307
308 layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize;
309 layout_data->current_load_cmd_count--;
310 if (((const load_command *)lc)->cmd == kLCSegment) {
311 const SegmentCommand* sc = (const SegmentCommand *)lc;
312 if (internal_strcmp(sc->segname, "__LINKEDIT") == 0) {
313 // The LINKEDIT sections are for internal linker use, and may alias
314 // with the LINKEDIT section for other modules. (If we included them,
315 // our memory map would contain overlappping sections.)
316 return false;
317 }
318
319 uptr base_virt_addr;
320 if (layout_data->current_image == kDyldImageIdx)
321 base_virt_addr = (uptr)_dyld_get_image_slide(get_dyld_hdr());
322 else
323 base_virt_addr =
324 (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image);
325
326 segment->start = sc->vmaddr + base_virt_addr;
327 segment->end = segment->start + sc->vmsize;
328 // Most callers don't need section information, so only fill this struct
329 // when required.
330 if (seg_data) {
331 seg_data->nsects = sc->nsects;
332 seg_data->current_load_cmd_addr =
333 (const char *)lc + sizeof(SegmentCommand);
334 seg_data->lc_type = kLCSegment;
335 seg_data->base_virt_addr = base_virt_addr;
336 internal_strncpy(seg_data->name, sc->segname,
337 ARRAY_SIZE(seg_data->name));
338 seg_data->name[ARRAY_SIZE(seg_data->name) - 1] = 0;
339 }
340
341 // Return the initial protection.
342 segment->protection = sc->initprot;
343 segment->offset = (layout_data->current_filetype ==
344 /*MH_EXECUTE*/ 0x2)
345 ? sc->vmaddr
346 : sc->fileoff;
347 if (segment->filename) {
348 const char *src = (layout_data->current_image == kDyldImageIdx)
349 ? kDyldPath
350 : _dyld_get_image_name(layout_data->current_image);
351 internal_strncpy(segment->filename, src, segment->filename_size);
352 segment->filename[segment->filename_size - 1] = 0;
353 }
354 segment->arch = layout_data->current_arch;
355 internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize);
356 return true;
357 }
358 return false;
359}
360
361ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
362 cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
363 switch (cputype) {
364 case CPU_TYPE_I386:
365 return kModuleArchI386;
366 case CPU_TYPE_X86_64:
367 if (cpusubtype == CPU_SUBTYPE_X86_64_ALL)
368 return kModuleArchX86_64;
369 if (cpusubtype == CPU_SUBTYPE_X86_64_H)
370 return kModuleArchX86_64H;
371 CHECK(0 && "Invalid subtype of x86_64");
372 return kModuleArchUnknown;
373 case CPU_TYPE_ARM:
374 if (cpusubtype == CPU_SUBTYPE_ARM_V6)
375 return kModuleArchARMV6;
376 if (cpusubtype == CPU_SUBTYPE_ARM_V7)
377 return kModuleArchARMV7;
378 if (cpusubtype == CPU_SUBTYPE_ARM_V7S)
379 return kModuleArchARMV7S;
380 if (cpusubtype == CPU_SUBTYPE_ARM_V7K)
381 return kModuleArchARMV7K;
382 CHECK(0 && "Invalid subtype of ARM");
383 return kModuleArchUnknown;
384 case CPU_TYPE_ARM64:
385 if (cpusubtype == CPU_SUBTYPE_ARM64E)
386 return kModuleArchARM64E;
387 return kModuleArchARM64;
388 default:
389 CHECK(0 && "Invalid CPU type");
390 return kModuleArchUnknown;
391 }
392}
393
394static const load_command *NextCommand(const load_command *lc) {
395 return (const load_command *)((const char *)lc + lc->cmdsize);
396}
397
398# ifdef MH_MAGIC_64
399static constexpr size_t header_size = sizeof(mach_header_64);
400# else
401static constexpr size_t header_size = sizeof(mach_header);
402# endif
403
404static void FindUUID(const load_command *first_lc, const mach_header *hdr,
405 u8 *uuid_output) {
406 uint32_t curcmd = 0;
407 for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
408 curcmd++, lc = NextCommand(lc)) {
409 CHECK_LT((const char *)lc,
410 (const char *)hdr + header_size + hdr->sizeofcmds);
411
412 if (lc->cmd != LC_UUID)
413 continue;
414
415 const uuid_command *uuid_lc = (const uuid_command *)lc;
416 const uint8_t *uuid = &uuid_lc->uuid[0];
417 internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
418 return;
419 }
420}
421
422static bool IsModuleInstrumented(const load_command *first_lc,
423 const mach_header *hdr) {
424 uint32_t curcmd = 0;
425 for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
426 curcmd++, lc = NextCommand(lc)) {
427 CHECK_LT((const char *)lc,
428 (const char *)hdr + header_size + hdr->sizeofcmds);
429
430 if (lc->cmd != LC_LOAD_DYLIB)
431 continue;
432
433 const dylib_command *dylib_lc = (const dylib_command *)lc;
434 uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
435 const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset;
436 dylib_name = StripModuleName(dylib_name);
437 if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) {
438 return true;
439 }
440 }
441 return false;
442}
443
444const ImageHeader *MemoryMappingLayout::CurrentImageHeader() {
445 const mach_header *hdr = (data_.current_image == kDyldImageIdx)
446 ? get_dyld_hdr()
447 : _dyld_get_image_header(data_.current_image);
448 return (const ImageHeader *)hdr;
449}
450
451bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
452 for (; data_.current_image >= kDyldImageIdx; data_.current_image--) {
453 const mach_header *hdr = (const mach_header *)CurrentImageHeader();
454 if (!hdr) continue;
455 if (data_.current_load_cmd_count < 0) {
456 // Set up for this image;
457 data_.current_load_cmd_count = hdr->ncmds;
458 data_.current_magic = hdr->magic;
459 data_.current_filetype = hdr->filetype;
460 data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
461 switch (data_.current_magic) {
462#ifdef MH_MAGIC_64
463 case MH_MAGIC_64: {
464 data_.current_load_cmd_addr =
465 (const char *)hdr + sizeof(mach_header_64);
466 break;
467 }
468#endif
469 case MH_MAGIC: {
470 data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header);
471 break;
472 }
473 default: {
474 continue;
475 }
476 }
477 FindUUID((const load_command *)data_.current_load_cmd_addr, hdr,
478 data_.current_uuid);
479 data_.current_instrumented = IsModuleInstrumented(
480 (const load_command *)data_.current_load_cmd_addr, hdr);
481 }
482
483 while (data_.current_load_cmd_count > 0) {
484 switch (data_.current_magic) {
485 // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64.
486#ifdef MH_MAGIC_64
487 case MH_MAGIC_64: {
488 if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
489 segment, segment->data_, &data_))
490 return true;
491 break;
492 }
493#endif
494 case MH_MAGIC: {
495 if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
496 segment, segment->data_, &data_))
497 return true;
498 break;
499 }
500 }
501 }
502 // If we get here, no more load_cmd's in this image talk about
503 // segments. Go on to the next image.
504 data_.current_load_cmd_count = -1; // This will trigger loading next image
505 }
506 return false;
507}
508
509void MemoryMappingLayout::DumpListOfModules(
510 InternalMmapVectorNoCtor<LoadedModule> *modules) {
511 Reset();
512 InternalMmapVector<char> module_name(kMaxPathLength);
513 MemoryMappedSegment segment(module_name.data(), module_name.size());
514 MemoryMappedSegmentData data;
515 segment.data_ = &data;
516 while (Next(&segment)) {
517 // skip the __PAGEZERO segment, its vmsize is 0
518 if (segment.filename[0] == '\0' || (segment.start == segment.end))
519 continue;
520 LoadedModule *cur_module = nullptr;
521 if (!modules->empty() &&
522 0 == internal_strcmp(segment.filename, modules->back().full_name())) {
523 cur_module = &modules->back();
524 } else {
525 modules->push_back(LoadedModule());
526 cur_module = &modules->back();
527 cur_module->set(segment.filename, segment.start, segment.arch,
528 segment.uuid, data_.current_instrumented);
529 }
530 segment.AddAddressRanges(cur_module);
531 }
532}
533
534} // namespace __sanitizer
535
536#endif // SANITIZER_APPLE
537