1//===-- sanitizer_procmaps_mac.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Information about the process mappings (Mac-specific parts).
10//===----------------------------------------------------------------------===//
11
12#include "sanitizer_platform.h"
13#if SANITIZER_APPLE
14#include "sanitizer_common.h"
15#include "sanitizer_placement_new.h"
16#include "sanitizer_procmaps.h"
17
18#include <mach-o/dyld.h>
19#include <mach-o/loader.h>
20#include <mach/mach.h>
21
22// These are not available in older macOS SDKs.
23# ifndef CPU_SUBTYPE_X86_64_H
24# define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */
25# endif
26# ifndef CPU_SUBTYPE_ARM_V7S
27# define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */
28# endif
29# ifndef CPU_SUBTYPE_ARM_V7K
30# define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12)
31# endif
32# ifndef CPU_TYPE_ARM64
33# define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64)
34# endif
35# ifndef CPU_SUBTYPE_ARM64E
36# define CPU_SUBTYPE_ARM64E ((cpu_subtype_t)2)
37# endif
38
39namespace __sanitizer {
40
41// Contains information used to iterate through sections.
42struct MemoryMappedSegmentData {
43 char name[kMaxSegName];
44 uptr nsects;
45 const char *current_load_cmd_addr;
46 u32 lc_type;
47 uptr base_virt_addr;
48};
49
50template <typename Section>
51static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
52 bool isWritable) {
53 const Section *sc = (const Section *)data->current_load_cmd_addr;
54 data->current_load_cmd_addr += sizeof(Section);
55
56 uptr sec_start = sc->addr + data->base_virt_addr;
57 uptr sec_end = sec_start + sc->size;
58 module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
59 sc->sectname);
60}
61
62static bool VerifyMemoryMapping(MemoryMappingLayout* mapping) {
63 InternalMmapVector<LoadedModule> modules;
64 modules.reserve(128); // matches DumpProcessMap
65 mapping->DumpListOfModules(&modules);
66
67 InternalMmapVector<LoadedModule::AddressRange> segments;
68 for (uptr i = 0; i < modules.size(); ++i) {
69 for (auto& range : modules[i].ranges()) {
70 if (range.beg == range.end)
71 continue;
72 segments.push_back(range);
73 }
74 }
75
76 // Verify that none of the segments overlap:
77 // 1. Sort the segments by the start address
78 // 2. Check that every segment starts after the previous one ends.
79 Sort(segments.data(), segments.size(),
80 [](LoadedModule::AddressRange& a, LoadedModule::AddressRange& b) {
81 return a.beg < b.beg;
82 });
83
84 // To avoid spam, we only print the report message once-per-process.
85 static bool invalid_module_map_reported = false;
86 bool well_formed = true;
87
88 for (size_t i = 1; i < segments.size(); i++) {
89 uptr cur_start = segments[i].beg;
90 uptr prev_end = segments[i - 1].end;
91 if (cur_start < prev_end) {
92 well_formed = false;
93 VReport(2, "Overlapping mappings: %s start = %p, %s end = %p\n",
94 segments[i].name, (void*)cur_start, segments[i - 1].name,
95 (void*)prev_end);
96 if (!invalid_module_map_reported) {
97 Report(
98 "WARN: Invalid dyld module map detected. This is most likely a bug "
99 "in the sanitizer.\n");
100 Report("WARN: Backtraces may be unreliable.\n");
101 invalid_module_map_reported = true;
102 }
103 }
104 }
105
106 for (auto& m : modules) m.clear();
107
108 mapping->Reset();
109 return well_formed;
110}
111
112void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
113 // Don't iterate over sections when the caller hasn't set up the
114 // data pointer, when there are no sections, or when the segment
115 // is executable. Avoid iterating over executable sections because
116 // it will confuse libignore, and because the extra granularity
117 // of information is not needed by any sanitizers.
118 if (!data_ || !data_->nsects || IsExecutable()) {
119 module->addAddressRange(start, end, IsExecutable(), IsWritable(),
120 data_ ? data_->name : nullptr);
121 return;
122 }
123
124 do {
125 if (data_->lc_type == LC_SEGMENT) {
126 NextSectionLoad<struct section>(module, data_, IsWritable());
127#ifdef MH_MAGIC_64
128 } else if (data_->lc_type == LC_SEGMENT_64) {
129 NextSectionLoad<struct section_64>(module, data_, IsWritable());
130#endif
131 }
132 } while (--data_->nsects);
133}
134
135MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
136 Reset();
137 VerifyMemoryMapping(this);
138}
139
140MemoryMappingLayout::~MemoryMappingLayout() {
141}
142
143bool MemoryMappingLayout::Error() const {
144 return false;
145}
146
147// More information about Mach-O headers can be found in mach-o/loader.h
148// Each Mach-O image has a header (mach_header or mach_header_64) starting with
149// a magic number, and a list of linker load commands directly following the
150// header.
151// A load command is at least two 32-bit words: the command type and the
152// command size in bytes. We're interested only in segment load commands
153// (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped
154// into the task's address space.
155// The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or
156// segment_command_64 correspond to the memory address, memory size and the
157// file offset of the current memory segment.
158// Because these fields are taken from the images as is, one needs to add
159// _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime.
160
161void MemoryMappingLayout::Reset() {
162 // Count down from the top.
163 // TODO(glider): as per man 3 dyld, iterating over the headers with
164 // _dyld_image_count is thread-unsafe. We need to register callbacks for
165 // adding and removing images which will invalidate the MemoryMappingLayout
166 // state.
167 data_.current_image = _dyld_image_count();
168 data_.current_load_cmd_count = -1;
169 data_.current_load_cmd_addr = 0;
170 data_.current_magic = 0;
171 data_.current_filetype = 0;
172 data_.current_arch = kModuleArchUnknown;
173 internal_memset(data_.current_uuid, 0, kModuleUUIDSize);
174}
175
176// The dyld load address should be unchanged throughout process execution,
177// and it is expensive to compute once many libraries have been loaded,
178// so cache it here and do not reset.
179static const mach_header* dyld_hdr = 0;
180static const char kDyldPath[] = "/usr/lib/dyld";
181static const int kDyldImageIdx = -1;
182
183// static
184void MemoryMappingLayout::CacheMemoryMappings() {
185 // No-op on Mac for now.
186}
187
188void MemoryMappingLayout::LoadFromCache() {
189 // No-op on Mac for now.
190}
191
192static bool IsDyldHdr(const mach_header *hdr) {
193 return (hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) &&
194 hdr->filetype == MH_DYLINKER;
195}
196
197// _dyld_get_image_header() and related APIs don't report dyld itself.
198// We work around this by manually recursing through the memory map
199// until we hit a Mach header matching dyld instead. These recurse
200// calls are expensive, but the first memory map generation occurs
201// early in the process, when dyld is one of the only images loaded,
202// so it will be hit after only a few iterations. These assumptions don't hold
203// on macOS 13+ anymore (dyld itself has moved into the shared cache).
204static mach_header *GetDyldImageHeaderViaVMRegion() {
205 vm_address_t address = 0;
206
207 while (true) {
208 vm_size_t size = 0;
209 unsigned depth = 1;
210 struct vm_region_submap_info_64 info;
211 mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
212 kern_return_t err =
213 vm_region_recurse_64(mach_task_self(), &address, &size, &depth,
214 (vm_region_info_t)&info, &count);
215 if (err != KERN_SUCCESS) return nullptr;
216
217 if (size >= sizeof(mach_header) && info.protection & kProtectionRead) {
218 mach_header *hdr = (mach_header *)address;
219 if (IsDyldHdr(hdr)) {
220 return hdr;
221 }
222 }
223 address += size;
224 }
225}
226
227extern "C" {
228struct dyld_shared_cache_dylib_text_info {
229 uint64_t version; // current version 2
230 // following fields all exist in version 1
231 uint64_t loadAddressUnslid;
232 uint64_t textSegmentSize;
233 uuid_t dylibUuid;
234 const char *path; // pointer invalid at end of iterations
235 // following fields all exist in version 2
236 uint64_t textSegmentOffset; // offset from start of cache
237};
238typedef struct dyld_shared_cache_dylib_text_info
239 dyld_shared_cache_dylib_text_info;
240
241extern bool _dyld_get_shared_cache_uuid(uuid_t uuid);
242extern const void *_dyld_get_shared_cache_range(size_t *length);
243extern intptr_t _dyld_get_image_slide(const struct mach_header* mh);
244extern int dyld_shared_cache_iterate_text(
245 const uuid_t cacheUuid,
246 void (^callback)(const dyld_shared_cache_dylib_text_info *info));
247SANITIZER_WEAK_IMPORT const struct mach_header* _dyld_get_dyld_header(void);
248} // extern "C"
249
250static const mach_header* GetDyldImageHeaderViaSharedCache() {
251 uuid_t uuid;
252 bool hasCache = _dyld_get_shared_cache_uuid(uuid);
253 if (!hasCache)
254 return nullptr;
255
256 if (&_dyld_get_dyld_header != nullptr)
257 return _dyld_get_dyld_header();
258
259 size_t cacheLength;
260 __block uptr cacheStart = (uptr)_dyld_get_shared_cache_range(&cacheLength);
261 CHECK(cacheStart && cacheLength);
262
263 __block mach_header *dyldHdr = nullptr;
264 int res = dyld_shared_cache_iterate_text(
265 uuid, ^(const dyld_shared_cache_dylib_text_info *info) {
266 CHECK_GE(info->version, 2);
267 mach_header *hdr =
268 (mach_header *)(cacheStart + info->textSegmentOffset);
269 if (IsDyldHdr(hdr))
270 dyldHdr = hdr;
271 });
272 CHECK_EQ(res, 0);
273
274 return dyldHdr;
275}
276
277const mach_header *get_dyld_hdr() {
278 if (!dyld_hdr) {
279 // On macOS 13+, dyld itself has moved into the shared cache. Looking it up
280 // via vm_region_recurse_64() causes spins/hangs/crashes.
281 if (GetMacosAlignedVersion() >= MacosVersion(13, 0)) {
282 dyld_hdr = GetDyldImageHeaderViaSharedCache();
283 if (!dyld_hdr) {
284 VReport(1,
285 "Failed to lookup the dyld image header in the shared cache on "
286 "macOS 13+ (or no shared cache in use). Falling back to "
287 "lookup via vm_region_recurse_64().\n");
288 dyld_hdr = GetDyldImageHeaderViaVMRegion();
289 }
290 } else {
291 dyld_hdr = GetDyldImageHeaderViaVMRegion();
292 }
293 CHECK(dyld_hdr);
294 }
295
296 return dyld_hdr;
297}
298
299// Next and NextSegmentLoad were inspired by base/sysinfo.cc in
300// Google Perftools, https://github.com/gperftools/gperftools.
301
302// NextSegmentLoad scans the current image for the next segment load command
303// and returns the start and end addresses and file offset of the corresponding
304// segment.
305// Note that the segment addresses are not necessarily sorted.
306template <u32 kLCSegment, typename SegmentCommand>
307static bool NextSegmentLoad(MemoryMappedSegment *segment,
308 MemoryMappedSegmentData *seg_data,
309 MemoryMappingLayoutData *layout_data) {
310 const char *lc = layout_data->current_load_cmd_addr;
311
312 layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize;
313 layout_data->current_load_cmd_count--;
314 if (((const load_command *)lc)->cmd == kLCSegment) {
315 const SegmentCommand* sc = (const SegmentCommand *)lc;
316 if (internal_strcmp(sc->segname, "__LINKEDIT") == 0) {
317 // The LINKEDIT sections are for internal linker use, and may alias
318 // with the LINKEDIT section for other modules. (If we included them,
319 // our memory map would contain overlappping sections.)
320 return false;
321 }
322
323 uptr base_virt_addr;
324 if (layout_data->current_image == kDyldImageIdx)
325 base_virt_addr = (uptr)_dyld_get_image_slide(get_dyld_hdr());
326 else
327 base_virt_addr =
328 (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image);
329
330 segment->start = sc->vmaddr + base_virt_addr;
331 segment->end = segment->start + sc->vmsize;
332 // Most callers don't need section information, so only fill this struct
333 // when required.
334 if (seg_data) {
335 seg_data->nsects = sc->nsects;
336 seg_data->current_load_cmd_addr =
337 (const char *)lc + sizeof(SegmentCommand);
338 seg_data->lc_type = kLCSegment;
339 seg_data->base_virt_addr = base_virt_addr;
340 internal_strncpy(seg_data->name, sc->segname,
341 ARRAY_SIZE(seg_data->name));
342 seg_data->name[ARRAY_SIZE(seg_data->name) - 1] = 0;
343 }
344
345 // Return the initial protection.
346 segment->protection = sc->initprot;
347 segment->offset = (layout_data->current_filetype ==
348 /*MH_EXECUTE*/ 0x2)
349 ? sc->vmaddr
350 : sc->fileoff;
351 if (segment->filename) {
352 const char *src = (layout_data->current_image == kDyldImageIdx)
353 ? kDyldPath
354 : _dyld_get_image_name(layout_data->current_image);
355 internal_strncpy(segment->filename, src, segment->filename_size);
356 segment->filename[segment->filename_size - 1] = 0;
357 }
358 segment->arch = layout_data->current_arch;
359 internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize);
360 return true;
361 }
362 return false;
363}
364
365ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
366 cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
367 switch (cputype) {
368 case CPU_TYPE_I386:
369 return kModuleArchI386;
370 case CPU_TYPE_X86_64:
371 if (cpusubtype == CPU_SUBTYPE_X86_64_ALL)
372 return kModuleArchX86_64;
373 if (cpusubtype == CPU_SUBTYPE_X86_64_H)
374 return kModuleArchX86_64H;
375 CHECK(0 && "Invalid subtype of x86_64");
376 return kModuleArchUnknown;
377 case CPU_TYPE_ARM:
378 if (cpusubtype == CPU_SUBTYPE_ARM_V6)
379 return kModuleArchARMV6;
380 if (cpusubtype == CPU_SUBTYPE_ARM_V7)
381 return kModuleArchARMV7;
382 if (cpusubtype == CPU_SUBTYPE_ARM_V7S)
383 return kModuleArchARMV7S;
384 if (cpusubtype == CPU_SUBTYPE_ARM_V7K)
385 return kModuleArchARMV7K;
386 CHECK(0 && "Invalid subtype of ARM");
387 return kModuleArchUnknown;
388 case CPU_TYPE_ARM64:
389 if (cpusubtype == CPU_SUBTYPE_ARM64E)
390 return kModuleArchARM64E;
391 return kModuleArchARM64;
392 default:
393 CHECK(0 && "Invalid CPU type");
394 return kModuleArchUnknown;
395 }
396}
397
398static const load_command *NextCommand(const load_command *lc) {
399 return (const load_command *)((const char *)lc + lc->cmdsize);
400}
401
402# ifdef MH_MAGIC_64
403static constexpr size_t header_size = sizeof(mach_header_64);
404# else
405static constexpr size_t header_size = sizeof(mach_header);
406# endif
407
408static void FindUUID(const load_command *first_lc, const mach_header *hdr,
409 u8 *uuid_output) {
410 uint32_t curcmd = 0;
411 for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
412 curcmd++, lc = NextCommand(lc)) {
413 CHECK_LT((const char *)lc,
414 (const char *)hdr + header_size + hdr->sizeofcmds);
415
416 if (lc->cmd != LC_UUID)
417 continue;
418
419 const uuid_command *uuid_lc = (const uuid_command *)lc;
420 const uint8_t *uuid = &uuid_lc->uuid[0];
421 internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
422 return;
423 }
424}
425
426static bool IsModuleInstrumented(const load_command *first_lc,
427 const mach_header *hdr) {
428 uint32_t curcmd = 0;
429 for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
430 curcmd++, lc = NextCommand(lc)) {
431 CHECK_LT((const char *)lc,
432 (const char *)hdr + header_size + hdr->sizeofcmds);
433
434 if (lc->cmd != LC_LOAD_DYLIB)
435 continue;
436
437 const dylib_command *dylib_lc = (const dylib_command *)lc;
438 uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
439 const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset;
440 dylib_name = StripModuleName(dylib_name);
441 if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) {
442 return true;
443 }
444 }
445 return false;
446}
447
448const ImageHeader *MemoryMappingLayout::CurrentImageHeader() {
449 const mach_header *hdr = (data_.current_image == kDyldImageIdx)
450 ? get_dyld_hdr()
451 : _dyld_get_image_header(data_.current_image);
452 return (const ImageHeader *)hdr;
453}
454
455bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
456 for (; data_.current_image >= kDyldImageIdx; data_.current_image--) {
457 const mach_header *hdr = (const mach_header *)CurrentImageHeader();
458 if (!hdr) continue;
459 if (data_.current_load_cmd_count < 0) {
460 // Set up for this image;
461 data_.current_load_cmd_count = hdr->ncmds;
462 data_.current_magic = hdr->magic;
463 data_.current_filetype = hdr->filetype;
464 data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
465 switch (data_.current_magic) {
466#ifdef MH_MAGIC_64
467 case MH_MAGIC_64: {
468 data_.current_load_cmd_addr =
469 (const char *)hdr + sizeof(mach_header_64);
470 break;
471 }
472#endif
473 case MH_MAGIC: {
474 data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header);
475 break;
476 }
477 default: {
478 continue;
479 }
480 }
481 FindUUID((const load_command *)data_.current_load_cmd_addr, hdr,
482 data_.current_uuid);
483 data_.current_instrumented = IsModuleInstrumented(
484 (const load_command *)data_.current_load_cmd_addr, hdr);
485 }
486
487 while (data_.current_load_cmd_count > 0) {
488 switch (data_.current_magic) {
489 // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64.
490#ifdef MH_MAGIC_64
491 case MH_MAGIC_64: {
492 if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
493 segment, segment->data_, &data_))
494 return true;
495 break;
496 }
497#endif
498 case MH_MAGIC: {
499 if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
500 segment, segment->data_, &data_))
501 return true;
502 break;
503 }
504 }
505 }
506 // If we get here, no more load_cmd's in this image talk about
507 // segments. Go on to the next image.
508 data_.current_load_cmd_count = -1; // This will trigger loading next image
509 }
510 return false;
511}
512
513void MemoryMappingLayout::DumpListOfModules(
514 InternalMmapVectorNoCtor<LoadedModule> *modules) {
515 Reset();
516 InternalMmapVector<char> module_name(kMaxPathLength);
517 MemoryMappedSegment segment(module_name.data(), module_name.size());
518 MemoryMappedSegmentData data;
519 segment.data_ = &data;
520 while (Next(&segment)) {
521 // skip the __PAGEZERO segment, its vmsize is 0
522 if (segment.filename[0] == '\0' || (segment.start == segment.end))
523 continue;
524 LoadedModule *cur_module = nullptr;
525 if (!modules->empty() &&
526 0 == internal_strcmp(segment.filename, modules->back().full_name())) {
527 cur_module = &modules->back();
528 } else {
529 modules->push_back(LoadedModule());
530 cur_module = &modules->back();
531 cur_module->set(segment.filename, segment.start, segment.arch,
532 segment.uuid, data_.current_instrumented);
533 }
534 segment.AddAddressRanges(cur_module);
535 }
536}
537
538} // namespace __sanitizer
539
540#endif // SANITIZER_APPLE
541