sanitizer_procmaps_mac.cpp source code [llvm_projects/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp]

1	//===-- sanitizer_procmaps_mac.cpp ----------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Information about the process mappings (Mac-specific parts).
10	//===----------------------------------------------------------------------===//
11
12	#include "sanitizer_platform.h"
13	#if SANITIZER_APPLE
14	#include "sanitizer_common.h"
15	#include "sanitizer_placement_new.h"
16	#include "sanitizer_procmaps.h"
17
18	#include <mach-o/dyld.h>
19	#include <mach-o/loader.h>
20	#include <mach/mach.h>
21
22	// These are not available in older macOS SDKs.
23	#ifndef CPU_SUBTYPE_X86_64_H
24	#define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */
25	#endif
26	#ifndef CPU_SUBTYPE_ARM_V7S
27	#define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */
28	#endif
29	#ifndef CPU_SUBTYPE_ARM_V7K
30	#define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12)
31	#endif
32	#ifndef CPU_TYPE_ARM64
33	#define CPU_TYPE_ARM64 (CPU_TYPE_ARM \| CPU_ARCH_ABI64)
34	#endif
35
36	namespace __sanitizer {
37
38	// Contains information used to iterate through sections.
39	struct MemoryMappedSegmentData {
40	char name[kMaxSegName];
41	uptr nsects;
42	const char *current_load_cmd_addr;
43	u32 lc_type;
44	uptr base_virt_addr;
45	uptr addr_mask;
46	};
47
48	template <typename Section>
49	static void NextSectionLoad(LoadedModule module, MemoryMappedSegmentData data,
50	bool isWritable) {
51	const Section sc = (const* Section *)data->current_load_cmd_addr;
52	data->current_load_cmd_addr += sizeof(Section);
53
54	uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr;
55	uptr sec_end = sec_start + sc->size;
56	module->addAddressRange(sec_start, sec_end, /executable=/false, isWritable,
57	sc->sectname);
58	}
59
60	void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
61	// Don't iterate over sections when the caller hasn't set up the
62	// data pointer, when there are no sections, or when the segment
63	// is executable. Avoid iterating over executable sections because
64	// it will confuse libignore, and because the extra granularity
65	// of information is not needed by any sanitizers.
66	if (!data_ \|\| !data_->nsects \|\| IsExecutable()) {
67	module->addAddressRange(start, end, IsExecutable(), IsWritable(),
68	data_ ? data_->name : nullptr);
69	return;
70	}
71
72	do {
73	if (data_->lc_type == LC_SEGMENT) {
74	NextSectionLoad<struct section>(module, data_, IsWritable());
75	#ifdef MH_MAGIC_64
76	} else if (data_->lc_type == LC_SEGMENT_64) {
77	NextSectionLoad<struct section_64>(module, data_, IsWritable());
78	#endif
79	}
80	} while (--data_->nsects);
81	}
82
83	MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
84	Reset();
85	}
86
87	MemoryMappingLayout::~MemoryMappingLayout() {
88	}
89
90	bool MemoryMappingLayout::Error() const {
91	return false;
92	}
93
94	// More information about Mach-O headers can be found in mach-o/loader.h
95	// Each Mach-O image has a header (mach_header or mach_header_64) starting with
96	// a magic number, and a list of linker load commands directly following the
97	// header.
98	// A load command is at least two 32-bit words: the command type and the
99	// command size in bytes. We're interested only in segment load commands
100	// (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped
101	// into the task's address space.
102	// The \|vmaddr\|, \|vmsize\| and \|fileoff\| fields of segment_command or
103	// segment_command_64 correspond to the memory address, memory size and the
104	// file offset of the current memory segment.
105	// Because these fields are taken from the images as is, one needs to add
106	// _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime.
107
108	void MemoryMappingLayout::Reset() {
109	// Count down from the top.
110	// TODO(glider): as per man 3 dyld, iterating over the headers with
111	// _dyld_image_count is thread-unsafe. We need to register callbacks for
112	// adding and removing images which will invalidate the MemoryMappingLayout
113	// state.
114	data_.current_image = _dyld_image_count();
115	data_.current_load_cmd_count = -`1`;
116	data_.current_load_cmd_addr = `0`;
117	data_.current_magic = `0`;
118	data_.current_filetype = `0`;
119	data_.current_arch = kModuleArchUnknown;
120	internal_memset(data_.current_uuid, `0`, kModuleUUIDSize);
121	}
122
123	// The dyld load address should be unchanged throughout process execution,
124	// and it is expensive to compute once many libraries have been loaded,
125	// so cache it here and do not reset.
126	static mach_header *dyld_hdr = `0`;
127	static const char kDyldPath[] = "/usr/lib/dyld";
128	static const int kDyldImageIdx = -`1`;
129
130	// static
131	void MemoryMappingLayout::CacheMemoryMappings() {
132	// No-op on Mac for now.
133	}
134
135	void MemoryMappingLayout::LoadFromCache() {
136	// No-op on Mac for now.
137	}
138
139	static bool IsDyldHdr(const mach_header *hdr) {
140	return (hdr->magic == MH_MAGIC \|\| hdr->magic == MH_MAGIC_64) &&
141	hdr->filetype == MH_DYLINKER;
142	}
143
144	// _dyld_get_image_header() and related APIs don't report dyld itself.
145	// We work around this by manually recursing through the memory map
146	// until we hit a Mach header matching dyld instead. These recurse
147	// calls are expensive, but the first memory map generation occurs
148	// early in the process, when dyld is one of the only images loaded,
149	// so it will be hit after only a few iterations. These assumptions don't hold
150	// on macOS 13+ anymore (dyld itself has moved into the shared cache).
151	static mach_header *GetDyldImageHeaderViaVMRegion() {
152	vm_address_t address = `0`;
153
154	while (true) {
155	vm_size_t size = `0`;
156	unsigned depth = `1`;
157	struct vm_region_submap_info_64 info;
158	mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
159	kern_return_t err =
160	vm_region_recurse_64(mach_task_self(), &address, &size, &depth,
161	(vm_region_info_t)&info, &count);
162	if (err != KERN_SUCCESS) return nullptr;
163
164	if (size >= sizeof(mach_header) && info.protection & kProtectionRead) {
165	mach_header hdr = (mach_header )address;
166	if (IsDyldHdr(hdr)) {
167	return hdr;
168	}
169	}
170	address += size;
171	}
172	}
173
174	extern "C" {
175	struct dyld_shared_cache_dylib_text_info {
176	uint64_t version; // current version 2
177	// following fields all exist in version 1
178	uint64_t loadAddressUnslid;
179	uint64_t textSegmentSize;
180	uuid_t dylibUuid;
181	const char path; // pointer invalid at end of iterations*
182	// following fields all exist in version 2
183	uint64_t textSegmentOffset; // offset from start of cache
184	};
185	typedef struct dyld_shared_cache_dylib_text_info
186	dyld_shared_cache_dylib_text_info;
187
188	extern bool _dyld_get_shared_cache_uuid(uuid_t uuid);
189	extern const void _dyld_get_shared_cache_range(size_t length);
190	extern int dyld_shared_cache_iterate_text(
191	const uuid_t cacheUuid,
192	void (^callback)(const dyld_shared_cache_dylib_text_info *info));
193	} // extern "C"
194
195	static mach_header *GetDyldImageHeaderViaSharedCache() {
196	uuid_t uuid;
197	bool hasCache = _dyld_get_shared_cache_uuid(uuid);
198	if (!hasCache)
199	return nullptr;
200
201	size_t cacheLength;
202	__block uptr cacheStart = (uptr)_dyld_get_shared_cache_range(&cacheLength);
203	CHECK(cacheStart && cacheLength);
204
205	__block mach_header dyldHdr = nullptr*;
206	int res = dyld_shared_cache_iterate_text(
207	uuid, ^(const dyld_shared_cache_dylib_text_info *info) {
208	CHECK_GE(info->version, `2`);
209	mach_header *hdr =
210	(mach_header *)(cacheStart + info->textSegmentOffset);
211	if (IsDyldHdr(hdr))
212	dyldHdr = hdr;
213	});
214	CHECK_EQ(res, `0`);
215
216	return dyldHdr;
217	}
218
219	const mach_header *get_dyld_hdr() {
220	if (!dyld_hdr) {
221	// On macOS 13+, dyld itself has moved into the shared cache. Looking it up
222	// via vm_region_recurse_64() causes spins/hangs/crashes.
223	if (GetMacosAlignedVersion() >= MacosVersion(`13`, `0`)) {
224	dyld_hdr = GetDyldImageHeaderViaSharedCache();
225	if (!dyld_hdr) {
226	VReport(`1`,
227	"Failed to lookup the dyld image header in the shared cache on "
228	"macOS 13+ (or no shared cache in use). Falling back to "
229	"lookup via vm_region_recurse_64().\n");
230	dyld_hdr = GetDyldImageHeaderViaVMRegion();
231	}
232	} else {
233	dyld_hdr = GetDyldImageHeaderViaVMRegion();
234	}
235	CHECK(dyld_hdr);
236	}
237
238	return dyld_hdr;
239	}
240
241	// Next and NextSegmentLoad were inspired by base/sysinfo.cc in
242	// Google Perftools, https://github.com/gperftools/gperftools.
243
244	// NextSegmentLoad scans the current image for the next segment load command
245	// and returns the start and end addresses and file offset of the corresponding
246	// segment.
247	// Note that the segment addresses are not necessarily sorted.
248	template <u32 kLCSegment, typename SegmentCommand>
249	static bool NextSegmentLoad(MemoryMappedSegment *segment,
250	MemoryMappedSegmentData *seg_data,
251	MemoryMappingLayoutData *layout_data) {
252	const char *lc = layout_data->current_load_cmd_addr;
253
254	layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize;
255	layout_data->current_load_cmd_count--;
256	if (((const load_command *)lc)->cmd == kLCSegment) {
257	const SegmentCommand* sc = (const SegmentCommand *)lc;
258	uptr base_virt_addr, addr_mask;
259	if (layout_data->current_image == kDyldImageIdx) {
260	base_virt_addr = (uptr)get_dyld_hdr();
261	// vmaddr is masked with 0xfffff because on macOS versions < 10.12,
262	// it contains an absolute address rather than an offset for dyld.
263	// To make matters even more complicated, this absolute address
264	// isn't actually the absolute segment address, but the offset portion
265	// of the address is accurate when combined with the dyld base address,
266	// and the mask will give just this offset.
267	addr_mask = `0xfffff`;
268	} else {
269	base_virt_addr =
270	(uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image);
271	addr_mask = ~`0`;
272	}
273
274	segment->start = (sc->vmaddr & addr_mask) + base_virt_addr;
275	segment->end = segment->start + sc->vmsize;
276	// Most callers don't need section information, so only fill this struct
277	// when required.
278	if (seg_data) {
279	seg_data->nsects = sc->nsects;
280	seg_data->current_load_cmd_addr =
281	(const char )lc + sizeof*(SegmentCommand);
282	seg_data->lc_type = kLCSegment;
283	seg_data->base_virt_addr = base_virt_addr;
284	seg_data->addr_mask = addr_mask;
285	internal_strncpy(seg_data->name, sc->segname,
286	ARRAY_SIZE(seg_data->name));
287	}
288
289	// Return the initial protection.
290	segment->protection = sc->initprot;
291	segment->offset = (layout_data->current_filetype ==
292	/MH_EXECUTE/ `0x2`)
293	? sc->vmaddr
294	: sc->fileoff;
295	if (segment->filename) {
296	const char *src = (layout_data->current_image == kDyldImageIdx)
297	? kDyldPath
298	: _dyld_get_image_name(layout_data->current_image);
299	internal_strncpy(segment->filename, src, segment->filename_size);
300	}
301	segment->arch = layout_data->current_arch;
302	internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize);
303	return true;
304	}
305	return false;
306	}
307
308	ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
309	cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
310	switch (cputype) {
311	case CPU_TYPE_I386:
312	return kModuleArchI386;
313	case CPU_TYPE_X86_64:
314	if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
315	if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
316	CHECK(`0` && "Invalid subtype of x86_64");
317	return kModuleArchUnknown;
318	case CPU_TYPE_ARM:
319	if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
320	if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
321	if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
322	if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
323	CHECK(`0` && "Invalid subtype of ARM");
324	return kModuleArchUnknown;
325	case CPU_TYPE_ARM64:
326	return kModuleArchARM64;
327	default:
328	CHECK(`0` && "Invalid CPU type");
329	return kModuleArchUnknown;
330	}
331	}
332
333	static const load_command NextCommand(const* load_command *lc) {
334	return (const load_command )((const* char *)lc + lc->cmdsize);
335	}
336
337	# ifdef MH_MAGIC_64
338	static constexpr size_t header_size = sizeof(mach_header_64);
339	# else
340	static constexpr size_t header_size = sizeof(mach_header);
341	# endif
342
343	static void FindUUID(const load_command first_lc, const* mach_header *hdr,
344	u8 *uuid_output) {
345	uint32_t curcmd = `0`;
346	for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
347	curcmd++, lc = NextCommand(lc)) {
348	CHECK_LT((const char *)lc,
349	(const char *)hdr + header_size + hdr->sizeofcmds);
350
351	if (lc->cmd != LC_UUID)
352	continue;
353
354	const uuid_command uuid_lc = (const* uuid_command *)lc;
355	const uint8_t *uuid = &uuid_lc->uuid[`0`];
356	internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
357	return;
358	}
359	}
360
361	static bool IsModuleInstrumented(const load_command *first_lc,
362	const mach_header *hdr) {
363	uint32_t curcmd = `0`;
364	for (const load_command *lc = first_lc; curcmd < hdr->ncmds;
365	curcmd++, lc = NextCommand(lc)) {
366	CHECK_LT((const char *)lc,
367	(const char *)hdr + header_size + hdr->sizeofcmds);
368
369	if (lc->cmd != LC_LOAD_DYLIB)
370	continue;
371
372	const dylib_command dylib_lc = (const* dylib_command *)lc;
373	uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
374	const char dylib_name = ((const* char *)dylib_lc) + dylib_name_offset;
375	dylib_name = StripModuleName(dylib_name);
376	if (dylib_name != `0` && (internal_strstr(dylib_name, "libclang_rt."))) {
377	return true;
378	}
379	}
380	return false;
381	}
382
383	const ImageHeader *MemoryMappingLayout::CurrentImageHeader() {
384	const mach_header *hdr = (data_.current_image == kDyldImageIdx)
385	? get_dyld_hdr()
386	: _dyld_get_image_header(data_.current_image);
387	return (const ImageHeader *)hdr;
388	}
389
390	bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
391	for (; data_.current_image >= kDyldImageIdx; data_.current_image--) {
392	const mach_header hdr = (const* mach_header *)CurrentImageHeader();
393	if (!hdr) continue;
394	if (data_.current_load_cmd_count < `0`) {
395	// Set up for this image;
396	data_.current_load_cmd_count = hdr->ncmds;
397	data_.current_magic = hdr->magic;
398	data_.current_filetype = hdr->filetype;
399	data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
400	switch (data_.current_magic) {
401	#ifdef MH_MAGIC_64
402	case MH_MAGIC_64: {
403	data_.current_load_cmd_addr =
404	(const char )hdr + sizeof*(mach_header_64);
405	break;
406	}
407	#endif
408	case MH_MAGIC: {
409	data_.current_load_cmd_addr = (const char )hdr + sizeof*(mach_header);
410	break;
411	}
412	default: {
413	continue;
414	}
415	}
416	FindUUID((const load_command *)data_.current_load_cmd_addr, hdr,
417	data_.current_uuid);
418	data_.current_instrumented = IsModuleInstrumented(
419	(const load_command *)data_.current_load_cmd_addr, hdr);
420	}
421
422	while (data_.current_load_cmd_count > `0`) {
423	switch (data_.current_magic) {
424	// data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64.
425	#ifdef MH_MAGIC_64
426	case MH_MAGIC_64: {
427	if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
428	segment, segment->data_, &data_))
429	return true;
430	break;
431	}
432	#endif
433	case MH_MAGIC: {
434	if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
435	segment, segment->data_, &data_))
436	return true;
437	break;
438	}
439	}
440	}
441	// If we get here, no more load_cmd's in this image talk about
442	// segments. Go on to the next image.
443	data_.current_load_cmd_count = -`1`; // This will trigger loading next image
444	}
445	return false;
446	}
447
448	void MemoryMappingLayout::DumpListOfModules(
449	InternalMmapVectorNoCtor<LoadedModule> *modules) {
450	Reset();
451	InternalMmapVector<char> module_name(kMaxPathLength);
452	MemoryMappedSegment segment(module_name.data(), module_name.size());
453	MemoryMappedSegmentData data;
454	segment.data_ = &data;
455	while (Next(&segment)) {
456	// skip the __PAGEZERO segment, its vmsize is 0
457	if (segment.filename[`0`] == `'\0'` \|\| (segment.start == segment.end))
458	continue;
459	LoadedModule cur_module = nullptr*;
460	if (!modules->empty() &&
461	`0` == internal_strcmp(segment.filename, modules->back().full_name())) {
462	cur_module = &modules->back();
463	} else {
464	modules->push_back(LoadedModule());
465	cur_module = &modules->back();
466	cur_module->set(segment.filename, segment.start, segment.arch,
467	segment.uuid, data_.current_instrumented);
468	}
469	segment.AddAddressRanges(cur_module);
470	}
471	}
472
473	} // namespace __sanitizer
474
475	#endif // SANITIZER_APPLE
476

Browse the source code of llvm_projects/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp