ICF.cpp source code [llvm_projects/lld/MachO/ICF.cpp]

1	//===- ICF.cpp ------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ICF.h"
10	#include "ConcatOutputSection.h"
11	#include "Config.h"
12	#include "InputSection.h"
13	#include "SymbolTable.h"
14	#include "Symbols.h"
15
16	#include "lld/Common/CommonLinkerContext.h"
17	#include "llvm/Support/Parallel.h"
18	#include "llvm/Support/TimeProfiler.h"
19	#include "llvm/Support/xxhash.h"
20
21	#include <atomic>
22
23	using namespace llvm;
24	using namespace lld;
25	using namespace lld::macho;
26
27	static constexpr bool verboseDiagnostics = false;
28	// This counter is used to generate unique thunk names.
29	static uint64_t icfThunkCounter = `0`;
30
31	class ICF {
32	public:
33	ICF(std::vector<ConcatInputSection *> &inputs);
34	void run();
35
36	using EqualsFn = bool (ICF::)(const* ConcatInputSection *,
37	const ConcatInputSection *);
38	void segregate(size_t begin, size_t end, EqualsFn);
39	size_t findBoundary(size_t begin, size_t end);
40	void forEachClassRange(size_t begin, size_t end,
41	llvm::function_ref<void(size_t, size_t)> func);
42	void forEachClass(llvm::function_ref<void(size_t, size_t)> func);
43
44	bool equalsConstant(const ConcatInputSection *ia,
45	const ConcatInputSection *ib);
46	bool equalsVariable(const ConcatInputSection *ia,
47	const ConcatInputSection *ib);
48	void applySafeThunksToRange(size_t begin, size_t end);
49
50	// ICF needs a copy of the inputs vector because its equivalence-class
51	// segregation algorithm destroys the proper sequence.
52	std::vector<ConcatInputSection *> icfInputs;
53
54	unsigned icfPass = `0`;
55	std::atomic<bool> icfRepeat{false};
56	std::atomic<uint64_t> equalsConstantCount{`0`};
57	std::atomic<uint64_t> equalsVariableCount{`0`};
58	};
59
60	ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
61	icfInputs.assign(first: inputs.begin(), last: inputs.end());
62	}
63
64	// ICF = Identical Code Folding
65	//
66	// We only fold __TEXT,__text, so this is really "code" folding, and not
67	// "COMDAT" folding. String and scalar constant literals are deduplicated
68	// elsewhere.
69	//
70	// Summary of segments & sections:
71	//
72	// The __TEXT segment is readonly at the MMU. Some sections are already
73	// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal) and some are*
74	// synthetic and inherently free of duplicates (__TEXT,__stubs &
75	// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
76	// because doing so induces many test failures.
77	//
78	// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
79	// thus ineligible for ICF.
80	//
81	// The __DATA_CONST segment is read/write at the MMU, but is logically const to
82	// the application after dyld applies fixups to pointer data. We currently
83	// fold only the __DATA_CONST,__cfstring section.
84	//
85	// The __DATA segment is read/write at the MMU, and as application-writeable
86	// data, none of its sections are eligible for ICF.
87	//
88	// Please see the large block comment in lld/ELF/ICF.cpp for an explanation
89	// of the segregation algorithm.
90	//
91	// FIXME(gkm): implement keep-unique attributes
92	// FIXME(gkm): implement address-significance tables for MachO object files
93
94	// Compare "non-moving" parts of two ConcatInputSections, namely everything
95	// except references to other ConcatInputSections.
96	bool ICF::equalsConstant(const ConcatInputSection *ia,
97	const ConcatInputSection *ib) {
98	if (verboseDiagnostics)
99	++equalsConstantCount;
100	// We can only fold within the same OutputSection.
101	if (ia->parent != ib->parent)
102	return false;
103	if (ia->data.size() != ib->data.size())
104	return false;
105	if (ia->data != ib->data)
106	return false;
107	if (ia->relocs.size() != ib->relocs.size())
108	return false;
109	auto f = [](const Reloc &ra, const Reloc &rb) {
110	if (ra.type != rb.type)
111	return false;
112	if (ra.pcrel != rb.pcrel)
113	return false;
114	if (ra.length != rb.length)
115	return false;
116	if (ra.offset != rb.offset)
117	return false;
118	if (isa<Symbol >(Val: ra.referent) != isa<Symbol >(Val: rb.referent))
119	return false;
120
121	InputSection isecA, isecB;
122
123	uint64_t valueA = `0`;
124	uint64_t valueB = `0`;
125	if (isa<Symbol *>(Val: ra.referent)) {
126	const auto sa = cast<Symbol >(Val: ra.referent);
127	const auto sb = cast<Symbol >(Val: rb.referent);
128	if (sa->kind() != sb->kind())
129	return false;
130	// ICF runs before Undefineds are treated (and potentially converted into
131	// DylibSymbols).
132	if (isa<DylibSymbol>(Val: sa) \|\| isa<Undefined>(Val: sa))
133	return sa == sb && ra.addend == rb.addend;
134	assert(isa<Defined>(sa));
135	const auto *da = cast<Defined>(Val: sa);
136	const auto *db = cast<Defined>(Val: sb);
137	if (!da->isec() \|\| !db->isec()) {
138	assert(da->isAbsolute() && db->isAbsolute());
139	return da->value + ra.addend == db->value + rb.addend;
140	}
141	isecA = da->isec();
142	valueA = da->value;
143	isecB = db->isec();
144	valueB = db->value;
145	} else {
146	isecA = cast<InputSection *>(Val: ra.referent);
147	isecB = cast<InputSection *>(Val: rb.referent);
148	}
149
150	// Typically, we should not encounter sections marked with `keepUnique` at
151	// this point as they would have resulted in different hashes and therefore
152	// no need for a full comparison.
153	// However, in `safe_thunks` mode, it's possible for two different
154	// relocations to reference identical `keepUnique` functions that will be
155	// distinguished later via thunks - so we need to handle this case
156	// explicitly.
157	if ((isecA != isecB) && ((isecA->keepUnique && isCodeSection(isecA)) \|\|
158	(isecB->keepUnique && isCodeSection(isecB))))
159	return false;
160
161	if (isecA->parent != isecB->parent)
162	return false;
163	// Sections with identical parents should be of the same kind.
164	assert(isecA->kind() == isecB->kind());
165	// We will compare ConcatInputSection contents in equalsVariable.
166	if (isa<ConcatInputSection>(Val: isecA))
167	return ra.addend == rb.addend;
168	// Else we have two literal sections. References to them are equal iff their
169	// offsets in the output section are equal.
170	if (isa<Symbol *>(Val: ra.referent))
171	// For symbol relocs, we compare the contents at the symbol address. We
172	// don't do `getOffset(value + addend)` because value + addend may not be
173	// a valid offset in the literal section.
174	return isecA->getOffset(off: valueA) == isecB->getOffset(off: valueB) &&
175	ra.addend == rb.addend;
176	else {
177	assert(valueA == `0` && valueB == `0`);
178	// For section relocs, we compare the content at the section offset.
179	return isecA->getOffset(off: ra.addend) == isecB->getOffset(off: rb.addend);
180	}
181	};
182	return std::equal(first1: ia->relocs.begin(), last1: ia->relocs.end(), first2: ib->relocs.begin(),
183	binary_pred: f);
184	}
185
186	// Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
187	// handled by equalsConstant().
188	bool ICF::equalsVariable(const ConcatInputSection *ia,
189	const ConcatInputSection *ib) {
190	if (verboseDiagnostics)
191	++equalsVariableCount;
192	assert(ia->relocs.size() == ib->relocs.size());
193	auto f = [this](const Reloc &ra, const Reloc &rb) {
194	// We already filtered out mismatching values/addends in equalsConstant.
195	if (ra.referent == rb.referent)
196	return true;
197	const ConcatInputSection isecA, isecB;
198	if (isa<Symbol *>(Val: ra.referent)) {
199	// Matching DylibSymbols are already filtered out by the
200	// identical-referent check above. Non-matching DylibSymbols were filtered
201	// out in equalsConstant(). So we can safely cast to Defined here.
202	const auto da = cast<Defined>(Val: cast<Symbol >(Val: ra.referent));
203	const auto db = cast<Defined>(Val: cast<Symbol >(Val: rb.referent));
204	if (da->isAbsolute())
205	return true;
206	isecA = dyn_cast<ConcatInputSection>(Val: da->isec());
207	if (!isecA)
208	return true; // literal sections were checked in equalsConstant.
209	isecB = cast<ConcatInputSection>(Val: db->isec());
210	} else {
211	const auto sa = cast<InputSection >(Val: ra.referent);
212	const auto sb = cast<InputSection >(Val: rb.referent);
213	isecA = dyn_cast<ConcatInputSection>(Val: sa);
214	if (!isecA)
215	return true;
216	isecB = cast<ConcatInputSection>(Val: sb);
217	}
218	return isecA->icfEqClass[icfPass % `2`] == isecB->icfEqClass[icfPass % `2`];
219	};
220	if (!std::equal(first1: ia->relocs.begin(), last1: ia->relocs.end(), first2: ib->relocs.begin(), binary_pred: f))
221	return false;
222
223	// If there are symbols with associated unwind info, check that the unwind
224	// info matches. For simplicity, we only handle the case where there are only
225	// symbols at offset zero within the section (which is typically the case with
226	// .subsections_via_symbols.)
227	auto hasUnwind = [](Defined d) { return* d->unwindEntry() != nullptr; };
228	const auto *itA = llvm::find_if(Range: ia->symbols, P: hasUnwind);
229	const auto *itB = llvm::find_if(Range: ib->symbols, P: hasUnwind);
230	if (itA == ia->symbols.end())
231	return itB == ib->symbols.end();
232	if (itB == ib->symbols.end())
233	return false;
234	const Defined da = itA;
235	const Defined db = itB;
236	if (da->unwindEntry()->icfEqClass[icfPass % `2`] !=
237	db->unwindEntry()->icfEqClass[icfPass % `2`] \|\|
238	da->value != `0` \|\| db->value != `0`)
239	return false;
240	auto isZero = [](Defined d) { return* d->value == `0`; };
241	return std::find_if_not(first: std::next(x: itA), last: ia->symbols.end(), pred: isZero) ==
242	ia->symbols.end() &&
243	std::find_if_not(first: std::next(x: itB), last: ib->symbols.end(), pred: isZero) ==
244	ib->symbols.end();
245	}
246
247	// Find the first InputSection after BEGIN whose equivalence class differs
248	size_t ICF::findBoundary(size_t begin, size_t end) {
249	uint64_t beginHash = icfInputs [begin]->icfEqClass[icfPass % `2`];
250	for (size_t i = begin + `1`; i < end; ++i)
251	if (beginHash != icfInputs [i]->icfEqClass[icfPass % `2`])
252	return i;
253	return end;
254	}
255
256	// Invoke FUNC on subranges with matching equivalence class
257	void ICF::forEachClassRange(size_t begin, size_t end,
258	llvm::function_ref<void(size_t, size_t)> func) {
259	while (begin < end) {
260	size_t mid = findBoundary(begin, end);
261	func (begin, mid);
262	begin = mid;
263	}
264	}
265
266	// Find or create a symbol at offset 0 in the given section
267	static Symbol getThunkTargetSymbol(ConcatInputSection isec) {
268	for (Symbol *sym : isec->symbols)
269	if (auto *d = dyn_cast<Defined>(Val: sym))
270	if (d->value == `0`)
271	return sym;
272
273	std::string thunkName;
274	if (isec->symbols.size() == `0`)
275	thunkName = isec->getName().str() + ".icf.0";
276	else
277	thunkName = isec->getName().str() + "icf.thunk.target" +
278	std::to_string(val: icfThunkCounter++);
279
280	// If no symbol found at offset 0, create one
281	auto sym = make<Defined>(args&: thunkName, /file=/args: nullptr*, args&: isec,
282	/value=/args: `0`, /size=/args: isec->getSize(),
283	/isWeakDef=/args: false, /isExternal=/args: false,
284	/isPrivateExtern=/args: false, /isThumb=/args: false,
285	/isReferencedDynamically=/args: false,
286	/noDeadStrip=/args: false);
287	isec->symbols.push_back(NewVal: sym);
288	return sym;
289	}
290
291	// Given a range of identical icfInputs, replace address significant functions
292	// with a thunk that is just a direct branch to the first function in the
293	// series. This way we keep only one main body of the function but we still
294	// retain the address uniqueness of relevant functions by having them be a
295	// direct branch thunk rather than containing a full copy of the actual function
296	// body.
297	void ICF::applySafeThunksToRange(size_t begin, size_t end) {
298	// When creating a unique ICF thunk, use the first section as the section that
299	// all thunks will branch to.
300	ConcatInputSection *masterIsec = icfInputs [begin];
301
302	// If the first section is not address significant, sorting guarantees that
303	// there are no address significant functions. So we can skip this range.
304	if (!masterIsec->keepUnique)
305	return;
306
307	// Skip anything that is not a code section.
308	if (!isCodeSection(masterIsec))
309	return;
310
311	// If the functions we're dealing with are smaller than the thunk size, then
312	// just leave them all as-is - creating thunks would be a net loss.
313	uint32_t thunkSize = target->getICFSafeThunkSize();
314	if (masterIsec->data.size() <= thunkSize)
315	return;
316
317	// Get the symbol that all thunks will branch to.
318	Symbol *masterSym = getThunkTargetSymbol(isec: masterIsec);
319
320	for (size_t i = begin + `1`; i < end; ++i) {
321	ConcatInputSection *isec = icfInputs [i];
322	// When we're done processing keepUnique entries, we can stop. Sorting
323	// guaratees that all keepUnique will be at the front.
324	if (!isec->keepUnique)
325	break;
326
327	ConcatInputSection *thunk =
328	makeSyntheticInputSection(segName: isec->getSegName(), sectName: isec->getName());
329	addInputSection(inputSection: thunk);
330
331	target->initICFSafeThunkBody(thunk, targetSym: masterSym);
332	thunk->foldIdentical(redundant: isec, foldKind: Symbol::ICFFoldKind::Thunk);
333
334	// Since we're folding the target function into a thunk, we need to adjust
335	// the symbols that now got relocated from the target function to the thunk.
336	// Since the thunk is only one branch, we move all symbols to offset 0 and
337	// make sure that the size of all non-zero-size symbols is equal to the size
338	// of the branch.
339	for (auto *sym : thunk->symbols) {
340	sym->value = `0`;
341	if (sym->size != `0`)
342	sym->size = thunkSize;
343	}
344	}
345	}
346
347	// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
348	// with matching equivalence class
349	void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
350	// Only use threads when the benefits outweigh the overhead.
351	const size_t threadingThreshold = `1024`;
352	if (icfInputs.size() < threadingThreshold) {
353	forEachClassRange(begin: `0`, end: icfInputs.size(), func);
354	++icfPass;
355	return;
356	}
357
358	// Shard into non-overlapping intervals, and call FUNC in parallel. The
359	// sharding must be completed before any calls to FUNC are made so that FUNC
360	// can modify the InputSection in its shard without causing data races.
361	const size_t shards = `256`;
362	size_t step = icfInputs.size() / shards;
363	size_t boundaries[shards + `1`];
364	boundaries[`0`] = `0`;
365	boundaries[shards] = icfInputs.size();
366	parallelFor(Begin: `1`, End: shards, Fn: [&](size_t i) {
367	boundaries[i] = findBoundary(begin: (i - `1`) * step, end: icfInputs.size());
368	});
369	parallelFor(Begin: `1`, End: shards + `1`, Fn: [&](size_t i) {
370	if (boundaries[i - `1`] < boundaries[i]) {
371	forEachClassRange(begin: boundaries[i - `1`], end: boundaries[i], func);
372	}
373	});
374	++icfPass;
375	}
376
377	void ICF::run() {
378	// Into each origin-section hash, combine all reloc referent section hashes.
379	for (icfPass = `0`; icfPass < `2`; ++icfPass) {
380	parallelForEach(R&: icfInputs, Fn: [&](ConcatInputSection *isec) {
381	uint32_t hash = isec->icfEqClass[icfPass % `2`];
382	for (const Reloc &r : isec->relocs) {
383	if (auto sym = r.referent.dyn_cast<Symbol >()) {
384	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
385	if (defined->isec()) {
386	if (auto *referentIsec =
387	dyn_cast<ConcatInputSection>(Val: defined->isec()))
388	hash += defined->value + referentIsec->icfEqClass[icfPass % `2`];
389	else
390	hash += defined->isec()->kind() +
391	defined->isec()->getOffset(off: defined->value);
392	} else {
393	hash += defined->value;
394	}
395	} else {
396	// ICF runs before Undefined diags
397	assert(isa<Undefined>(sym) \|\| isa<DylibSymbol>(sym));
398	}
399	}
400	}
401	// Set MSB to 1 to avoid collisions with non-hashed classes.
402	isec->icfEqClass[(icfPass + `1`) % `2`] = hash \| (`1ull` << `31`);
403	});
404	}
405
406	llvm::stable_sort(
407	Range&: icfInputs, C: [](const ConcatInputSection a, const* ConcatInputSection *b) {
408	// When using safe_thunks, ensure that we first sort by icfEqClass and
409	// then by keepUnique (descending). This guarantees that within an
410	// equivalence class, the keepUnique inputs are always first.
411	if (config ->icfLevel == ICFLevel::safe_thunks)
412	if (a->icfEqClass[`0`] == b->icfEqClass[`0`])
413	return a->keepUnique > b->keepUnique;
414	return a->icfEqClass[`0`] < b->icfEqClass[`0`];
415	});
416	forEachClass(func: [&](size_t begin, size_t end) {
417	segregate(begin, end, &ICF::equalsConstant);
418	});
419
420	// Split equivalence groups by comparing relocations until convergence
421	do {
422	icfRepeat = false;
423	forEachClass(func: [&](size_t begin, size_t end) {
424	segregate(begin, end, &ICF::equalsVariable);
425	});
426	} while (icfRepeat);
427	log(msg: "ICF needed " + Twine (icfPass) + " iterations");
428	if (verboseDiagnostics) {
429	log(msg: "equalsConstant() called " + Twine (equalsConstantCount) + " times");
430	log(msg: "equalsVariable() called " + Twine (equalsVariableCount) + " times");
431	}
432
433	// When using safe_thunks, we need to create thunks for all keepUnique
434	// functions that can be deduplicated. Since we're creating / adding new
435	// InputSections, we can't paralellize this.
436	if (config ->icfLevel == ICFLevel::safe_thunks)
437	forEachClassRange(begin: `0`, end: icfInputs.size(), func: [&](size_t begin, size_t end) {
438	applySafeThunksToRange(begin, end);
439	});
440
441	// Fold sections within equivalence classes
442	forEachClass(func: [&](size_t begin, size_t end) {
443	if (end - begin < `2`)
444	return;
445	bool useSafeThunks = config ->icfLevel == ICFLevel::safe_thunks;
446
447	// For ICF level safe_thunks, replace keepUnique function bodies with
448	// thunks. For all other ICF levles, directly merge the functions.
449
450	ConcatInputSection *beginIsec = icfInputs [begin];
451	for (size_t i = begin + `1`; i < end; ++i) {
452	// Skip keepUnique inputs when using safe_thunks (already handeled above)
453	if (useSafeThunks && icfInputs [i]->keepUnique) {
454	// Assert keepUnique sections are either small or replaced with thunks.
455	assert(!icfInputs[i]->live \|\|
456	icfInputs[i]->data.size() <= target->getICFSafeThunkSize());
457	assert(!icfInputs[i]->replacement \|\|
458	icfInputs[i]->replacement->data.size() ==
459	target->getICFSafeThunkSize());
460	continue;
461	}
462	beginIsec->foldIdentical(redundant: icfInputs [i]);
463	}
464	});
465	}
466
467	// Split an equivalence class into smaller classes.
468	void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
469	while (begin < end) {
470	// Divide [begin, end) into two. Let mid be the start index of the
471	// second group.
472	auto bound = std::stable_partition(
473	first: icfInputs.begin() + begin + `1`, last: icfInputs.begin() + end,
474	pred: [&](ConcatInputSection *isec) {
475	return (this->*equals)(icfInputs [begin], isec);
476	});
477	size_t mid = bound - icfInputs.begin();
478
479	// Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
480	// equivalence class ID because every group ends with a unique index.
481	for (size_t i = begin; i < mid; ++i)
482	icfInputs [i]->icfEqClass[(icfPass + `1`) % `2`] = mid;
483
484	// If we created a group, we need to iterate the main loop again.
485	if (mid != end)
486	icfRepeat = true;
487
488	begin = mid;
489	}
490	}
491
492	void macho::markSymAsAddrSig(Symbol *s) {
493	if (auto *d = dyn_cast_or_null<Defined>(Val: s))
494	if (d->isec())
495	d->isec()->keepUnique = true;
496	}
497
498	void macho::markAddrSigSymbols() {
499	TimeTraceScope timeScope("Mark addrsig symbols");
500	for (InputFile *file : inputFiles) {
501	ObjFile *obj = dyn_cast<ObjFile>(Val: file);
502	if (!obj)
503	continue;
504
505	Section *addrSigSection = obj->addrSigSection;
506	if (!addrSigSection)
507	continue;
508	assert(addrSigSection->subsections.size() == `1`);
509
510	const InputSection *isec = addrSigSection->subsections [`0`].isec;
511
512	for (const Reloc &r : isec->relocs) {
513	if (auto sym = r.referent.dyn_cast<Symbol >())
514	markSymAsAddrSig(s: sym);
515	else
516	error(msg: toString(isec) + ": unexpected section relocation");
517	}
518	}
519	}
520
521	// Given a symbol that was folded into a thunk, return the symbol pointing to
522	// the actual body of the function. We use this approach rather than storing the
523	// needed info in the Defined itself in order to minimize memory usage.
524	Defined macho::getBodyForThunkFoldedSym(Defined foldedSym) {
525	assert(isa<ConcatInputSection>(foldedSym->originalIsec) &&
526	"thunk-folded ICF symbol expected to be on a ConcatInputSection");
527	// foldedSec is the InputSection that was marked as deleted upon fold
528	ConcatInputSection *foldedSec =
529	cast<ConcatInputSection>(Val: foldedSym->originalIsec);
530
531	// thunkBody is the actual live thunk, containing the code that branches to
532	// the actual body of the function.
533	InputSection *thunkBody = foldedSec->replacement;
534
535	// The symbol of the merged body of the function that the thunk jumps to. This
536	// will end up in the final binary.
537	Symbol *targetSym = target->getThunkBranchTarget(thunk: thunkBody);
538
539	return cast<Defined>(Val: targetSym);
540	}
541	void macho::foldIdenticalSections(bool onlyCfStrings) {
542	TimeTraceScope timeScope("Fold Identical Code Sections");
543	// The ICF equivalence-class segregation algorithm relies on pre-computed
544	// hashes of InputSection::data for the ConcatOutputSection::inputs and all
545	// sections referenced by their relocs. We could recursively traverse the
546	// relocs to find every referenced InputSection, but that precludes easy
547	// parallelization. Therefore, we hash every InputSection here where we have
548	// them all accessible as simple vectors.
549
550	// If an InputSection is ineligible for ICF, we give it a unique ID to force
551	// it into an unfoldable singleton equivalence class. Begin the unique-ID
552	// space at inputSections.size(), so that it will never intersect with
553	// equivalence-class IDs which begin at 0. Since hashes & unique IDs never
554	// coexist with equivalence-class IDs, this is not necessary, but might help
555	// someone keep the numbers straight in case we ever need to debug the
556	// ICF::segregate()
557	std::vector<ConcatInputSection *> foldable;
558	uint64_t icfUniqueID = inputSections.size();
559	// Reset the thunk counter for each run of ICF.
560	icfThunkCounter = `0`;
561	for (ConcatInputSection *isec : inputSections) {
562	bool isFoldableWithAddendsRemoved = isCfStringSection(isec) \|\|
563	isClassRefsSection(isec) \|\|
564	isSelRefsSection(isec);
565	// NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we
566	// can still fold it.
567	bool hasFoldableFlags = (isSelRefsSection(isec) \|\|
568	sectionType(flags: isec->getFlags()) == MachO::S_REGULAR);
569
570	bool isCodeSec = isCodeSection(isec);
571
572	// When keepUnique is true, the section is not foldable. Unless we are at
573	// icf level safe_thunks, in which case we still want to fold code sections.
574	// When using safe_thunks we'll apply the safe_thunks logic at merge time
575	// based on the 'keepUnique' flag.
576	bool noUniqueRequirement =
577	!isec->keepUnique \|\|
578	((config ->icfLevel == ICFLevel::safe_thunks) && isCodeSec);
579
580	// FIXME: consider non-code __text sections as foldable?
581	bool isFoldable = (!onlyCfStrings \|\| isCfStringSection(isec)) &&
582	(isCodeSec \|\| isFoldableWithAddendsRemoved \|\|
583	isGccExceptTabSection(isec)) &&
584	noUniqueRequirement && !isec->hasAltEntry &&
585	!isec->shouldOmitFromOutput() && hasFoldableFlags;
586	if (isFoldable) {
587	foldable.push_back(x: isec);
588	for (Defined *d : isec->symbols)
589	if (d->unwindEntry())
590	foldable.push_back(x: d->unwindEntry());
591
592	// Some sections have embedded addends that foil ICF's hashing / equality
593	// checks. (We can ignore embedded addends when doing ICF because the same
594	// information gets recorded in our Reloc structs.) We therefore create a
595	// mutable copy of the section data and zero out the embedded addends
596	// before performing any hashing / equality checks.
597	if (isFoldableWithAddendsRemoved) {
598	// We have to do this copying serially as the BumpPtrAllocator is not
599	// thread-safe. FIXME: Make a thread-safe allocator.
600	MutableArrayRef<uint8_t> copy = isec->data.copy(A&: bAlloc());
601	for (const Reloc &r : isec->relocs)
602	target->relocateOne(loc: copy.data() + r.offset, r, /va=/`0`,
603	/relocVA=/`0`);
604	isec->data = copy;
605	}
606	} else if (!isEhFrameSection(isec)) {
607	// EH frames are gathered as foldables from unwindEntry above; give a
608	// unique ID to everything else.
609	isec->icfEqClass[`0`] = ++icfUniqueID;
610	}
611	}
612	parallelForEach(R&: foldable, Fn: [](ConcatInputSection *isec) {
613	assert(isec->icfEqClass[`0`] == `0`); // don't overwrite a unique ID!
614	// Turn-on the top bit to guarantee that valid hashes have no collisions
615	// with the small-integer unique IDs for ICF-ineligible sections
616	isec->icfEqClass[`0`] = xxh3_64bits(data: isec->data) \| (`1ull` << `31`);
617	});
618	// Now that every input section is either hashed or marked as unique, run the
619	// segregation algorithm to detect foldable subsections.
620	ICF (foldable).run();
621	}
622

Browse the source code of llvm_projects/lld/MachO/ICF.cpp