1//===- ObjC.cpp -----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ObjC.h"
10#include "ConcatOutputSection.h"
11#include "InputFiles.h"
12#include "InputSection.h"
13#include "Layout.h"
14#include "OutputSegment.h"
15#include "SyntheticSections.h"
16#include "Target.h"
17
18#include "lld/Common/ErrorHandler.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/BinaryFormat/MachO.h"
21#include "llvm/Bitcode/BitcodeReader.h"
22#include "llvm/Support/TimeProfiler.h"
23
24using namespace llvm;
25using namespace llvm::MachO;
26using namespace lld;
27using namespace lld::macho;
28
29template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
30 using SectionHeader = typename LP::section;
31
32 auto *hdr =
33 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
34 if (hdr->magic != LP::magic)
35 return false;
36
37 if (const auto *c =
38 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
39 auto sectionHeaders = ArrayRef<SectionHeader>{
40 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
41 for (const SectionHeader &secHead : sectionHeaders) {
42 StringRef sectname(secHead.sectname,
43 strnlen(secHead.sectname, sizeof(secHead.sectname)));
44 StringRef segname(secHead.segname,
45 strnlen(secHead.segname, sizeof(secHead.segname)));
46 if ((segname == segment_names::data &&
47 sectname == section_names::objcCatList) ||
48 (segname == segment_names::text &&
49 sectname.starts_with(Prefix: section_names::swift))) {
50 return true;
51 }
52 }
53 }
54 return false;
55}
56
57static bool objectHasObjCSection(MemoryBufferRef mb) {
58 if (target->wordSize == 8)
59 return ::objectHasObjCSection<LP64>(mb);
60 else
61 return ::objectHasObjCSection<ILP32>(mb);
62}
63
64bool macho::hasObjCSection(MemoryBufferRef mb) {
65 switch (identify_magic(magic: mb.getBuffer())) {
66 case file_magic::macho_object:
67 return objectHasObjCSection(mb);
68 case file_magic::bitcode:
69 return check(e: isBitcodeContainingObjCCategory(Buffer: mb));
70 default:
71 return false;
72 }
73}
74
75namespace {
76
77#define FOR_EACH_CATEGORY_FIELD(DO) \
78 DO(Ptr, name) \
79 DO(Ptr, klass) \
80 DO(Ptr, instanceMethods) \
81 DO(Ptr, classMethods) \
82 DO(Ptr, protocols) \
83 DO(Ptr, instanceProps) \
84 DO(Ptr, classProps) \
85 DO(uint32_t, size)
86
87CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
88
89#undef FOR_EACH_CATEGORY_FIELD
90
91#define FOR_EACH_CLASS_FIELD(DO) \
92 DO(Ptr, metaClass) \
93 DO(Ptr, superClass) \
94 DO(Ptr, methodCache) \
95 DO(Ptr, vtable) \
96 DO(Ptr, roData)
97
98CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
99
100#undef FOR_EACH_CLASS_FIELD
101
102#define FOR_EACH_RO_CLASS_FIELD(DO) \
103 DO(uint32_t, flags) \
104 DO(uint32_t, instanceStart) \
105 DO(Ptr, instanceSize) \
106 DO(Ptr, ivarLayout) \
107 DO(Ptr, name) \
108 DO(Ptr, baseMethods) \
109 DO(Ptr, baseProtocols) \
110 DO(Ptr, ivars) \
111 DO(Ptr, weakIvarLayout) \
112 DO(Ptr, baseProperties)
113
114CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
115
116#undef FOR_EACH_RO_CLASS_FIELD
117
118#define FOR_EACH_LIST_HEADER(DO) \
119 DO(uint32_t, structSize) \
120 DO(uint32_t, structCount)
121
122CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
123
124#undef FOR_EACH_LIST_HEADER
125
126#define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
127
128CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
129
130#undef FOR_EACH_PROTOCOL_LIST_HEADER
131
132#define FOR_EACH_METHOD(DO) \
133 DO(Ptr, name) \
134 DO(Ptr, type) \
135 DO(Ptr, impl)
136
137CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
138
139#undef FOR_EACH_METHOD
140
141enum MethodContainerKind {
142 MCK_Class,
143 MCK_Category,
144};
145
146struct MethodContainer {
147 MethodContainerKind kind;
148 const ConcatInputSection *isec;
149};
150
151enum MethodKind {
152 MK_Instance,
153 MK_Static,
154};
155
156struct ObjcClass {
157 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
158 DenseMap<CachedHashStringRef, MethodContainer> classMethods;
159};
160
161} // namespace
162
163class ObjcCategoryChecker {
164public:
165 ObjcCategoryChecker();
166 void parseCategory(const ConcatInputSection *catListIsec);
167
168private:
169 void parseClass(const Defined *classSym);
170 void parseMethods(const ConcatInputSection *methodsIsec,
171 const Symbol *methodContainer,
172 const ConcatInputSection *containerIsec,
173 MethodContainerKind, MethodKind);
174
175 CategoryLayout catLayout;
176 ClassLayout classLayout;
177 ROClassLayout roClassLayout;
178 ListHeaderLayout listHeaderLayout;
179 MethodLayout methodLayout;
180
181 DenseMap<const Symbol *, ObjcClass> classMap;
182};
183
184ObjcCategoryChecker::ObjcCategoryChecker()
185 : catLayout(target->wordSize), classLayout(target->wordSize),
186 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
187 methodLayout(target->wordSize) {}
188
189// \p r must point to an offset within a CStringInputSection or a
190// ConcatInputSection
191static StringRef getReferentString(const Reloc &r) {
192 if (auto *isec = r.referent.dyn_cast<InputSection *>())
193 return cast<CStringInputSection>(Val: isec)->getStringRefAtOffset(off: r.addend);
194
195 auto *sym = cast<Defined>(Val: r.referent.get<Symbol *>());
196 auto *symIsec = sym->isec();
197 auto symOffset = sym->value + r.addend;
198
199 if (auto *s = dyn_cast_or_null<CStringInputSection>(Val: symIsec))
200 return s->getStringRefAtOffset(off: symOffset);
201
202 if (isa<ConcatInputSection>(Val: symIsec)) {
203 auto strData = symIsec->data.slice(N: symOffset);
204 const char *pszData = reinterpret_cast<const char *>(strData.data());
205 return StringRef(pszData, strnlen(string: pszData, maxlen: strData.size()));
206 }
207
208 llvm_unreachable("unknown reference section in getReferentString");
209}
210
211void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
212 const Symbol *methodContainerSym,
213 const ConcatInputSection *containerIsec,
214 MethodContainerKind mcKind,
215 MethodKind mKind) {
216 ObjcClass &klass = classMap[methodContainerSym];
217 for (const Reloc &r : methodsIsec->relocs) {
218 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
219 methodLayout.nameOffset)
220 continue;
221
222 CachedHashStringRef methodName(getReferentString(r));
223 // +load methods are special: all implementations are called by the runtime
224 // even if they are part of the same class. Thus there is no need to check
225 // for duplicates.
226 // NOTE: Instead of specifically checking for this method name, ld64 simply
227 // checks whether a class / category is present in __objc_nlclslist /
228 // __objc_nlcatlist respectively. This will be the case if the class /
229 // category has a +load method. It skips optimizing the categories if there
230 // are multiple +load methods. Since it does dupe checking as part of the
231 // optimization process, this avoids spurious dupe messages around +load,
232 // but it also means that legit dupe issues for other methods are ignored.
233 if (mKind == MK_Static && methodName.val() == "load")
234 continue;
235
236 auto &methodMap =
237 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
238 if (methodMap
239 .try_emplace(Key: methodName, Args: MethodContainer{.kind: mcKind, .isec: containerIsec})
240 .second)
241 continue;
242
243 // We have a duplicate; generate a warning message.
244 const auto &mc = methodMap.lookup(Val: methodName);
245 const Reloc *nameReloc = nullptr;
246 if (mc.kind == MCK_Category) {
247 nameReloc = mc.isec->getRelocAt(off: catLayout.nameOffset);
248 } else {
249 assert(mc.kind == MCK_Class);
250 const auto *roIsec = mc.isec->getRelocAt(off: classLayout.roDataOffset)
251 ->getReferentInputSection();
252 nameReloc = roIsec->getRelocAt(off: roClassLayout.nameOffset);
253 }
254 StringRef containerName = getReferentString(r: *nameReloc);
255 StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
256
257 // We should only ever encounter collisions when parsing category methods
258 // (since the Class struct is parsed before any of its categories).
259 assert(mcKind == MCK_Category);
260 StringRef newCatName =
261 getReferentString(r: *containerIsec->getRelocAt(off: catLayout.nameOffset));
262
263 auto formatObjAndSrcFileName = [](const InputSection *section) {
264 lld::macho::InputFile *inputFile = section->getFile();
265 std::string result = toString(file: inputFile);
266
267 auto objFile = dyn_cast_or_null<ObjFile>(Val: inputFile);
268 if (objFile && objFile->compileUnit)
269 result += " (" + objFile->sourceFile() + ")";
270
271 return result;
272 };
273
274 StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
275 warn(msg: "method '" + methPrefix + methodName.val() +
276 "' has conflicting definitions:\n>>> defined in category " +
277 newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
278 "\n>>> defined in " + containerType + " " + containerName + " from " +
279 formatObjAndSrcFileName(mc.isec));
280 }
281}
282
283void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
284 auto *classReloc = catIsec->getRelocAt(off: catLayout.klassOffset);
285 if (!classReloc)
286 return;
287
288 auto *classSym = classReloc->referent.get<Symbol *>();
289 if (auto *d = dyn_cast<Defined>(Val: classSym))
290 if (!classMap.count(Val: d))
291 parseClass(classSym: d);
292
293 if (const auto *r = catIsec->getRelocAt(off: catLayout.classMethodsOffset)) {
294 parseMethods(methodsIsec: cast<ConcatInputSection>(Val: r->getReferentInputSection()),
295 methodContainerSym: classSym, containerIsec: catIsec, mcKind: MCK_Category, mKind: MK_Static);
296 }
297
298 if (const auto *r = catIsec->getRelocAt(off: catLayout.instanceMethodsOffset)) {
299 parseMethods(methodsIsec: cast<ConcatInputSection>(Val: r->getReferentInputSection()),
300 methodContainerSym: classSym, containerIsec: catIsec, mcKind: MCK_Category, mKind: MK_Instance);
301 }
302}
303
304void ObjcCategoryChecker::parseClass(const Defined *classSym) {
305 // Given a Class struct, get its corresponding Methods struct
306 auto getMethodsIsec =
307 [&](const InputSection *classIsec) -> ConcatInputSection * {
308 if (const auto *r = classIsec->getRelocAt(off: classLayout.roDataOffset)) {
309 if (const auto *roIsec =
310 cast_or_null<ConcatInputSection>(Val: r->getReferentInputSection())) {
311 if (const auto *r =
312 roIsec->getRelocAt(off: roClassLayout.baseMethodsOffset)) {
313 if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
314 Val: r->getReferentInputSection()))
315 return methodsIsec;
316 }
317 }
318 }
319 return nullptr;
320 };
321
322 const auto *classIsec = cast<ConcatInputSection>(Val: classSym->isec());
323
324 // Parse instance methods.
325 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
326 parseMethods(methodsIsec: instanceMethodsIsec, methodContainerSym: classSym, containerIsec: classIsec, mcKind: MCK_Class,
327 mKind: MK_Instance);
328
329 // Class methods are contained in the metaclass.
330 if (const auto *r = classSym->isec()->getRelocAt(off: classLayout.metaClassOffset))
331 if (const auto *classMethodsIsec = getMethodsIsec(
332 cast<ConcatInputSection>(Val: r->getReferentInputSection())))
333 parseMethods(methodsIsec: classMethodsIsec, methodContainerSym: classSym, containerIsec: classIsec, mcKind: MCK_Class, mKind: MK_Static);
334}
335
336void objc::checkCategories() {
337 TimeTraceScope timeScope("ObjcCategoryChecker");
338
339 ObjcCategoryChecker checker;
340 for (const InputSection *isec : inputSections) {
341 if (isec->getName() == section_names::objcCatList)
342 for (const Reloc &r : isec->relocs) {
343 auto *catIsec = cast<ConcatInputSection>(Val: r.getReferentInputSection());
344 checker.parseCategory(catIsec);
345 }
346 }
347}
348
349namespace {
350
351class ObjcCategoryMerger {
352 // In which language was a particular construct originally defined
353 enum SourceLanguage { Unknown, ObjC, Swift };
354
355 // Information about an input category
356 struct InfoInputCategory {
357 ConcatInputSection *catListIsec;
358 ConcatInputSection *catBodyIsec;
359 uint32_t offCatListIsec = 0;
360 SourceLanguage sourceLanguage = SourceLanguage::Unknown;
361
362 bool wasMerged = false;
363 };
364
365 // To write new (merged) categories or classes, we will try make limited
366 // assumptions about the alignment and the sections the various class/category
367 // info are stored in and . So we'll just reuse the same sections and
368 // alignment as already used in existing (input) categories. To do this we
369 // have InfoCategoryWriter which contains the various sections that the
370 // generated categories will be written to.
371 struct InfoWriteSection {
372 bool valid = false; // Data has been successfully collected from input
373 uint32_t align = 0;
374 Section *inputSection;
375 Reloc relocTemplate;
376 OutputSection *outputSection;
377 };
378
379 struct InfoCategoryWriter {
380 InfoWriteSection catListInfo;
381 InfoWriteSection catBodyInfo;
382 InfoWriteSection catNameInfo;
383 InfoWriteSection catPtrListInfo;
384 };
385
386 // Information about a pointer list in the original categories or class(method
387 // lists, protocol lists, etc)
388 struct PointerListInfo {
389 PointerListInfo() = default;
390 PointerListInfo(const PointerListInfo &) = default;
391 PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
392 : categoryPrefix(_categoryPrefix),
393 pointersPerStruct(_pointersPerStruct) {}
394
395 inline bool operator==(const PointerListInfo &cmp) const {
396 return pointersPerStruct == cmp.pointersPerStruct &&
397 structSize == cmp.structSize && structCount == cmp.structCount &&
398 allPtrs == cmp.allPtrs;
399 }
400
401 const char *categoryPrefix;
402
403 uint32_t pointersPerStruct = 0;
404
405 uint32_t structSize = 0;
406 uint32_t structCount = 0;
407
408 std::vector<Symbol *> allPtrs;
409 };
410
411 // Full information describing an ObjC class . This will include all the
412 // additional methods, protocols, and properties that are contained in the
413 // class and all the categories that extend a particular class.
414 struct ClassExtensionInfo {
415 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
416
417 // Merged names of containers. Ex: base|firstCategory|secondCategory|...
418 std::string mergedContainerName;
419 std::string baseClassName;
420 const Symbol *baseClass = nullptr;
421 SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
422
423 CategoryLayout &catLayout;
424
425 // In case we generate new data, mark the new data as belonging to this file
426 ObjFile *objFileForMergeData = nullptr;
427
428 PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
429 /*pointersPerStruct=*/3};
430 PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
431 /*pointersPerStruct=*/3};
432 PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
433 /*pointersPerStruct=*/0};
434 PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
435 /*pointersPerStruct=*/2};
436 PointerListInfo classProps = {objc::symbol_names::klassPropList,
437 /*pointersPerStruct=*/2};
438 };
439
440public:
441 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
442 void doMerge();
443 static void doCleanup();
444
445private:
446 DenseSet<const Symbol *> collectNlCategories();
447 void collectAndValidateCategoriesData();
448 void
449 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
450
451 void eraseISec(ConcatInputSection *isec);
452 void eraseMergedCategories();
453
454 void generateCatListForNonErasedCategories(
455 MapVector<ConcatInputSection *, std::set<uint64_t>>
456 catListToErasedOffsets);
457 void collectSectionWriteInfoFromIsec(const InputSection *isec,
458 InfoWriteSection &catWriteInfo);
459 void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
460 void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
461 ClassExtensionInfo &extInfo);
462
463 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
464 PointerListInfo &ptrList,
465 SourceLanguage sourceLang);
466
467 PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
468 uint32_t secOffset,
469 SourceLanguage sourceLang);
470
471 void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
472 PointerListInfo &ptrList);
473
474 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
475 const ClassExtensionInfo &extInfo,
476 const PointerListInfo &ptrList);
477
478 Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
479 const ClassExtensionInfo &extInfo,
480 const PointerListInfo &ptrList);
481
482 Defined *emitCategory(const ClassExtensionInfo &extInfo);
483 Defined *emitCatListEntrySec(const std::string &forCategoryName,
484 const std::string &forBaseClassName,
485 ObjFile *objFile);
486 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
487 const Symbol *baseClassSym,
488 const std::string &baseClassName, ObjFile *objFile);
489 Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
490 void createSymbolReference(Defined *refFrom, const Symbol *refTo,
491 uint32_t offset, const Reloc &relocTemplate);
492 Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
493 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
494 uint32_t offset);
495 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
496 uint32_t offset);
497 Defined *getClassRo(const Defined *classSym, bool getMetaRo);
498 SourceLanguage getClassSymSourceLang(const Defined *classSym);
499 void mergeCategoriesIntoBaseClass(const Defined *baseClass,
500 std::vector<InfoInputCategory> &categories);
501 void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
502 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
503 uint32_t offset);
504
505 // Allocate a null-terminated StringRef backed by generatedSectionData
506 StringRef newStringData(const char *str);
507 // Allocate section data, backed by generatedSectionData
508 SmallVector<uint8_t> &newSectionData(uint32_t size);
509
510 CategoryLayout catLayout;
511 ClassLayout classLayout;
512 ROClassLayout roClassLayout;
513 ListHeaderLayout listHeaderLayout;
514 MethodLayout methodLayout;
515 ProtocolListHeaderLayout protocolListHeaderLayout;
516
517 InfoCategoryWriter infoCategoryWriter;
518 std::vector<ConcatInputSection *> &allInputSections;
519 // Map of base class Symbol to list of InfoInputCategory's for it
520 MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
521
522 // Normally, the binary data comes from the input files, but since we're
523 // generating binary data ourselves, we use the below array to store it in.
524 // Need this to be 'static' so the data survives past the ObjcCategoryMerger
525 // object, as the data will be read by the Writer when the final binary is
526 // generated.
527 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
528 generatedSectionData;
529};
530
531SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
532 ObjcCategoryMerger::generatedSectionData;
533
534ObjcCategoryMerger::ObjcCategoryMerger(
535 std::vector<ConcatInputSection *> &_allInputSections)
536 : catLayout(target->wordSize), classLayout(target->wordSize),
537 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
538 methodLayout(target->wordSize),
539 protocolListHeaderLayout(target->wordSize),
540 allInputSections(_allInputSections) {}
541
542void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
543 const InputSection *isec, InfoWriteSection &catWriteInfo) {
544
545 catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
546 catWriteInfo.align = isec->align;
547 catWriteInfo.outputSection = isec->parent;
548
549 assert(catWriteInfo.outputSection &&
550 "outputSection may not be null in collectSectionWriteInfoFromIsec.");
551
552 if (isec->relocs.size())
553 catWriteInfo.relocTemplate = isec->relocs[0];
554
555 catWriteInfo.valid = true;
556}
557
558Symbol *
559ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
560 uint32_t offset) {
561 if (!isec)
562 return nullptr;
563 const Reloc *reloc = isec->getRelocAt(off: offset);
564
565 if (!reloc)
566 return nullptr;
567
568 Symbol *sym = reloc->referent.get<Symbol *>();
569
570 if (reloc->addend) {
571 assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
572 Defined *definedSym = cast<Defined>(Val: sym);
573 sym = tryFindDefinedOnIsec(isec: definedSym->isec(),
574 offset: definedSym->value + reloc->addend);
575 }
576
577 return sym;
578}
579
580Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
581 uint32_t offset) {
582 for (Defined *sym : isec->symbols)
583 if ((sym->value <= offset) && (sym->value + sym->size > offset))
584 return sym;
585
586 return nullptr;
587}
588
589Defined *
590ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
591 uint32_t offset) {
592 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
593 return dyn_cast_or_null<Defined>(Val: sym);
594}
595
596// Get the class's ro_data symbol. If getMetaRo is true, then we will return
597// the meta-class's ro_data symbol. Otherwise, we will return the class
598// (instance) ro_data symbol.
599Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
600 bool getMetaRo) {
601 ConcatInputSection *isec = dyn_cast<ConcatInputSection>(Val: classSym->isec());
602 if (!isec)
603 return nullptr;
604
605 if (!getMetaRo)
606 return tryGetDefinedAtIsecOffset(isec, offset: classLayout.roDataOffset +
607 classSym->value);
608
609 Defined *metaClass = tryGetDefinedAtIsecOffset(
610 isec, offset: classLayout.metaClassOffset + classSym->value);
611 if (!metaClass)
612 return nullptr;
613
614 return tryGetDefinedAtIsecOffset(
615 isec: dyn_cast<ConcatInputSection>(Val: metaClass->isec()),
616 offset: classLayout.roDataOffset);
617}
618
619// Given an ConcatInputSection or CStringInputSection and an offset, if there is
620// a symbol(Defined) at that offset, then erase the symbol (mark it not live)
621void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
622 const ConcatInputSection *isec, uint32_t offset) {
623 const Reloc *reloc = isec->getRelocAt(off: offset);
624
625 if (!reloc)
626 return;
627
628 Defined *sym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
629 if (!sym)
630 return;
631
632 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(Val: sym->isec()))
633 eraseISec(isec: cisec);
634 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(Val: sym->isec())) {
635 uint32_t totalOffset = sym->value + reloc->addend;
636 StringPiece &piece = csisec->getStringPiece(off: totalOffset);
637 piece.live = false;
638 } else {
639 llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
640 }
641}
642
643void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
644 const InfoInputCategory &catInfo) {
645
646 if (!infoCategoryWriter.catListInfo.valid)
647 collectSectionWriteInfoFromIsec(isec: catInfo.catListIsec,
648 catWriteInfo&: infoCategoryWriter.catListInfo);
649 if (!infoCategoryWriter.catBodyInfo.valid)
650 collectSectionWriteInfoFromIsec(isec: catInfo.catBodyIsec,
651 catWriteInfo&: infoCategoryWriter.catBodyInfo);
652
653 if (!infoCategoryWriter.catNameInfo.valid) {
654 lld::macho::Defined *catNameSym =
655 tryGetDefinedAtIsecOffset(isec: catInfo.catBodyIsec, offset: catLayout.nameOffset);
656 assert(catNameSym && "Category does not have a valid name Symbol");
657
658 collectSectionWriteInfoFromIsec(isec: catNameSym->isec(),
659 catWriteInfo&: infoCategoryWriter.catNameInfo);
660 }
661
662 // Collect writer info from all the category lists (we're assuming they all
663 // would provide the same info)
664 if (!infoCategoryWriter.catPtrListInfo.valid) {
665 for (uint32_t off = catLayout.instanceMethodsOffset;
666 off <= catLayout.classPropsOffset; off += target->wordSize) {
667 if (Defined *ptrList =
668 tryGetDefinedAtIsecOffset(isec: catInfo.catBodyIsec, offset: off)) {
669 collectSectionWriteInfoFromIsec(isec: ptrList->isec(),
670 catWriteInfo&: infoCategoryWriter.catPtrListInfo);
671 // we've successfully collected data, so we can break
672 break;
673 }
674 }
675 }
676}
677
678// Parse a protocol list that might be linked to ConcatInputSection at a given
679// offset. The format of the protocol list is different than other lists (prop
680// lists, method lists) so we need to parse it differently
681void ObjcCategoryMerger::parseProtocolListInfo(
682 const ConcatInputSection *isec, uint32_t secOffset,
683 PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
684 assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
685 "Tried to read pointer list beyond protocol section end");
686
687 const Reloc *reloc = isec->getRelocAt(off: secOffset);
688 if (!reloc)
689 return;
690
691 auto *ptrListSym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
692 assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
693
694 // Theoretically protocol count can be either 32b or 64b, depending on
695 // platform pointer size, but to simplify implementation we always just read
696 // the lower 32b which should be good enough.
697 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
698 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
699
700 ptrList.structCount += protocolCount;
701 ptrList.structSize = target->wordSize;
702
703 [[maybe_unused]] uint32_t expectedListSize =
704 (protocolCount * target->wordSize) +
705 /*header(count)*/ protocolListHeaderLayout.totalSize +
706 /*extra null value*/ target->wordSize;
707
708 // On Swift, the protocol list does not have the extra (unnecessary) null
709 [[maybe_unused]] uint32_t expectedListSizeSwift =
710 expectedListSize - target->wordSize;
711
712 assert(((expectedListSize == ptrListSym->isec()->data.size() &&
713 sourceLang == SourceLanguage::ObjC) ||
714 (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
715 sourceLang == SourceLanguage::Swift)) &&
716 "Protocol list does not match expected size");
717
718 uint32_t off = protocolListHeaderLayout.totalSize;
719 for (uint32_t inx = 0; inx < protocolCount; ++inx) {
720 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
721 assert(reloc && "No reloc found at protocol list offset");
722
723 auto *listSym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
724 assert(listSym && "Protocol list reloc does not have a valid Defined");
725
726 ptrList.allPtrs.push_back(x: listSym);
727 off += target->wordSize;
728 }
729 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
730 "expected null terminating protocol");
731 assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
732 "Protocol list end offset does not match expected size");
733}
734
735// Parse a protocol list and return the PointerListInfo for it
736ObjcCategoryMerger::PointerListInfo
737ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
738 uint32_t secOffset,
739 SourceLanguage sourceLang) {
740 PointerListInfo ptrList;
741 parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
742 return ptrList;
743}
744
745// Parse a pointer list that might be linked to ConcatInputSection at a given
746// offset. This can be used for instance methods, class methods, instance props
747// and class props since they have the same format.
748void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
749 uint32_t secOffset,
750 PointerListInfo &ptrList) {
751 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
752 assert(isec && "Trying to parse pointer list from null isec");
753 assert(secOffset + target->wordSize <= isec->data.size() &&
754 "Trying to read pointer list beyond section end");
755
756 const Reloc *reloc = isec->getRelocAt(off: secOffset);
757 if (!reloc)
758 return;
759
760 auto *ptrListSym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
761 assert(ptrListSym && "Reloc does not have a valid Defined");
762
763 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
764 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
765 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
766 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
767 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
768
769 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
770
771 ptrList.structCount += thisStructCount;
772 ptrList.structSize = thisStructSize;
773
774 uint32_t expectedListSize =
775 listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
776 assert(expectedListSize == ptrListSym->isec()->data.size() &&
777 "Pointer list does not match expected size");
778
779 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
780 off += target->wordSize) {
781 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
782 assert(reloc && "No reloc found at pointer list offset");
783
784 auto *listSym = dyn_cast_or_null<Defined>(Val: reloc->referent.get<Symbol *>());
785 assert(listSym && "Reloc does not have a valid Defined");
786
787 ptrList.allPtrs.push_back(x: listSym);
788 }
789}
790
791// Here we parse all the information of an input category (catInfo) and
792// append the parsed info into the structure which will contain all the
793// information about how a class is extended (extInfo)
794void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
795 ClassExtensionInfo &extInfo) {
796 const Reloc *catNameReloc =
797 catInfo.catBodyIsec->getRelocAt(off: catLayout.nameOffset);
798
799 // Parse name
800 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
801
802 // is this the first category we are parsing?
803 if (extInfo.mergedContainerName.empty())
804 extInfo.objFileForMergeData =
805 dyn_cast_or_null<ObjFile>(Val: catInfo.catBodyIsec->getFile());
806 else
807 extInfo.mergedContainerName += "|";
808
809 assert(extInfo.objFileForMergeData &&
810 "Expected to already have valid objextInfo.objFileForMergeData");
811
812 StringRef catName = getReferentString(r: *catNameReloc);
813 extInfo.mergedContainerName += catName.str();
814
815 // Parse base class
816 if (!extInfo.baseClass) {
817 Symbol *classSym =
818 tryGetSymbolAtIsecOffset(isec: catInfo.catBodyIsec, offset: catLayout.klassOffset);
819 assert(extInfo.baseClassName.empty());
820 extInfo.baseClass = classSym;
821 llvm::StringRef classPrefix(objc::symbol_names::klass);
822 assert(classSym->getName().starts_with(classPrefix) &&
823 "Base class symbol does not start with expected prefix");
824 extInfo.baseClassName = classSym->getName().substr(Start: classPrefix.size());
825 } else {
826 assert((extInfo.baseClass ==
827 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
828 catLayout.klassOffset)) &&
829 "Trying to parse category info into container with different base "
830 "class");
831 }
832
833 parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.instanceMethodsOffset,
834 ptrList&: extInfo.instanceMethods);
835
836 parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.classMethodsOffset,
837 ptrList&: extInfo.classMethods);
838
839 parseProtocolListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.protocolsOffset,
840 ptrList&: extInfo.protocols, sourceLang: catInfo.sourceLanguage);
841
842 parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.instancePropsOffset,
843 ptrList&: extInfo.instanceProps);
844
845 parsePointerListInfo(isec: catInfo.catBodyIsec, secOffset: catLayout.classPropsOffset,
846 ptrList&: extInfo.classProps);
847}
848
849// Generate a protocol list (including header) and link it into the parent at
850// the specified offset.
851Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
852 Defined *parentSym, uint32_t linkAtOffset,
853 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
854 if (ptrList.allPtrs.empty())
855 return nullptr;
856
857 assert(ptrList.allPtrs.size() == ptrList.structCount);
858
859 uint32_t bodySize = (ptrList.structCount * target->wordSize) +
860 /*header(count)*/ protocolListHeaderLayout.totalSize +
861 /*extra null value*/ target->wordSize;
862 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: bodySize);
863
864 // This theoretically can be either 32b or 64b, but writing just the first 32b
865 // is good enough
866 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
867 bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
868
869 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
870
871 ConcatInputSection *listSec = make<ConcatInputSection>(
872 args&: *infoCategoryWriter.catPtrListInfo.inputSection, args&: bodyData,
873 args&: infoCategoryWriter.catPtrListInfo.align);
874 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
875 listSec->live = true;
876
877 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
878
879 std::string symName = ptrList.categoryPrefix;
880 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
881
882 Defined *ptrListSym = make<Defined>(
883 args: newStringData(str: symName.c_str()), /*file=*/args: parentSym->getObjectFile(),
884 args&: listSec, /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false,
885 /*isExternal=*/args: false, /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
886 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
887 /*isWeakDefCanBeHidden=*/args: false);
888
889 ptrListSym->used = true;
890 parentSym->getObjectFile()->symbols.push_back(x: ptrListSym);
891 addInputSection(inputSection: listSec);
892
893 createSymbolReference(refFrom: parentSym, refTo: ptrListSym, offset: linkAtOffset,
894 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
895
896 uint32_t offset = protocolListHeaderLayout.totalSize;
897 for (Symbol *symbol : ptrList.allPtrs) {
898 createSymbolReference(refFrom: ptrListSym, refTo: symbol, offset,
899 relocTemplate: infoCategoryWriter.catPtrListInfo.relocTemplate);
900 offset += target->wordSize;
901 }
902
903 return ptrListSym;
904}
905
906// Generate a pointer list (including header) and link it into the parent at the
907// specified offset. This is used for instance and class methods and
908// proprieties.
909void ObjcCategoryMerger::emitAndLinkPointerList(
910 Defined *parentSym, uint32_t linkAtOffset,
911 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
912 if (ptrList.allPtrs.empty())
913 return;
914
915 assert(ptrList.allPtrs.size() * target->wordSize ==
916 ptrList.structCount * ptrList.structSize);
917
918 // Generate body
919 uint32_t bodySize =
920 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
921 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: bodySize);
922
923 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
924 bodyData.data() + listHeaderLayout.structSizeOffset);
925 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
926 bodyData.data() + listHeaderLayout.structCountOffset);
927
928 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
929 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
930
931 ConcatInputSection *listSec = make<ConcatInputSection>(
932 args&: *infoCategoryWriter.catPtrListInfo.inputSection, args&: bodyData,
933 args&: infoCategoryWriter.catPtrListInfo.align);
934 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
935 listSec->live = true;
936
937 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
938
939 std::string symName = ptrList.categoryPrefix;
940 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
941
942 Defined *ptrListSym = make<Defined>(
943 args: newStringData(str: symName.c_str()), /*file=*/args: parentSym->getObjectFile(),
944 args&: listSec, /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false,
945 /*isExternal=*/args: false, /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
946 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
947 /*isWeakDefCanBeHidden=*/args: false);
948
949 ptrListSym->used = true;
950 parentSym->getObjectFile()->symbols.push_back(x: ptrListSym);
951 addInputSection(inputSection: listSec);
952
953 createSymbolReference(refFrom: parentSym, refTo: ptrListSym, offset: linkAtOffset,
954 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
955
956 uint32_t offset = listHeaderLayout.totalSize;
957 for (Symbol *symbol : ptrList.allPtrs) {
958 createSymbolReference(refFrom: ptrListSym, refTo: symbol, offset,
959 relocTemplate: infoCategoryWriter.catPtrListInfo.relocTemplate);
960 offset += target->wordSize;
961 }
962}
963
964// This method creates an __objc_catlist ConcatInputSection with a single slot
965Defined *
966ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
967 const std::string &forBaseClassName,
968 ObjFile *objFile) {
969 uint32_t sectionSize = target->wordSize;
970 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: sectionSize);
971
972 ConcatInputSection *newCatList =
973 make<ConcatInputSection>(args&: *infoCategoryWriter.catListInfo.inputSection,
974 args&: bodyData, args&: infoCategoryWriter.catListInfo.align);
975 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
976 newCatList->live = true;
977
978 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
979
980 std::string catSymName = "<__objc_catlist slot for merged category ";
981 catSymName += forBaseClassName + "(" + forCategoryName + ")>";
982
983 Defined *catListSym = make<Defined>(
984 args: newStringData(str: catSymName.c_str()), /*file=*/args&: objFile, args&: newCatList,
985 /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false, /*isExternal=*/args: false,
986 /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: false,
987 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
988 /*isWeakDefCanBeHidden=*/args: false);
989
990 catListSym->used = true;
991 objFile->symbols.push_back(x: catListSym);
992 addInputSection(inputSection: newCatList);
993 return catListSym;
994}
995
996// Here we generate the main category body and link the name and base class into
997// it. We don't link any other info yet like the protocol and class/instance
998// methods/props.
999Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1000 const Defined *nameSym,
1001 const Symbol *baseClassSym,
1002 const std::string &baseClassName,
1003 ObjFile *objFile) {
1004 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: catLayout.totalSize);
1005
1006 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1007 catLayout.sizeOffset);
1008 *ptrSize = catLayout.totalSize;
1009
1010 ConcatInputSection *newBodySec =
1011 make<ConcatInputSection>(args&: *infoCategoryWriter.catBodyInfo.inputSection,
1012 args&: bodyData, args&: infoCategoryWriter.catBodyInfo.align);
1013 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1014 newBodySec->live = true;
1015
1016 std::string symName =
1017 objc::symbol_names::category + baseClassName + "(" + name + ")";
1018 Defined *catBodySym = make<Defined>(
1019 args: newStringData(str: symName.c_str()), /*file=*/args&: objFile, args&: newBodySec,
1020 /*value=*/args: 0, args: bodyData.size(), /*isWeakDef=*/args: false, /*isExternal=*/args: false,
1021 /*isPrivateExtern=*/args: false, /*includeInSymtab=*/args: true,
1022 /*isReferencedDynamically=*/args: false, /*noDeadStrip=*/args: false,
1023 /*isWeakDefCanBeHidden=*/args: false);
1024
1025 catBodySym->used = true;
1026 objFile->symbols.push_back(x: catBodySym);
1027 addInputSection(inputSection: newBodySec);
1028
1029 createSymbolReference(refFrom: catBodySym, refTo: nameSym, offset: catLayout.nameOffset,
1030 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
1031
1032 // Create a reloc to the base class (either external or internal)
1033 createSymbolReference(refFrom: catBodySym, refTo: baseClassSym, offset: catLayout.klassOffset,
1034 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
1035
1036 return catBodySym;
1037}
1038
1039// This writes the new category name (for the merged category) into the binary
1040// and returns the sybmol for it.
1041Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1042 ObjFile *objFile) {
1043 StringRef nameStrData = newStringData(str: name.c_str());
1044 // We use +1 below to include the null terminator
1045 llvm::ArrayRef<uint8_t> nameData(
1046 reinterpret_cast<const uint8_t *>(nameStrData.data()),
1047 nameStrData.size() + 1);
1048
1049 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1050 CStringInputSection *newStringSec = make<CStringInputSection>(
1051 args&: *infoCategoryWriter.catNameInfo.inputSection, args&: nameData,
1052 args&: infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/args: true);
1053
1054 parentSection->subsections.push_back(x: {.offset: 0, .isec: newStringSec});
1055
1056 newStringSec->splitIntoPieces();
1057 newStringSec->pieces[0].live = true;
1058 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1059 in.cStringSection->addInput(newStringSec);
1060 assert(newStringSec->pieces.size() == 1);
1061
1062 Defined *catNameSym = make<Defined>(
1063 args: "<merged category name>", /*file=*/args&: objFile, args&: newStringSec,
1064 /*value=*/args: 0, args: nameData.size(),
1065 /*isWeakDef=*/args: false, /*isExternal=*/args: false, /*isPrivateExtern=*/args: false,
1066 /*includeInSymtab=*/args: false, /*isReferencedDynamically=*/args: false,
1067 /*noDeadStrip=*/args: false, /*isWeakDefCanBeHidden=*/args: false);
1068
1069 catNameSym->used = true;
1070 objFile->symbols.push_back(x: catNameSym);
1071 return catNameSym;
1072}
1073
1074// This method fully creates a new category from the given ClassExtensionInfo.
1075// It creates the category name, body and method/protocol/prop lists and links
1076// them all together. Then it creates a new __objc_catlist entry and adds the
1077// category to it. Calling this method will fully generate a category which will
1078// be available in the final binary.
1079Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1080 Defined *catNameSym = emitCategoryName(name: extInfo.mergedContainerName,
1081 objFile: extInfo.objFileForMergeData);
1082
1083 Defined *catBodySym = emitCategoryBody(
1084 name: extInfo.mergedContainerName, nameSym: catNameSym, baseClassSym: extInfo.baseClass,
1085 baseClassName: extInfo.baseClassName, objFile: extInfo.objFileForMergeData);
1086
1087 Defined *catListSym =
1088 emitCatListEntrySec(forCategoryName: extInfo.mergedContainerName, forBaseClassName: extInfo.baseClassName,
1089 objFile: extInfo.objFileForMergeData);
1090
1091 // Add the single category body to the category list at the offset 0.
1092 createSymbolReference(refFrom: catListSym, refTo: catBodySym, /*offset=*/0,
1093 relocTemplate: infoCategoryWriter.catListInfo.relocTemplate);
1094
1095 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.instanceMethodsOffset, extInfo,
1096 ptrList: extInfo.instanceMethods);
1097
1098 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.classMethodsOffset, extInfo,
1099 ptrList: extInfo.classMethods);
1100
1101 emitAndLinkProtocolList(parentSym: catBodySym, linkAtOffset: catLayout.protocolsOffset, extInfo,
1102 ptrList: extInfo.protocols);
1103
1104 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.instancePropsOffset, extInfo,
1105 ptrList: extInfo.instanceProps);
1106
1107 emitAndLinkPointerList(parentSym: catBodySym, linkAtOffset: catLayout.classPropsOffset, extInfo,
1108 ptrList: extInfo.classProps);
1109
1110 return catBodySym;
1111}
1112
1113// This method merges all the categories (sharing a base class) into a single
1114// category.
1115void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1116 std::vector<InfoInputCategory> &categories) {
1117 assert(categories.size() > 1 && "Expected at least 2 categories");
1118
1119 ClassExtensionInfo extInfo(catLayout);
1120
1121 for (auto &catInfo : categories)
1122 parseCatInfoToExtInfo(catInfo, extInfo);
1123
1124 Defined *newCatDef = emitCategory(extInfo);
1125 assert(newCatDef && "Failed to create a new category");
1126
1127 // Suppress unsuded var warning
1128 (void)newCatDef;
1129
1130 for (auto &catInfo : categories)
1131 catInfo.wasMerged = true;
1132}
1133
1134void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1135 const Symbol *refTo,
1136 uint32_t offset,
1137 const Reloc &relocTemplate) {
1138 Reloc r = relocTemplate;
1139 r.offset = offset;
1140 r.addend = 0;
1141 r.referent = const_cast<Symbol *>(refTo);
1142 refFrom->isec()->relocs.push_back(x: r);
1143}
1144
1145// Get the list of categories in the '__objc_nlcatlist' section. We can't
1146// optimize these as they have a '+load' method that has to be called at
1147// runtime.
1148DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1149 DenseSet<const Symbol *> nlCategories;
1150
1151 for (InputSection *sec : allInputSections) {
1152 if (sec->getName() != section_names::objcNonLazyCatList)
1153 continue;
1154
1155 for (auto &r : sec->relocs) {
1156 const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1157 nlCategories.insert(V: sym);
1158 }
1159 }
1160 return nlCategories;
1161}
1162
1163void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1164 auto nlCategories = collectNlCategories();
1165
1166 for (InputSection *sec : allInputSections) {
1167 if (sec->getName() != section_names::objcCatList)
1168 continue;
1169 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(Val: sec);
1170 assert(catListCisec &&
1171 "__objc_catList InputSection is not a ConcatInputSection");
1172
1173 for (uint32_t off = 0; off < catListCisec->getSize();
1174 off += target->wordSize) {
1175 Defined *categorySym = tryGetDefinedAtIsecOffset(isec: catListCisec, offset: off);
1176 assert(categorySym &&
1177 "Failed to get a valid category at __objc_catlit offset");
1178
1179 if (nlCategories.count(V: categorySym))
1180 continue;
1181
1182 auto *catBodyIsec = dyn_cast<ConcatInputSection>(Val: categorySym->isec());
1183 assert(catBodyIsec &&
1184 "Category data section is not an ConcatInputSection");
1185
1186 SourceLanguage eLang = SourceLanguage::Unknown;
1187 if (categorySym->getName().starts_with(Prefix: objc::symbol_names::category))
1188 eLang = SourceLanguage::ObjC;
1189 else if (categorySym->getName().starts_with(
1190 Prefix: objc::symbol_names::swift_objc_category))
1191 eLang = SourceLanguage::Swift;
1192 else
1193 llvm_unreachable("Unexpected category symbol name");
1194
1195 InfoInputCategory catInputInfo{.catListIsec: catListCisec, .catBodyIsec: catBodyIsec, .offCatListIsec: off, .sourceLanguage: eLang};
1196
1197 // Check that the category has a reloc at 'klassOffset' (which is
1198 // a pointer to the class symbol)
1199
1200 Symbol *classSym =
1201 tryGetSymbolAtIsecOffset(isec: catBodyIsec, offset: catLayout.klassOffset);
1202 assert(classSym && "Category does not have a valid base class");
1203
1204 categoryMap[classSym].push_back(x: catInputInfo);
1205
1206 collectCategoryWriterInfoFromCategory(catInfo: catInputInfo);
1207 }
1208 }
1209}
1210
1211// In the input we have multiple __objc_catlist InputSection, each of which may
1212// contain links to multiple categories. Of these categories, we will merge (and
1213// erase) only some. There will be some categories that will remain untouched
1214// (not erased). For these not erased categories, we generate new __objc_catlist
1215// entries since the parent __objc_catlist entry will be erased
1216void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1217 const MapVector<ConcatInputSection *, std::set<uint64_t>>
1218 catListToErasedOffsets) {
1219
1220 // Go through all offsets of all __objc_catlist's that we process and if there
1221 // are categories that we didn't process - generate a new __objc_catlist for
1222 // each.
1223 for (auto &mapEntry : catListToErasedOffsets) {
1224 ConcatInputSection *catListIsec = mapEntry.first;
1225 for (uint32_t catListIsecOffset = 0;
1226 catListIsecOffset < catListIsec->data.size();
1227 catListIsecOffset += target->wordSize) {
1228 // This slot was erased, we can just skip it
1229 if (mapEntry.second.count(x: catListIsecOffset))
1230 continue;
1231
1232 Defined *nonErasedCatBody =
1233 tryGetDefinedAtIsecOffset(isec: catListIsec, offset: catListIsecOffset);
1234 assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1235
1236 // Allocate data for the new __objc_catlist slot
1237 llvm::ArrayRef<uint8_t> bodyData = newSectionData(size: target->wordSize);
1238
1239 // We mark the __objc_catlist slot as belonging to the same file as the
1240 // category
1241 ObjFile *objFile = dyn_cast<ObjFile>(Val: nonErasedCatBody->getFile());
1242
1243 ConcatInputSection *listSec = make<ConcatInputSection>(
1244 args&: *infoCategoryWriter.catListInfo.inputSection, args&: bodyData,
1245 args&: infoCategoryWriter.catListInfo.align);
1246 listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1247 listSec->live = true;
1248
1249 std::string slotSymName = "<__objc_catlist slot for category ";
1250 slotSymName += nonErasedCatBody->getName();
1251 slotSymName += ">";
1252
1253 Defined *catListSlotSym = make<Defined>(
1254 args: newStringData(str: slotSymName.c_str()), /*file=*/args&: objFile, args&: listSec,
1255 /*value=*/args: 0, args: bodyData.size(),
1256 /*isWeakDef=*/args: false, /*isExternal=*/args: false, /*isPrivateExtern=*/args: false,
1257 /*includeInSymtab=*/args: false, /*isReferencedDynamically=*/args: false,
1258 /*noDeadStrip=*/args: false, /*isWeakDefCanBeHidden=*/args: false);
1259
1260 catListSlotSym->used = true;
1261 objFile->symbols.push_back(x: catListSlotSym);
1262 addInputSection(inputSection: listSec);
1263
1264 // Now link the category body into the newly created slot
1265 createSymbolReference(refFrom: catListSlotSym, refTo: nonErasedCatBody, offset: 0,
1266 relocTemplate: infoCategoryWriter.catListInfo.relocTemplate);
1267 }
1268 }
1269}
1270
1271void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1272 isec->live = false;
1273 for (auto &sym : isec->symbols)
1274 sym->used = false;
1275}
1276
1277// This fully erases the merged categories, including their body, their names,
1278// their method/protocol/prop lists and the __objc_catlist entries that link to
1279// them.
1280void ObjcCategoryMerger::eraseMergedCategories() {
1281 // Map of InputSection to a set of offsets of the categories that were merged
1282 MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1283
1284 for (auto &mapEntry : categoryMap) {
1285 for (InfoInputCategory &catInfo : mapEntry.second) {
1286 if (catInfo.wasMerged) {
1287 eraseISec(isec: catInfo.catListIsec);
1288 catListToErasedOffsets[catInfo.catListIsec].insert(
1289 x: catInfo.offCatListIsec);
1290 }
1291 }
1292 }
1293
1294 // If there were categories that we did not erase, we need to generate a new
1295 // __objc_catList that contains only the un-merged categories, and get rid of
1296 // the references to the ones we merged.
1297 generateCatListForNonErasedCategories(catListToErasedOffsets);
1298
1299 // Erase the old method lists & names of the categories that were merged
1300 for (auto &mapEntry : categoryMap) {
1301 for (InfoInputCategory &catInfo : mapEntry.second) {
1302 if (!catInfo.wasMerged)
1303 continue;
1304
1305 eraseISec(isec: catInfo.catBodyIsec);
1306
1307 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1308 // categories because the name will sometimes also be used for other
1309 // purposes.
1310 // For Swift, see usages of 'l_.str.11.SimpleClass' in
1311 // objc-category-merging-swift.s
1312 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1313 // objc-category-merging-erase-objc-name-test.s
1314 // TODO: handle the above in a smarter way
1315
1316 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1317 offset: catLayout.instanceMethodsOffset);
1318 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1319 offset: catLayout.classMethodsOffset);
1320 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1321 offset: catLayout.protocolsOffset);
1322 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1323 offset: catLayout.classPropsOffset);
1324 tryEraseDefinedAtIsecOffset(isec: catInfo.catBodyIsec,
1325 offset: catLayout.instancePropsOffset);
1326 }
1327 }
1328}
1329
1330void ObjcCategoryMerger::doMerge() {
1331 collectAndValidateCategoriesData();
1332
1333 for (auto &[baseClass, catInfos] : categoryMap) {
1334 if (auto *baseClassDef = dyn_cast<Defined>(Val: baseClass)) {
1335 // Merge all categories into the base class
1336 mergeCategoriesIntoBaseClass(baseClass: baseClassDef, categories&: catInfos);
1337 } else if (catInfos.size() > 1) {
1338 // Merge all categories into a new, single category
1339 mergeCategoriesIntoSingleCategory(categories&: catInfos);
1340 }
1341 }
1342
1343 // Erase all categories that were merged
1344 eraseMergedCategories();
1345}
1346
1347void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1348
1349StringRef ObjcCategoryMerger::newStringData(const char *str) {
1350 uint32_t len = strlen(s: str);
1351 uint32_t bufSize = len + 1;
1352 SmallVector<uint8_t> &data = newSectionData(size: bufSize);
1353 char *strData = reinterpret_cast<char *>(data.data());
1354 // Copy the string chars and null-terminator
1355 memcpy(dest: strData, src: str, n: bufSize);
1356 return StringRef(strData, len);
1357}
1358
1359SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1360 generatedSectionData.push_back(
1361 Elt: std::make_unique<SmallVector<uint8_t>>(args&: size, args: 0));
1362 return *generatedSectionData.back();
1363}
1364
1365} // namespace
1366
1367void objc::mergeCategories() {
1368 TimeTraceScope timeScope("ObjcCategoryMerger");
1369
1370 ObjcCategoryMerger merger(inputSections);
1371 merger.doMerge();
1372}
1373
1374void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1375
1376ObjcCategoryMerger::SourceLanguage
1377ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1378 if (classSym->getName().starts_with(Prefix: objc::symbol_names::swift_objc_klass))
1379 return SourceLanguage::Swift;
1380
1381 // If the symbol name matches the ObjC prefix, we don't necessarely know this
1382 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1383 // classes. Ex:
1384 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1385 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1386 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1387 //
1388 // So we scan for symbols with the same address and check for the Swift class
1389 if (classSym->getName().starts_with(Prefix: objc::symbol_names::klass)) {
1390 for (auto &sym : classSym->originalIsec->symbols)
1391 if (sym->value == classSym->value)
1392 if (sym->getName().starts_with(Prefix: objc::symbol_names::swift_objc_klass))
1393 return SourceLanguage::Swift;
1394 return SourceLanguage::ObjC;
1395 }
1396
1397 llvm_unreachable("Unexpected class symbol name during category merging");
1398}
1399void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1400 const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1401 assert(categories.size() >= 1 && "Expected at least one category to merge");
1402
1403 // Collect all the info from the categories
1404 ClassExtensionInfo extInfo(catLayout);
1405 extInfo.baseClass = baseClass;
1406 extInfo.baseClassSourceLanguage = getClassSymSourceLang(classSym: baseClass);
1407
1408 for (auto &catInfo : categories) {
1409 parseCatInfoToExtInfo(catInfo, extInfo);
1410 }
1411
1412 // Get metadata for the base class
1413 Defined *metaRo = getClassRo(classSym: baseClass, /*getMetaRo=*/true);
1414 ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(Val: metaRo->isec());
1415 Defined *classRo = getClassRo(classSym: baseClass, /*getMetaRo=*/false);
1416 ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(Val: classRo->isec());
1417
1418 // Now collect the info from the base class from the various lists in the
1419 // class metadata
1420
1421 // Protocol lists are a special case - the same protocol list is in classRo
1422 // and metaRo, so we only need to parse it once
1423 parseProtocolListInfo(isec: classIsec, secOffset: roClassLayout.baseProtocolsOffset,
1424 ptrList&: extInfo.protocols, sourceLang: extInfo.baseClassSourceLanguage);
1425
1426 // Check that the classRo and metaRo protocol lists are identical
1427 assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1428 extInfo.baseClassSourceLanguage) ==
1429 parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1430 extInfo.baseClassSourceLanguage) &&
1431 "Category merger expects classRo and metaRo to have the same protocol "
1432 "list");
1433
1434 parsePointerListInfo(isec: metaIsec, secOffset: roClassLayout.baseMethodsOffset,
1435 ptrList&: extInfo.classMethods);
1436 parsePointerListInfo(isec: classIsec, secOffset: roClassLayout.baseMethodsOffset,
1437 ptrList&: extInfo.instanceMethods);
1438
1439 parsePointerListInfo(isec: metaIsec, secOffset: roClassLayout.basePropertiesOffset,
1440 ptrList&: extInfo.classProps);
1441 parsePointerListInfo(isec: classIsec, secOffset: roClassLayout.basePropertiesOffset,
1442 ptrList&: extInfo.instanceProps);
1443
1444 // Erase the old lists - these will be generated and replaced
1445 eraseSymbolAtIsecOffset(isec: metaIsec, offset: roClassLayout.baseMethodsOffset);
1446 eraseSymbolAtIsecOffset(isec: metaIsec, offset: roClassLayout.baseProtocolsOffset);
1447 eraseSymbolAtIsecOffset(isec: metaIsec, offset: roClassLayout.basePropertiesOffset);
1448 eraseSymbolAtIsecOffset(isec: classIsec, offset: roClassLayout.baseMethodsOffset);
1449 eraseSymbolAtIsecOffset(isec: classIsec, offset: roClassLayout.baseProtocolsOffset);
1450 eraseSymbolAtIsecOffset(isec: classIsec, offset: roClassLayout.basePropertiesOffset);
1451
1452 // Emit the newly merged lists - first into the meta RO then into the class RO
1453 // First we emit and link the protocol list into the meta RO. Then we link it
1454 // in the classRo as well (they're supposed to be identical)
1455 if (Defined *protoListSym =
1456 emitAndLinkProtocolList(parentSym: metaRo, linkAtOffset: roClassLayout.baseProtocolsOffset,
1457 extInfo, ptrList: extInfo.protocols)) {
1458 createSymbolReference(refFrom: classRo, refTo: protoListSym,
1459 offset: roClassLayout.baseProtocolsOffset,
1460 relocTemplate: infoCategoryWriter.catBodyInfo.relocTemplate);
1461 }
1462
1463 emitAndLinkPointerList(parentSym: metaRo, linkAtOffset: roClassLayout.baseMethodsOffset, extInfo,
1464 ptrList: extInfo.classMethods);
1465 emitAndLinkPointerList(parentSym: classRo, linkAtOffset: roClassLayout.baseMethodsOffset, extInfo,
1466 ptrList: extInfo.instanceMethods);
1467
1468 emitAndLinkPointerList(parentSym: metaRo, linkAtOffset: roClassLayout.basePropertiesOffset, extInfo,
1469 ptrList: extInfo.classProps);
1470
1471 emitAndLinkPointerList(parentSym: classRo, linkAtOffset: roClassLayout.basePropertiesOffset, extInfo,
1472 ptrList: extInfo.instanceProps);
1473
1474 // Mark all the categories as merged - this will be used to erase them later
1475 for (auto &catInfo : categories)
1476 catInfo.wasMerged = true;
1477}
1478
1479// Erase the symbol at a given offset in an InputSection
1480void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1481 uint32_t offset) {
1482 Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1483 if (!sym)
1484 return;
1485
1486 // Remove the symbol from isec->symbols
1487 assert(isa<Defined>(sym) && "Can only erase a Defined");
1488 llvm::erase(C&: isec->symbols, V: sym);
1489
1490 // Remove the relocs that refer to this symbol
1491 auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1492 llvm::erase_if(C&: isec->relocs, P: removeAtOff);
1493
1494 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1495 // the whole ConcatInputSection
1496 if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(Val: sym->isec()))
1497 if (cisec->data.size() == sym->size)
1498 eraseISec(isec: cisec);
1499}
1500