1//===- MCMachOStreamer.cpp - MachO Streamer -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/DenseMap.h"
10#include "llvm/ADT/SmallVector.h"
11#include "llvm/ADT/StringRef.h"
12#include "llvm/BinaryFormat/MachO.h"
13#include "llvm/MC/MCAsmBackend.h"
14#include "llvm/MC/MCAssembler.h"
15#include "llvm/MC/MCCodeEmitter.h"
16#include "llvm/MC/MCContext.h"
17#include "llvm/MC/MCDirectives.h"
18#include "llvm/MC/MCExpr.h"
19#include "llvm/MC/MCFixup.h"
20#include "llvm/MC/MCLinkerOptimizationHint.h"
21#include "llvm/MC/MCMachObjectWriter.h"
22#include "llvm/MC/MCObjectFileInfo.h"
23#include "llvm/MC/MCObjectStreamer.h"
24#include "llvm/MC/MCObjectWriter.h"
25#include "llvm/MC/MCSection.h"
26#include "llvm/MC/MCSectionMachO.h"
27#include "llvm/MC/MCSymbol.h"
28#include "llvm/MC/MCSymbolMachO.h"
29#include "llvm/MC/MCValue.h"
30#include "llvm/MC/SectionKind.h"
31#include "llvm/MC/TargetRegistry.h"
32#include "llvm/Support/Casting.h"
33#include "llvm/Support/ErrorHandling.h"
34#include <cassert>
35#include <vector>
36
37namespace llvm {
38class MCInst;
39class MCStreamer;
40class MCSubtargetInfo;
41class Triple;
42} // namespace llvm
43
44using namespace llvm;
45
46namespace {
47
48class MCMachOStreamer : public MCObjectStreamer {
49private:
50 /// LabelSections - true if each section change should emit a linker local
51 /// label for use in relocations for assembler local references. Obviates the
52 /// need for local relocations. False by default.
53 bool LabelSections;
54
55 /// HasSectionLabel - map of which sections have already had a non-local
56 /// label emitted to them. Used so we don't emit extraneous linker local
57 /// labels in the middle of the section.
58 DenseMap<const MCSection*, bool> HasSectionLabel;
59
60 void emitDataRegion(MachO::DataRegionType Kind);
61 void emitDataRegionEnd();
62
63public:
64 MCMachOStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
65 std::unique_ptr<MCObjectWriter> OW,
66 std::unique_ptr<MCCodeEmitter> Emitter, bool label)
67 : MCObjectStreamer(Context, std::move(MAB), std::move(OW),
68 std::move(Emitter)),
69 LabelSections(label) {}
70
71 /// state management
72 void reset() override {
73 HasSectionLabel.clear();
74 MCObjectStreamer::reset();
75 }
76
77 MachObjectWriter &getWriter() {
78 return static_cast<MachObjectWriter &>(getAssembler().getWriter());
79 }
80
81 /// @name MCStreamer Interface
82 /// @{
83
84 void changeSection(MCSection *Sect, uint32_t Subsection = 0) override;
85 void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
86 void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
87 void emitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override;
88 void emitSubsectionsViaSymbols() override;
89 void emitLinkerOptions(ArrayRef<std::string> Options) override;
90 void emitDataRegion(MCDataRegionType Kind) override;
91 void emitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor,
92 unsigned Update, VersionTuple SDKVersion) override;
93 void emitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor,
94 unsigned Update, VersionTuple SDKVersion) override;
95 void emitDarwinTargetVariantBuildVersion(unsigned Platform, unsigned Major,
96 unsigned Minor, unsigned Update,
97 VersionTuple SDKVersion) override;
98 bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
99 void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
100 void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
101 Align ByteAlignment) override;
102
103 void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
104 Align ByteAlignment) override;
105 void emitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
106 uint64_t Size = 0, Align ByteAlignment = Align(1),
107 SMLoc Loc = SMLoc()) override;
108 void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
109 Align ByteAlignment = Align(1)) override;
110
111 void emitIdent(StringRef IdentString) override {
112 llvm_unreachable("macho doesn't support this directive");
113 }
114
115 void emitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) override {
116 getWriter().getLOHContainer().addDirective(Kind, Args);
117 }
118 void emitCGProfileEntry(const MCSymbolRefExpr *From,
119 const MCSymbolRefExpr *To, uint64_t Count) override {
120 if (!From->getSymbol().isTemporary() && !To->getSymbol().isTemporary())
121 getWriter().getCGProfile().push_back(Elt: {.From: From, .To: To, .Count: Count});
122 }
123
124 void finishImpl() override;
125
126 void finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE);
127 void finalizeCGProfile();
128 void createAddrSigSection();
129};
130
131} // end anonymous namespace.
132
133void MCMachOStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
134 MCObjectStreamer::changeSection(Section, Subsection);
135
136 // Output a linker-local symbol so we don't need section-relative local
137 // relocations. The linker hates us when we do that.
138 if (LabelSections && !HasSectionLabel[Section] &&
139 !Section->getBeginSymbol()) {
140 MCSymbol *Label = getContext().createLinkerPrivateTempSymbol();
141 Section->setBeginSymbol(Label);
142 HasSectionLabel[Section] = true;
143 if (!Label->isInSection())
144 emitLabel(Symbol: Label);
145 }
146}
147
148void MCMachOStreamer::emitEHSymAttributes(const MCSymbol *Symbol,
149 MCSymbol *EHSymbol) {
150 auto *Sym = static_cast<const MCSymbolMachO *>(Symbol);
151 getAssembler().registerSymbol(Symbol: *Symbol);
152 if (Sym->isExternal())
153 emitSymbolAttribute(Symbol: EHSymbol, Attribute: MCSA_Global);
154 if (Sym->isWeakDefinition())
155 emitSymbolAttribute(Symbol: EHSymbol, Attribute: MCSA_WeakDefinition);
156 if (Sym->isPrivateExtern())
157 emitSymbolAttribute(Symbol: EHSymbol, Attribute: MCSA_PrivateExtern);
158}
159
160void MCMachOStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
161 // We have to create a new fragment if this is an atom defining symbol,
162 // fragments cannot span atoms.
163 if (static_cast<MCSymbolMachO *>(Symbol)->isSymbolLinkerVisible())
164 newFragment();
165
166 MCObjectStreamer::emitLabel(Symbol, Loc);
167
168 // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
169 // to clear the weak reference and weak definition bits too, but the
170 // implementation was buggy. For now we just try to match 'as', for
171 // diffability.
172 //
173 // FIXME: Cleanup this code, these bits should be emitted based on semantic
174 // properties, not on the order of definition, etc.
175 static_cast<MCSymbolMachO *>(Symbol)->clearReferenceType();
176}
177
178void MCMachOStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
179 MCValue Res;
180
181 if (Value->evaluateAsRelocatable(Res, Asm: nullptr)) {
182 if (const auto *SymA = Res.getAddSym()) {
183 if (!Res.getSubSym() &&
184 (SymA->getName().empty() || Res.getConstant() != 0))
185 static_cast<MCSymbolMachO *>(Symbol)->setAltEntry();
186 }
187 }
188 MCObjectStreamer::emitAssignment(Symbol, Value);
189}
190
191void MCMachOStreamer::emitDataRegion(MachO::DataRegionType Kind) {
192 // Create a temporary label to mark the start of the data region.
193 MCSymbol *Start = getContext().createTempSymbol();
194 emitLabel(Symbol: Start);
195 // Record the region for the object writer to use.
196 getWriter().getDataRegions().push_back(x: {.Kind: Kind, .Start: Start, .End: nullptr});
197}
198
199void MCMachOStreamer::emitDataRegionEnd() {
200 auto &Regions = getWriter().getDataRegions();
201 assert(!Regions.empty() && "Mismatched .end_data_region!");
202 auto &Data = Regions.back();
203 assert(!Data.End && "Mismatched .end_data_region!");
204 // Create a temporary label to mark the end of the data region.
205 Data.End = getContext().createTempSymbol();
206 emitLabel(Symbol: Data.End);
207}
208
209void MCMachOStreamer::emitSubsectionsViaSymbols() {
210 getWriter().setSubsectionsViaSymbols(true);
211}
212
213void MCMachOStreamer::emitLinkerOptions(ArrayRef<std::string> Options) {
214 getWriter().getLinkerOptions().push_back(x: Options);
215}
216
217void MCMachOStreamer::emitDataRegion(MCDataRegionType Kind) {
218 switch (Kind) {
219 case MCDR_DataRegion:
220 emitDataRegion(Kind: MachO::DataRegionType::DICE_KIND_DATA);
221 return;
222 case MCDR_DataRegionJT8:
223 emitDataRegion(Kind: MachO::DataRegionType::DICE_KIND_JUMP_TABLE8);
224 return;
225 case MCDR_DataRegionJT16:
226 emitDataRegion(Kind: MachO::DataRegionType::DICE_KIND_JUMP_TABLE16);
227 return;
228 case MCDR_DataRegionJT32:
229 emitDataRegion(Kind: MachO::DataRegionType::DICE_KIND_JUMP_TABLE32);
230 return;
231 case MCDR_DataRegionEnd:
232 emitDataRegionEnd();
233 return;
234 }
235}
236
237void MCMachOStreamer::emitVersionMin(MCVersionMinType Kind, unsigned Major,
238 unsigned Minor, unsigned Update,
239 VersionTuple SDKVersion) {
240 getWriter().setVersionMin(Type: Kind, Major, Minor, Update, SDKVersion);
241}
242
243void MCMachOStreamer::emitBuildVersion(unsigned Platform, unsigned Major,
244 unsigned Minor, unsigned Update,
245 VersionTuple SDKVersion) {
246 getWriter().setBuildVersion(Platform: (MachO::PlatformType)Platform, Major, Minor,
247 Update, SDKVersion);
248}
249
250void MCMachOStreamer::emitDarwinTargetVariantBuildVersion(
251 unsigned Platform, unsigned Major, unsigned Minor, unsigned Update,
252 VersionTuple SDKVersion) {
253 getWriter().setTargetVariantBuildVersion(Platform: (MachO::PlatformType)Platform, Major,
254 Minor, Update, SDKVersion);
255}
256
257bool MCMachOStreamer::emitSymbolAttribute(MCSymbol *Sym,
258 MCSymbolAttr Attribute) {
259 auto *Symbol = static_cast<MCSymbolMachO *>(Sym);
260
261 // Indirect symbols are handled differently, to match how 'as' handles
262 // them. This makes writing matching .o files easier.
263 if (Attribute == MCSA_IndirectSymbol) {
264 // Note that we intentionally cannot use the symbol data here; this is
265 // important for matching the string table that 'as' generates.
266 getWriter().getIndirectSymbols().push_back(
267 x: {.Symbol: Symbol, .Section: getCurrentSectionOnly()});
268 return true;
269 }
270
271 // Adding a symbol attribute always introduces the symbol, note that an
272 // important side effect of calling registerSymbol here is to register
273 // the symbol with the assembler.
274 getAssembler().registerSymbol(Symbol: *Symbol);
275
276 // The implementation of symbol attributes is designed to match 'as', but it
277 // leaves much to desired. It doesn't really make sense to arbitrarily add and
278 // remove flags, but 'as' allows this (in particular, see .desc).
279 //
280 // In the future it might be worth trying to make these operations more well
281 // defined.
282 switch (Attribute) {
283 case MCSA_Invalid:
284 case MCSA_ELF_TypeFunction:
285 case MCSA_ELF_TypeIndFunction:
286 case MCSA_ELF_TypeObject:
287 case MCSA_ELF_TypeTLS:
288 case MCSA_ELF_TypeCommon:
289 case MCSA_ELF_TypeNoType:
290 case MCSA_ELF_TypeGnuUniqueObject:
291 case MCSA_Extern:
292 case MCSA_Hidden:
293 case MCSA_IndirectSymbol:
294 case MCSA_Internal:
295 case MCSA_Protected:
296 case MCSA_Weak:
297 case MCSA_Local:
298 case MCSA_LGlobal:
299 case MCSA_Exported:
300 case MCSA_Memtag:
301 case MCSA_WeakAntiDep:
302 case MCSA_OSLinkage:
303 case MCSA_XPLinkage:
304 return false;
305
306 case MCSA_Global:
307 Symbol->setExternal(true);
308 // This effectively clears the undefined lazy bit, in Darwin 'as', although
309 // it isn't very consistent because it implements this as part of symbol
310 // lookup.
311 //
312 // FIXME: Cleanup this code, these bits should be emitted based on semantic
313 // properties, not on the order of definition, etc.
314 Symbol->setReferenceTypeUndefinedLazy(false);
315 break;
316
317 case MCSA_LazyReference:
318 // FIXME: This requires -dynamic.
319 Symbol->setNoDeadStrip();
320 if (Symbol->isUndefined())
321 Symbol->setReferenceTypeUndefinedLazy(true);
322 break;
323
324 // Since .reference sets the no dead strip bit, it is equivalent to
325 // .no_dead_strip in practice.
326 case MCSA_Reference:
327 case MCSA_NoDeadStrip:
328 Symbol->setNoDeadStrip();
329 break;
330
331 case MCSA_SymbolResolver:
332 Symbol->setSymbolResolver();
333 break;
334
335 case MCSA_AltEntry:
336 Symbol->setAltEntry();
337 break;
338
339 case MCSA_PrivateExtern:
340 Symbol->setExternal(true);
341 Symbol->setPrivateExtern(true);
342 break;
343
344 case MCSA_WeakReference:
345 // FIXME: This requires -dynamic.
346 if (Symbol->isUndefined())
347 Symbol->setWeakReference();
348 break;
349
350 case MCSA_WeakDefinition:
351 // FIXME: 'as' enforces that this is defined and global. The manual claims
352 // it has to be in a coalesced section, but this isn't enforced.
353 Symbol->setWeakDefinition();
354 break;
355
356 case MCSA_WeakDefAutoPrivate:
357 Symbol->setWeakDefinition();
358 Symbol->setWeakReference();
359 break;
360
361 case MCSA_Cold:
362 Symbol->setCold();
363 break;
364 }
365
366 return true;
367}
368
369void MCMachOStreamer::emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
370 // Encode the 'desc' value into the lowest implementation defined bits.
371 getAssembler().registerSymbol(Symbol: *Symbol);
372 static_cast<MCSymbolMachO *>(Symbol)->setDesc(DescValue);
373}
374
375void MCMachOStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
376 Align ByteAlignment) {
377 auto &Sym = static_cast<MCSymbolMachO &>(*Symbol);
378 // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
379 assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
380
381 getAssembler().registerSymbol(Symbol: Sym);
382 Sym.setExternal(true);
383 Sym.setCommon(Size, Alignment: ByteAlignment);
384}
385
386void MCMachOStreamer::emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
387 Align ByteAlignment) {
388 // '.lcomm' is equivalent to '.zerofill'.
389 return emitZerofill(Section: getContext().getObjectFileInfo()->getDataBSSSection(),
390 Symbol, Size, ByteAlignment);
391}
392
393void MCMachOStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
394 uint64_t Size, Align ByteAlignment,
395 SMLoc Loc) {
396 // On darwin all virtual sections have zerofill type. Disallow the usage of
397 // .zerofill in non-virtual functions. If something similar is needed, use
398 // .space or .zero.
399 if (!Section->isBssSection()) {
400 getContext().reportError(
401 L: Loc, Msg: "The usage of .zerofill is restricted to sections of "
402 "ZEROFILL type. Use .zero or .space instead.");
403 return; // Early returning here shouldn't harm. EmitZeros should work on any
404 // section.
405 }
406
407 pushSection();
408 switchSection(Section);
409
410 // The symbol may not be present, which only creates the section.
411 if (Symbol) {
412 emitValueToAlignment(Alignment: ByteAlignment, Fill: 0, FillLen: 1, MaxBytesToEmit: 0);
413 emitLabel(Symbol);
414 emitZeros(NumBytes: Size);
415 }
416 popSection();
417}
418
419// This should always be called with the thread local bss section. Like the
420// .zerofill directive this doesn't actually switch sections on us.
421void MCMachOStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
422 uint64_t Size, Align ByteAlignment) {
423 emitZerofill(Section, Symbol, Size, ByteAlignment);
424}
425
426void MCMachOStreamer::finishImpl() {
427 emitFrames();
428
429 // We have to set the fragment atom associations so we can relax properly for
430 // Mach-O.
431
432 // First, scan the symbol table to build a lookup table from fragments to
433 // defining symbols.
434 DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap;
435 for (const MCSymbol &Symbol : getAssembler().symbols()) {
436 auto &Sym = static_cast<const MCSymbolMachO &>(Symbol);
437 if (Sym.isSymbolLinkerVisible() && Sym.isInSection() && !Sym.isVariable() &&
438 !Sym.isAltEntry()) {
439 // An atom defining symbol should never be internal to a fragment.
440 assert(Symbol.getOffset() == 0 &&
441 "Invalid offset in atom defining symbol!");
442 DefiningSymbolMap[Symbol.getFragment()] = &Symbol;
443 }
444 }
445
446 // Set the fragment atom associations by tracking the last seen atom defining
447 // symbol.
448 for (MCSection &Sec : getAssembler()) {
449 static_cast<MCSectionMachO &>(Sec).allocAtoms();
450 const MCSymbol *CurrentAtom = nullptr;
451 size_t I = 0;
452 for (MCFragment &Frag : Sec) {
453 if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(Val: &Frag))
454 CurrentAtom = Symbol;
455 static_cast<MCSectionMachO &>(Sec).setAtom(I: I++, Sym: CurrentAtom);
456 }
457 }
458
459 finalizeCGProfile();
460
461 createAddrSigSection();
462 this->MCObjectStreamer::finishImpl();
463}
464
465void MCMachOStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE) {
466 auto *S =
467 static_cast<MCSymbolMachO *>(const_cast<MCSymbol *>(&SRE->getSymbol()));
468 if (getAssembler().registerSymbol(Symbol: *S))
469 S->setExternal(true);
470}
471
472void MCMachOStreamer::finalizeCGProfile() {
473 MCAssembler &Asm = getAssembler();
474 MCObjectWriter &W = getWriter();
475 if (W.getCGProfile().empty())
476 return;
477 for (auto &E : W.getCGProfile()) {
478 finalizeCGProfileEntry(SRE&: E.From);
479 finalizeCGProfileEntry(SRE&: E.To);
480 }
481 // We can't write the section out until symbol indices are finalized which
482 // doesn't happen until after section layout. We need to create the section
483 // and set its size now so that it's accounted for in layout.
484 MCSection *CGProfileSection = Asm.getContext().getMachOSection(
485 Segment: "__LLVM", Section: "__cg_profile", TypeAndAttributes: 0, K: SectionKind::getMetadata());
486 // Call the base class changeSection to omit the linker-local label.
487 MCObjectStreamer::changeSection(Section: CGProfileSection);
488 // For each entry, reserve space for 2 32-bit indices and a 64-bit count.
489 size_t SectionBytes =
490 W.getCGProfile().size() * (2 * sizeof(uint32_t) + sizeof(uint64_t));
491 (*CGProfileSection->begin())
492 .setVarContents(std::vector<char>(SectionBytes, 0));
493}
494
495MCStreamer *llvm::createMachOStreamer(MCContext &Context,
496 std::unique_ptr<MCAsmBackend> &&MAB,
497 std::unique_ptr<MCObjectWriter> &&OW,
498 std::unique_ptr<MCCodeEmitter> &&CE,
499 bool DWARFMustBeAtTheEnd,
500 bool LabelSections) {
501 return new MCMachOStreamer(Context, std::move(MAB), std::move(OW),
502 std::move(CE), LabelSections);
503}
504
505// The AddrSig section uses a series of relocations to refer to the symbols that
506// should be considered address-significant. The only interesting content of
507// these relocations is their symbol; the type, length etc will be ignored by
508// the linker. The reason we are not referring to the symbol indices directly is
509// that those indices will be invalidated by tools that update the symbol table.
510// Symbol relocations OTOH will have their indices updated by e.g. llvm-strip.
511void MCMachOStreamer::createAddrSigSection() {
512 MCAssembler &Asm = getAssembler();
513 MCObjectWriter &writer = Asm.getWriter();
514 if (!writer.getEmitAddrsigSection())
515 return;
516 // Create the AddrSig section and first data fragment here as its layout needs
517 // to be computed immediately after in order for it to be exported correctly.
518 MCSection *AddrSigSection =
519 Asm.getContext().getObjectFileInfo()->getAddrSigSection();
520 // Call the base class changeSection to omit the linker-local label.
521 MCObjectStreamer::changeSection(Section: AddrSigSection);
522 auto *Frag = cast<MCFragment>(Val: AddrSigSection->curFragList()->Head);
523 // We will generate a series of pointer-sized symbol relocations at offset
524 // 0x0. Set the section size to be large enough to contain a single pointer
525 // (instead of emitting a zero-sized section) so these relocations are
526 // technically valid, even though we don't expect these relocations to
527 // actually be applied by the linker.
528 constexpr char zero[8] = {};
529 Frag->setVarContents(zero);
530}
531