1//===- MCMachOStreamer.cpp - MachO Streamer -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/DenseMap.h"
10#include "llvm/ADT/SmallString.h"
11#include "llvm/ADT/SmallVector.h"
12#include "llvm/ADT/StringRef.h"
13#include "llvm/BinaryFormat/MachO.h"
14#include "llvm/MC/MCAsmBackend.h"
15#include "llvm/MC/MCAssembler.h"
16#include "llvm/MC/MCCodeEmitter.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDirectives.h"
19#include "llvm/MC/MCExpr.h"
20#include "llvm/MC/MCFixup.h"
21#include "llvm/MC/MCLinkerOptimizationHint.h"
22#include "llvm/MC/MCMachObjectWriter.h"
23#include "llvm/MC/MCObjectFileInfo.h"
24#include "llvm/MC/MCObjectStreamer.h"
25#include "llvm/MC/MCObjectWriter.h"
26#include "llvm/MC/MCSection.h"
27#include "llvm/MC/MCSectionMachO.h"
28#include "llvm/MC/MCSymbol.h"
29#include "llvm/MC/MCSymbolMachO.h"
30#include "llvm/MC/MCValue.h"
31#include "llvm/MC/SectionKind.h"
32#include "llvm/MC/TargetRegistry.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/ErrorHandling.h"
35#include <cassert>
36#include <vector>
37
38namespace llvm {
39class MCInst;
40class MCStreamer;
41class MCSubtargetInfo;
42class Triple;
43} // namespace llvm
44
45using namespace llvm;
46
47namespace {
48
49class MCMachOStreamer : public MCObjectStreamer {
50private:
51 /// LabelSections - true if each section change should emit a linker local
52 /// label for use in relocations for assembler local references. Obviates the
53 /// need for local relocations. False by default.
54 bool LabelSections;
55
56 /// HasSectionLabel - map of which sections have already had a non-local
57 /// label emitted to them. Used so we don't emit extraneous linker local
58 /// labels in the middle of the section.
59 DenseMap<const MCSection*, bool> HasSectionLabel;
60
61 void emitDataRegion(MachO::DataRegionType Kind);
62 void emitDataRegionEnd();
63
64public:
65 MCMachOStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
66 std::unique_ptr<MCObjectWriter> OW,
67 std::unique_ptr<MCCodeEmitter> Emitter, bool label)
68 : MCObjectStreamer(Context, std::move(MAB), std::move(OW),
69 std::move(Emitter)),
70 LabelSections(label) {}
71
72 /// state management
73 void reset() override {
74 HasSectionLabel.clear();
75 MCObjectStreamer::reset();
76 }
77
78 MachObjectWriter &getWriter() {
79 return static_cast<MachObjectWriter &>(getAssembler().getWriter());
80 }
81
82 /// @name MCStreamer Interface
83 /// @{
84
85 void changeSection(MCSection *Sect, uint32_t Subsection = 0) override;
86 void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
87 void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
88 void emitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override;
89 void emitSubsectionsViaSymbols() override;
90 void emitLinkerOptions(ArrayRef<std::string> Options) override;
91 void emitDataRegion(MCDataRegionType Kind) override;
92 void emitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor,
93 unsigned Update, VersionTuple SDKVersion) override;
94 void emitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor,
95 unsigned Update, VersionTuple SDKVersion) override;
96 void emitDarwinTargetVariantBuildVersion(unsigned Platform, unsigned Major,
97 unsigned Minor, unsigned Update,
98 VersionTuple SDKVersion) override;
99 bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
100 void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
101 void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
102 Align ByteAlignment) override;
103
104 void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
105 Align ByteAlignment) override;
106 void emitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
107 uint64_t Size = 0, Align ByteAlignment = Align(1),
108 SMLoc Loc = SMLoc()) override;
109 void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
110 Align ByteAlignment = Align(1)) override;
111
112 void emitIdent(StringRef IdentString) override {
113 llvm_unreachable("macho doesn't support this directive");
114 }
115
116 void emitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) override {
117 getWriter().getLOHContainer().addDirective(Kind, Args);
118 }
119 void emitCGProfileEntry(const MCSymbolRefExpr *From,
120 const MCSymbolRefExpr *To, uint64_t Count) override {
121 if (!From->getSymbol().isTemporary() && !To->getSymbol().isTemporary())
122 getWriter().getCGProfile().push_back(Elt: {.From: From, .To: To, .Count: Count});
123 }
124
125 void finishImpl() override;
126
127 void finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE);
128 void finalizeCGProfile();
129 void createAddrSigSection();
130};
131
132} // end anonymous namespace.
133
134void MCMachOStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
135 // Change the section normally.
136 changeSectionImpl(Section, Subsection);
137
138 // Output a linker-local symbol so we don't need section-relative local
139 // relocations. The linker hates us when we do that.
140 if (LabelSections && !HasSectionLabel[Section] &&
141 !Section->getBeginSymbol()) {
142 MCSymbol *Label = getContext().createLinkerPrivateTempSymbol();
143 Section->setBeginSymbol(Label);
144 HasSectionLabel[Section] = true;
145 }
146}
147
148void MCMachOStreamer::emitEHSymAttributes(const MCSymbol *Symbol,
149 MCSymbol *EHSymbol) {
150 auto *Sym = cast<MCSymbolMachO>(Val: Symbol);
151 getAssembler().registerSymbol(Symbol: *Symbol);
152 if (Symbol->isExternal())
153 emitSymbolAttribute(Symbol: EHSymbol, Attribute: MCSA_Global);
154 if (Sym->isWeakDefinition())
155 emitSymbolAttribute(Symbol: EHSymbol, Attribute: MCSA_WeakDefinition);
156 if (Sym->isPrivateExtern())
157 emitSymbolAttribute(Symbol: EHSymbol, Attribute: MCSA_PrivateExtern);
158}
159
160void MCMachOStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
161 // We have to create a new fragment if this is an atom defining symbol,
162 // fragments cannot span atoms.
163 if (cast<MCSymbolMachO>(Val: Symbol)->isSymbolLinkerVisible())
164 insert(F: getContext().allocFragment<MCDataFragment>());
165
166 MCObjectStreamer::emitLabel(Symbol, Loc);
167
168 // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
169 // to clear the weak reference and weak definition bits too, but the
170 // implementation was buggy. For now we just try to match 'as', for
171 // diffability.
172 //
173 // FIXME: Cleanup this code, these bits should be emitted based on semantic
174 // properties, not on the order of definition, etc.
175 cast<MCSymbolMachO>(Val: Symbol)->clearReferenceType();
176}
177
178void MCMachOStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
179 MCValue Res;
180
181 if (Value->evaluateAsRelocatable(Res, Asm: nullptr)) {
182 if (const auto *SymA = Res.getAddSym()) {
183 if (!Res.getSubSym() &&
184 (SymA->getName().empty() || Res.getConstant() != 0))
185 cast<MCSymbolMachO>(Val: Symbol)->setAltEntry();
186 }
187 }
188 MCObjectStreamer::emitAssignment(Symbol, Value);
189}
190
191void MCMachOStreamer::emitDataRegion(MachO::DataRegionType Kind) {
192 // Create a temporary label to mark the start of the data region.
193 MCSymbol *Start = getContext().createTempSymbol();
194 emitLabel(Symbol: Start);
195 // Record the region for the object writer to use.
196 getWriter().getDataRegions().push_back(x: {.Kind: Kind, .Start: Start, .End: nullptr});
197}
198
199void MCMachOStreamer::emitDataRegionEnd() {
200 auto &Regions = getWriter().getDataRegions();
201 assert(!Regions.empty() && "Mismatched .end_data_region!");
202 auto &Data = Regions.back();
203 assert(!Data.End && "Mismatched .end_data_region!");
204 // Create a temporary label to mark the end of the data region.
205 Data.End = getContext().createTempSymbol();
206 emitLabel(Symbol: Data.End);
207}
208
209void MCMachOStreamer::emitSubsectionsViaSymbols() {
210 getWriter().setSubsectionsViaSymbols(true);
211}
212
213void MCMachOStreamer::emitLinkerOptions(ArrayRef<std::string> Options) {
214 getWriter().getLinkerOptions().push_back(x: Options);
215}
216
217void MCMachOStreamer::emitDataRegion(MCDataRegionType Kind) {
218 switch (Kind) {
219 case MCDR_DataRegion:
220 emitDataRegion(Kind: MachO::DataRegionType::DICE_KIND_DATA);
221 return;
222 case MCDR_DataRegionJT8:
223 emitDataRegion(Kind: MachO::DataRegionType::DICE_KIND_JUMP_TABLE8);
224 return;
225 case MCDR_DataRegionJT16:
226 emitDataRegion(Kind: MachO::DataRegionType::DICE_KIND_JUMP_TABLE16);
227 return;
228 case MCDR_DataRegionJT32:
229 emitDataRegion(Kind: MachO::DataRegionType::DICE_KIND_JUMP_TABLE32);
230 return;
231 case MCDR_DataRegionEnd:
232 emitDataRegionEnd();
233 return;
234 }
235}
236
237void MCMachOStreamer::emitVersionMin(MCVersionMinType Kind, unsigned Major,
238 unsigned Minor, unsigned Update,
239 VersionTuple SDKVersion) {
240 getWriter().setVersionMin(Type: Kind, Major, Minor, Update, SDKVersion);
241}
242
243void MCMachOStreamer::emitBuildVersion(unsigned Platform, unsigned Major,
244 unsigned Minor, unsigned Update,
245 VersionTuple SDKVersion) {
246 getWriter().setBuildVersion(Platform: (MachO::PlatformType)Platform, Major, Minor,
247 Update, SDKVersion);
248}
249
250void MCMachOStreamer::emitDarwinTargetVariantBuildVersion(
251 unsigned Platform, unsigned Major, unsigned Minor, unsigned Update,
252 VersionTuple SDKVersion) {
253 getWriter().setTargetVariantBuildVersion(Platform: (MachO::PlatformType)Platform, Major,
254 Minor, Update, SDKVersion);
255}
256
257bool MCMachOStreamer::emitSymbolAttribute(MCSymbol *Sym,
258 MCSymbolAttr Attribute) {
259 MCSymbolMachO *Symbol = cast<MCSymbolMachO>(Val: Sym);
260
261 // Indirect symbols are handled differently, to match how 'as' handles
262 // them. This makes writing matching .o files easier.
263 if (Attribute == MCSA_IndirectSymbol) {
264 // Note that we intentionally cannot use the symbol data here; this is
265 // important for matching the string table that 'as' generates.
266 getWriter().getIndirectSymbols().push_back(
267 x: {.Symbol: Symbol, .Section: getCurrentSectionOnly()});
268 return true;
269 }
270
271 // Adding a symbol attribute always introduces the symbol, note that an
272 // important side effect of calling registerSymbol here is to register
273 // the symbol with the assembler.
274 getAssembler().registerSymbol(Symbol: *Symbol);
275
276 // The implementation of symbol attributes is designed to match 'as', but it
277 // leaves much to desired. It doesn't really make sense to arbitrarily add and
278 // remove flags, but 'as' allows this (in particular, see .desc).
279 //
280 // In the future it might be worth trying to make these operations more well
281 // defined.
282 switch (Attribute) {
283 case MCSA_Invalid:
284 case MCSA_ELF_TypeFunction:
285 case MCSA_ELF_TypeIndFunction:
286 case MCSA_ELF_TypeObject:
287 case MCSA_ELF_TypeTLS:
288 case MCSA_ELF_TypeCommon:
289 case MCSA_ELF_TypeNoType:
290 case MCSA_ELF_TypeGnuUniqueObject:
291 case MCSA_Extern:
292 case MCSA_Hidden:
293 case MCSA_IndirectSymbol:
294 case MCSA_Internal:
295 case MCSA_Protected:
296 case MCSA_Weak:
297 case MCSA_Local:
298 case MCSA_LGlobal:
299 case MCSA_Exported:
300 case MCSA_Memtag:
301 case MCSA_WeakAntiDep:
302 return false;
303
304 case MCSA_Global:
305 Symbol->setExternal(true);
306 // This effectively clears the undefined lazy bit, in Darwin 'as', although
307 // it isn't very consistent because it implements this as part of symbol
308 // lookup.
309 //
310 // FIXME: Cleanup this code, these bits should be emitted based on semantic
311 // properties, not on the order of definition, etc.
312 Symbol->setReferenceTypeUndefinedLazy(false);
313 break;
314
315 case MCSA_LazyReference:
316 // FIXME: This requires -dynamic.
317 Symbol->setNoDeadStrip();
318 if (Symbol->isUndefined())
319 Symbol->setReferenceTypeUndefinedLazy(true);
320 break;
321
322 // Since .reference sets the no dead strip bit, it is equivalent to
323 // .no_dead_strip in practice.
324 case MCSA_Reference:
325 case MCSA_NoDeadStrip:
326 Symbol->setNoDeadStrip();
327 break;
328
329 case MCSA_SymbolResolver:
330 Symbol->setSymbolResolver();
331 break;
332
333 case MCSA_AltEntry:
334 Symbol->setAltEntry();
335 break;
336
337 case MCSA_PrivateExtern:
338 Symbol->setExternal(true);
339 Symbol->setPrivateExtern(true);
340 break;
341
342 case MCSA_WeakReference:
343 // FIXME: This requires -dynamic.
344 if (Symbol->isUndefined())
345 Symbol->setWeakReference();
346 break;
347
348 case MCSA_WeakDefinition:
349 // FIXME: 'as' enforces that this is defined and global. The manual claims
350 // it has to be in a coalesced section, but this isn't enforced.
351 Symbol->setWeakDefinition();
352 break;
353
354 case MCSA_WeakDefAutoPrivate:
355 Symbol->setWeakDefinition();
356 Symbol->setWeakReference();
357 break;
358
359 case MCSA_Cold:
360 Symbol->setCold();
361 break;
362 }
363
364 return true;
365}
366
367void MCMachOStreamer::emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
368 // Encode the 'desc' value into the lowest implementation defined bits.
369 getAssembler().registerSymbol(Symbol: *Symbol);
370 cast<MCSymbolMachO>(Val: Symbol)->setDesc(DescValue);
371}
372
373void MCMachOStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
374 Align ByteAlignment) {
375 // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
376 assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
377
378 getAssembler().registerSymbol(Symbol: *Symbol);
379 Symbol->setExternal(true);
380 Symbol->setCommon(Size, Alignment: ByteAlignment);
381}
382
383void MCMachOStreamer::emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
384 Align ByteAlignment) {
385 // '.lcomm' is equivalent to '.zerofill'.
386 return emitZerofill(Section: getContext().getObjectFileInfo()->getDataBSSSection(),
387 Symbol, Size, ByteAlignment);
388}
389
390void MCMachOStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
391 uint64_t Size, Align ByteAlignment,
392 SMLoc Loc) {
393 // On darwin all virtual sections have zerofill type. Disallow the usage of
394 // .zerofill in non-virtual functions. If something similar is needed, use
395 // .space or .zero.
396 if (!Section->isVirtualSection()) {
397 getContext().reportError(
398 L: Loc, Msg: "The usage of .zerofill is restricted to sections of "
399 "ZEROFILL type. Use .zero or .space instead.");
400 return; // Early returning here shouldn't harm. EmitZeros should work on any
401 // section.
402 }
403
404 pushSection();
405 switchSection(Section);
406
407 // The symbol may not be present, which only creates the section.
408 if (Symbol) {
409 emitValueToAlignment(Alignment: ByteAlignment, Value: 0, ValueSize: 1, MaxBytesToEmit: 0);
410 emitLabel(Symbol);
411 emitZeros(NumBytes: Size);
412 }
413 popSection();
414}
415
416// This should always be called with the thread local bss section. Like the
417// .zerofill directive this doesn't actually switch sections on us.
418void MCMachOStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
419 uint64_t Size, Align ByteAlignment) {
420 emitZerofill(Section, Symbol, Size, ByteAlignment);
421}
422
423void MCMachOStreamer::finishImpl() {
424 emitFrames(MAB: &getAssembler().getBackend());
425
426 // We have to set the fragment atom associations so we can relax properly for
427 // Mach-O.
428
429 // First, scan the symbol table to build a lookup table from fragments to
430 // defining symbols.
431 DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap;
432 for (const MCSymbol &Symbol : getAssembler().symbols()) {
433 auto &Sym = cast<MCSymbolMachO>(Val: Symbol);
434 if (Sym.isSymbolLinkerVisible() && Sym.isInSection() && !Sym.isVariable() &&
435 !Sym.isAltEntry()) {
436 // An atom defining symbol should never be internal to a fragment.
437 assert(Symbol.getOffset() == 0 &&
438 "Invalid offset in atom defining symbol!");
439 DefiningSymbolMap[Symbol.getFragment()] = &Symbol;
440 }
441 }
442
443 // Set the fragment atom associations by tracking the last seen atom defining
444 // symbol.
445 for (MCSection &Sec : getAssembler()) {
446 cast<MCSectionMachO>(Val&: Sec).allocAtoms();
447 const MCSymbol *CurrentAtom = nullptr;
448 size_t I = 0;
449 for (MCFragment &Frag : Sec) {
450 if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(Val: &Frag))
451 CurrentAtom = Symbol;
452 cast<MCSectionMachO>(Val&: Sec).setAtom(I: I++, Sym: CurrentAtom);
453 }
454 }
455
456 finalizeCGProfile();
457
458 createAddrSigSection();
459 this->MCObjectStreamer::finishImpl();
460}
461
462void MCMachOStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE) {
463 const MCSymbol *S = &SRE->getSymbol();
464 if (getAssembler().registerSymbol(Symbol: *S))
465 S->setExternal(true);
466}
467
468void MCMachOStreamer::finalizeCGProfile() {
469 MCAssembler &Asm = getAssembler();
470 MCObjectWriter &W = getWriter();
471 if (W.getCGProfile().empty())
472 return;
473 for (auto &E : W.getCGProfile()) {
474 finalizeCGProfileEntry(SRE&: E.From);
475 finalizeCGProfileEntry(SRE&: E.To);
476 }
477 // We can't write the section out until symbol indices are finalized which
478 // doesn't happen until after section layout. We need to create the section
479 // and set its size now so that it's accounted for in layout.
480 MCSection *CGProfileSection = Asm.getContext().getMachOSection(
481 Segment: "__LLVM", Section: "__cg_profile", TypeAndAttributes: 0, K: SectionKind::getMetadata());
482 changeSection(Section: CGProfileSection);
483 // For each entry, reserve space for 2 32-bit indices and a 64-bit count.
484 size_t SectionBytes =
485 W.getCGProfile().size() * (2 * sizeof(uint32_t) + sizeof(uint64_t));
486 cast<MCDataFragment>(Val&: *CGProfileSection->begin())
487 .appendContents(Num: SectionBytes, Elt: 0);
488}
489
490MCStreamer *llvm::createMachOStreamer(MCContext &Context,
491 std::unique_ptr<MCAsmBackend> &&MAB,
492 std::unique_ptr<MCObjectWriter> &&OW,
493 std::unique_ptr<MCCodeEmitter> &&CE,
494 bool DWARFMustBeAtTheEnd,
495 bool LabelSections) {
496 return new MCMachOStreamer(Context, std::move(MAB), std::move(OW),
497 std::move(CE), LabelSections);
498}
499
500// The AddrSig section uses a series of relocations to refer to the symbols that
501// should be considered address-significant. The only interesting content of
502// these relocations is their symbol; the type, length etc will be ignored by
503// the linker. The reason we are not referring to the symbol indices directly is
504// that those indices will be invalidated by tools that update the symbol table.
505// Symbol relocations OTOH will have their indices updated by e.g. llvm-strip.
506void MCMachOStreamer::createAddrSigSection() {
507 MCAssembler &Asm = getAssembler();
508 MCObjectWriter &writer = Asm.getWriter();
509 if (!writer.getEmitAddrsigSection())
510 return;
511 // Create the AddrSig section and first data fragment here as its layout needs
512 // to be computed immediately after in order for it to be exported correctly.
513 MCSection *AddrSigSection =
514 Asm.getContext().getObjectFileInfo()->getAddrSigSection();
515 changeSection(Section: AddrSigSection);
516 auto *Frag = cast<MCDataFragment>(Val: AddrSigSection->curFragList()->Head);
517 // We will generate a series of pointer-sized symbol relocations at offset
518 // 0x0. Set the section size to be large enough to contain a single pointer
519 // (instead of emitting a zero-sized section) so these relocations are
520 // technically valid, even though we don't expect these relocations to
521 // actually be applied by the linker.
522 Frag->appendContents(Num: 8, Elt: 0);
523}
524