1//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MCTargetDesc/X86BaseInfo.h"
10#include "MCTargetDesc/X86EncodingOptimization.h"
11#include "MCTargetDesc/X86FixupKinds.h"
12#include "llvm/ADT/StringSwitch.h"
13#include "llvm/BinaryFormat/ELF.h"
14#include "llvm/BinaryFormat/MachO.h"
15#include "llvm/MC/MCAsmBackend.h"
16#include "llvm/MC/MCAssembler.h"
17#include "llvm/MC/MCCodeEmitter.h"
18#include "llvm/MC/MCContext.h"
19#include "llvm/MC/MCDwarf.h"
20#include "llvm/MC/MCELFObjectWriter.h"
21#include "llvm/MC/MCELFStreamer.h"
22#include "llvm/MC/MCExpr.h"
23#include "llvm/MC/MCFixupKindInfo.h"
24#include "llvm/MC/MCInst.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCMachObjectWriter.h"
27#include "llvm/MC/MCObjectStreamer.h"
28#include "llvm/MC/MCObjectWriter.h"
29#include "llvm/MC/MCRegisterInfo.h"
30#include "llvm/MC/MCSectionMachO.h"
31#include "llvm/MC/MCSubtargetInfo.h"
32#include "llvm/MC/MCValue.h"
33#include "llvm/MC/TargetRegistry.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/raw_ostream.h"
37
38using namespace llvm;
39
40namespace {
41/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42class X86AlignBranchKind {
43private:
44 uint8_t AlignBranchKind = 0;
45
46public:
47 void operator=(const std::string &Val) {
48 if (Val.empty())
49 return;
50 SmallVector<StringRef, 6> BranchTypes;
51 StringRef(Val).split(A&: BranchTypes, Separator: '+', MaxSplit: -1, KeepEmpty: false);
52 for (auto BranchType : BranchTypes) {
53 if (BranchType == "fused")
54 addKind(Value: X86::AlignBranchFused);
55 else if (BranchType == "jcc")
56 addKind(Value: X86::AlignBranchJcc);
57 else if (BranchType == "jmp")
58 addKind(Value: X86::AlignBranchJmp);
59 else if (BranchType == "call")
60 addKind(Value: X86::AlignBranchCall);
61 else if (BranchType == "ret")
62 addKind(Value: X86::AlignBranchRet);
63 else if (BranchType == "indirect")
64 addKind(Value: X86::AlignBranchIndirect);
65 else {
66 errs() << "invalid argument " << BranchType.str()
67 << " to -x86-align-branch=; each element must be one of: fused, "
68 "jcc, jmp, call, ret, indirect.(plus separated)\n";
69 }
70 }
71 }
72
73 operator uint8_t() const { return AlignBranchKind; }
74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75};
76
77X86AlignBranchKind X86AlignBranchKindLoc;
78
79cl::opt<unsigned> X86AlignBranchBoundary(
80 "x86-align-branch-boundary", cl::init(Val: 0),
81 cl::desc(
82 "Control how the assembler should align branches with NOP. If the "
83 "boundary's size is not 0, it should be a power of 2 and no less "
84 "than 32. Branches will be aligned to prevent from being across or "
85 "against the boundary of specified size. The default value 0 does not "
86 "align branches."));
87
88cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89 "x86-align-branch",
90 cl::desc(
91 "Specify types of branches to align (plus separated list of types):"
92 "\njcc indicates conditional jumps"
93 "\nfused indicates fused conditional jumps"
94 "\njmp indicates direct unconditional jumps"
95 "\ncall indicates direct and indirect calls"
96 "\nret indicates rets"
97 "\nindirect indicates indirect unconditional jumps"),
98 cl::location(L&: X86AlignBranchKindLoc));
99
100cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101 "x86-branches-within-32B-boundaries", cl::init(Val: false),
102 cl::desc(
103 "Align selected instructions to mitigate negative performance impact "
104 "of Intel's micro code update for errata skx102. May break "
105 "assumptions about labels corresponding to particular instructions, "
106 "and should be used with caution."));
107
108cl::opt<unsigned> X86PadMaxPrefixSize(
109 "x86-pad-max-prefix-size", cl::init(Val: 0),
110 cl::desc("Maximum number of prefixes to use for padding"));
111
112cl::opt<bool> X86PadForAlign(
113 "x86-pad-for-align", cl::init(Val: false), cl::Hidden,
114 cl::desc("Pad previous instructions to implement align directives"));
115
116cl::opt<bool> X86PadForBranchAlign(
117 "x86-pad-for-branch-align", cl::init(Val: true), cl::Hidden,
118 cl::desc("Pad previous instructions to implement branch alignment"));
119
120class X86AsmBackend : public MCAsmBackend {
121 const MCSubtargetInfo &STI;
122 std::unique_ptr<const MCInstrInfo> MCII;
123 X86AlignBranchKind AlignBranchType;
124 Align AlignBoundary;
125 unsigned TargetPrefixMax = 0;
126
127 MCInst PrevInst;
128 unsigned PrevInstOpcode = 0;
129 MCBoundaryAlignFragment *PendingBA = nullptr;
130 std::pair<MCFragment *, size_t> PrevInstPosition;
131 bool IsRightAfterData = false;
132
133 uint8_t determinePaddingPrefix(const MCInst &Inst) const;
134 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
135 bool needAlign(const MCInst &Inst) const;
136 bool canPadBranches(MCObjectStreamer &OS) const;
137 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
138
139public:
140 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
141 : MCAsmBackend(llvm::endianness::little), STI(STI),
142 MCII(T.createMCInstrInfo()) {
143 if (X86AlignBranchWithin32BBoundaries) {
144 // At the moment, this defaults to aligning fused branches, unconditional
145 // jumps, and (unfused) conditional jumps with nops. Both the
146 // instructions aligned and the alignment method (nop vs prefix) may
147 // change in the future.
148 AlignBoundary = assumeAligned(Value: 32);
149 AlignBranchType.addKind(Value: X86::AlignBranchFused);
150 AlignBranchType.addKind(Value: X86::AlignBranchJcc);
151 AlignBranchType.addKind(Value: X86::AlignBranchJmp);
152 }
153 // Allow overriding defaults set by main flag
154 if (X86AlignBranchBoundary.getNumOccurrences())
155 AlignBoundary = assumeAligned(Value: X86AlignBranchBoundary);
156 if (X86AlignBranch.getNumOccurrences())
157 AlignBranchType = X86AlignBranchKindLoc;
158 if (X86PadMaxPrefixSize.getNumOccurrences())
159 TargetPrefixMax = X86PadMaxPrefixSize;
160 }
161
162 bool allowAutoPadding() const override;
163 bool allowEnhancedRelaxation() const override;
164 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
165 const MCSubtargetInfo &STI);
166 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
167
168 unsigned getNumFixupKinds() const override {
169 return X86::NumTargetFixupKinds;
170 }
171
172 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
173
174 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
175
176 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
177 const MCValue &Target,
178 const MCSubtargetInfo *STI) override;
179
180 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
181 const MCValue &Target, MutableArrayRef<char> Data,
182 uint64_t Value, bool IsResolved,
183 const MCSubtargetInfo *STI) const override;
184
185 bool mayNeedRelaxation(const MCInst &Inst,
186 const MCSubtargetInfo &STI) const override;
187
188 bool fixupNeedsRelaxation(const MCFixup &Fixup,
189 uint64_t Value) const override;
190
191 void relaxInstruction(MCInst &Inst,
192 const MCSubtargetInfo &STI) const override;
193
194 bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195 MCCodeEmitter &Emitter,
196 unsigned &RemainingSize) const;
197
198 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199 unsigned &RemainingSize) const;
200
201 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202 unsigned &RemainingSize) const;
203
204 void finishLayout(const MCAssembler &Asm) const override;
205
206 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207
208 bool writeNopData(raw_ostream &OS, uint64_t Count,
209 const MCSubtargetInfo *STI) const override;
210};
211} // end anonymous namespace
212
213static bool isRelaxableBranch(unsigned Opcode) {
214 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
215}
216
217static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218 bool Is16BitMode = false) {
219 switch (Opcode) {
220 default:
221 llvm_unreachable("invalid opcode for branch");
222 case X86::JCC_1:
223 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224 case X86::JMP_1:
225 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226 }
227}
228
229static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230 unsigned Opcode = MI.getOpcode();
231 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232 : X86::getOpcodeForLongImmediateForm(Opcode);
233}
234
235static X86::CondCode getCondFromBranch(const MCInst &MI,
236 const MCInstrInfo &MCII) {
237 unsigned Opcode = MI.getOpcode();
238 switch (Opcode) {
239 default:
240 return X86::COND_INVALID;
241 case X86::JCC_1: {
242 const MCInstrDesc &Desc = MCII.get(Opcode);
243 return static_cast<X86::CondCode>(
244 MI.getOperand(i: Desc.getNumOperands() - 1).getImm());
245 }
246 }
247}
248
249static X86::SecondMacroFusionInstKind
250classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251 X86::CondCode CC = getCondFromBranch(MI, MCII);
252 return classifySecondCondCodeInMacroFusion(CC);
253}
254
255/// Check if the instruction uses RIP relative addressing.
256static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257 unsigned Opcode = MI.getOpcode();
258 const MCInstrDesc &Desc = MCII.get(Opcode);
259 uint64_t TSFlags = Desc.TSFlags;
260 unsigned CurOp = X86II::getOperandBias(Desc);
261 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262 if (MemoryOperand < 0)
263 return false;
264 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265 unsigned BaseReg = MI.getOperand(i: BaseRegNum).getReg();
266 return (BaseReg == X86::RIP);
267}
268
269/// Check if the instruction is a prefix.
270static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
271 return X86II::isPrefix(TSFlags: MCII.get(Opcode).TSFlags);
272}
273
274/// Check if the instruction is valid as the first instruction in macro fusion.
275static bool isFirstMacroFusibleInst(const MCInst &Inst,
276 const MCInstrInfo &MCII) {
277 // An Intel instruction with RIP relative addressing is not macro fusible.
278 if (isRIPRelative(MI: Inst, MCII))
279 return false;
280 X86::FirstMacroFusionInstKind FIK =
281 X86::classifyFirstOpcodeInMacroFusion(Opcode: Inst.getOpcode());
282 return FIK != X86::FirstMacroFusionInstKind::Invalid;
283}
284
285/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286/// get a better peformance in some cases. Here, we determine which prefix is
287/// the most suitable.
288///
289/// If the instruction has a segment override prefix, use the existing one.
290/// If the target is 64-bit, use the CS.
291/// If the target is 32-bit,
292/// - If the instruction has a ESP/EBP base register, use SS.
293/// - Otherwise use DS.
294uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
296 "Prefixes can be added only in 32-bit or 64-bit mode.");
297 const MCInstrDesc &Desc = MCII->get(Opcode: Inst.getOpcode());
298 uint64_t TSFlags = Desc.TSFlags;
299
300 // Determine where the memory operand starts, if present.
301 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302 if (MemoryOperand != -1)
303 MemoryOperand += X86II::getOperandBias(Desc);
304
305 unsigned SegmentReg = 0;
306 if (MemoryOperand >= 0) {
307 // Check for explicit segment override on memory operand.
308 SegmentReg = Inst.getOperand(i: MemoryOperand + X86::AddrSegmentReg).getReg();
309 }
310
311 switch (TSFlags & X86II::FormMask) {
312 default:
313 break;
314 case X86II::RawFrmDstSrc: {
315 // Check segment override opcode prefix as needed (not for %ds).
316 if (Inst.getOperand(i: 2).getReg() != X86::DS)
317 SegmentReg = Inst.getOperand(i: 2).getReg();
318 break;
319 }
320 case X86II::RawFrmSrc: {
321 // Check segment override opcode prefix as needed (not for %ds).
322 if (Inst.getOperand(i: 1).getReg() != X86::DS)
323 SegmentReg = Inst.getOperand(i: 1).getReg();
324 break;
325 }
326 case X86II::RawFrmMemOffs: {
327 // Check segment override opcode prefix as needed.
328 SegmentReg = Inst.getOperand(i: 1).getReg();
329 break;
330 }
331 }
332
333 if (SegmentReg != 0)
334 return X86::getSegmentOverridePrefixForReg(Reg: SegmentReg);
335
336 if (STI.hasFeature(Feature: X86::Is64Bit))
337 return X86::CS_Encoding;
338
339 if (MemoryOperand >= 0) {
340 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341 unsigned BaseReg = Inst.getOperand(i: BaseRegNum).getReg();
342 if (BaseReg == X86::ESP || BaseReg == X86::EBP)
343 return X86::SS_Encoding;
344 }
345 return X86::DS_Encoding;
346}
347
348/// Check if the two instructions will be macro-fused on the target cpu.
349bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350 const MCInstrDesc &InstDesc = MCII->get(Opcode: Jcc.getOpcode());
351 if (!InstDesc.isConditionalBranch())
352 return false;
353 if (!isFirstMacroFusibleInst(Inst: Cmp, MCII: *MCII))
354 return false;
355 const X86::FirstMacroFusionInstKind CmpKind =
356 X86::classifyFirstOpcodeInMacroFusion(Opcode: Cmp.getOpcode());
357 const X86::SecondMacroFusionInstKind BranchKind =
358 classifySecondInstInMacroFusion(MI: Jcc, MCII: *MCII);
359 return X86::isMacroFused(FirstKind: CmpKind, SecondKind: BranchKind);
360}
361
362/// Check if the instruction has a variant symbol operand.
363static bool hasVariantSymbol(const MCInst &MI) {
364 for (auto &Operand : MI) {
365 if (!Operand.isExpr())
366 continue;
367 const MCExpr &Expr = *Operand.getExpr();
368 if (Expr.getKind() == MCExpr::SymbolRef &&
369 cast<MCSymbolRefExpr>(Val: Expr).getKind() != MCSymbolRefExpr::VK_None)
370 return true;
371 }
372 return false;
373}
374
375bool X86AsmBackend::allowAutoPadding() const {
376 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
377}
378
379bool X86AsmBackend::allowEnhancedRelaxation() const {
380 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
381}
382
383/// X86 has certain instructions which enable interrupts exactly one
384/// instruction *after* the instruction which stores to SS. Return true if the
385/// given instruction may have such an interrupt delay slot.
386static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
387 switch (InstOpcode) {
388 case X86::POPSS16:
389 case X86::POPSS32:
390 case X86::STI:
391 return true;
392
393 case X86::MOV16sr:
394 case X86::MOV32sr:
395 case X86::MOV64sr:
396 case X86::MOV16sm:
397 // In fact, this is only the case if the first operand is SS. However, as
398 // segment moves occur extremely rarely, this is just a minor pessimization.
399 return true;
400 }
401 return false;
402}
403
404/// Check if the instruction to be emitted is right after any data.
405static bool
406isRightAfterData(MCFragment *CurrentFragment,
407 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408 MCFragment *F = CurrentFragment;
409 // Since data is always emitted into a DataFragment, our check strategy is
410 // simple here.
411 // - If the fragment is a DataFragment
412 // - If it's empty (section start or data after align), return false.
413 // - If it's not the fragment where the previous instruction is,
414 // returns true.
415 // - If it's the fragment holding the previous instruction but its
416 // size changed since the previous instruction was emitted into
417 // it, returns true.
418 // - Otherwise returns false.
419 // - If the fragment is not a DataFragment, returns false.
420 if (auto *DF = dyn_cast_or_null<MCDataFragment>(Val: F))
421 return DF->getContents().size() &&
422 (DF != PrevInstPosition.first ||
423 DF->getContents().size() != PrevInstPosition.second);
424
425 return false;
426}
427
428/// \returns the fragment size if it has instructions, otherwise returns 0.
429static size_t getSizeForInstFragment(const MCFragment *F) {
430 if (!F || !F->hasInstructions())
431 return 0;
432 // MCEncodedFragmentWithContents being templated makes this tricky.
433 switch (F->getKind()) {
434 default:
435 llvm_unreachable("Unknown fragment with instructions!");
436 case MCFragment::FT_Data:
437 return cast<MCDataFragment>(Val: *F).getContents().size();
438 case MCFragment::FT_Relaxable:
439 return cast<MCRelaxableFragment>(Val: *F).getContents().size();
440 case MCFragment::FT_CompactEncodedInst:
441 return cast<MCCompactEncodedInstFragment>(Val: *F).getContents().size();
442 }
443}
444
445/// Return true if we can insert NOP or prefixes automatically before the
446/// the instruction to be emitted.
447bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
448 if (hasVariantSymbol(MI: Inst))
449 // Linker may rewrite the instruction with variant symbol operand(e.g.
450 // TLSCALL).
451 return false;
452
453 if (mayHaveInterruptDelaySlot(InstOpcode: PrevInstOpcode))
454 // If this instruction follows an interrupt enabling instruction with a one
455 // instruction delay, inserting a nop would change behavior.
456 return false;
457
458 if (isPrefix(Opcode: PrevInstOpcode, MCII: *MCII))
459 // If this instruction follows a prefix, inserting a nop/prefix would change
460 // semantic.
461 return false;
462
463 if (isPrefix(Opcode: Inst.getOpcode(), MCII: *MCII))
464 // If this instruction is a prefix, inserting a prefix would change
465 // semantic.
466 return false;
467
468 if (IsRightAfterData)
469 // If this instruction follows any data, there is no clear
470 // instruction boundary, inserting a nop/prefix would change semantic.
471 return false;
472
473 return true;
474}
475
476bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
477 if (!OS.getAllowAutoPadding())
478 return false;
479 assert(allowAutoPadding() && "incorrect initialization!");
480
481 // We only pad in text section.
482 if (!OS.getCurrentSectionOnly()->isText())
483 return false;
484
485 // To be Done: Currently don't deal with Bundle cases.
486 if (OS.getAssembler().isBundlingEnabled())
487 return false;
488
489 // Branches only need to be aligned in 32-bit or 64-bit mode.
490 if (!(STI.hasFeature(Feature: X86::Is64Bit) || STI.hasFeature(Feature: X86::Is32Bit)))
491 return false;
492
493 return true;
494}
495
496/// Check if the instruction operand needs to be aligned.
497bool X86AsmBackend::needAlign(const MCInst &Inst) const {
498 const MCInstrDesc &Desc = MCII->get(Opcode: Inst.getOpcode());
499 return (Desc.isConditionalBranch() &&
500 (AlignBranchType & X86::AlignBranchJcc)) ||
501 (Desc.isUnconditionalBranch() &&
502 (AlignBranchType & X86::AlignBranchJmp)) ||
503 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
504 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
505 (Desc.isIndirectBranch() &&
506 (AlignBranchType & X86::AlignBranchIndirect));
507}
508
509/// Insert BoundaryAlignFragment before instructions to align branches.
510void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
511 const MCInst &Inst, const MCSubtargetInfo &STI) {
512 // Used by canPadInst. Done here, because in emitInstructionEnd, the current
513 // fragment will have changed.
514 IsRightAfterData =
515 isRightAfterData(CurrentFragment: OS.getCurrentFragment(), PrevInstPosition);
516
517 if (!canPadBranches(OS))
518 return;
519
520 // NB: PrevInst only valid if canPadBranches is true.
521 if (!isMacroFused(Cmp: PrevInst, Jcc: Inst))
522 // Macro fusion doesn't happen indeed, clear the pending.
523 PendingBA = nullptr;
524
525 // When branch padding is enabled (basically the skx102 erratum => unlikely),
526 // we call canPadInst (not cheap) twice. However, in the common case, we can
527 // avoid unnecessary calls to that, as this is otherwise only used for
528 // relaxable fragments.
529 if (!canPadInst(Inst, OS))
530 return;
531
532 if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
533 // Macro fusion actually happens and there is no other fragment inserted
534 // after the previous instruction.
535 //
536 // Do nothing here since we already inserted a BoudaryAlign fragment when
537 // we met the first instruction in the fused pair and we'll tie them
538 // together in emitInstructionEnd.
539 //
540 // Note: When there is at least one fragment, such as MCAlignFragment,
541 // inserted after the previous instruction, e.g.
542 //
543 // \code
544 // cmp %rax %rcx
545 // .align 16
546 // je .Label0
547 // \ endcode
548 //
549 // We will treat the JCC as a unfused branch although it may be fused
550 // with the CMP.
551 return;
552 }
553
554 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
555 isFirstMacroFusibleInst(Inst, MCII: *MCII))) {
556 // If we meet a unfused branch or the first instuction in a fusiable pair,
557 // insert a BoundaryAlign fragment.
558 PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
559 args&: AlignBoundary, args: STI);
560 OS.insert(F: PendingBA);
561 }
562}
563
564/// Set the last fragment to be aligned for the BoundaryAlignFragment.
565void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
566 const MCInst &Inst) {
567 MCFragment *CF = OS.getCurrentFragment();
568 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(Val: CF))
569 F->setAllowAutoPadding(canPadInst(Inst, OS));
570
571 // Update PrevInstOpcode here, canPadInst() reads that.
572 PrevInstOpcode = Inst.getOpcode();
573 PrevInstPosition = std::make_pair(x&: CF, y: getSizeForInstFragment(F: CF));
574
575 if (!canPadBranches(OS))
576 return;
577
578 // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
579 PrevInst = Inst;
580
581 if (!needAlign(Inst) || !PendingBA)
582 return;
583
584 // Tie the aligned instructions into a pending BoundaryAlign.
585 PendingBA->setLastFragment(CF);
586 PendingBA = nullptr;
587
588 // We need to ensure that further data isn't added to the current
589 // DataFragment, so that we can get the size of instructions later in
590 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
591 // DataFragment.
592 if (isa_and_nonnull<MCDataFragment>(Val: CF))
593 OS.insert(F: OS.getContext().allocFragment<MCDataFragment>());
594
595 // Update the maximum alignment on the current section if necessary.
596 MCSection *Sec = OS.getCurrentSectionOnly();
597 Sec->ensureMinAlignment(MinAlignment: AlignBoundary);
598}
599
600std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
601 if (STI.getTargetTriple().isOSBinFormatELF()) {
602 unsigned Type;
603 if (STI.getTargetTriple().getArch() == Triple::x86_64) {
604 Type = llvm::StringSwitch<unsigned>(Name)
605#define ELF_RELOC(X, Y) .Case(#X, Y)
606#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
607#undef ELF_RELOC
608 .Case(S: "BFD_RELOC_NONE", Value: ELF::R_X86_64_NONE)
609 .Case(S: "BFD_RELOC_8", Value: ELF::R_X86_64_8)
610 .Case(S: "BFD_RELOC_16", Value: ELF::R_X86_64_16)
611 .Case(S: "BFD_RELOC_32", Value: ELF::R_X86_64_32)
612 .Case(S: "BFD_RELOC_64", Value: ELF::R_X86_64_64)
613 .Default(Value: -1u);
614 } else {
615 Type = llvm::StringSwitch<unsigned>(Name)
616#define ELF_RELOC(X, Y) .Case(#X, Y)
617#include "llvm/BinaryFormat/ELFRelocs/i386.def"
618#undef ELF_RELOC
619 .Case(S: "BFD_RELOC_NONE", Value: ELF::R_386_NONE)
620 .Case(S: "BFD_RELOC_8", Value: ELF::R_386_8)
621 .Case(S: "BFD_RELOC_16", Value: ELF::R_386_16)
622 .Case(S: "BFD_RELOC_32", Value: ELF::R_386_32)
623 .Default(Value: -1u);
624 }
625 if (Type == -1u)
626 return std::nullopt;
627 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
628 }
629 return MCAsmBackend::getFixupKind(Name);
630}
631
632const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
633 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
634 {.Name: "reloc_riprel_4byte", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
635 {.Name: "reloc_riprel_4byte_movq_load", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
636 {.Name: "reloc_riprel_4byte_relax", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
637 {.Name: "reloc_riprel_4byte_relax_rex", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
638 {.Name: "reloc_signed_4byte", .TargetOffset: 0, .TargetSize: 32, .Flags: 0},
639 {.Name: "reloc_signed_4byte_relax", .TargetOffset: 0, .TargetSize: 32, .Flags: 0},
640 {.Name: "reloc_global_offset_table", .TargetOffset: 0, .TargetSize: 32, .Flags: 0},
641 {.Name: "reloc_global_offset_table8", .TargetOffset: 0, .TargetSize: 64, .Flags: 0},
642 {.Name: "reloc_branch_4byte_pcrel", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
643 };
644
645 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
646 // do not require any extra processing.
647 if (Kind >= FirstLiteralRelocationKind)
648 return MCAsmBackend::getFixupKindInfo(Kind: FK_NONE);
649
650 if (Kind < FirstTargetFixupKind)
651 return MCAsmBackend::getFixupKindInfo(Kind);
652
653 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
654 "Invalid kind!");
655 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
656 return Infos[Kind - FirstTargetFixupKind];
657}
658
659bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
660 const MCFixup &Fixup, const MCValue &,
661 const MCSubtargetInfo *STI) {
662 return Fixup.getKind() >= FirstLiteralRelocationKind;
663}
664
665static unsigned getFixupKindSize(unsigned Kind) {
666 switch (Kind) {
667 default:
668 llvm_unreachable("invalid fixup kind!");
669 case FK_NONE:
670 return 0;
671 case FK_PCRel_1:
672 case FK_SecRel_1:
673 case FK_Data_1:
674 return 1;
675 case FK_PCRel_2:
676 case FK_SecRel_2:
677 case FK_Data_2:
678 return 2;
679 case FK_PCRel_4:
680 case X86::reloc_riprel_4byte:
681 case X86::reloc_riprel_4byte_relax:
682 case X86::reloc_riprel_4byte_relax_rex:
683 case X86::reloc_riprel_4byte_movq_load:
684 case X86::reloc_signed_4byte:
685 case X86::reloc_signed_4byte_relax:
686 case X86::reloc_global_offset_table:
687 case X86::reloc_branch_4byte_pcrel:
688 case FK_SecRel_4:
689 case FK_Data_4:
690 return 4;
691 case FK_PCRel_8:
692 case FK_SecRel_8:
693 case FK_Data_8:
694 case X86::reloc_global_offset_table8:
695 return 8;
696 }
697}
698
699void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
700 const MCValue &Target,
701 MutableArrayRef<char> Data,
702 uint64_t Value, bool IsResolved,
703 const MCSubtargetInfo *STI) const {
704 unsigned Kind = Fixup.getKind();
705 if (Kind >= FirstLiteralRelocationKind)
706 return;
707 unsigned Size = getFixupKindSize(Kind);
708
709 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
710
711 int64_t SignedValue = static_cast<int64_t>(Value);
712 if ((Target.isAbsolute() || IsResolved) &&
713 getFixupKindInfo(Kind: Fixup.getKind()).Flags &
714 MCFixupKindInfo::FKF_IsPCRel) {
715 // check that PC relative fixup fits into the fixup size.
716 if (Size > 0 && !isIntN(N: Size * 8, x: SignedValue))
717 Asm.getContext().reportError(
718 L: Fixup.getLoc(), Msg: "value of " + Twine(SignedValue) +
719 " is too large for field of " + Twine(Size) +
720 ((Size == 1) ? " byte." : " bytes."));
721 } else {
722 // Check that uppper bits are either all zeros or all ones.
723 // Specifically ignore overflow/underflow as long as the leakage is
724 // limited to the lower bits. This is to remain compatible with
725 // other assemblers.
726 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
727 "Value does not fit in the Fixup field");
728 }
729
730 for (unsigned i = 0; i != Size; ++i)
731 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
732}
733
734bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
735 const MCSubtargetInfo &STI) const {
736 unsigned Opcode = MI.getOpcode();
737 unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;
738 return isRelaxableBranch(Opcode) ||
739 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
740 MI.getOperand(i: MI.getNumOperands() - 1 - SkipOperands).isExpr());
741}
742
743bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
744 uint64_t Value) const {
745 // Relax if the value is too big for a (signed) i8.
746 return !isInt<8>(x: Value);
747}
748
749// FIXME: Can tblgen help at all here to verify there aren't other instructions
750// we can relax?
751void X86AsmBackend::relaxInstruction(MCInst &Inst,
752 const MCSubtargetInfo &STI) const {
753 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
754 bool Is16BitMode = STI.hasFeature(Feature: X86::Is16Bit);
755 unsigned RelaxedOp = getRelaxedOpcode(MI: Inst, Is16BitMode);
756
757 if (RelaxedOp == Inst.getOpcode()) {
758 SmallString<256> Tmp;
759 raw_svector_ostream OS(Tmp);
760 Inst.dump_pretty(OS);
761 OS << "\n";
762 report_fatal_error(reason: "unexpected instruction to relax: " + OS.str());
763 }
764
765 Inst.setOpcode(RelaxedOp);
766}
767
768bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
769 MCCodeEmitter &Emitter,
770 unsigned &RemainingSize) const {
771 if (!RF.getAllowAutoPadding())
772 return false;
773 // If the instruction isn't fully relaxed, shifting it around might require a
774 // larger value for one of the fixups then can be encoded. The outer loop
775 // will also catch this before moving to the next instruction, but we need to
776 // prevent padding this single instruction as well.
777 if (mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
778 return false;
779
780 const unsigned OldSize = RF.getContents().size();
781 if (OldSize == 15)
782 return false;
783
784 const unsigned MaxPossiblePad = std::min(a: 15 - OldSize, b: RemainingSize);
785 const unsigned RemainingPrefixSize = [&]() -> unsigned {
786 SmallString<15> Code;
787 X86_MC::emitPrefix(MCE&: Emitter, MI: RF.getInst(), CB&: Code, STI);
788 assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
789
790 // TODO: It turns out we need a decent amount of plumbing for the target
791 // specific bits to determine number of prefixes its safe to add. Various
792 // targets (older chips mostly, but also Atom family) encounter decoder
793 // stalls with too many prefixes. For testing purposes, we set the value
794 // externally for the moment.
795 unsigned ExistingPrefixSize = Code.size();
796 if (TargetPrefixMax <= ExistingPrefixSize)
797 return 0;
798 return TargetPrefixMax - ExistingPrefixSize;
799 }();
800 const unsigned PrefixBytesToAdd =
801 std::min(a: MaxPossiblePad, b: RemainingPrefixSize);
802 if (PrefixBytesToAdd == 0)
803 return false;
804
805 const uint8_t Prefix = determinePaddingPrefix(Inst: RF.getInst());
806
807 SmallString<256> Code;
808 Code.append(NumInputs: PrefixBytesToAdd, Elt: Prefix);
809 Code.append(in_start: RF.getContents().begin(), in_end: RF.getContents().end());
810 RF.getContents() = Code;
811
812 // Adjust the fixups for the change in offsets
813 for (auto &F : RF.getFixups()) {
814 F.setOffset(F.getOffset() + PrefixBytesToAdd);
815 }
816
817 RemainingSize -= PrefixBytesToAdd;
818 return true;
819}
820
821bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
822 MCCodeEmitter &Emitter,
823 unsigned &RemainingSize) const {
824 if (!mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
825 // TODO: There are lots of other tricks we could apply for increasing
826 // encoding size without impacting performance.
827 return false;
828
829 MCInst Relaxed = RF.getInst();
830 relaxInstruction(Inst&: Relaxed, STI: *RF.getSubtargetInfo());
831
832 SmallVector<MCFixup, 4> Fixups;
833 SmallString<15> Code;
834 Emitter.encodeInstruction(Inst: Relaxed, CB&: Code, Fixups, STI: *RF.getSubtargetInfo());
835 const unsigned OldSize = RF.getContents().size();
836 const unsigned NewSize = Code.size();
837 assert(NewSize >= OldSize && "size decrease during relaxation?");
838 unsigned Delta = NewSize - OldSize;
839 if (Delta > RemainingSize)
840 return false;
841 RF.setInst(Relaxed);
842 RF.getContents() = Code;
843 RF.getFixups() = Fixups;
844 RemainingSize -= Delta;
845 return true;
846}
847
848bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
849 MCCodeEmitter &Emitter,
850 unsigned &RemainingSize) const {
851 bool Changed = false;
852 if (RemainingSize != 0)
853 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
854 if (RemainingSize != 0)
855 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
856 return Changed;
857}
858
859void X86AsmBackend::finishLayout(MCAssembler const &Asm) const {
860 // See if we can further relax some instructions to cut down on the number of
861 // nop bytes required for code alignment. The actual win is in reducing
862 // instruction count, not number of bytes. Modern X86-64 can easily end up
863 // decode limited. It is often better to reduce the number of instructions
864 // (i.e. eliminate nops) even at the cost of increasing the size and
865 // complexity of others.
866 if (!X86PadForAlign && !X86PadForBranchAlign)
867 return;
868
869 // The processed regions are delimitered by LabeledFragments. -g may have more
870 // MCSymbols and therefore different relaxation results. X86PadForAlign is
871 // disabled by default to eliminate the -g vs non -g difference.
872 DenseSet<MCFragment *> LabeledFragments;
873 for (const MCSymbol &S : Asm.symbols())
874 LabeledFragments.insert(V: S.getFragment(SetUsed: false));
875
876 for (MCSection &Sec : Asm) {
877 if (!Sec.isText())
878 continue;
879
880 SmallVector<MCRelaxableFragment *, 4> Relaxable;
881 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
882 MCFragment &F = *I;
883
884 if (LabeledFragments.count(V: &F))
885 Relaxable.clear();
886
887 if (F.getKind() == MCFragment::FT_Data ||
888 F.getKind() == MCFragment::FT_CompactEncodedInst)
889 // Skip and ignore
890 continue;
891
892 if (F.getKind() == MCFragment::FT_Relaxable) {
893 auto &RF = cast<MCRelaxableFragment>(Val&: *I);
894 Relaxable.push_back(Elt: &RF);
895 continue;
896 }
897
898 auto canHandle = [](MCFragment &F) -> bool {
899 switch (F.getKind()) {
900 default:
901 return false;
902 case MCFragment::FT_Align:
903 return X86PadForAlign;
904 case MCFragment::FT_BoundaryAlign:
905 return X86PadForBranchAlign;
906 }
907 };
908 // For any unhandled kind, assume we can't change layout.
909 if (!canHandle(F)) {
910 Relaxable.clear();
911 continue;
912 }
913
914#ifndef NDEBUG
915 const uint64_t OrigOffset = Asm.getFragmentOffset(F);
916#endif
917 const uint64_t OrigSize = Asm.computeFragmentSize(F);
918
919 // To keep the effects local, prefer to relax instructions closest to
920 // the align directive. This is purely about human understandability
921 // of the resulting code. If we later find a reason to expand
922 // particular instructions over others, we can adjust.
923 unsigned RemainingSize = OrigSize;
924 while (!Relaxable.empty() && RemainingSize != 0) {
925 auto &RF = *Relaxable.pop_back_val();
926 // Give the backend a chance to play any tricks it wishes to increase
927 // the encoding size of the given instruction. Target independent code
928 // will try further relaxation, but target's may play further tricks.
929 if (padInstructionEncoding(RF, Emitter&: Asm.getEmitter(), RemainingSize))
930 Sec.setHasLayout(false);
931
932 // If we have an instruction which hasn't been fully relaxed, we can't
933 // skip past it and insert bytes before it. Changing its starting
934 // offset might require a larger negative offset than it can encode.
935 // We don't need to worry about larger positive offsets as none of the
936 // possible offsets between this and our align are visible, and the
937 // ones afterwards aren't changing.
938 if (mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
939 break;
940 }
941 Relaxable.clear();
942
943 // BoundaryAlign explicitly tracks it's size (unlike align)
944 if (F.getKind() == MCFragment::FT_BoundaryAlign)
945 cast<MCBoundaryAlignFragment>(Val&: F).setSize(RemainingSize);
946
947#ifndef NDEBUG
948 const uint64_t FinalOffset = Asm.getFragmentOffset(F);
949 const uint64_t FinalSize = Asm.computeFragmentSize(F);
950 assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
951 "can't move start of next fragment!");
952 assert(FinalSize == RemainingSize && "inconsistent size computation?");
953#endif
954
955 // If we're looking at a boundary align, make sure we don't try to pad
956 // its target instructions for some following directive. Doing so would
957 // break the alignment of the current boundary align.
958 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(Val: &F)) {
959 const MCFragment *LastFragment = BF->getLastFragment();
960 if (!LastFragment)
961 continue;
962 while (&*I != LastFragment)
963 ++I;
964 }
965 }
966 }
967
968 // The layout is done. Mark every fragment as valid.
969 for (MCSection &Section : Asm) {
970 Asm.getFragmentOffset(F: *Section.curFragList()->Tail);
971 Asm.computeFragmentSize(F: *Section.curFragList()->Tail);
972 }
973}
974
975unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
976 if (STI.hasFeature(Feature: X86::Is16Bit))
977 return 4;
978 if (!STI.hasFeature(Feature: X86::FeatureNOPL) && !STI.hasFeature(Feature: X86::Is64Bit))
979 return 1;
980 if (STI.hasFeature(Feature: X86::TuningFast7ByteNOP))
981 return 7;
982 if (STI.hasFeature(Feature: X86::TuningFast15ByteNOP))
983 return 15;
984 if (STI.hasFeature(Feature: X86::TuningFast11ByteNOP))
985 return 11;
986 // FIXME: handle 32-bit mode
987 // 15-bytes is the longest single NOP instruction, but 10-bytes is
988 // commonly the longest that can be efficiently decoded.
989 return 10;
990}
991
992/// Write a sequence of optimal nops to the output, covering \p Count
993/// bytes.
994/// \return - true on success, false on failure
995bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
996 const MCSubtargetInfo *STI) const {
997 static const char Nops32Bit[10][11] = {
998 // nop
999 "\x90",
1000 // xchg %ax,%ax
1001 "\x66\x90",
1002 // nopl (%[re]ax)
1003 "\x0f\x1f\x00",
1004 // nopl 0(%[re]ax)
1005 "\x0f\x1f\x40\x00",
1006 // nopl 0(%[re]ax,%[re]ax,1)
1007 "\x0f\x1f\x44\x00\x00",
1008 // nopw 0(%[re]ax,%[re]ax,1)
1009 "\x66\x0f\x1f\x44\x00\x00",
1010 // nopl 0L(%[re]ax)
1011 "\x0f\x1f\x80\x00\x00\x00\x00",
1012 // nopl 0L(%[re]ax,%[re]ax,1)
1013 "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1014 // nopw 0L(%[re]ax,%[re]ax,1)
1015 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1016 // nopw %cs:0L(%[re]ax,%[re]ax,1)
1017 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018 };
1019
1020 // 16-bit mode uses different nop patterns than 32-bit.
1021 static const char Nops16Bit[4][11] = {
1022 // nop
1023 "\x90",
1024 // xchg %eax,%eax
1025 "\x66\x90",
1026 // lea 0(%si),%si
1027 "\x8d\x74\x00",
1028 // lea 0w(%si),%si
1029 "\x8d\xb4\x00\x00",
1030 };
1031
1032 const char(*Nops)[11] =
1033 STI->hasFeature(Feature: X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034
1035 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(STI: *STI);
1036
1037 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1038 // length.
1039 do {
1040 const uint8_t ThisNopLength = (uint8_t) std::min(a: Count, b: MaxNopLength);
1041 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1042 for (uint8_t i = 0; i < Prefixes; i++)
1043 OS << '\x66';
1044 const uint8_t Rest = ThisNopLength - Prefixes;
1045 if (Rest != 0)
1046 OS.write(Ptr: Nops[Rest - 1], Size: Rest);
1047 Count -= ThisNopLength;
1048 } while (Count != 0);
1049
1050 return true;
1051}
1052
1053/* *** */
1054
1055namespace {
1056
1057class ELFX86AsmBackend : public X86AsmBackend {
1058public:
1059 uint8_t OSABI;
1060 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1061 : X86AsmBackend(T, STI), OSABI(OSABI) {}
1062};
1063
1064class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1065public:
1066 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1067 const MCSubtargetInfo &STI)
1068 : ELFX86AsmBackend(T, OSABI, STI) {}
1069
1070 std::unique_ptr<MCObjectTargetWriter>
1071 createObjectTargetWriter() const override {
1072 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, EMachine: ELF::EM_386);
1073 }
1074};
1075
1076class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1077public:
1078 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1079 const MCSubtargetInfo &STI)
1080 : ELFX86AsmBackend(T, OSABI, STI) {}
1081
1082 std::unique_ptr<MCObjectTargetWriter>
1083 createObjectTargetWriter() const override {
1084 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1085 EMachine: ELF::EM_X86_64);
1086 }
1087};
1088
1089class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1090public:
1091 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1092 const MCSubtargetInfo &STI)
1093 : ELFX86AsmBackend(T, OSABI, STI) {}
1094
1095 std::unique_ptr<MCObjectTargetWriter>
1096 createObjectTargetWriter() const override {
1097 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1098 EMachine: ELF::EM_IAMCU);
1099 }
1100};
1101
1102class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1103public:
1104 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1105 const MCSubtargetInfo &STI)
1106 : ELFX86AsmBackend(T, OSABI, STI) {}
1107
1108 std::unique_ptr<MCObjectTargetWriter>
1109 createObjectTargetWriter() const override {
1110 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, EMachine: ELF::EM_X86_64);
1111 }
1112};
1113
1114class WindowsX86AsmBackend : public X86AsmBackend {
1115 bool Is64Bit;
1116
1117public:
1118 WindowsX86AsmBackend(const Target &T, bool is64Bit,
1119 const MCSubtargetInfo &STI)
1120 : X86AsmBackend(T, STI)
1121 , Is64Bit(is64Bit) {
1122 }
1123
1124 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1125 return StringSwitch<std::optional<MCFixupKind>>(Name)
1126 .Case(S: "dir32", Value: FK_Data_4)
1127 .Case(S: "secrel32", Value: FK_SecRel_4)
1128 .Case(S: "secidx", Value: FK_SecRel_2)
1129 .Default(Value: MCAsmBackend::getFixupKind(Name));
1130 }
1131
1132 std::unique_ptr<MCObjectTargetWriter>
1133 createObjectTargetWriter() const override {
1134 return createX86WinCOFFObjectWriter(Is64Bit);
1135 }
1136};
1137
1138namespace CU {
1139
1140 /// Compact unwind encoding values.
1141 enum CompactUnwindEncodings {
1142 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1143 /// the return address, then [RE]SP is moved to [RE]BP.
1144 UNWIND_MODE_BP_FRAME = 0x01000000,
1145
1146 /// A frameless function with a small constant stack size.
1147 UNWIND_MODE_STACK_IMMD = 0x02000000,
1148
1149 /// A frameless function with a large constant stack size.
1150 UNWIND_MODE_STACK_IND = 0x03000000,
1151
1152 /// No compact unwind encoding is available.
1153 UNWIND_MODE_DWARF = 0x04000000,
1154
1155 /// Mask for encoding the frame registers.
1156 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1157
1158 /// Mask for encoding the frameless registers.
1159 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1160 };
1161
1162} // namespace CU
1163
1164class DarwinX86AsmBackend : public X86AsmBackend {
1165 const MCRegisterInfo &MRI;
1166
1167 /// Number of registers that can be saved in a compact unwind encoding.
1168 enum { CU_NUM_SAVED_REGS = 6 };
1169
1170 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1171 Triple TT;
1172 bool Is64Bit;
1173
1174 unsigned OffsetSize; ///< Offset of a "push" instruction.
1175 unsigned MoveInstrSize; ///< Size of a "move" instruction.
1176 unsigned StackDivide; ///< Amount to adjust stack size by.
1177protected:
1178 /// Size of a "push" instruction for the given register.
1179 unsigned PushInstrSize(unsigned Reg) const {
1180 switch (Reg) {
1181 case X86::EBX:
1182 case X86::ECX:
1183 case X86::EDX:
1184 case X86::EDI:
1185 case X86::ESI:
1186 case X86::EBP:
1187 case X86::RBX:
1188 case X86::RBP:
1189 return 1;
1190 case X86::R12:
1191 case X86::R13:
1192 case X86::R14:
1193 case X86::R15:
1194 return 2;
1195 }
1196 return 1;
1197 }
1198
1199private:
1200 /// Get the compact unwind number for a given register. The number
1201 /// corresponds to the enum lists in compact_unwind_encoding.h.
1202 int getCompactUnwindRegNum(unsigned Reg) const {
1203 static const MCPhysReg CU32BitRegs[7] = {
1204 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1205 };
1206 static const MCPhysReg CU64BitRegs[] = {
1207 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1208 };
1209 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1210 for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1211 if (*CURegs == Reg)
1212 return Idx;
1213
1214 return -1;
1215 }
1216
1217 /// Return the registers encoded for a compact encoding with a frame
1218 /// pointer.
1219 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1220 // Encode the registers in the order they were saved --- 3-bits per
1221 // register. The list of saved registers is assumed to be in reverse
1222 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1223 uint32_t RegEnc = 0;
1224 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1225 unsigned Reg = SavedRegs[i];
1226 if (Reg == 0) break;
1227
1228 int CURegNum = getCompactUnwindRegNum(Reg);
1229 if (CURegNum == -1) return ~0U;
1230
1231 // Encode the 3-bit register number in order, skipping over 3-bits for
1232 // each register.
1233 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1234 }
1235
1236 assert((RegEnc & 0x3FFFF) == RegEnc &&
1237 "Invalid compact register encoding!");
1238 return RegEnc;
1239 }
1240
1241 /// Create the permutation encoding used with frameless stacks. It is
1242 /// passed the number of registers to be saved and an array of the registers
1243 /// saved.
1244 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1245 // The saved registers are numbered from 1 to 6. In order to encode the
1246 // order in which they were saved, we re-number them according to their
1247 // place in the register order. The re-numbering is relative to the last
1248 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1249 // that order:
1250 //
1251 // Orig Re-Num
1252 // ---- ------
1253 // 6 6
1254 // 2 2
1255 // 4 3
1256 // 5 3
1257 //
1258 for (unsigned i = 0; i < RegCount; ++i) {
1259 int CUReg = getCompactUnwindRegNum(Reg: SavedRegs[i]);
1260 if (CUReg == -1) return ~0U;
1261 SavedRegs[i] = CUReg;
1262 }
1263
1264 // Reverse the list.
1265 std::reverse(first: &SavedRegs[0], last: &SavedRegs[CU_NUM_SAVED_REGS]);
1266
1267 uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1268 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1269 unsigned Countless = 0;
1270 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1271 if (SavedRegs[j] < SavedRegs[i])
1272 ++Countless;
1273
1274 RenumRegs[i] = SavedRegs[i] - Countless - 1;
1275 }
1276
1277 // Take the renumbered values and encode them into a 10-bit number.
1278 uint32_t permutationEncoding = 0;
1279 switch (RegCount) {
1280 case 6:
1281 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1282 + 6 * RenumRegs[2] + 2 * RenumRegs[3]
1283 + RenumRegs[4];
1284 break;
1285 case 5:
1286 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1287 + 6 * RenumRegs[3] + 2 * RenumRegs[4]
1288 + RenumRegs[5];
1289 break;
1290 case 4:
1291 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1292 + 3 * RenumRegs[4] + RenumRegs[5];
1293 break;
1294 case 3:
1295 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1296 + RenumRegs[5];
1297 break;
1298 case 2:
1299 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1300 break;
1301 case 1:
1302 permutationEncoding |= RenumRegs[5];
1303 break;
1304 }
1305
1306 assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1307 "Invalid compact register encoding!");
1308 return permutationEncoding;
1309 }
1310
1311public:
1312 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1313 const MCSubtargetInfo &STI)
1314 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1315 Is64Bit(TT.isArch64Bit()) {
1316 memset(s: SavedRegs, c: 0, n: sizeof(SavedRegs));
1317 OffsetSize = Is64Bit ? 8 : 4;
1318 MoveInstrSize = Is64Bit ? 3 : 2;
1319 StackDivide = Is64Bit ? 8 : 4;
1320 }
1321
1322 std::unique_ptr<MCObjectTargetWriter>
1323 createObjectTargetWriter() const override {
1324 uint32_t CPUType = cantFail(ValOrErr: MachO::getCPUType(T: TT));
1325 uint32_t CPUSubType = cantFail(ValOrErr: MachO::getCPUSubType(T: TT));
1326 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubtype: CPUSubType);
1327 }
1328
1329 /// Implementation of algorithm to generate the compact unwind encoding
1330 /// for the CFI instructions.
1331 uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1332 const MCContext *Ctxt) const override {
1333 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1334 if (Instrs.empty()) return 0;
1335 if (!isDarwinCanonicalPersonality(Sym: FI->Personality) &&
1336 !Ctxt->emitCompactUnwindNonCanonical())
1337 return CU::UNWIND_MODE_DWARF;
1338
1339 // Reset the saved registers.
1340 unsigned SavedRegIdx = 0;
1341 memset(s: SavedRegs, c: 0, n: sizeof(SavedRegs));
1342
1343 bool HasFP = false;
1344
1345 // Encode that we are using EBP/RBP as the frame pointer.
1346 uint64_t CompactUnwindEncoding = 0;
1347
1348 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1349 unsigned InstrOffset = 0;
1350 unsigned StackAdjust = 0;
1351 uint64_t StackSize = 0;
1352 int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1353
1354 for (const MCCFIInstruction &Inst : Instrs) {
1355 switch (Inst.getOperation()) {
1356 default:
1357 // Any other CFI directives indicate a frame that we aren't prepared
1358 // to represent via compact unwind, so just bail out.
1359 return CU::UNWIND_MODE_DWARF;
1360 case MCCFIInstruction::OpDefCfaRegister: {
1361 // Defines a frame pointer. E.g.
1362 //
1363 // movq %rsp, %rbp
1364 // L0:
1365 // .cfi_def_cfa_register %rbp
1366 //
1367 HasFP = true;
1368
1369 // If the frame pointer is other than esp/rsp, we do not have a way to
1370 // generate a compact unwinding representation, so bail out.
1371 if (*MRI.getLLVMRegNum(RegNum: Inst.getRegister(), isEH: true) !=
1372 (Is64Bit ? X86::RBP : X86::EBP))
1373 return CU::UNWIND_MODE_DWARF;
1374
1375 // Reset the counts.
1376 memset(s: SavedRegs, c: 0, n: sizeof(SavedRegs));
1377 StackAdjust = 0;
1378 SavedRegIdx = 0;
1379 MinAbsOffset = std::numeric_limits<int64_t>::max();
1380 InstrOffset += MoveInstrSize;
1381 break;
1382 }
1383 case MCCFIInstruction::OpDefCfaOffset: {
1384 // Defines a new offset for the CFA. E.g.
1385 //
1386 // With frame:
1387 //
1388 // pushq %rbp
1389 // L0:
1390 // .cfi_def_cfa_offset 16
1391 //
1392 // Without frame:
1393 //
1394 // subq $72, %rsp
1395 // L0:
1396 // .cfi_def_cfa_offset 80
1397 //
1398 StackSize = Inst.getOffset() / StackDivide;
1399 break;
1400 }
1401 case MCCFIInstruction::OpOffset: {
1402 // Defines a "push" of a callee-saved register. E.g.
1403 //
1404 // pushq %r15
1405 // pushq %r14
1406 // pushq %rbx
1407 // L0:
1408 // subq $120, %rsp
1409 // L1:
1410 // .cfi_offset %rbx, -40
1411 // .cfi_offset %r14, -32
1412 // .cfi_offset %r15, -24
1413 //
1414 if (SavedRegIdx == CU_NUM_SAVED_REGS)
1415 // If there are too many saved registers, we cannot use a compact
1416 // unwind encoding.
1417 return CU::UNWIND_MODE_DWARF;
1418
1419 unsigned Reg = *MRI.getLLVMRegNum(RegNum: Inst.getRegister(), isEH: true);
1420 SavedRegs[SavedRegIdx++] = Reg;
1421 StackAdjust += OffsetSize;
1422 MinAbsOffset = std::min(a: MinAbsOffset, b: std::abs(i: Inst.getOffset()));
1423 InstrOffset += PushInstrSize(Reg);
1424 break;
1425 }
1426 }
1427 }
1428
1429 StackAdjust /= StackDivide;
1430
1431 if (HasFP) {
1432 if ((StackAdjust & 0xFF) != StackAdjust)
1433 // Offset was too big for a compact unwind encoding.
1434 return CU::UNWIND_MODE_DWARF;
1435
1436 // We don't attempt to track a real StackAdjust, so if the saved registers
1437 // aren't adjacent to rbp we can't cope.
1438 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1439 return CU::UNWIND_MODE_DWARF;
1440
1441 // Get the encoding of the saved registers when we have a frame pointer.
1442 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1443 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1444
1445 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1446 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1447 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1448 } else {
1449 SubtractInstrIdx += InstrOffset;
1450 ++StackAdjust;
1451
1452 if ((StackSize & 0xFF) == StackSize) {
1453 // Frameless stack with a small stack size.
1454 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1455
1456 // Encode the stack size.
1457 CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1458 } else {
1459 if ((StackAdjust & 0x7) != StackAdjust)
1460 // The extra stack adjustments are too big for us to handle.
1461 return CU::UNWIND_MODE_DWARF;
1462
1463 // Frameless stack with an offset too large for us to encode compactly.
1464 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1465
1466 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467 // instruction.
1468 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1469
1470 // Encode any extra stack adjustments (done via push instructions).
1471 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1472 }
1473
1474 // Encode the number of registers saved. (Reverse the list first.)
1475 std::reverse(first: &SavedRegs[0], last: &SavedRegs[SavedRegIdx]);
1476 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1477
1478 // Get the encoding of the saved registers when we don't have a frame
1479 // pointer.
1480 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(RegCount: SavedRegIdx);
1481 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1482
1483 // Encode the register encoding.
1484 CompactUnwindEncoding |=
1485 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1486 }
1487
1488 return CompactUnwindEncoding;
1489 }
1490};
1491
1492} // end anonymous namespace
1493
1494MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1495 const MCSubtargetInfo &STI,
1496 const MCRegisterInfo &MRI,
1497 const MCTargetOptions &Options) {
1498 const Triple &TheTriple = STI.getTargetTriple();
1499 if (TheTriple.isOSBinFormatMachO())
1500 return new DarwinX86AsmBackend(T, MRI, STI);
1501
1502 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1503 return new WindowsX86AsmBackend(T, false, STI);
1504
1505 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType: TheTriple.getOS());
1506
1507 if (TheTriple.isOSIAMCU())
1508 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1509
1510 return new ELFX86_32AsmBackend(T, OSABI, STI);
1511}
1512
1513MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1514 const MCSubtargetInfo &STI,
1515 const MCRegisterInfo &MRI,
1516 const MCTargetOptions &Options) {
1517 const Triple &TheTriple = STI.getTargetTriple();
1518 if (TheTriple.isOSBinFormatMachO())
1519 return new DarwinX86AsmBackend(T, MRI, STI);
1520
1521 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1522 return new WindowsX86AsmBackend(T, true, STI);
1523
1524 if (TheTriple.isUEFI()) {
1525 assert(TheTriple.isOSBinFormatCOFF() &&
1526 "Only COFF format is supported in UEFI environment.");
1527 return new WindowsX86AsmBackend(T, true, STI);
1528 }
1529
1530 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType: TheTriple.getOS());
1531
1532 if (TheTriple.isX32())
1533 return new ELFX86_X32AsmBackend(T, OSABI, STI);
1534 return new ELFX86_64AsmBackend(T, OSABI, STI);
1535}
1536
1537namespace {
1538class X86ELFStreamer : public MCELFStreamer {
1539public:
1540 X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1541 std::unique_ptr<MCObjectWriter> OW,
1542 std::unique_ptr<MCCodeEmitter> Emitter)
1543 : MCELFStreamer(Context, std::move(TAB), std::move(OW),
1544 std::move(Emitter)) {}
1545
1546 void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1547};
1548} // end anonymous namespace
1549
1550void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1551 const MCSubtargetInfo &STI) {
1552 auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1553 Backend.emitInstructionBegin(OS&: S, Inst, STI);
1554 S.MCObjectStreamer::emitInstruction(Inst, STI);
1555 Backend.emitInstructionEnd(OS&: S, Inst);
1556}
1557
1558void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1559 const MCSubtargetInfo &STI) {
1560 X86_MC::emitInstruction(S&: *this, Inst, STI);
1561}
1562
1563MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,
1564 std::unique_ptr<MCAsmBackend> &&MAB,
1565 std::unique_ptr<MCObjectWriter> &&MOW,
1566 std::unique_ptr<MCCodeEmitter> &&MCE) {
1567 return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),
1568 std::move(MCE));
1569}
1570