1//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MCTargetDesc/X86BaseInfo.h"
10#include "MCTargetDesc/X86EncodingOptimization.h"
11#include "MCTargetDesc/X86FixupKinds.h"
12#include "MCTargetDesc/X86MCAsmInfo.h"
13#include "llvm/ADT/StringSwitch.h"
14#include "llvm/BinaryFormat/ELF.h"
15#include "llvm/BinaryFormat/MachO.h"
16#include "llvm/MC/MCAsmBackend.h"
17#include "llvm/MC/MCAssembler.h"
18#include "llvm/MC/MCCodeEmitter.h"
19#include "llvm/MC/MCContext.h"
20#include "llvm/MC/MCDwarf.h"
21#include "llvm/MC/MCELFObjectWriter.h"
22#include "llvm/MC/MCELFStreamer.h"
23#include "llvm/MC/MCExpr.h"
24#include "llvm/MC/MCFixupKindInfo.h"
25#include "llvm/MC/MCInst.h"
26#include "llvm/MC/MCInstrInfo.h"
27#include "llvm/MC/MCObjectStreamer.h"
28#include "llvm/MC/MCObjectWriter.h"
29#include "llvm/MC/MCRegisterInfo.h"
30#include "llvm/MC/MCSubtargetInfo.h"
31#include "llvm/MC/MCValue.h"
32#include "llvm/MC/TargetRegistry.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/raw_ostream.h"
36
37using namespace llvm;
38
39namespace {
40/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
41class X86AlignBranchKind {
42private:
43 uint8_t AlignBranchKind = 0;
44
45public:
46 void operator=(const std::string &Val) {
47 if (Val.empty())
48 return;
49 SmallVector<StringRef, 6> BranchTypes;
50 StringRef(Val).split(A&: BranchTypes, Separator: '+', MaxSplit: -1, KeepEmpty: false);
51 for (auto BranchType : BranchTypes) {
52 if (BranchType == "fused")
53 addKind(Value: X86::AlignBranchFused);
54 else if (BranchType == "jcc")
55 addKind(Value: X86::AlignBranchJcc);
56 else if (BranchType == "jmp")
57 addKind(Value: X86::AlignBranchJmp);
58 else if (BranchType == "call")
59 addKind(Value: X86::AlignBranchCall);
60 else if (BranchType == "ret")
61 addKind(Value: X86::AlignBranchRet);
62 else if (BranchType == "indirect")
63 addKind(Value: X86::AlignBranchIndirect);
64 else {
65 errs() << "invalid argument " << BranchType.str()
66 << " to -x86-align-branch=; each element must be one of: fused, "
67 "jcc, jmp, call, ret, indirect.(plus separated)\n";
68 }
69 }
70 }
71
72 operator uint8_t() const { return AlignBranchKind; }
73 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
74};
75
76X86AlignBranchKind X86AlignBranchKindLoc;
77
78cl::opt<unsigned> X86AlignBranchBoundary(
79 "x86-align-branch-boundary", cl::init(Val: 0),
80 cl::desc(
81 "Control how the assembler should align branches with NOP. If the "
82 "boundary's size is not 0, it should be a power of 2 and no less "
83 "than 32. Branches will be aligned to prevent from being across or "
84 "against the boundary of specified size. The default value 0 does not "
85 "align branches."));
86
87cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
88 "x86-align-branch",
89 cl::desc(
90 "Specify types of branches to align (plus separated list of types):"
91 "\njcc indicates conditional jumps"
92 "\nfused indicates fused conditional jumps"
93 "\njmp indicates direct unconditional jumps"
94 "\ncall indicates direct and indirect calls"
95 "\nret indicates rets"
96 "\nindirect indicates indirect unconditional jumps"),
97 cl::location(L&: X86AlignBranchKindLoc));
98
99cl::opt<bool> X86AlignBranchWithin32BBoundaries(
100 "x86-branches-within-32B-boundaries", cl::init(Val: false),
101 cl::desc(
102 "Align selected instructions to mitigate negative performance impact "
103 "of Intel's micro code update for errata skx102. May break "
104 "assumptions about labels corresponding to particular instructions, "
105 "and should be used with caution."));
106
107cl::opt<unsigned> X86PadMaxPrefixSize(
108 "x86-pad-max-prefix-size", cl::init(Val: 0),
109 cl::desc("Maximum number of prefixes to use for padding"));
110
111cl::opt<bool> X86PadForAlign(
112 "x86-pad-for-align", cl::init(Val: false), cl::Hidden,
113 cl::desc("Pad previous instructions to implement align directives"));
114
115cl::opt<bool> X86PadForBranchAlign(
116 "x86-pad-for-branch-align", cl::init(Val: true), cl::Hidden,
117 cl::desc("Pad previous instructions to implement branch alignment"));
118
119class X86AsmBackend : public MCAsmBackend {
120 const MCSubtargetInfo &STI;
121 std::unique_ptr<const MCInstrInfo> MCII;
122 X86AlignBranchKind AlignBranchType;
123 Align AlignBoundary;
124 unsigned TargetPrefixMax = 0;
125
126 MCInst PrevInst;
127 unsigned PrevInstOpcode = 0;
128 MCBoundaryAlignFragment *PendingBA = nullptr;
129 std::pair<MCFragment *, size_t> PrevInstPosition;
130 bool IsRightAfterData = false;
131
132 uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134 bool needAlign(const MCInst &Inst) const;
135 bool canPadBranches(MCObjectStreamer &OS) const;
136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137
138public:
139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140 : MCAsmBackend(llvm::endianness::little), STI(STI),
141 MCII(T.createMCInstrInfo()) {
142 if (X86AlignBranchWithin32BBoundaries) {
143 // At the moment, this defaults to aligning fused branches, unconditional
144 // jumps, and (unfused) conditional jumps with nops. Both the
145 // instructions aligned and the alignment method (nop vs prefix) may
146 // change in the future.
147 AlignBoundary = assumeAligned(Value: 32);
148 AlignBranchType.addKind(Value: X86::AlignBranchFused);
149 AlignBranchType.addKind(Value: X86::AlignBranchJcc);
150 AlignBranchType.addKind(Value: X86::AlignBranchJmp);
151 }
152 // Allow overriding defaults set by main flag
153 if (X86AlignBranchBoundary.getNumOccurrences())
154 AlignBoundary = assumeAligned(Value: X86AlignBranchBoundary);
155 if (X86AlignBranch.getNumOccurrences())
156 AlignBranchType = X86AlignBranchKindLoc;
157 if (X86PadMaxPrefixSize.getNumOccurrences())
158 TargetPrefixMax = X86PadMaxPrefixSize;
159 }
160
161 bool allowAutoPadding() const override;
162 bool allowEnhancedRelaxation() const override;
163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164 const MCSubtargetInfo &STI);
165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
166
167
168 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
169
170 MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
171
172 bool shouldForceRelocation(const MCFixup &, const MCValue &);
173
174 void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
175 MutableArrayRef<char> Data, uint64_t Value,
176 bool IsResolved) override;
177
178 bool mayNeedRelaxation(const MCInst &Inst,
179 const MCSubtargetInfo &STI) const override;
180
181 bool fixupNeedsRelaxationAdvanced(const MCFixup &, const MCValue &, uint64_t,
182 bool) const override;
183
184 void relaxInstruction(MCInst &Inst,
185 const MCSubtargetInfo &STI) const override;
186
187 bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
188 MCCodeEmitter &Emitter,
189 unsigned &RemainingSize) const;
190
191 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
192 unsigned &RemainingSize) const;
193
194 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
195 unsigned &RemainingSize) const;
196
197 bool finishLayout(const MCAssembler &Asm) const override;
198
199 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
200
201 bool writeNopData(raw_ostream &OS, uint64_t Count,
202 const MCSubtargetInfo *STI) const override;
203};
204} // end anonymous namespace
205
206static bool isRelaxableBranch(unsigned Opcode) {
207 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
208}
209
210static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
211 bool Is16BitMode = false) {
212 switch (Opcode) {
213 default:
214 llvm_unreachable("invalid opcode for branch");
215 case X86::JCC_1:
216 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
217 case X86::JMP_1:
218 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
219 }
220}
221
222static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
223 unsigned Opcode = MI.getOpcode();
224 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
225 : X86::getOpcodeForLongImmediateForm(Opcode);
226}
227
228static X86::CondCode getCondFromBranch(const MCInst &MI,
229 const MCInstrInfo &MCII) {
230 unsigned Opcode = MI.getOpcode();
231 switch (Opcode) {
232 default:
233 return X86::COND_INVALID;
234 case X86::JCC_1: {
235 const MCInstrDesc &Desc = MCII.get(Opcode);
236 return static_cast<X86::CondCode>(
237 MI.getOperand(i: Desc.getNumOperands() - 1).getImm());
238 }
239 }
240}
241
242static X86::SecondMacroFusionInstKind
243classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
244 X86::CondCode CC = getCondFromBranch(MI, MCII);
245 return classifySecondCondCodeInMacroFusion(CC);
246}
247
248/// Check if the instruction uses RIP relative addressing.
249static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
250 unsigned Opcode = MI.getOpcode();
251 const MCInstrDesc &Desc = MCII.get(Opcode);
252 uint64_t TSFlags = Desc.TSFlags;
253 unsigned CurOp = X86II::getOperandBias(Desc);
254 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
255 if (MemoryOperand < 0)
256 return false;
257 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
258 MCRegister BaseReg = MI.getOperand(i: BaseRegNum).getReg();
259 return (BaseReg == X86::RIP);
260}
261
262/// Check if the instruction is a prefix.
263static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
264 return X86II::isPrefix(TSFlags: MCII.get(Opcode).TSFlags);
265}
266
267/// Check if the instruction is valid as the first instruction in macro fusion.
268static bool isFirstMacroFusibleInst(const MCInst &Inst,
269 const MCInstrInfo &MCII) {
270 // An Intel instruction with RIP relative addressing is not macro fusible.
271 if (isRIPRelative(MI: Inst, MCII))
272 return false;
273 X86::FirstMacroFusionInstKind FIK =
274 X86::classifyFirstOpcodeInMacroFusion(Opcode: Inst.getOpcode());
275 return FIK != X86::FirstMacroFusionInstKind::Invalid;
276}
277
278/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
279/// get a better peformance in some cases. Here, we determine which prefix is
280/// the most suitable.
281///
282/// If the instruction has a segment override prefix, use the existing one.
283/// If the target is 64-bit, use the CS.
284/// If the target is 32-bit,
285/// - If the instruction has a ESP/EBP base register, use SS.
286/// - Otherwise use DS.
287uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
288 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
289 "Prefixes can be added only in 32-bit or 64-bit mode.");
290 const MCInstrDesc &Desc = MCII->get(Opcode: Inst.getOpcode());
291 uint64_t TSFlags = Desc.TSFlags;
292
293 // Determine where the memory operand starts, if present.
294 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
295 if (MemoryOperand != -1)
296 MemoryOperand += X86II::getOperandBias(Desc);
297
298 MCRegister SegmentReg;
299 if (MemoryOperand >= 0) {
300 // Check for explicit segment override on memory operand.
301 SegmentReg = Inst.getOperand(i: MemoryOperand + X86::AddrSegmentReg).getReg();
302 }
303
304 switch (TSFlags & X86II::FormMask) {
305 default:
306 break;
307 case X86II::RawFrmDstSrc: {
308 // Check segment override opcode prefix as needed (not for %ds).
309 if (Inst.getOperand(i: 2).getReg() != X86::DS)
310 SegmentReg = Inst.getOperand(i: 2).getReg();
311 break;
312 }
313 case X86II::RawFrmSrc: {
314 // Check segment override opcode prefix as needed (not for %ds).
315 if (Inst.getOperand(i: 1).getReg() != X86::DS)
316 SegmentReg = Inst.getOperand(i: 1).getReg();
317 break;
318 }
319 case X86II::RawFrmMemOffs: {
320 // Check segment override opcode prefix as needed.
321 SegmentReg = Inst.getOperand(i: 1).getReg();
322 break;
323 }
324 }
325
326 if (SegmentReg)
327 return X86::getSegmentOverridePrefixForReg(Reg: SegmentReg);
328
329 if (STI.hasFeature(Feature: X86::Is64Bit))
330 return X86::CS_Encoding;
331
332 if (MemoryOperand >= 0) {
333 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
334 MCRegister BaseReg = Inst.getOperand(i: BaseRegNum).getReg();
335 if (BaseReg == X86::ESP || BaseReg == X86::EBP)
336 return X86::SS_Encoding;
337 }
338 return X86::DS_Encoding;
339}
340
341/// Check if the two instructions will be macro-fused on the target cpu.
342bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
343 const MCInstrDesc &InstDesc = MCII->get(Opcode: Jcc.getOpcode());
344 if (!InstDesc.isConditionalBranch())
345 return false;
346 if (!isFirstMacroFusibleInst(Inst: Cmp, MCII: *MCII))
347 return false;
348 const X86::FirstMacroFusionInstKind CmpKind =
349 X86::classifyFirstOpcodeInMacroFusion(Opcode: Cmp.getOpcode());
350 const X86::SecondMacroFusionInstKind BranchKind =
351 classifySecondInstInMacroFusion(MI: Jcc, MCII: *MCII);
352 return X86::isMacroFused(FirstKind: CmpKind, SecondKind: BranchKind);
353}
354
355/// Check if the instruction has a variant symbol operand.
356static bool hasVariantSymbol(const MCInst &MI) {
357 for (auto &Operand : MI) {
358 if (!Operand.isExpr())
359 continue;
360 const MCExpr &Expr = *Operand.getExpr();
361 if (Expr.getKind() == MCExpr::SymbolRef &&
362 cast<MCSymbolRefExpr>(Val: &Expr)->getSpecifier())
363 return true;
364 }
365 return false;
366}
367
368bool X86AsmBackend::allowAutoPadding() const {
369 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
370}
371
372bool X86AsmBackend::allowEnhancedRelaxation() const {
373 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
374}
375
376/// X86 has certain instructions which enable interrupts exactly one
377/// instruction *after* the instruction which stores to SS. Return true if the
378/// given instruction may have such an interrupt delay slot.
379static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
380 switch (InstOpcode) {
381 case X86::POPSS16:
382 case X86::POPSS32:
383 case X86::STI:
384 return true;
385
386 case X86::MOV16sr:
387 case X86::MOV32sr:
388 case X86::MOV64sr:
389 case X86::MOV16sm:
390 // In fact, this is only the case if the first operand is SS. However, as
391 // segment moves occur extremely rarely, this is just a minor pessimization.
392 return true;
393 }
394 return false;
395}
396
397/// Check if the instruction to be emitted is right after any data.
398static bool
399isRightAfterData(MCFragment *CurrentFragment,
400 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
401 MCFragment *F = CurrentFragment;
402 // Since data is always emitted into a DataFragment, our check strategy is
403 // simple here.
404 // - If the fragment is a DataFragment
405 // - If it's empty (section start or data after align), return false.
406 // - If it's not the fragment where the previous instruction is,
407 // returns true.
408 // - If it's the fragment holding the previous instruction but its
409 // size changed since the previous instruction was emitted into
410 // it, returns true.
411 // - Otherwise returns false.
412 // - If the fragment is not a DataFragment, returns false.
413 if (auto *DF = dyn_cast_or_null<MCDataFragment>(Val: F))
414 return DF->getContents().size() &&
415 (DF != PrevInstPosition.first ||
416 DF->getContents().size() != PrevInstPosition.second);
417
418 return false;
419}
420
421/// \returns the fragment size if it has instructions, otherwise returns 0.
422static size_t getSizeForInstFragment(const MCFragment *F) {
423 if (!F || !F->hasInstructions())
424 return 0;
425 // MCEncodedFragmentWithContents being templated makes this tricky.
426 switch (F->getKind()) {
427 default:
428 llvm_unreachable("Unknown fragment with instructions!");
429 case MCFragment::FT_Data:
430 return cast<MCDataFragment>(Val: *F).getContents().size();
431 case MCFragment::FT_Relaxable:
432 return cast<MCRelaxableFragment>(Val: *F).getContents().size();
433 }
434}
435
436/// Return true if we can insert NOP or prefixes automatically before the
437/// the instruction to be emitted.
438bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
439 if (hasVariantSymbol(MI: Inst))
440 // Linker may rewrite the instruction with variant symbol operand(e.g.
441 // TLSCALL).
442 return false;
443
444 if (mayHaveInterruptDelaySlot(InstOpcode: PrevInstOpcode))
445 // If this instruction follows an interrupt enabling instruction with a one
446 // instruction delay, inserting a nop would change behavior.
447 return false;
448
449 if (isPrefix(Opcode: PrevInstOpcode, MCII: *MCII))
450 // If this instruction follows a prefix, inserting a nop/prefix would change
451 // semantic.
452 return false;
453
454 if (isPrefix(Opcode: Inst.getOpcode(), MCII: *MCII))
455 // If this instruction is a prefix, inserting a prefix would change
456 // semantic.
457 return false;
458
459 if (IsRightAfterData)
460 // If this instruction follows any data, there is no clear
461 // instruction boundary, inserting a nop/prefix would change semantic.
462 return false;
463
464 return true;
465}
466
467bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
468 if (!OS.getAllowAutoPadding())
469 return false;
470 assert(allowAutoPadding() && "incorrect initialization!");
471
472 // We only pad in text section.
473 if (!OS.getCurrentSectionOnly()->isText())
474 return false;
475
476 // To be Done: Currently don't deal with Bundle cases.
477 if (OS.getAssembler().isBundlingEnabled())
478 return false;
479
480 // Branches only need to be aligned in 32-bit or 64-bit mode.
481 if (!(STI.hasFeature(Feature: X86::Is64Bit) || STI.hasFeature(Feature: X86::Is32Bit)))
482 return false;
483
484 return true;
485}
486
487/// Check if the instruction operand needs to be aligned.
488bool X86AsmBackend::needAlign(const MCInst &Inst) const {
489 const MCInstrDesc &Desc = MCII->get(Opcode: Inst.getOpcode());
490 return (Desc.isConditionalBranch() &&
491 (AlignBranchType & X86::AlignBranchJcc)) ||
492 (Desc.isUnconditionalBranch() &&
493 (AlignBranchType & X86::AlignBranchJmp)) ||
494 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
495 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
496 (Desc.isIndirectBranch() &&
497 (AlignBranchType & X86::AlignBranchIndirect));
498}
499
500/// Insert BoundaryAlignFragment before instructions to align branches.
501void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
502 const MCInst &Inst, const MCSubtargetInfo &STI) {
503 // Used by canPadInst. Done here, because in emitInstructionEnd, the current
504 // fragment will have changed.
505 IsRightAfterData =
506 isRightAfterData(CurrentFragment: OS.getCurrentFragment(), PrevInstPosition);
507
508 if (!canPadBranches(OS))
509 return;
510
511 // NB: PrevInst only valid if canPadBranches is true.
512 if (!isMacroFused(Cmp: PrevInst, Jcc: Inst))
513 // Macro fusion doesn't happen indeed, clear the pending.
514 PendingBA = nullptr;
515
516 // When branch padding is enabled (basically the skx102 erratum => unlikely),
517 // we call canPadInst (not cheap) twice. However, in the common case, we can
518 // avoid unnecessary calls to that, as this is otherwise only used for
519 // relaxable fragments.
520 if (!canPadInst(Inst, OS))
521 return;
522
523 if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
524 // Macro fusion actually happens and there is no other fragment inserted
525 // after the previous instruction.
526 //
527 // Do nothing here since we already inserted a BoudaryAlign fragment when
528 // we met the first instruction in the fused pair and we'll tie them
529 // together in emitInstructionEnd.
530 //
531 // Note: When there is at least one fragment, such as MCAlignFragment,
532 // inserted after the previous instruction, e.g.
533 //
534 // \code
535 // cmp %rax %rcx
536 // .align 16
537 // je .Label0
538 // \ endcode
539 //
540 // We will treat the JCC as a unfused branch although it may be fused
541 // with the CMP.
542 return;
543 }
544
545 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
546 isFirstMacroFusibleInst(Inst, MCII: *MCII))) {
547 // If we meet a unfused branch or the first instuction in a fusiable pair,
548 // insert a BoundaryAlign fragment.
549 PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
550 args&: AlignBoundary, args: STI);
551 OS.insert(F: PendingBA);
552 }
553}
554
555/// Set the last fragment to be aligned for the BoundaryAlignFragment.
556void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
557 const MCInst &Inst) {
558 MCFragment *CF = OS.getCurrentFragment();
559 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(Val: CF))
560 F->setAllowAutoPadding(canPadInst(Inst, OS));
561
562 // Update PrevInstOpcode here, canPadInst() reads that.
563 PrevInstOpcode = Inst.getOpcode();
564 PrevInstPosition = std::make_pair(x&: CF, y: getSizeForInstFragment(F: CF));
565
566 if (!canPadBranches(OS))
567 return;
568
569 // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
570 PrevInst = Inst;
571
572 if (!needAlign(Inst) || !PendingBA)
573 return;
574
575 // Tie the aligned instructions into a pending BoundaryAlign.
576 PendingBA->setLastFragment(CF);
577 PendingBA = nullptr;
578
579 // We need to ensure that further data isn't added to the current
580 // DataFragment, so that we can get the size of instructions later in
581 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
582 // DataFragment.
583 if (isa_and_nonnull<MCDataFragment>(Val: CF))
584 OS.insert(F: OS.getContext().allocFragment<MCDataFragment>());
585
586 // Update the maximum alignment on the current section if necessary.
587 MCSection *Sec = OS.getCurrentSectionOnly();
588 Sec->ensureMinAlignment(MinAlignment: AlignBoundary);
589}
590
591std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
592 if (STI.getTargetTriple().isOSBinFormatELF()) {
593 unsigned Type;
594 if (STI.getTargetTriple().getArch() == Triple::x86_64) {
595 Type = llvm::StringSwitch<unsigned>(Name)
596#define ELF_RELOC(X, Y) .Case(#X, Y)
597#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
598#undef ELF_RELOC
599 .Case(S: "BFD_RELOC_NONE", Value: ELF::R_X86_64_NONE)
600 .Case(S: "BFD_RELOC_8", Value: ELF::R_X86_64_8)
601 .Case(S: "BFD_RELOC_16", Value: ELF::R_X86_64_16)
602 .Case(S: "BFD_RELOC_32", Value: ELF::R_X86_64_32)
603 .Case(S: "BFD_RELOC_64", Value: ELF::R_X86_64_64)
604 .Default(Value: -1u);
605 } else {
606 Type = llvm::StringSwitch<unsigned>(Name)
607#define ELF_RELOC(X, Y) .Case(#X, Y)
608#include "llvm/BinaryFormat/ELFRelocs/i386.def"
609#undef ELF_RELOC
610 .Case(S: "BFD_RELOC_NONE", Value: ELF::R_386_NONE)
611 .Case(S: "BFD_RELOC_8", Value: ELF::R_386_8)
612 .Case(S: "BFD_RELOC_16", Value: ELF::R_386_16)
613 .Case(S: "BFD_RELOC_32", Value: ELF::R_386_32)
614 .Default(Value: -1u);
615 }
616 if (Type == -1u)
617 return std::nullopt;
618 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
619 }
620 return MCAsmBackend::getFixupKind(Name);
621}
622
623MCFixupKindInfo X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
624 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
625 // clang-format off
626 {.Name: "reloc_riprel_4byte", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
627 {.Name: "reloc_riprel_4byte_movq_load", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
628 {.Name: "reloc_riprel_4byte_movq_load_rex2", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
629 {.Name: "reloc_riprel_4byte_relax", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
630 {.Name: "reloc_riprel_4byte_relax_rex", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
631 {.Name: "reloc_riprel_4byte_relax_rex2", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
632 {.Name: "reloc_riprel_4byte_relax_evex", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
633 {.Name: "reloc_signed_4byte", .TargetOffset: 0, .TargetSize: 32, .Flags: 0},
634 {.Name: "reloc_signed_4byte_relax", .TargetOffset: 0, .TargetSize: 32, .Flags: 0},
635 {.Name: "reloc_global_offset_table", .TargetOffset: 0, .TargetSize: 32, .Flags: 0},
636 {.Name: "reloc_branch_4byte_pcrel", .TargetOffset: 0, .TargetSize: 32, .Flags: MCFixupKindInfo::FKF_IsPCRel},
637 // clang-format on
638 };
639
640 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
641 // do not require any extra processing.
642 if (mc::isRelocation(FixupKind: Kind))
643 return MCAsmBackend::getFixupKindInfo(Kind: FK_NONE);
644
645 if (Kind < FirstTargetFixupKind)
646 return MCAsmBackend::getFixupKindInfo(Kind);
647
648 assert(unsigned(Kind - FirstTargetFixupKind) < X86::NumTargetFixupKinds &&
649 "Invalid kind!");
650 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
651 return Infos[Kind - FirstTargetFixupKind];
652}
653
654static unsigned getFixupKindSize(unsigned Kind) {
655 switch (Kind) {
656 default:
657 llvm_unreachable("invalid fixup kind!");
658 case FK_NONE:
659 return 0;
660 case FK_PCRel_1:
661 case FK_SecRel_1:
662 case FK_Data_1:
663 return 1;
664 case FK_PCRel_2:
665 case FK_SecRel_2:
666 case FK_Data_2:
667 return 2;
668 case FK_PCRel_4:
669 case X86::reloc_riprel_4byte:
670 case X86::reloc_riprel_4byte_relax:
671 case X86::reloc_riprel_4byte_relax_rex:
672 case X86::reloc_riprel_4byte_relax_rex2:
673 case X86::reloc_riprel_4byte_movq_load:
674 case X86::reloc_riprel_4byte_movq_load_rex2:
675 case X86::reloc_riprel_4byte_relax_evex:
676 case X86::reloc_signed_4byte:
677 case X86::reloc_signed_4byte_relax:
678 case X86::reloc_global_offset_table:
679 case X86::reloc_branch_4byte_pcrel:
680 case FK_SecRel_4:
681 case FK_Data_4:
682 return 4;
683 case FK_PCRel_8:
684 case FK_SecRel_8:
685 case FK_Data_8:
686 return 8;
687 }
688}
689
690void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
691 const MCValue &Target,
692 MutableArrayRef<char> Data, uint64_t Value,
693 bool IsResolved) {
694 // Force relocation when there is a specifier. This might be too conservative
695 // - GAS doesn't emit a relocation for call local@plt; local:.
696 if (Target.getSpecifier())
697 IsResolved = false;
698 maybeAddReloc(F, Fixup, Target, Value, IsResolved);
699
700 auto Kind = Fixup.getKind();
701 if (mc::isRelocation(FixupKind: Kind))
702 return;
703 unsigned Size = getFixupKindSize(Kind);
704
705 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
706
707 int64_t SignedValue = static_cast<int64_t>(Value);
708 if (IsResolved && Fixup.isPCRel()) {
709 // check that PC relative fixup fits into the fixup size.
710 if (Size > 0 && !isIntN(N: Size * 8, x: SignedValue))
711 getContext().reportError(L: Fixup.getLoc(),
712 Msg: "value of " + Twine(SignedValue) +
713 " is too large for field of " + Twine(Size) +
714 ((Size == 1) ? " byte." : " bytes."));
715 } else {
716 // Check that uppper bits are either all zeros or all ones.
717 // Specifically ignore overflow/underflow as long as the leakage is
718 // limited to the lower bits. This is to remain compatible with
719 // other assemblers.
720 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
721 "Value does not fit in the Fixup field");
722 }
723
724 for (unsigned i = 0; i != Size; ++i)
725 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
726}
727
728bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
729 const MCSubtargetInfo &STI) const {
730 unsigned Opcode = MI.getOpcode();
731 unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;
732 return isRelaxableBranch(Opcode) ||
733 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
734 MI.getOperand(i: MI.getNumOperands() - 1 - SkipOperands).isExpr());
735}
736
737bool X86AsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
738 const MCValue &Target,
739 uint64_t Value,
740 bool Resolved) const {
741 // If resolved, relax if the value is too big for a (signed) i8.
742 //
743 // Currently, `jmp local@plt` relaxes JMP even if the offset is small,
744 // different from gas.
745 if (Resolved)
746 return !isInt<8>(x: Value) || Target.getSpecifier();
747
748 // Otherwise, relax unless there is a @ABS8 specifier.
749 if (Fixup.getKind() == FK_Data_1 && Target.getAddSym() &&
750 Target.getSpecifier() == X86::S_ABS8)
751 return false;
752 return true;
753}
754
755// FIXME: Can tblgen help at all here to verify there aren't other instructions
756// we can relax?
757void X86AsmBackend::relaxInstruction(MCInst &Inst,
758 const MCSubtargetInfo &STI) const {
759 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
760 bool Is16BitMode = STI.hasFeature(Feature: X86::Is16Bit);
761 unsigned RelaxedOp = getRelaxedOpcode(MI: Inst, Is16BitMode);
762
763 if (RelaxedOp == Inst.getOpcode()) {
764 SmallString<256> Tmp;
765 raw_svector_ostream OS(Tmp);
766 Inst.dump_pretty(OS);
767 OS << "\n";
768 report_fatal_error(reason: "unexpected instruction to relax: " + OS.str());
769 }
770
771 Inst.setOpcode(RelaxedOp);
772}
773
774bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
775 MCCodeEmitter &Emitter,
776 unsigned &RemainingSize) const {
777 if (!RF.getAllowAutoPadding())
778 return false;
779 // If the instruction isn't fully relaxed, shifting it around might require a
780 // larger value for one of the fixups then can be encoded. The outer loop
781 // will also catch this before moving to the next instruction, but we need to
782 // prevent padding this single instruction as well.
783 if (mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
784 return false;
785
786 const unsigned OldSize = RF.getContents().size();
787 if (OldSize == 15)
788 return false;
789
790 const unsigned MaxPossiblePad = std::min(a: 15 - OldSize, b: RemainingSize);
791 const unsigned RemainingPrefixSize = [&]() -> unsigned {
792 SmallString<15> Code;
793 X86_MC::emitPrefix(MCE&: Emitter, MI: RF.getInst(), CB&: Code, STI);
794 assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
795
796 // TODO: It turns out we need a decent amount of plumbing for the target
797 // specific bits to determine number of prefixes its safe to add. Various
798 // targets (older chips mostly, but also Atom family) encounter decoder
799 // stalls with too many prefixes. For testing purposes, we set the value
800 // externally for the moment.
801 unsigned ExistingPrefixSize = Code.size();
802 if (TargetPrefixMax <= ExistingPrefixSize)
803 return 0;
804 return TargetPrefixMax - ExistingPrefixSize;
805 }();
806 const unsigned PrefixBytesToAdd =
807 std::min(a: MaxPossiblePad, b: RemainingPrefixSize);
808 if (PrefixBytesToAdd == 0)
809 return false;
810
811 const uint8_t Prefix = determinePaddingPrefix(Inst: RF.getInst());
812
813 SmallString<256> Code;
814 Code.append(NumInputs: PrefixBytesToAdd, Elt: Prefix);
815 Code.append(in_start: RF.getContents().begin(), in_end: RF.getContents().end());
816 RF.setContents(Code);
817
818 // Adjust the fixups for the change in offsets
819 for (auto &F : RF.getFixups()) {
820 F.setOffset(F.getOffset() + PrefixBytesToAdd);
821 }
822
823 RemainingSize -= PrefixBytesToAdd;
824 return true;
825}
826
827bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
828 MCCodeEmitter &Emitter,
829 unsigned &RemainingSize) const {
830 if (!mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
831 // TODO: There are lots of other tricks we could apply for increasing
832 // encoding size without impacting performance.
833 return false;
834
835 MCInst Relaxed = RF.getInst();
836 relaxInstruction(Inst&: Relaxed, STI: *RF.getSubtargetInfo());
837
838 SmallVector<MCFixup, 4> Fixups;
839 SmallString<15> Code;
840 Emitter.encodeInstruction(Inst: Relaxed, CB&: Code, Fixups, STI: *RF.getSubtargetInfo());
841 const unsigned OldSize = RF.getContents().size();
842 const unsigned NewSize = Code.size();
843 assert(NewSize >= OldSize && "size decrease during relaxation?");
844 unsigned Delta = NewSize - OldSize;
845 if (Delta > RemainingSize)
846 return false;
847 RF.setInst(Relaxed);
848 RF.setContents(Code);
849 RF.setFixups(Fixups);
850 RemainingSize -= Delta;
851 return true;
852}
853
854bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
855 MCCodeEmitter &Emitter,
856 unsigned &RemainingSize) const {
857 bool Changed = false;
858 if (RemainingSize != 0)
859 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
860 if (RemainingSize != 0)
861 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
862 return Changed;
863}
864
865bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
866 // See if we can further relax some instructions to cut down on the number of
867 // nop bytes required for code alignment. The actual win is in reducing
868 // instruction count, not number of bytes. Modern X86-64 can easily end up
869 // decode limited. It is often better to reduce the number of instructions
870 // (i.e. eliminate nops) even at the cost of increasing the size and
871 // complexity of others.
872 if (!X86PadForAlign && !X86PadForBranchAlign)
873 return false;
874
875 // The processed regions are delimitered by LabeledFragments. -g may have more
876 // MCSymbols and therefore different relaxation results. X86PadForAlign is
877 // disabled by default to eliminate the -g vs non -g difference.
878 DenseSet<MCFragment *> LabeledFragments;
879 for (const MCSymbol &S : Asm.symbols())
880 LabeledFragments.insert(V: S.getFragment());
881
882 bool Changed = false;
883 for (MCSection &Sec : Asm) {
884 if (!Sec.isText())
885 continue;
886
887 SmallVector<MCRelaxableFragment *, 4> Relaxable;
888 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
889 MCFragment &F = *I;
890
891 if (LabeledFragments.count(V: &F))
892 Relaxable.clear();
893
894 if (F.getKind() == MCFragment::FT_Data) // Skip and ignore
895 continue;
896
897 if (F.getKind() == MCFragment::FT_Relaxable) {
898 auto &RF = cast<MCRelaxableFragment>(Val&: *I);
899 Relaxable.push_back(Elt: &RF);
900 continue;
901 }
902
903 auto canHandle = [](MCFragment &F) -> bool {
904 switch (F.getKind()) {
905 default:
906 return false;
907 case MCFragment::FT_Align:
908 return X86PadForAlign;
909 case MCFragment::FT_BoundaryAlign:
910 return X86PadForBranchAlign;
911 }
912 };
913 // For any unhandled kind, assume we can't change layout.
914 if (!canHandle(F)) {
915 Relaxable.clear();
916 continue;
917 }
918
919 const uint64_t OrigSize = Asm.computeFragmentSize(F);
920
921 // To keep the effects local, prefer to relax instructions closest to
922 // the align directive. This is purely about human understandability
923 // of the resulting code. If we later find a reason to expand
924 // particular instructions over others, we can adjust.
925 unsigned RemainingSize = OrigSize;
926 while (!Relaxable.empty() && RemainingSize != 0) {
927 auto &RF = *Relaxable.pop_back_val();
928 // Give the backend a chance to play any tricks it wishes to increase
929 // the encoding size of the given instruction. Target independent code
930 // will try further relaxation, but target's may play further tricks.
931 Changed |= padInstructionEncoding(RF, Emitter&: Asm.getEmitter(), RemainingSize);
932
933 // If we have an instruction which hasn't been fully relaxed, we can't
934 // skip past it and insert bytes before it. Changing its starting
935 // offset might require a larger negative offset than it can encode.
936 // We don't need to worry about larger positive offsets as none of the
937 // possible offsets between this and our align are visible, and the
938 // ones afterwards aren't changing.
939 if (mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
940 break;
941 }
942 Relaxable.clear();
943
944 // If we're looking at a boundary align, make sure we don't try to pad
945 // its target instructions for some following directive. Doing so would
946 // break the alignment of the current boundary align.
947 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(Val: &F)) {
948 cast<MCBoundaryAlignFragment>(Val&: F).setSize(RemainingSize);
949 Changed = true;
950 const MCFragment *LastFragment = BF->getLastFragment();
951 if (!LastFragment)
952 continue;
953 while (&*I != LastFragment)
954 ++I;
955 }
956 }
957 }
958
959 return Changed;
960}
961
962unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
963 if (STI.hasFeature(Feature: X86::Is16Bit))
964 return 4;
965 if (!STI.hasFeature(Feature: X86::FeatureNOPL) && !STI.hasFeature(Feature: X86::Is64Bit))
966 return 1;
967 if (STI.hasFeature(Feature: X86::TuningFast7ByteNOP))
968 return 7;
969 if (STI.hasFeature(Feature: X86::TuningFast15ByteNOP))
970 return 15;
971 if (STI.hasFeature(Feature: X86::TuningFast11ByteNOP))
972 return 11;
973 // FIXME: handle 32-bit mode
974 // 15-bytes is the longest single NOP instruction, but 10-bytes is
975 // commonly the longest that can be efficiently decoded.
976 return 10;
977}
978
979/// Write a sequence of optimal nops to the output, covering \p Count
980/// bytes.
981/// \return - true on success, false on failure
982bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
983 const MCSubtargetInfo *STI) const {
984 static const char Nops32Bit[10][11] = {
985 // nop
986 "\x90",
987 // xchg %ax,%ax
988 "\x66\x90",
989 // nopl (%[re]ax)
990 "\x0f\x1f\x00",
991 // nopl 0(%[re]ax)
992 "\x0f\x1f\x40\x00",
993 // nopl 0(%[re]ax,%[re]ax,1)
994 "\x0f\x1f\x44\x00\x00",
995 // nopw 0(%[re]ax,%[re]ax,1)
996 "\x66\x0f\x1f\x44\x00\x00",
997 // nopl 0L(%[re]ax)
998 "\x0f\x1f\x80\x00\x00\x00\x00",
999 // nopl 0L(%[re]ax,%[re]ax,1)
1000 "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1001 // nopw 0L(%[re]ax,%[re]ax,1)
1002 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1003 // nopw %cs:0L(%[re]ax,%[re]ax,1)
1004 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1005 };
1006
1007 // 16-bit mode uses different nop patterns than 32-bit.
1008 static const char Nops16Bit[4][11] = {
1009 // nop
1010 "\x90",
1011 // xchg %eax,%eax
1012 "\x66\x90",
1013 // lea 0(%si),%si
1014 "\x8d\x74\x00",
1015 // lea 0w(%si),%si
1016 "\x8d\xb4\x00\x00",
1017 };
1018
1019 const char(*Nops)[11] =
1020 STI->hasFeature(Feature: X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1021
1022 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(STI: *STI);
1023
1024 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1025 // length.
1026 do {
1027 const uint8_t ThisNopLength = (uint8_t) std::min(a: Count, b: MaxNopLength);
1028 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1029 for (uint8_t i = 0; i < Prefixes; i++)
1030 OS << '\x66';
1031 const uint8_t Rest = ThisNopLength - Prefixes;
1032 if (Rest != 0)
1033 OS.write(Ptr: Nops[Rest - 1], Size: Rest);
1034 Count -= ThisNopLength;
1035 } while (Count != 0);
1036
1037 return true;
1038}
1039
1040/* *** */
1041
1042namespace {
1043
1044class ELFX86AsmBackend : public X86AsmBackend {
1045public:
1046 uint8_t OSABI;
1047 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1048 : X86AsmBackend(T, STI), OSABI(OSABI) {}
1049};
1050
1051class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1052public:
1053 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1054 const MCSubtargetInfo &STI)
1055 : ELFX86AsmBackend(T, OSABI, STI) {}
1056
1057 std::unique_ptr<MCObjectTargetWriter>
1058 createObjectTargetWriter() const override {
1059 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, EMachine: ELF::EM_386);
1060 }
1061};
1062
1063class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1064public:
1065 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1066 const MCSubtargetInfo &STI)
1067 : ELFX86AsmBackend(T, OSABI, STI) {}
1068
1069 std::unique_ptr<MCObjectTargetWriter>
1070 createObjectTargetWriter() const override {
1071 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1072 EMachine: ELF::EM_X86_64);
1073 }
1074};
1075
1076class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1077public:
1078 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1079 const MCSubtargetInfo &STI)
1080 : ELFX86AsmBackend(T, OSABI, STI) {}
1081
1082 std::unique_ptr<MCObjectTargetWriter>
1083 createObjectTargetWriter() const override {
1084 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1085 EMachine: ELF::EM_IAMCU);
1086 }
1087};
1088
1089class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1090public:
1091 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1092 const MCSubtargetInfo &STI)
1093 : ELFX86AsmBackend(T, OSABI, STI) {}
1094
1095 std::unique_ptr<MCObjectTargetWriter>
1096 createObjectTargetWriter() const override {
1097 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, EMachine: ELF::EM_X86_64);
1098 }
1099};
1100
1101class WindowsX86AsmBackend : public X86AsmBackend {
1102 bool Is64Bit;
1103
1104public:
1105 WindowsX86AsmBackend(const Target &T, bool is64Bit,
1106 const MCSubtargetInfo &STI)
1107 : X86AsmBackend(T, STI)
1108 , Is64Bit(is64Bit) {
1109 }
1110
1111 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1112 return StringSwitch<std::optional<MCFixupKind>>(Name)
1113 .Case(S: "dir32", Value: FK_Data_4)
1114 .Case(S: "secrel32", Value: FK_SecRel_4)
1115 .Case(S: "secidx", Value: FK_SecRel_2)
1116 .Default(Value: MCAsmBackend::getFixupKind(Name));
1117 }
1118
1119 std::unique_ptr<MCObjectTargetWriter>
1120 createObjectTargetWriter() const override {
1121 return createX86WinCOFFObjectWriter(Is64Bit);
1122 }
1123};
1124
1125namespace CU {
1126
1127 /// Compact unwind encoding values.
1128 enum CompactUnwindEncodings {
1129 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1130 /// the return address, then [RE]SP is moved to [RE]BP.
1131 UNWIND_MODE_BP_FRAME = 0x01000000,
1132
1133 /// A frameless function with a small constant stack size.
1134 UNWIND_MODE_STACK_IMMD = 0x02000000,
1135
1136 /// A frameless function with a large constant stack size.
1137 UNWIND_MODE_STACK_IND = 0x03000000,
1138
1139 /// No compact unwind encoding is available.
1140 UNWIND_MODE_DWARF = 0x04000000,
1141
1142 /// Mask for encoding the frame registers.
1143 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1144
1145 /// Mask for encoding the frameless registers.
1146 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1147 };
1148
1149} // namespace CU
1150
1151class DarwinX86AsmBackend : public X86AsmBackend {
1152 const MCRegisterInfo &MRI;
1153
1154 /// Number of registers that can be saved in a compact unwind encoding.
1155 enum { CU_NUM_SAVED_REGS = 6 };
1156
1157 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1158 Triple TT;
1159 bool Is64Bit;
1160
1161 unsigned OffsetSize; ///< Offset of a "push" instruction.
1162 unsigned MoveInstrSize; ///< Size of a "move" instruction.
1163 unsigned StackDivide; ///< Amount to adjust stack size by.
1164protected:
1165 /// Size of a "push" instruction for the given register.
1166 unsigned PushInstrSize(MCRegister Reg) const {
1167 switch (Reg.id()) {
1168 case X86::EBX:
1169 case X86::ECX:
1170 case X86::EDX:
1171 case X86::EDI:
1172 case X86::ESI:
1173 case X86::EBP:
1174 case X86::RBX:
1175 case X86::RBP:
1176 return 1;
1177 case X86::R12:
1178 case X86::R13:
1179 case X86::R14:
1180 case X86::R15:
1181 return 2;
1182 }
1183 return 1;
1184 }
1185
1186private:
1187 /// Get the compact unwind number for a given register. The number
1188 /// corresponds to the enum lists in compact_unwind_encoding.h.
1189 int getCompactUnwindRegNum(unsigned Reg) const {
1190 static const MCPhysReg CU32BitRegs[7] = {
1191 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1192 };
1193 static const MCPhysReg CU64BitRegs[] = {
1194 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1195 };
1196 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1197 for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1198 if (*CURegs == Reg)
1199 return Idx;
1200
1201 return -1;
1202 }
1203
1204 /// Return the registers encoded for a compact encoding with a frame
1205 /// pointer.
1206 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1207 // Encode the registers in the order they were saved --- 3-bits per
1208 // register. The list of saved registers is assumed to be in reverse
1209 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1210 uint32_t RegEnc = 0;
1211 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1212 unsigned Reg = SavedRegs[i];
1213 if (Reg == 0) break;
1214
1215 int CURegNum = getCompactUnwindRegNum(Reg);
1216 if (CURegNum == -1) return ~0U;
1217
1218 // Encode the 3-bit register number in order, skipping over 3-bits for
1219 // each register.
1220 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1221 }
1222
1223 assert((RegEnc & 0x3FFFF) == RegEnc &&
1224 "Invalid compact register encoding!");
1225 return RegEnc;
1226 }
1227
1228 /// Create the permutation encoding used with frameless stacks. It is
1229 /// passed the number of registers to be saved and an array of the registers
1230 /// saved.
1231 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1232 // The saved registers are numbered from 1 to 6. In order to encode the
1233 // order in which they were saved, we re-number them according to their
1234 // place in the register order. The re-numbering is relative to the last
1235 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1236 // that order:
1237 //
1238 // Orig Re-Num
1239 // ---- ------
1240 // 6 6
1241 // 2 2
1242 // 4 3
1243 // 5 3
1244 //
1245 for (unsigned i = 0; i < RegCount; ++i) {
1246 int CUReg = getCompactUnwindRegNum(Reg: SavedRegs[i]);
1247 if (CUReg == -1) return ~0U;
1248 SavedRegs[i] = CUReg;
1249 }
1250
1251 // Reverse the list.
1252 std::reverse(first: &SavedRegs[0], last: &SavedRegs[CU_NUM_SAVED_REGS]);
1253
1254 uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1255 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1256 unsigned Countless = 0;
1257 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1258 if (SavedRegs[j] < SavedRegs[i])
1259 ++Countless;
1260
1261 RenumRegs[i] = SavedRegs[i] - Countless - 1;
1262 }
1263
1264 // Take the renumbered values and encode them into a 10-bit number.
1265 uint32_t permutationEncoding = 0;
1266 switch (RegCount) {
1267 case 6:
1268 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1269 + 6 * RenumRegs[2] + 2 * RenumRegs[3]
1270 + RenumRegs[4];
1271 break;
1272 case 5:
1273 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1274 + 6 * RenumRegs[3] + 2 * RenumRegs[4]
1275 + RenumRegs[5];
1276 break;
1277 case 4:
1278 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1279 + 3 * RenumRegs[4] + RenumRegs[5];
1280 break;
1281 case 3:
1282 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1283 + RenumRegs[5];
1284 break;
1285 case 2:
1286 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1287 break;
1288 case 1:
1289 permutationEncoding |= RenumRegs[5];
1290 break;
1291 }
1292
1293 assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1294 "Invalid compact register encoding!");
1295 return permutationEncoding;
1296 }
1297
1298public:
1299 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1300 const MCSubtargetInfo &STI)
1301 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1302 Is64Bit(TT.isArch64Bit()) {
1303 memset(s: SavedRegs, c: 0, n: sizeof(SavedRegs));
1304 OffsetSize = Is64Bit ? 8 : 4;
1305 MoveInstrSize = Is64Bit ? 3 : 2;
1306 StackDivide = Is64Bit ? 8 : 4;
1307 }
1308
1309 std::unique_ptr<MCObjectTargetWriter>
1310 createObjectTargetWriter() const override {
1311 uint32_t CPUType = cantFail(ValOrErr: MachO::getCPUType(T: TT));
1312 uint32_t CPUSubType = cantFail(ValOrErr: MachO::getCPUSubType(T: TT));
1313 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubtype: CPUSubType);
1314 }
1315
1316 /// Implementation of algorithm to generate the compact unwind encoding
1317 /// for the CFI instructions.
1318 uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1319 const MCContext *Ctxt) const override {
1320 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1321 if (Instrs.empty()) return 0;
1322 if (!isDarwinCanonicalPersonality(Sym: FI->Personality) &&
1323 !Ctxt->emitCompactUnwindNonCanonical())
1324 return CU::UNWIND_MODE_DWARF;
1325
1326 // Reset the saved registers.
1327 unsigned SavedRegIdx = 0;
1328 memset(s: SavedRegs, c: 0, n: sizeof(SavedRegs));
1329
1330 bool HasFP = false;
1331
1332 // Encode that we are using EBP/RBP as the frame pointer.
1333 uint64_t CompactUnwindEncoding = 0;
1334
1335 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1336 unsigned InstrOffset = 0;
1337 unsigned StackAdjust = 0;
1338 uint64_t StackSize = 0;
1339 int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1340
1341 for (const MCCFIInstruction &Inst : Instrs) {
1342 switch (Inst.getOperation()) {
1343 default:
1344 // Any other CFI directives indicate a frame that we aren't prepared
1345 // to represent via compact unwind, so just bail out.
1346 return CU::UNWIND_MODE_DWARF;
1347 case MCCFIInstruction::OpDefCfaRegister: {
1348 // Defines a frame pointer. E.g.
1349 //
1350 // movq %rsp, %rbp
1351 // L0:
1352 // .cfi_def_cfa_register %rbp
1353 //
1354 HasFP = true;
1355
1356 // If the frame pointer is other than esp/rsp, we do not have a way to
1357 // generate a compact unwinding representation, so bail out.
1358 if (*MRI.getLLVMRegNum(RegNum: Inst.getRegister(), isEH: true) !=
1359 (Is64Bit ? X86::RBP : X86::EBP))
1360 return CU::UNWIND_MODE_DWARF;
1361
1362 // Reset the counts.
1363 memset(s: SavedRegs, c: 0, n: sizeof(SavedRegs));
1364 StackAdjust = 0;
1365 SavedRegIdx = 0;
1366 MinAbsOffset = std::numeric_limits<int64_t>::max();
1367 InstrOffset += MoveInstrSize;
1368 break;
1369 }
1370 case MCCFIInstruction::OpDefCfaOffset: {
1371 // Defines a new offset for the CFA. E.g.
1372 //
1373 // With frame:
1374 //
1375 // pushq %rbp
1376 // L0:
1377 // .cfi_def_cfa_offset 16
1378 //
1379 // Without frame:
1380 //
1381 // subq $72, %rsp
1382 // L0:
1383 // .cfi_def_cfa_offset 80
1384 //
1385 StackSize = Inst.getOffset() / StackDivide;
1386 break;
1387 }
1388 case MCCFIInstruction::OpOffset: {
1389 // Defines a "push" of a callee-saved register. E.g.
1390 //
1391 // pushq %r15
1392 // pushq %r14
1393 // pushq %rbx
1394 // L0:
1395 // subq $120, %rsp
1396 // L1:
1397 // .cfi_offset %rbx, -40
1398 // .cfi_offset %r14, -32
1399 // .cfi_offset %r15, -24
1400 //
1401 if (SavedRegIdx == CU_NUM_SAVED_REGS)
1402 // If there are too many saved registers, we cannot use a compact
1403 // unwind encoding.
1404 return CU::UNWIND_MODE_DWARF;
1405
1406 MCRegister Reg = *MRI.getLLVMRegNum(RegNum: Inst.getRegister(), isEH: true);
1407 SavedRegs[SavedRegIdx++] = Reg;
1408 StackAdjust += OffsetSize;
1409 MinAbsOffset = std::min(a: MinAbsOffset, b: std::abs(i: Inst.getOffset()));
1410 InstrOffset += PushInstrSize(Reg);
1411 break;
1412 }
1413 }
1414 }
1415
1416 StackAdjust /= StackDivide;
1417
1418 if (HasFP) {
1419 if ((StackAdjust & 0xFF) != StackAdjust)
1420 // Offset was too big for a compact unwind encoding.
1421 return CU::UNWIND_MODE_DWARF;
1422
1423 // We don't attempt to track a real StackAdjust, so if the saved registers
1424 // aren't adjacent to rbp we can't cope.
1425 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1426 return CU::UNWIND_MODE_DWARF;
1427
1428 // Get the encoding of the saved registers when we have a frame pointer.
1429 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1430 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1431
1432 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1433 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1434 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1435 } else {
1436 SubtractInstrIdx += InstrOffset;
1437 ++StackAdjust;
1438
1439 if ((StackSize & 0xFF) == StackSize) {
1440 // Frameless stack with a small stack size.
1441 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1442
1443 // Encode the stack size.
1444 CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1445 } else {
1446 if ((StackAdjust & 0x7) != StackAdjust)
1447 // The extra stack adjustments are too big for us to handle.
1448 return CU::UNWIND_MODE_DWARF;
1449
1450 // Frameless stack with an offset too large for us to encode compactly.
1451 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1452
1453 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1454 // instruction.
1455 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1456
1457 // Encode any extra stack adjustments (done via push instructions).
1458 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1459 }
1460
1461 // Encode the number of registers saved. (Reverse the list first.)
1462 std::reverse(first: &SavedRegs[0], last: &SavedRegs[SavedRegIdx]);
1463 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1464
1465 // Get the encoding of the saved registers when we don't have a frame
1466 // pointer.
1467 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(RegCount: SavedRegIdx);
1468 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1469
1470 // Encode the register encoding.
1471 CompactUnwindEncoding |=
1472 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1473 }
1474
1475 return CompactUnwindEncoding;
1476 }
1477};
1478
1479} // end anonymous namespace
1480
1481MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1482 const MCSubtargetInfo &STI,
1483 const MCRegisterInfo &MRI,
1484 const MCTargetOptions &Options) {
1485 const Triple &TheTriple = STI.getTargetTriple();
1486 if (TheTriple.isOSBinFormatMachO())
1487 return new DarwinX86AsmBackend(T, MRI, STI);
1488
1489 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1490 return new WindowsX86AsmBackend(T, false, STI);
1491
1492 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType: TheTriple.getOS());
1493
1494 if (TheTriple.isOSIAMCU())
1495 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1496
1497 return new ELFX86_32AsmBackend(T, OSABI, STI);
1498}
1499
1500MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1501 const MCSubtargetInfo &STI,
1502 const MCRegisterInfo &MRI,
1503 const MCTargetOptions &Options) {
1504 const Triple &TheTriple = STI.getTargetTriple();
1505 if (TheTriple.isOSBinFormatMachO())
1506 return new DarwinX86AsmBackend(T, MRI, STI);
1507
1508 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1509 return new WindowsX86AsmBackend(T, true, STI);
1510
1511 if (TheTriple.isUEFI()) {
1512 assert(TheTriple.isOSBinFormatCOFF() &&
1513 "Only COFF format is supported in UEFI environment.");
1514 return new WindowsX86AsmBackend(T, true, STI);
1515 }
1516
1517 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType: TheTriple.getOS());
1518
1519 if (TheTriple.isX32())
1520 return new ELFX86_X32AsmBackend(T, OSABI, STI);
1521 return new ELFX86_64AsmBackend(T, OSABI, STI);
1522}
1523
1524namespace {
1525class X86ELFStreamer : public MCELFStreamer {
1526public:
1527 X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1528 std::unique_ptr<MCObjectWriter> OW,
1529 std::unique_ptr<MCCodeEmitter> Emitter)
1530 : MCELFStreamer(Context, std::move(TAB), std::move(OW),
1531 std::move(Emitter)) {}
1532
1533 void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1534};
1535} // end anonymous namespace
1536
1537void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1538 const MCSubtargetInfo &STI) {
1539 auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1540 Backend.emitInstructionBegin(OS&: S, Inst, STI);
1541 S.MCObjectStreamer::emitInstruction(Inst, STI);
1542 Backend.emitInstructionEnd(OS&: S, Inst);
1543}
1544
1545void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1546 const MCSubtargetInfo &STI) {
1547 X86_MC::emitInstruction(S&: *this, Inst, STI);
1548}
1549
1550MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,
1551 std::unique_ptr<MCAsmBackend> &&MAB,
1552 std::unique_ptr<MCObjectWriter> &&MOW,
1553 std::unique_ptr<MCCodeEmitter> &&MCE) {
1554 return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),
1555 std::move(MCE));
1556}
1557