X86AsmBackend.cpp source code [llvm_projects/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp]

1	//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "MCTargetDesc/X86BaseInfo.h"
10	#include "MCTargetDesc/X86EncodingOptimization.h"
11	#include "MCTargetDesc/X86FixupKinds.h"
12	#include "MCTargetDesc/X86MCAsmInfo.h"
13	#include "llvm/ADT/StringSwitch.h"
14	#include "llvm/BinaryFormat/ELF.h"
15	#include "llvm/BinaryFormat/MachO.h"
16	#include "llvm/MC/MCAsmBackend.h"
17	#include "llvm/MC/MCAssembler.h"
18	#include "llvm/MC/MCCodeEmitter.h"
19	#include "llvm/MC/MCContext.h"
20	#include "llvm/MC/MCDwarf.h"
21	#include "llvm/MC/MCELFObjectWriter.h"
22	#include "llvm/MC/MCELFStreamer.h"
23	#include "llvm/MC/MCExpr.h"
24	#include "llvm/MC/MCFixupKindInfo.h"
25	#include "llvm/MC/MCInst.h"
26	#include "llvm/MC/MCInstrInfo.h"
27	#include "llvm/MC/MCObjectStreamer.h"
28	#include "llvm/MC/MCObjectWriter.h"
29	#include "llvm/MC/MCRegisterInfo.h"
30	#include "llvm/MC/MCSubtargetInfo.h"
31	#include "llvm/MC/MCValue.h"
32	#include "llvm/MC/TargetRegistry.h"
33	#include "llvm/Support/CommandLine.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/raw_ostream.h"
36
37	using namespace llvm;
38
39	namespace {
40	/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
41	class X86AlignBranchKind {
42	private:
43	uint8_t AlignBranchKind = `0`;
44
45	public:
46	void operator=(const std::string &Val) {
47	if (Val.empty())
48	return;
49	SmallVector<StringRef, `6`> BranchTypes;
50	StringRef (Val).split(A&: BranchTypes, Separator: `'+'`, MaxSplit: -`1`, KeepEmpty: false);
51	for (auto BranchType : BranchTypes) {
52	if (BranchType == "fused")
53	addKind(Value: X86::AlignBranchFused);
54	else if (BranchType == "jcc")
55	addKind(Value: X86::AlignBranchJcc);
56	else if (BranchType == "jmp")
57	addKind(Value: X86::AlignBranchJmp);
58	else if (BranchType == "call")
59	addKind(Value: X86::AlignBranchCall);
60	else if (BranchType == "ret")
61	addKind(Value: X86::AlignBranchRet);
62	else if (BranchType == "indirect")
63	addKind(Value: X86::AlignBranchIndirect);
64	else {
65	errs() << "invalid argument " << BranchType.str()
66	<< " to -x86-align-branch=; each element must be one of: fused, "
67	"jcc, jmp, call, ret, indirect.(plus separated)\n";
68	}
69	}
70	}
71
72	operator uint8_t() const { return AlignBranchKind; }
73	void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind \|= Value; }
74	};
75
76	X86AlignBranchKind X86AlignBranchKindLoc;
77
78	cl::opt<unsigned> X86AlignBranchBoundary(
79	"x86-align-branch-boundary", cl::init(Val: `0`),
80	cl::desc (
81	"Control how the assembler should align branches with NOP. If the "
82	"boundary's size is not 0, it should be a power of 2 and no less "
83	"than 32. Branches will be aligned to prevent from being across or "
84	"against the boundary of specified size. The default value 0 does not "
85	"align branches."));
86
87	cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
88	"x86-align-branch",
89	cl::desc (
90	"Specify types of branches to align (plus separated list of types):"
91	"\njcc indicates conditional jumps"
92	"\nfused indicates fused conditional jumps"
93	"\njmp indicates direct unconditional jumps"
94	"\ncall indicates direct and indirect calls"
95	"\nret indicates rets"
96	"\nindirect indicates indirect unconditional jumps"),
97	cl::location(L&: X86AlignBranchKindLoc));
98
99	cl::opt<bool> X86AlignBranchWithin32BBoundaries(
100	"x86-branches-within-32B-boundaries", cl::init(Val: false),
101	cl::desc (
102	"Align selected instructions to mitigate negative performance impact "
103	"of Intel's micro code update for errata skx102. May break "
104	"assumptions about labels corresponding to particular instructions, "
105	"and should be used with caution."));
106
107	cl::opt<unsigned> X86PadMaxPrefixSize(
108	"x86-pad-max-prefix-size", cl::init(Val: `0`),
109	cl::desc ("Maximum number of prefixes to use for padding"));
110
111	cl::opt<bool> X86PadForAlign(
112	"x86-pad-for-align", cl::init(Val: false), cl::Hidden,
113	cl::desc ("Pad previous instructions to implement align directives"));
114
115	cl::opt<bool> X86PadForBranchAlign(
116	"x86-pad-for-branch-align", cl::init(Val: true), cl::Hidden,
117	cl::desc ("Pad previous instructions to implement branch alignment"));
118
119	class X86AsmBackend : public MCAsmBackend {
120	const MCSubtargetInfo &STI;
121	std::unique_ptr<const MCInstrInfo> MCII;
122	X86AlignBranchKind AlignBranchType;
123	Align AlignBoundary;
124	unsigned TargetPrefixMax = `0`;
125
126	MCInst PrevInst;
127	unsigned PrevInstOpcode = `0`;
128	MCBoundaryAlignFragment PendingBA = nullptr*;
129	std::pair<MCFragment *, size_t> PrevInstPosition;
130	bool IsRightAfterData = false;
131
132	uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133	bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134	bool needAlign(const MCInst &Inst) const;
135	bool canPadBranches(MCObjectStreamer &OS) const;
136	bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137
138	public:
139	X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140	: MCAsmBackend (llvm::endianness::little), STI(STI),
141	MCII (T.createMCInstrInfo()) {
142	if (X86AlignBranchWithin32BBoundaries) {
143	// At the moment, this defaults to aligning fused branches, unconditional
144	// jumps, and (unfused) conditional jumps with nops. Both the
145	// instructions aligned and the alignment method (nop vs prefix) may
146	// change in the future.
147	AlignBoundary = assumeAligned(Value: `32`);
148	AlignBranchType.addKind(Value: X86::AlignBranchFused);
149	AlignBranchType.addKind(Value: X86::AlignBranchJcc);
150	AlignBranchType.addKind(Value: X86::AlignBranchJmp);
151	}
152	// Allow overriding defaults set by main flag
153	if (X86AlignBranchBoundary.getNumOccurrences())
154	AlignBoundary = assumeAligned(Value: X86AlignBranchBoundary);
155	if (X86AlignBranch.getNumOccurrences())
156	AlignBranchType = X86AlignBranchKindLoc;
157	if (X86PadMaxPrefixSize.getNumOccurrences())
158	TargetPrefixMax = X86PadMaxPrefixSize;
159	}
160
161	bool allowAutoPadding() const override;
162	bool allowEnhancedRelaxation() const override;
163	void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164	const MCSubtargetInfo &STI);
165	void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
166
167
168	std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
169
170	MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
171
172	bool shouldForceRelocation(const MCFixup &, const MCValue &);
173
174	void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
175	MutableArrayRef<char> Data, uint64_t Value,
176	bool IsResolved) override;
177
178	bool mayNeedRelaxation(const MCInst &Inst,
179	const MCSubtargetInfo &STI) const override;
180
181	bool fixupNeedsRelaxationAdvanced(const MCFixup &, const MCValue &, uint64_t,
182	bool) const override;
183
184	void relaxInstruction(MCInst &Inst,
185	const MCSubtargetInfo &STI) const override;
186
187	bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
188	MCCodeEmitter &Emitter,
189	unsigned &RemainingSize) const;
190
191	bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
192	unsigned &RemainingSize) const;
193
194	bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
195	unsigned &RemainingSize) const;
196
197	bool finishLayout(const MCAssembler &Asm) const override;
198
199	unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
200
201	bool writeNopData(raw_ostream &OS, uint64_t Count,
202	const MCSubtargetInfo STI) const* override;
203	};
204	} // end anonymous namespace
205
206	static bool isRelaxableBranch(unsigned Opcode) {
207	return Opcode == X86::JCC_1 \|\| Opcode == X86::JMP_1;
208	}
209
210	static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
211	bool Is16BitMode = false) {
212	switch (Opcode) {
213	default:
214	llvm_unreachable("invalid opcode for branch");
215	case X86::JCC_1:
216	return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
217	case X86::JMP_1:
218	return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
219	}
220	}
221
222	static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
223	unsigned Opcode = MI.getOpcode();
224	return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
225	: X86::getOpcodeForLongImmediateForm(Opcode);
226	}
227
228	static X86::CondCode getCondFromBranch(const MCInst &MI,
229	const MCInstrInfo &MCII) {
230	unsigned Opcode = MI.getOpcode();
231	switch (Opcode) {
232	default:
233	return X86::COND_INVALID;
234	case X86::JCC_1: {
235	const MCInstrDesc &Desc = MCII.get(Opcode);
236	return static_cast<X86::CondCode>(
237	MI.getOperand(i: Desc.getNumOperands() - `1`).getImm());
238	}
239	}
240	}
241
242	static X86::SecondMacroFusionInstKind
243	classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
244	X86::CondCode CC = getCondFromBranch(MI, MCII);
245	return classifySecondCondCodeInMacroFusion(CC);
246	}
247
248	/// Check if the instruction uses RIP relative addressing.
249	static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
250	unsigned Opcode = MI.getOpcode();
251	const MCInstrDesc &Desc = MCII.get(Opcode);
252	uint64_t TSFlags = Desc.TSFlags;
253	unsigned CurOp = X86II::getOperandBias(Desc);
254	int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
255	if (MemoryOperand < `0`)
256	return false;
257	unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
258	MCRegister BaseReg = MI.getOperand(i: BaseRegNum).getReg();
259	return (BaseReg == X86::RIP);
260	}
261
262	/// Check if the instruction is a prefix.
263	static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
264	return X86II::isPrefix(TSFlags: MCII.get(Opcode).TSFlags);
265	}
266
267	/// Check if the instruction is valid as the first instruction in macro fusion.
268	static bool isFirstMacroFusibleInst(const MCInst &Inst,
269	const MCInstrInfo &MCII) {
270	// An Intel instruction with RIP relative addressing is not macro fusible.
271	if (isRIPRelative(MI: Inst, MCII))
272	return false;
273	X86::FirstMacroFusionInstKind FIK =
274	X86::classifyFirstOpcodeInMacroFusion(Opcode: Inst.getOpcode());
275	return FIK != X86::FirstMacroFusionInstKind::Invalid;
276	}
277
278	/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
279	/// get a better peformance in some cases. Here, we determine which prefix is
280	/// the most suitable.
281	///
282	/// If the instruction has a segment override prefix, use the existing one.
283	/// If the target is 64-bit, use the CS.
284	/// If the target is 32-bit,
285	/// - If the instruction has a ESP/EBP base register, use SS.
286	/// - Otherwise use DS.
287	uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
288	assert((STI.hasFeature(X86::Is32Bit) \|\| STI.hasFeature(X86::Is64Bit)) &&
289	"Prefixes can be added only in 32-bit or 64-bit mode.");
290	const MCInstrDesc &Desc = MCII ->get(Opcode: Inst.getOpcode());
291	uint64_t TSFlags = Desc.TSFlags;
292
293	// Determine where the memory operand starts, if present.
294	int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
295	if (MemoryOperand != -`1`)
296	MemoryOperand += X86II::getOperandBias(Desc);
297
298	MCRegister SegmentReg;
299	if (MemoryOperand >= `0`) {
300	// Check for explicit segment override on memory operand.
301	SegmentReg = Inst.getOperand(i: MemoryOperand + X86::AddrSegmentReg).getReg();
302	}
303
304	switch (TSFlags & X86II::FormMask) {
305	default:
306	break;
307	case X86II::RawFrmDstSrc: {
308	// Check segment override opcode prefix as needed (not for %ds).
309	if (Inst.getOperand(i: `2`).getReg() != X86::DS)
310	SegmentReg = Inst.getOperand(i: `2`).getReg();
311	break;
312	}
313	case X86II::RawFrmSrc: {
314	// Check segment override opcode prefix as needed (not for %ds).
315	if (Inst.getOperand(i: `1`).getReg() != X86::DS)
316	SegmentReg = Inst.getOperand(i: `1`).getReg();
317	break;
318	}
319	case X86II::RawFrmMemOffs: {
320	// Check segment override opcode prefix as needed.
321	SegmentReg = Inst.getOperand(i: `1`).getReg();
322	break;
323	}
324	}
325
326	if (SegmentReg)
327	return X86::getSegmentOverridePrefixForReg(Reg: SegmentReg);
328
329	if (STI.hasFeature(Feature: X86::Is64Bit))
330	return X86::CS_Encoding;
331
332	if (MemoryOperand >= `0`) {
333	unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
334	MCRegister BaseReg = Inst.getOperand(i: BaseRegNum).getReg();
335	if (BaseReg == X86::ESP \|\| BaseReg == X86::EBP)
336	return X86::SS_Encoding;
337	}
338	return X86::DS_Encoding;
339	}
340
341	/// Check if the two instructions will be macro-fused on the target cpu.
342	bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
343	const MCInstrDesc &InstDesc = MCII ->get(Opcode: Jcc.getOpcode());
344	if (!InstDesc.isConditionalBranch())
345	return false;
346	if (!isFirstMacroFusibleInst(Inst: Cmp, MCII: *MCII))
347	return false;
348	const X86::FirstMacroFusionInstKind CmpKind =
349	X86::classifyFirstOpcodeInMacroFusion(Opcode: Cmp.getOpcode());
350	const X86::SecondMacroFusionInstKind BranchKind =
351	classifySecondInstInMacroFusion(MI: Jcc, MCII: *MCII);
352	return X86::isMacroFused(FirstKind: CmpKind, SecondKind: BranchKind);
353	}
354
355	/// Check if the instruction has a variant symbol operand.
356	static bool hasVariantSymbol(const MCInst &MI) {
357	for (auto &Operand : MI) {
358	if (!Operand.isExpr())
359	continue;
360	const MCExpr &Expr = *Operand.getExpr();
361	if (Expr.getKind() == MCExpr::SymbolRef &&
362	cast<MCSymbolRefExpr>(Val: &Expr)->getSpecifier())
363	return true;
364	}
365	return false;
366	}
367
368	bool X86AsmBackend::allowAutoPadding() const {
369	return (AlignBoundary != Align (`1`) && AlignBranchType != X86::AlignBranchNone);
370	}
371
372	bool X86AsmBackend::allowEnhancedRelaxation() const {
373	return allowAutoPadding() && TargetPrefixMax != `0` && X86PadForBranchAlign;
374	}
375
376	/// X86 has certain instructions which enable interrupts exactly one
377	/// instruction after* the instruction which stores to SS. Return true if the*
378	/// given instruction may have such an interrupt delay slot.
379	static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
380	switch (InstOpcode) {
381	case X86::POPSS16:
382	case X86::POPSS32:
383	case X86::STI:
384	return true;
385
386	case X86::MOV16sr:
387	case X86::MOV32sr:
388	case X86::MOV64sr:
389	case X86::MOV16sm:
390	// In fact, this is only the case if the first operand is SS. However, as
391	// segment moves occur extremely rarely, this is just a minor pessimization.
392	return true;
393	}
394	return false;
395	}
396
397	/// Check if the instruction to be emitted is right after any data.
398	static bool
399	isRightAfterData(MCFragment *CurrentFragment,
400	const std::pair<MCFragment *, size_t> &PrevInstPosition) {
401	MCFragment *F = CurrentFragment;
402	// Since data is always emitted into a DataFragment, our check strategy is
403	// simple here.
404	// - If the fragment is a DataFragment
405	// - If it's empty (section start or data after align), return false.
406	// - If it's not the fragment where the previous instruction is,
407	// returns true.
408	// - If it's the fragment holding the previous instruction but its
409	// size changed since the previous instruction was emitted into
410	// it, returns true.
411	// - Otherwise returns false.
412	// - If the fragment is not a DataFragment, returns false.
413	if (auto *DF = dyn_cast_or_null<MCDataFragment>(Val: F))
414	return DF->getContents().size() &&
415	(DF != PrevInstPosition.first \|\|
416	DF->getContents().size() != PrevInstPosition.second);
417
418	return false;
419	}
420
421	/// \returns the fragment size if it has instructions, otherwise returns 0.
422	static size_t getSizeForInstFragment(const MCFragment *F) {
423	if (!F \|\| !F->hasInstructions())
424	return `0`;
425	// MCEncodedFragmentWithContents being templated makes this tricky.
426	switch (F->getKind()) {
427	default:
428	llvm_unreachable("Unknown fragment with instructions!");
429	case MCFragment::FT_Data:
430	return cast<MCDataFragment>(Val: *F).getContents().size();
431	case MCFragment::FT_Relaxable:
432	return cast<MCRelaxableFragment>(Val: *F).getContents().size();
433	}
434	}
435
436	/// Return true if we can insert NOP or prefixes automatically before the
437	/// the instruction to be emitted.
438	bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
439	if (hasVariantSymbol(MI: Inst))
440	// Linker may rewrite the instruction with variant symbol operand(e.g.
441	// TLSCALL).
442	return false;
443
444	if (mayHaveInterruptDelaySlot(InstOpcode: PrevInstOpcode))
445	// If this instruction follows an interrupt enabling instruction with a one
446	// instruction delay, inserting a nop would change behavior.
447	return false;
448
449	if (isPrefix(Opcode: PrevInstOpcode, MCII: *MCII))
450	// If this instruction follows a prefix, inserting a nop/prefix would change
451	// semantic.
452	return false;
453
454	if (isPrefix(Opcode: Inst.getOpcode(), MCII: *MCII))
455	// If this instruction is a prefix, inserting a prefix would change
456	// semantic.
457	return false;
458
459	if (IsRightAfterData)
460	// If this instruction follows any data, there is no clear
461	// instruction boundary, inserting a nop/prefix would change semantic.
462	return false;
463
464	return true;
465	}
466
467	bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
468	if (!OS.getAllowAutoPadding())
469	return false;
470	assert(allowAutoPadding() && "incorrect initialization!");
471
472	// We only pad in text section.
473	if (!OS.getCurrentSectionOnly()->isText())
474	return false;
475
476	// To be Done: Currently don't deal with Bundle cases.
477	if (OS.getAssembler().isBundlingEnabled())
478	return false;
479
480	// Branches only need to be aligned in 32-bit or 64-bit mode.
481	if (!(STI.hasFeature(Feature: X86::Is64Bit) \|\| STI.hasFeature(Feature: X86::Is32Bit)))
482	return false;
483
484	return true;
485	}
486
487	/// Check if the instruction operand needs to be aligned.
488	bool X86AsmBackend::needAlign(const MCInst &Inst) const {
489	const MCInstrDesc &Desc = MCII ->get(Opcode: Inst.getOpcode());
490	return (Desc.isConditionalBranch() &&
491	(AlignBranchType & X86::AlignBranchJcc)) \|\|
492	(Desc.isUnconditionalBranch() &&
493	(AlignBranchType & X86::AlignBranchJmp)) \|\|
494	(Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) \|\|
495	(Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) \|\|
496	(Desc.isIndirectBranch() &&
497	(AlignBranchType & X86::AlignBranchIndirect));
498	}
499
500	/// Insert BoundaryAlignFragment before instructions to align branches.
501	void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
502	const MCInst &Inst, const MCSubtargetInfo &STI) {
503	// Used by canPadInst. Done here, because in emitInstructionEnd, the current
504	// fragment will have changed.
505	IsRightAfterData =
506	isRightAfterData(CurrentFragment: OS.getCurrentFragment(), PrevInstPosition);
507
508	if (!canPadBranches(OS))
509	return;
510
511	// NB: PrevInst only valid if canPadBranches is true.
512	if (!isMacroFused(Cmp: PrevInst, Jcc: Inst))
513	// Macro fusion doesn't happen indeed, clear the pending.
514	PendingBA = nullptr;
515
516	// When branch padding is enabled (basically the skx102 erratum => unlikely),
517	// we call canPadInst (not cheap) twice. However, in the common case, we can
518	// avoid unnecessary calls to that, as this is otherwise only used for
519	// relaxable fragments.
520	if (!canPadInst(Inst, OS))
521	return;
522
523	if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
524	// Macro fusion actually happens and there is no other fragment inserted
525	// after the previous instruction.
526	//
527	// Do nothing here since we already inserted a BoudaryAlign fragment when
528	// we met the first instruction in the fused pair and we'll tie them
529	// together in emitInstructionEnd.
530	//
531	// Note: When there is at least one fragment, such as MCAlignFragment,
532	// inserted after the previous instruction, e.g.
533	//
534	// \code
535	// cmp %rax %rcx
536	// .align 16
537	// je .Label0
538	// \ endcode
539	//
540	// We will treat the JCC as a unfused branch although it may be fused
541	// with the CMP.
542	return;
543	}
544
545	if (needAlign(Inst) \|\| ((AlignBranchType & X86::AlignBranchFused) &&
546	isFirstMacroFusibleInst(Inst, MCII: *MCII))) {
547	// If we meet a unfused branch or the first instuction in a fusiable pair,
548	// insert a BoundaryAlign fragment.
549	PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
550	args&: AlignBoundary, args: STI);
551	OS.insert(F: PendingBA);
552	}
553	}
554
555	/// Set the last fragment to be aligned for the BoundaryAlignFragment.
556	void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
557	const MCInst &Inst) {
558	MCFragment *CF = OS.getCurrentFragment();
559	if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(Val: CF))
560	F->setAllowAutoPadding(canPadInst(Inst, OS));
561
562	// Update PrevInstOpcode here, canPadInst() reads that.
563	PrevInstOpcode = Inst.getOpcode();
564	PrevInstPosition = std::make_pair(x&: CF, y: getSizeForInstFragment(F: CF));
565
566	if (!canPadBranches(OS))
567	return;
568
569	// PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
570	PrevInst = Inst;
571
572	if (!needAlign(Inst) \|\| !PendingBA)
573	return;
574
575	// Tie the aligned instructions into a pending BoundaryAlign.
576	PendingBA->setLastFragment(CF);
577	PendingBA = nullptr;
578
579	// We need to ensure that further data isn't added to the current
580	// DataFragment, so that we can get the size of instructions later in
581	// MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
582	// DataFragment.
583	if (isa_and_nonnull<MCDataFragment>(Val: CF))
584	OS.insert(F: OS.getContext().allocFragment<MCDataFragment>());
585
586	// Update the maximum alignment on the current section if necessary.
587	MCSection *Sec = OS.getCurrentSectionOnly();
588	Sec->ensureMinAlignment(MinAlignment: AlignBoundary);
589	}
590
591	std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
592	if (STI.getTargetTriple().isOSBinFormatELF()) {
593	unsigned Type;
594	if (STI.getTargetTriple().getArch() == Triple::x86_64) {
595	Type = llvm::StringSwitch<unsigned>(Name)
596	#define ELF_RELOC(X, Y) .Case(#X, Y)
597	#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
598	#undef ELF_RELOC
599	.Case(S: "BFD_RELOC_NONE", Value: ELF::R_X86_64_NONE)
600	.Case(S: "BFD_RELOC_8", Value: ELF::R_X86_64_8)
601	.Case(S: "BFD_RELOC_16", Value: ELF::R_X86_64_16)
602	.Case(S: "BFD_RELOC_32", Value: ELF::R_X86_64_32)
603	.Case(S: "BFD_RELOC_64", Value: ELF::R_X86_64_64)
604	.Default(Value: -`1u`);
605	} else {
606	Type = llvm::StringSwitch<unsigned>(Name)
607	#define ELF_RELOC(X, Y) .Case(#X, Y)
608	#include "llvm/BinaryFormat/ELFRelocs/i386.def"
609	#undef ELF_RELOC
610	.Case(S: "BFD_RELOC_NONE", Value: ELF::R_386_NONE)
611	.Case(S: "BFD_RELOC_8", Value: ELF::R_386_8)
612	.Case(S: "BFD_RELOC_16", Value: ELF::R_386_16)
613	.Case(S: "BFD_RELOC_32", Value: ELF::R_386_32)
614	.Default(Value: -`1u`);
615	}
616	if (Type == -`1u`)
617	return std::nullopt;
618	return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
619	}
620	return MCAsmBackend::getFixupKind(Name);
621	}
622
623	MCFixupKindInfo X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
624	const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
625	// clang-format off
626	{.Name: "reloc_riprel_4byte", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
627	{.Name: "reloc_riprel_4byte_movq_load", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
628	{.Name: "reloc_riprel_4byte_movq_load_rex2", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
629	{.Name: "reloc_riprel_4byte_relax", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
630	{.Name: "reloc_riprel_4byte_relax_rex", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
631	{.Name: "reloc_riprel_4byte_relax_rex2", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
632	{.Name: "reloc_riprel_4byte_relax_evex", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
633	{.Name: "reloc_signed_4byte", .TargetOffset: `0`, .TargetSize: `32`, .Flags: `0`},
634	{.Name: "reloc_signed_4byte_relax", .TargetOffset: `0`, .TargetSize: `32`, .Flags: `0`},
635	{.Name: "reloc_global_offset_table", .TargetOffset: `0`, .TargetSize: `32`, .Flags: `0`},
636	{.Name: "reloc_branch_4byte_pcrel", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
637	// clang-format on
638	};
639
640	// Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
641	// do not require any extra processing.
642	if (mc::isRelocation(FixupKind: Kind))
643	return MCAsmBackend::getFixupKindInfo(Kind: FK_NONE);
644
645	if (Kind < FirstTargetFixupKind)
646	return MCAsmBackend::getFixupKindInfo(Kind);
647
648	assert(unsigned(Kind - FirstTargetFixupKind) < X86::NumTargetFixupKinds &&
649	"Invalid kind!");
650	assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
651	return Infos[Kind - FirstTargetFixupKind];
652	}
653
654	static unsigned getFixupKindSize(unsigned Kind) {
655	switch (Kind) {
656	default:
657	llvm_unreachable("invalid fixup kind!");
658	case FK_NONE:
659	return `0`;
660	case FK_PCRel_1:
661	case FK_SecRel_1:
662	case FK_Data_1:
663	return `1`;
664	case FK_PCRel_2:
665	case FK_SecRel_2:
666	case FK_Data_2:
667	return `2`;
668	case FK_PCRel_4:
669	case X86::reloc_riprel_4byte:
670	case X86::reloc_riprel_4byte_relax:
671	case X86::reloc_riprel_4byte_relax_rex:
672	case X86::reloc_riprel_4byte_relax_rex2:
673	case X86::reloc_riprel_4byte_movq_load:
674	case X86::reloc_riprel_4byte_movq_load_rex2:
675	case X86::reloc_riprel_4byte_relax_evex:
676	case X86::reloc_signed_4byte:
677	case X86::reloc_signed_4byte_relax:
678	case X86::reloc_global_offset_table:
679	case X86::reloc_branch_4byte_pcrel:
680	case FK_SecRel_4:
681	case FK_Data_4:
682	return `4`;
683	case FK_PCRel_8:
684	case FK_SecRel_8:
685	case FK_Data_8:
686	return `8`;
687	}
688	}
689
690	void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
691	const MCValue &Target,
692	MutableArrayRef<char> Data, uint64_t Value,
693	bool IsResolved) {
694	// Force relocation when there is a specifier. This might be too conservative
695	// - GAS doesn't emit a relocation for call local@plt; local:.
696	if (Target.getSpecifier())
697	IsResolved = false;
698	maybeAddReloc(F, Fixup, Target, Value, IsResolved);
699
700	auto Kind = Fixup.getKind();
701	if (mc::isRelocation(FixupKind: Kind))
702	return;
703	unsigned Size = getFixupKindSize(Kind);
704
705	assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
706
707	int64_t SignedValue = static_cast<int64_t>(Value);
708	if (IsResolved && Fixup.isPCRel()) {
709	// check that PC relative fixup fits into the fixup size.
710	if (Size > `0` && !isIntN(N: Size * `8`, x: SignedValue))
711	getContext().reportError(L: Fixup.getLoc(),
712	Msg: "value of " + Twine (SignedValue) +
713	" is too large for field of " + Twine (Size) +
714	((Size == `1`) ? " byte." : " bytes."));
715	} else {
716	// Check that uppper bits are either all zeros or all ones.
717	// Specifically ignore overflow/underflow as long as the leakage is
718	// limited to the lower bits. This is to remain compatible with
719	// other assemblers.
720	assert((Size == `0` \|\| isIntN(Size * `8` + `1`, SignedValue)) &&
721	"Value does not fit in the Fixup field");
722	}
723
724	for (unsigned i = `0`; i != Size; ++i)
725	Data [Fixup.getOffset() + i] = uint8_t(Value >> (i * `8`));
726	}
727
728	bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
729	const MCSubtargetInfo &STI) const {
730	unsigned Opcode = MI.getOpcode();
731	unsigned SkipOperands = X86::isCCMPCC(Opcode) ? `2` : `0`;
732	return isRelaxableBranch(Opcode) \|\|
733	(X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
734	MI.getOperand(i: MI.getNumOperands() - `1` - SkipOperands).isExpr());
735	}
736
737	bool X86AsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
738	const MCValue &Target,
739	uint64_t Value,
740	bool Resolved) const {
741	// If resolved, relax if the value is too big for a (signed) i8.
742	//
743	// Currently, `jmp local@plt` relaxes JMP even if the offset is small,
744	// different from gas.
745	if (Resolved)
746	return !isInt<`8`>(x: Value) \|\| Target.getSpecifier();
747
748	// Otherwise, relax unless there is a @ABS8 specifier.
749	if (Fixup.getKind() == FK_Data_1 && Target.getAddSym() &&
750	Target.getSpecifier() == X86::S_ABS8)
751	return false;
752	return true;
753	}
754
755	// FIXME: Can tblgen help at all here to verify there aren't other instructions
756	// we can relax?
757	void X86AsmBackend::relaxInstruction(MCInst &Inst,
758	const MCSubtargetInfo &STI) const {
759	// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
760	bool Is16BitMode = STI.hasFeature(Feature: X86::Is16Bit);
761	unsigned RelaxedOp = getRelaxedOpcode(MI: Inst, Is16BitMode);
762
763	if (RelaxedOp == Inst.getOpcode()) {
764	SmallString<`256`> Tmp;
765	raw_svector_ostream OS(Tmp);
766	Inst.dump_pretty(OS);
767	OS << "\n";
768	report_fatal_error(reason: "unexpected instruction to relax: " + OS.str());
769	}
770
771	Inst.setOpcode(RelaxedOp);
772	}
773
774	bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
775	MCCodeEmitter &Emitter,
776	unsigned &RemainingSize) const {
777	if (!RF.getAllowAutoPadding())
778	return false;
779	// If the instruction isn't fully relaxed, shifting it around might require a
780	// larger value for one of the fixups then can be encoded. The outer loop
781	// will also catch this before moving to the next instruction, but we need to
782	// prevent padding this single instruction as well.
783	if (mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
784	return false;
785
786	const unsigned OldSize = RF.getContents().size();
787	if (OldSize == `15`)
788	return false;
789
790	const unsigned MaxPossiblePad = std::min(a: `15` - OldSize, b: RemainingSize);
791	const unsigned RemainingPrefixSize = [&]() -> unsigned {
792	SmallString<`15`> Code;
793	X86_MC::emitPrefix(MCE&: Emitter, MI: RF.getInst(), CB&: Code, STI);
794	assert(Code.size() < `15` && "The number of prefixes must be less than 15.");
795
796	// TODO: It turns out we need a decent amount of plumbing for the target
797	// specific bits to determine number of prefixes its safe to add. Various
798	// targets (older chips mostly, but also Atom family) encounter decoder
799	// stalls with too many prefixes. For testing purposes, we set the value
800	// externally for the moment.
801	unsigned ExistingPrefixSize = Code.size();
802	if (TargetPrefixMax <= ExistingPrefixSize)
803	return `0`;
804	return TargetPrefixMax - ExistingPrefixSize;
805	}();
806	const unsigned PrefixBytesToAdd =
807	std::min(a: MaxPossiblePad, b: RemainingPrefixSize);
808	if (PrefixBytesToAdd == `0`)
809	return false;
810
811	const uint8_t Prefix = determinePaddingPrefix(Inst: RF.getInst());
812
813	SmallString<`256`> Code;
814	Code.append(NumInputs: PrefixBytesToAdd, Elt: Prefix);
815	Code.append(in_start: RF.getContents().begin(), in_end: RF.getContents().end());
816	RF.setContents(Code);
817
818	// Adjust the fixups for the change in offsets
819	for (auto &F : RF.getFixups()) {
820	F.setOffset(F.getOffset() + PrefixBytesToAdd);
821	}
822
823	RemainingSize -= PrefixBytesToAdd;
824	return true;
825	}
826
827	bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
828	MCCodeEmitter &Emitter,
829	unsigned &RemainingSize) const {
830	if (!mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
831	// TODO: There are lots of other tricks we could apply for increasing
832	// encoding size without impacting performance.
833	return false;
834
835	MCInst Relaxed = RF.getInst();
836	relaxInstruction(Inst&: Relaxed, STI: *RF.getSubtargetInfo());
837
838	SmallVector<MCFixup, `4`> Fixups;
839	SmallString<`15`> Code;
840	Emitter.encodeInstruction(Inst: Relaxed, CB&: Code, Fixups, STI: *RF.getSubtargetInfo());
841	const unsigned OldSize = RF.getContents().size();
842	const unsigned NewSize = Code.size();
843	assert(NewSize >= OldSize && "size decrease during relaxation?");
844	unsigned Delta = NewSize - OldSize;
845	if (Delta > RemainingSize)
846	return false;
847	RF.setInst(Relaxed);
848	RF.setContents(Code);
849	RF.setFixups(Fixups);
850	RemainingSize -= Delta;
851	return true;
852	}
853
854	bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
855	MCCodeEmitter &Emitter,
856	unsigned &RemainingSize) const {
857	bool Changed = false;
858	if (RemainingSize != `0`)
859	Changed \|= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
860	if (RemainingSize != `0`)
861	Changed \|= padInstructionViaPrefix(RF, Emitter, RemainingSize);
862	return Changed;
863	}
864
865	bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
866	// See if we can further relax some instructions to cut down on the number of
867	// nop bytes required for code alignment. The actual win is in reducing
868	// instruction count, not number of bytes. Modern X86-64 can easily end up
869	// decode limited. It is often better to reduce the number of instructions
870	// (i.e. eliminate nops) even at the cost of increasing the size and
871	// complexity of others.
872	if (!X86PadForAlign && !X86PadForBranchAlign)
873	return false;
874
875	// The processed regions are delimitered by LabeledFragments. -g may have more
876	// MCSymbols and therefore different relaxation results. X86PadForAlign is
877	// disabled by default to eliminate the -g vs non -g difference.
878	DenseSet<MCFragment *> LabeledFragments;
879	for (const MCSymbol &S : Asm.symbols())
880	LabeledFragments.insert(V: S.getFragment());
881
882	bool Changed = false;
883	for (MCSection &Sec : Asm) {
884	if (!Sec.isText())
885	continue;
886
887	SmallVector<MCRelaxableFragment *, `4`> Relaxable;
888	for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
889	MCFragment &F = *I;
890
891	if (LabeledFragments.count(V: &F))
892	Relaxable.clear();
893
894	if (F.getKind() == MCFragment::FT_Data) // Skip and ignore
895	continue;
896
897	if (F.getKind() == MCFragment::FT_Relaxable) {
898	auto &RF = cast<MCRelaxableFragment>(Val&: *I);
899	Relaxable.push_back(Elt: &RF);
900	continue;
901	}
902
903	auto canHandle = [](MCFragment &F) -> bool {
904	switch (F.getKind()) {
905	default:
906	return false;
907	case MCFragment::FT_Align:
908	return X86PadForAlign;
909	case MCFragment::FT_BoundaryAlign:
910	return X86PadForBranchAlign;
911	}
912	};
913	// For any unhandled kind, assume we can't change layout.
914	if (!canHandle (F)) {
915	Relaxable.clear();
916	continue;
917	}
918
919	const uint64_t OrigSize = Asm.computeFragmentSize(F);
920
921	// To keep the effects local, prefer to relax instructions closest to
922	// the align directive. This is purely about human understandability
923	// of the resulting code. If we later find a reason to expand
924	// particular instructions over others, we can adjust.
925	unsigned RemainingSize = OrigSize;
926	while (!Relaxable.empty() && RemainingSize != `0`) {
927	auto &RF = *Relaxable.pop_back_val();
928	// Give the backend a chance to play any tricks it wishes to increase
929	// the encoding size of the given instruction. Target independent code
930	// will try further relaxation, but target's may play further tricks.
931	Changed \|= padInstructionEncoding(RF, Emitter&: Asm.getEmitter(), RemainingSize);
932
933	// If we have an instruction which hasn't been fully relaxed, we can't
934	// skip past it and insert bytes before it. Changing its starting
935	// offset might require a larger negative offset than it can encode.
936	// We don't need to worry about larger positive offsets as none of the
937	// possible offsets between this and our align are visible, and the
938	// ones afterwards aren't changing.
939	if (mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
940	break;
941	}
942	Relaxable.clear();
943
944	// If we're looking at a boundary align, make sure we don't try to pad
945	// its target instructions for some following directive. Doing so would
946	// break the alignment of the current boundary align.
947	if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(Val: &F)) {
948	cast<MCBoundaryAlignFragment>(Val&: F).setSize(RemainingSize);
949	Changed = true;
950	const MCFragment *LastFragment = BF->getLastFragment();
951	if (!LastFragment)
952	continue;
953	while (&*I != LastFragment)
954	++I;
955	}
956	}
957	}
958
959	return Changed;
960	}
961
962	unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
963	if (STI.hasFeature(Feature: X86::Is16Bit))
964	return `4`;
965	if (!STI.hasFeature(Feature: X86::FeatureNOPL) && !STI.hasFeature(Feature: X86::Is64Bit))
966	return `1`;
967	if (STI.hasFeature(Feature: X86::TuningFast7ByteNOP))
968	return `7`;
969	if (STI.hasFeature(Feature: X86::TuningFast15ByteNOP))
970	return `15`;
971	if (STI.hasFeature(Feature: X86::TuningFast11ByteNOP))
972	return `11`;
973	// FIXME: handle 32-bit mode
974	// 15-bytes is the longest single NOP instruction, but 10-bytes is
975	// commonly the longest that can be efficiently decoded.
976	return `10`;
977	}
978
979	/// Write a sequence of optimal nops to the output, covering \p Count
980	/// bytes.
981	/// \return - true on success, false on failure
982	bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
983	const MCSubtargetInfo STI) const* {
984	static const char Nops32Bit[`10`][`11`] = {
985	// nop
986	"\x90",
987	// xchg %ax,%ax
988	"\x66\x90",
989	// nopl (%[re]ax)
990	"\x0f\x1f\x00",
991	// nopl 0(%[re]ax)
992	"\x0f\x1f\x40\x00",
993	// nopl 0(%[re]ax,%[re]ax,1)
994	"\x0f\x1f\x44\x00\x00",
995	// nopw 0(%[re]ax,%[re]ax,1)
996	"\x66\x0f\x1f\x44\x00\x00",
997	// nopl 0L(%[re]ax)
998	"\x0f\x1f\x80\x00\x00\x00\x00",
999	// nopl 0L(%[re]ax,%[re]ax,1)
1000	"\x0f\x1f\x84\x00\x00\x00\x00\x00",
1001	// nopw 0L(%[re]ax,%[re]ax,1)
1002	"\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1003	// nopw %cs:0L(%[re]ax,%[re]ax,1)
1004	"\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1005	};
1006
1007	// 16-bit mode uses different nop patterns than 32-bit.
1008	static const char Nops16Bit[`4`][`11`] = {
1009	// nop
1010	"\x90",
1011	// xchg %eax,%eax
1012	"\x66\x90",
1013	// lea 0(%si),%si
1014	"\x8d\x74\x00",
1015	// lea 0w(%si),%si
1016	"\x8d\xb4\x00\x00",
1017	};
1018
1019	const char(*Nops)[`11`] =
1020	STI->hasFeature(Feature: X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1021
1022	uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(STI: *STI);
1023
1024	// Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1025	// length.
1026	do {
1027	const uint8_t ThisNopLength = (uint8_t) std::min(a: Count, b: MaxNopLength);
1028	const uint8_t Prefixes = ThisNopLength <= `10` ? `0` : ThisNopLength - `10`;
1029	for (uint8_t i = `0`; i < Prefixes; i++)
1030	OS << `'\x66'`;
1031	const uint8_t Rest = ThisNopLength - Prefixes;
1032	if (Rest != `0`)
1033	OS.write(Ptr: Nops[Rest - `1`], Size: Rest);
1034	Count -= ThisNopLength;
1035	} while (Count != `0`);
1036
1037	return true;
1038	}
1039
1040	/ *** /
1041
1042	namespace {
1043
1044	class ELFX86AsmBackend : public X86AsmBackend {
1045	public:
1046	uint8_t OSABI;
1047	ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1048	: X86AsmBackend (T, STI), OSABI(OSABI) {}
1049	};
1050
1051	class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1052	public:
1053	ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1054	const MCSubtargetInfo &STI)
1055	: ELFX86AsmBackend (T, OSABI, STI) {}
1056
1057	std::unique_ptr<MCObjectTargetWriter>
1058	createObjectTargetWriter() const override {
1059	return createX86ELFObjectWriter(/IsELF64/ false, OSABI, EMachine: ELF::EM_386);
1060	}
1061	};
1062
1063	class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1064	public:
1065	ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1066	const MCSubtargetInfo &STI)
1067	: ELFX86AsmBackend (T, OSABI, STI) {}
1068
1069	std::unique_ptr<MCObjectTargetWriter>
1070	createObjectTargetWriter() const override {
1071	return createX86ELFObjectWriter(/IsELF64/ false, OSABI,
1072	EMachine: ELF::EM_X86_64);
1073	}
1074	};
1075
1076	class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1077	public:
1078	ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1079	const MCSubtargetInfo &STI)
1080	: ELFX86AsmBackend (T, OSABI, STI) {}
1081
1082	std::unique_ptr<MCObjectTargetWriter>
1083	createObjectTargetWriter() const override {
1084	return createX86ELFObjectWriter(/IsELF64/ false, OSABI,
1085	EMachine: ELF::EM_IAMCU);
1086	}
1087	};
1088
1089	class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1090	public:
1091	ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1092	const MCSubtargetInfo &STI)
1093	: ELFX86AsmBackend (T, OSABI, STI) {}
1094
1095	std::unique_ptr<MCObjectTargetWriter>
1096	createObjectTargetWriter() const override {
1097	return createX86ELFObjectWriter(/IsELF64/ true, OSABI, EMachine: ELF::EM_X86_64);
1098	}
1099	};
1100
1101	class WindowsX86AsmBackend : public X86AsmBackend {
1102	bool Is64Bit;
1103
1104	public:
1105	WindowsX86AsmBackend(const Target &T, bool is64Bit,
1106	const MCSubtargetInfo &STI)
1107	: X86AsmBackend (T, STI)
1108	, Is64Bit(is64Bit) {
1109	}
1110
1111	std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1112	return StringSwitch<std::optional<MCFixupKind>>(Name)
1113	.Case(S: "dir32", Value: FK_Data_4)
1114	.Case(S: "secrel32", Value: FK_SecRel_4)
1115	.Case(S: "secidx", Value: FK_SecRel_2)
1116	.Default(Value: MCAsmBackend::getFixupKind(Name));
1117	}
1118
1119	std::unique_ptr<MCObjectTargetWriter>
1120	createObjectTargetWriter() const override {
1121	return createX86WinCOFFObjectWriter(Is64Bit);
1122	}
1123	};
1124
1125	namespace CU {
1126
1127	/// Compact unwind encoding values.
1128	enum CompactUnwindEncodings {
1129	/// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1130	/// the return address, then [RE]SP is moved to [RE]BP.
1131	UNWIND_MODE_BP_FRAME = `0x01000000`,
1132
1133	/// A frameless function with a small constant stack size.
1134	UNWIND_MODE_STACK_IMMD = `0x02000000`,
1135
1136	/// A frameless function with a large constant stack size.
1137	UNWIND_MODE_STACK_IND = `0x03000000`,
1138
1139	/// No compact unwind encoding is available.
1140	UNWIND_MODE_DWARF = `0x04000000`,
1141
1142	/// Mask for encoding the frame registers.
1143	UNWIND_BP_FRAME_REGISTERS = `0x00007FFF`,
1144
1145	/// Mask for encoding the frameless registers.
1146	UNWIND_FRAMELESS_STACK_REG_PERMUTATION = `0x000003FF`
1147	};
1148
1149	} // namespace CU
1150
1151	class DarwinX86AsmBackend : public X86AsmBackend {
1152	const MCRegisterInfo &MRI;
1153
1154	/// Number of registers that can be saved in a compact unwind encoding.
1155	enum { CU_NUM_SAVED_REGS = `6` };
1156
1157	mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1158	Triple TT;
1159	bool Is64Bit;
1160
1161	unsigned OffsetSize; ///< Offset of a "push" instruction.
1162	unsigned MoveInstrSize; ///< Size of a "move" instruction.
1163	unsigned StackDivide; ///< Amount to adjust stack size by.
1164	protected:
1165	/// Size of a "push" instruction for the given register.
1166	unsigned PushInstrSize(MCRegister Reg) const {
1167	switch (Reg.id()) {
1168	case X86::EBX:
1169	case X86::ECX:
1170	case X86::EDX:
1171	case X86::EDI:
1172	case X86::ESI:
1173	case X86::EBP:
1174	case X86::RBX:
1175	case X86::RBP:
1176	return `1`;
1177	case X86::R12:
1178	case X86::R13:
1179	case X86::R14:
1180	case X86::R15:
1181	return `2`;
1182	}
1183	return `1`;
1184	}
1185
1186	private:
1187	/// Get the compact unwind number for a given register. The number
1188	/// corresponds to the enum lists in compact_unwind_encoding.h.
1189	int getCompactUnwindRegNum(unsigned Reg) const {
1190	static const MCPhysReg CU32BitRegs[`7`] = {
1191	X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, `0`
1192	};
1193	static const MCPhysReg CU64BitRegs[] = {
1194	X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, `0`
1195	};
1196	const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1197	for (int Idx = `1`; *CURegs; ++CURegs, ++Idx)
1198	if (*CURegs == Reg)
1199	return Idx;
1200
1201	return -`1`;
1202	}
1203
1204	/// Return the registers encoded for a compact encoding with a frame
1205	/// pointer.
1206	uint32_t encodeCompactUnwindRegistersWithFrame() const {
1207	// Encode the registers in the order they were saved --- 3-bits per
1208	// register. The list of saved registers is assumed to be in reverse
1209	// order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1210	uint32_t RegEnc = `0`;
1211	for (int i = `0`, Idx = `0`; i != CU_NUM_SAVED_REGS; ++i) {
1212	unsigned Reg = SavedRegs[i];
1213	if (Reg == `0`) break;
1214
1215	int CURegNum = getCompactUnwindRegNum(Reg);
1216	if (CURegNum == -`1`) return ~`0U`;
1217
1218	// Encode the 3-bit register number in order, skipping over 3-bits for
1219	// each register.
1220	RegEnc \|= (CURegNum & `0x7`) << (Idx++ * `3`);
1221	}
1222
1223	assert((RegEnc & `0x3FFFF`) == RegEnc &&
1224	"Invalid compact register encoding!");
1225	return RegEnc;
1226	}
1227
1228	/// Create the permutation encoding used with frameless stacks. It is
1229	/// passed the number of registers to be saved and an array of the registers
1230	/// saved.
1231	uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1232	// The saved registers are numbered from 1 to 6. In order to encode the
1233	// order in which they were saved, we re-number them according to their
1234	// place in the register order. The re-numbering is relative to the last
1235	// re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1236	// that order:
1237	//
1238	// Orig Re-Num
1239	// ---- ------
1240	// 6 6
1241	// 2 2
1242	// 4 3
1243	// 5 3
1244	//
1245	for (unsigned i = `0`; i < RegCount; ++i) {
1246	int CUReg = getCompactUnwindRegNum(Reg: SavedRegs[i]);
1247	if (CUReg == -`1`) return ~`0U`;
1248	SavedRegs[i] = CUReg;
1249	}
1250
1251	// Reverse the list.
1252	std::reverse(first: &SavedRegs[`0`], last: &SavedRegs[CU_NUM_SAVED_REGS]);
1253
1254	uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1255	for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1256	unsigned Countless = `0`;
1257	for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1258	if (SavedRegs[j] < SavedRegs[i])
1259	++Countless;
1260
1261	RenumRegs[i] = SavedRegs[i] - Countless - `1`;
1262	}
1263
1264	// Take the renumbered values and encode them into a 10-bit number.
1265	uint32_t permutationEncoding = `0`;
1266	switch (RegCount) {
1267	case `6`:
1268	permutationEncoding \|= `120` * RenumRegs[`0`] + `24` * RenumRegs[`1`]
1269	+ `6` * RenumRegs[`2`] + `2` * RenumRegs[`3`]
1270	+ RenumRegs[`4`];
1271	break;
1272	case `5`:
1273	permutationEncoding \|= `120` * RenumRegs[`1`] + `24` * RenumRegs[`2`]
1274	+ `6` * RenumRegs[`3`] + `2` * RenumRegs[`4`]
1275	+ RenumRegs[`5`];
1276	break;
1277	case `4`:
1278	permutationEncoding \|= `60` * RenumRegs[`2`] + `12` * RenumRegs[`3`]
1279	+ `3` * RenumRegs[`4`] + RenumRegs[`5`];
1280	break;
1281	case `3`:
1282	permutationEncoding \|= `20` * RenumRegs[`3`] + `4` * RenumRegs[`4`]
1283	+ RenumRegs[`5`];
1284	break;
1285	case `2`:
1286	permutationEncoding \|= `5` * RenumRegs[`4`] + RenumRegs[`5`];
1287	break;
1288	case `1`:
1289	permutationEncoding \|= RenumRegs[`5`];
1290	break;
1291	}
1292
1293	assert((permutationEncoding & `0x3FF`) == permutationEncoding &&
1294	"Invalid compact register encoding!");
1295	return permutationEncoding;
1296	}
1297
1298	public:
1299	DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1300	const MCSubtargetInfo &STI)
1301	: X86AsmBackend (T, STI), MRI(MRI), TT (STI.getTargetTriple()),
1302	Is64Bit(TT.isArch64Bit()) {
1303	memset(s: SavedRegs, c: `0`, n: sizeof(SavedRegs));
1304	OffsetSize = Is64Bit ? `8` : `4`;
1305	MoveInstrSize = Is64Bit ? `3` : `2`;
1306	StackDivide = Is64Bit ? `8` : `4`;
1307	}
1308
1309	std::unique_ptr<MCObjectTargetWriter>
1310	createObjectTargetWriter() const override {
1311	uint32_t CPUType = cantFail(ValOrErr: MachO::getCPUType(T: TT));
1312	uint32_t CPUSubType = cantFail(ValOrErr: MachO::getCPUSubType(T: TT));
1313	return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubtype: CPUSubType);
1314	}
1315
1316	/// Implementation of algorithm to generate the compact unwind encoding
1317	/// for the CFI instructions.
1318	uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1319	const MCContext Ctxt) const* override {
1320	ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1321	if (Instrs.empty()) return `0`;
1322	if (!isDarwinCanonicalPersonality(Sym: FI->Personality) &&
1323	!Ctxt->emitCompactUnwindNonCanonical())
1324	return CU::UNWIND_MODE_DWARF;
1325
1326	// Reset the saved registers.
1327	unsigned SavedRegIdx = `0`;
1328	memset(s: SavedRegs, c: `0`, n: sizeof(SavedRegs));
1329
1330	bool HasFP = false;
1331
1332	// Encode that we are using EBP/RBP as the frame pointer.
1333	uint64_t CompactUnwindEncoding = `0`;
1334
1335	unsigned SubtractInstrIdx = Is64Bit ? `3` : `2`;
1336	unsigned InstrOffset = `0`;
1337	unsigned StackAdjust = `0`;
1338	uint64_t StackSize = `0`;
1339	int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1340
1341	for (const MCCFIInstruction &Inst : Instrs) {
1342	switch (Inst.getOperation()) {
1343	default:
1344	// Any other CFI directives indicate a frame that we aren't prepared
1345	// to represent via compact unwind, so just bail out.
1346	return CU::UNWIND_MODE_DWARF;
1347	case MCCFIInstruction::OpDefCfaRegister: {
1348	// Defines a frame pointer. E.g.
1349	//
1350	// movq %rsp, %rbp
1351	// L0:
1352	// .cfi_def_cfa_register %rbp
1353	//
1354	HasFP = true;
1355
1356	// If the frame pointer is other than esp/rsp, we do not have a way to
1357	// generate a compact unwinding representation, so bail out.
1358	if (MRI.getLLVMRegNum(RegNum: Inst.getRegister(), isEH: true*) !=
1359	(Is64Bit ? X86::RBP : X86::EBP))
1360	return CU::UNWIND_MODE_DWARF;
1361
1362	// Reset the counts.
1363	memset(s: SavedRegs, c: `0`, n: sizeof(SavedRegs));
1364	StackAdjust = `0`;
1365	SavedRegIdx = `0`;
1366	MinAbsOffset = std::numeric_limits<int64_t>::max();
1367	InstrOffset += MoveInstrSize;
1368	break;
1369	}
1370	case MCCFIInstruction::OpDefCfaOffset: {
1371	// Defines a new offset for the CFA. E.g.
1372	//
1373	// With frame:
1374	//
1375	// pushq %rbp
1376	// L0:
1377	// .cfi_def_cfa_offset 16
1378	//
1379	// Without frame:
1380	//
1381	// subq $72, %rsp
1382	// L0:
1383	// .cfi_def_cfa_offset 80
1384	//
1385	StackSize = Inst.getOffset() / StackDivide;
1386	break;
1387	}
1388	case MCCFIInstruction::OpOffset: {
1389	// Defines a "push" of a callee-saved register. E.g.
1390	//
1391	// pushq %r15
1392	// pushq %r14
1393	// pushq %rbx
1394	// L0:
1395	// subq $120, %rsp
1396	// L1:
1397	// .cfi_offset %rbx, -40
1398	// .cfi_offset %r14, -32
1399	// .cfi_offset %r15, -24
1400	//
1401	if (SavedRegIdx == CU_NUM_SAVED_REGS)
1402	// If there are too many saved registers, we cannot use a compact
1403	// unwind encoding.
1404	return CU::UNWIND_MODE_DWARF;
1405
1406	MCRegister Reg = MRI.getLLVMRegNum(RegNum: Inst.getRegister(), isEH: true*);
1407	SavedRegs[SavedRegIdx++] = Reg;
1408	StackAdjust += OffsetSize;
1409	MinAbsOffset = std::min(a: MinAbsOffset, b: std::abs(i: Inst.getOffset()));
1410	InstrOffset += PushInstrSize(Reg);
1411	break;
1412	}
1413	}
1414	}
1415
1416	StackAdjust /= StackDivide;
1417
1418	if (HasFP) {
1419	if ((StackAdjust & `0xFF`) != StackAdjust)
1420	// Offset was too big for a compact unwind encoding.
1421	return CU::UNWIND_MODE_DWARF;
1422
1423	// We don't attempt to track a real StackAdjust, so if the saved registers
1424	// aren't adjacent to rbp we can't cope.
1425	if (SavedRegIdx != `0` && MinAbsOffset != `3` * (int)OffsetSize)
1426	return CU::UNWIND_MODE_DWARF;
1427
1428	// Get the encoding of the saved registers when we have a frame pointer.
1429	uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1430	if (RegEnc == ~`0U`) return CU::UNWIND_MODE_DWARF;
1431
1432	CompactUnwindEncoding \|= CU::UNWIND_MODE_BP_FRAME;
1433	CompactUnwindEncoding \|= (StackAdjust & `0xFF`) << `16`;
1434	CompactUnwindEncoding \|= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1435	} else {
1436	SubtractInstrIdx += InstrOffset;
1437	++StackAdjust;
1438
1439	if ((StackSize & `0xFF`) == StackSize) {
1440	// Frameless stack with a small stack size.
1441	CompactUnwindEncoding \|= CU::UNWIND_MODE_STACK_IMMD;
1442
1443	// Encode the stack size.
1444	CompactUnwindEncoding \|= (StackSize & `0xFF`) << `16`;
1445	} else {
1446	if ((StackAdjust & `0x7`) != StackAdjust)
1447	// The extra stack adjustments are too big for us to handle.
1448	return CU::UNWIND_MODE_DWARF;
1449
1450	// Frameless stack with an offset too large for us to encode compactly.
1451	CompactUnwindEncoding \|= CU::UNWIND_MODE_STACK_IND;
1452
1453	// Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1454	// instruction.
1455	CompactUnwindEncoding \|= (SubtractInstrIdx & `0xFF`) << `16`;
1456
1457	// Encode any extra stack adjustments (done via push instructions).
1458	CompactUnwindEncoding \|= (StackAdjust & `0x7`) << `13`;
1459	}
1460
1461	// Encode the number of registers saved. (Reverse the list first.)
1462	std::reverse(first: &SavedRegs[`0`], last: &SavedRegs[SavedRegIdx]);
1463	CompactUnwindEncoding \|= (SavedRegIdx & `0x7`) << `10`;
1464
1465	// Get the encoding of the saved registers when we don't have a frame
1466	// pointer.
1467	uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(RegCount: SavedRegIdx);
1468	if (RegEnc == ~`0U`) return CU::UNWIND_MODE_DWARF;
1469
1470	// Encode the register encoding.
1471	CompactUnwindEncoding \|=
1472	RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1473	}
1474
1475	return CompactUnwindEncoding;
1476	}
1477	};
1478
1479	} // end anonymous namespace
1480
1481	MCAsmBackend llvm::createX86_32AsmBackend(const* Target &T,
1482	const MCSubtargetInfo &STI,
1483	const MCRegisterInfo &MRI,
1484	const MCTargetOptions &Options) {
1485	const Triple &TheTriple = STI.getTargetTriple();
1486	if (TheTriple.isOSBinFormatMachO())
1487	return new DarwinX86AsmBackend (T, MRI, STI);
1488
1489	if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1490	return new WindowsX86AsmBackend (T, false, STI);
1491
1492	uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType: TheTriple.getOS());
1493
1494	if (TheTriple.isOSIAMCU())
1495	return new ELFX86_IAMCUAsmBackend (T, OSABI, STI);
1496
1497	return new ELFX86_32AsmBackend (T, OSABI, STI);
1498	}
1499
1500	MCAsmBackend llvm::createX86_64AsmBackend(const* Target &T,
1501	const MCSubtargetInfo &STI,
1502	const MCRegisterInfo &MRI,
1503	const MCTargetOptions &Options) {
1504	const Triple &TheTriple = STI.getTargetTriple();
1505	if (TheTriple.isOSBinFormatMachO())
1506	return new DarwinX86AsmBackend (T, MRI, STI);
1507
1508	if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1509	return new WindowsX86AsmBackend (T, true, STI);
1510
1511	if (TheTriple.isUEFI()) {
1512	assert(TheTriple.isOSBinFormatCOFF() &&
1513	"Only COFF format is supported in UEFI environment.");
1514	return new WindowsX86AsmBackend (T, true, STI);
1515	}
1516
1517	uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType: TheTriple.getOS());
1518
1519	if (TheTriple.isX32())
1520	return new ELFX86_X32AsmBackend (T, OSABI, STI);
1521	return new ELFX86_64AsmBackend (T, OSABI, STI);
1522	}
1523
1524	namespace {
1525	class X86ELFStreamer : public MCELFStreamer {
1526	public:
1527	X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1528	std::unique_ptr<MCObjectWriter> OW,
1529	std::unique_ptr<MCCodeEmitter> Emitter)
1530	: MCELFStreamer (Context, std::move(TAB), std::move(OW),
1531	std::move(Emitter)) {}
1532
1533	void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1534	};
1535	} // end anonymous namespace
1536
1537	void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1538	const MCSubtargetInfo &STI) {
1539	auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1540	Backend.emitInstructionBegin(OS&: S, Inst, STI);
1541	S.MCObjectStreamer::emitInstruction(Inst, STI);
1542	Backend.emitInstructionEnd(OS&: S, Inst);
1543	}
1544
1545	void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1546	const MCSubtargetInfo &STI) {
1547	X86_MC::emitInstruction(S&: *this, Inst, STI);
1548	}
1549
1550	MCStreamer llvm::createX86ELFStreamer(const* Triple &T, MCContext &Context,
1551	std::unique_ptr<MCAsmBackend> &&MAB,
1552	std::unique_ptr<MCObjectWriter> &&MOW,
1553	std::unique_ptr<MCCodeEmitter> &&MCE) {
1554	return new X86ELFStreamer (Context, std::move(MAB), std::move(MOW),
1555	std::move(MCE));
1556	}
1557

Browse the source code of llvm_projects/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp