X86AsmBackend.cpp source code [llvm_projects/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp]

1	//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "MCTargetDesc/X86BaseInfo.h"
10	#include "MCTargetDesc/X86EncodingOptimization.h"
11	#include "MCTargetDesc/X86FixupKinds.h"
12	#include "llvm/ADT/StringSwitch.h"
13	#include "llvm/BinaryFormat/ELF.h"
14	#include "llvm/BinaryFormat/MachO.h"
15	#include "llvm/MC/MCAsmBackend.h"
16	#include "llvm/MC/MCAssembler.h"
17	#include "llvm/MC/MCCodeEmitter.h"
18	#include "llvm/MC/MCContext.h"
19	#include "llvm/MC/MCDwarf.h"
20	#include "llvm/MC/MCELFObjectWriter.h"
21	#include "llvm/MC/MCELFStreamer.h"
22	#include "llvm/MC/MCExpr.h"
23	#include "llvm/MC/MCFixupKindInfo.h"
24	#include "llvm/MC/MCInst.h"
25	#include "llvm/MC/MCInstrInfo.h"
26	#include "llvm/MC/MCMachObjectWriter.h"
27	#include "llvm/MC/MCObjectStreamer.h"
28	#include "llvm/MC/MCObjectWriter.h"
29	#include "llvm/MC/MCRegisterInfo.h"
30	#include "llvm/MC/MCSectionMachO.h"
31	#include "llvm/MC/MCSubtargetInfo.h"
32	#include "llvm/MC/MCValue.h"
33	#include "llvm/MC/TargetRegistry.h"
34	#include "llvm/Support/CommandLine.h"
35	#include "llvm/Support/ErrorHandling.h"
36	#include "llvm/Support/raw_ostream.h"
37
38	using namespace llvm;
39
40	namespace {
41	/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42	class X86AlignBranchKind {
43	private:
44	uint8_t AlignBranchKind = `0`;
45
46	public:
47	void operator=(const std::string &Val) {
48	if (Val.empty())
49	return;
50	SmallVector<StringRef, `6`> BranchTypes;
51	StringRef (Val).split(A&: BranchTypes, Separator: `'+'`, MaxSplit: -`1`, KeepEmpty: false);
52	for (auto BranchType : BranchTypes) {
53	if (BranchType == "fused")
54	addKind(Value: X86::AlignBranchFused);
55	else if (BranchType == "jcc")
56	addKind(Value: X86::AlignBranchJcc);
57	else if (BranchType == "jmp")
58	addKind(Value: X86::AlignBranchJmp);
59	else if (BranchType == "call")
60	addKind(Value: X86::AlignBranchCall);
61	else if (BranchType == "ret")
62	addKind(Value: X86::AlignBranchRet);
63	else if (BranchType == "indirect")
64	addKind(Value: X86::AlignBranchIndirect);
65	else {
66	errs() << "invalid argument " << BranchType.str()
67	<< " to -x86-align-branch=; each element must be one of: fused, "
68	"jcc, jmp, call, ret, indirect.(plus separated)\n";
69	}
70	}
71	}
72
73	operator uint8_t() const { return AlignBranchKind; }
74	void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind \|= Value; }
75	};
76
77	X86AlignBranchKind X86AlignBranchKindLoc;
78
79	cl::opt<unsigned> X86AlignBranchBoundary(
80	"x86-align-branch-boundary", cl::init(Val: `0`),
81	cl::desc (
82	"Control how the assembler should align branches with NOP. If the "
83	"boundary's size is not 0, it should be a power of 2 and no less "
84	"than 32. Branches will be aligned to prevent from being across or "
85	"against the boundary of specified size. The default value 0 does not "
86	"align branches."));
87
88	cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89	"x86-align-branch",
90	cl::desc (
91	"Specify types of branches to align (plus separated list of types):"
92	"\njcc indicates conditional jumps"
93	"\nfused indicates fused conditional jumps"
94	"\njmp indicates direct unconditional jumps"
95	"\ncall indicates direct and indirect calls"
96	"\nret indicates rets"
97	"\nindirect indicates indirect unconditional jumps"),
98	cl::location(L&: X86AlignBranchKindLoc));
99
100	cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101	"x86-branches-within-32B-boundaries", cl::init(Val: false),
102	cl::desc (
103	"Align selected instructions to mitigate negative performance impact "
104	"of Intel's micro code update for errata skx102. May break "
105	"assumptions about labels corresponding to particular instructions, "
106	"and should be used with caution."));
107
108	cl::opt<unsigned> X86PadMaxPrefixSize(
109	"x86-pad-max-prefix-size", cl::init(Val: `0`),
110	cl::desc ("Maximum number of prefixes to use for padding"));
111
112	cl::opt<bool> X86PadForAlign(
113	"x86-pad-for-align", cl::init(Val: false), cl::Hidden,
114	cl::desc ("Pad previous instructions to implement align directives"));
115
116	cl::opt<bool> X86PadForBranchAlign(
117	"x86-pad-for-branch-align", cl::init(Val: true), cl::Hidden,
118	cl::desc ("Pad previous instructions to implement branch alignment"));
119
120	class X86AsmBackend : public MCAsmBackend {
121	const MCSubtargetInfo &STI;
122	std::unique_ptr<const MCInstrInfo> MCII;
123	X86AlignBranchKind AlignBranchType;
124	Align AlignBoundary;
125	unsigned TargetPrefixMax = `0`;
126
127	MCInst PrevInst;
128	unsigned PrevInstOpcode = `0`;
129	MCBoundaryAlignFragment PendingBA = nullptr*;
130	std::pair<MCFragment *, size_t> PrevInstPosition;
131	bool IsRightAfterData = false;
132
133	uint8_t determinePaddingPrefix(const MCInst &Inst) const;
134	bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
135	bool needAlign(const MCInst &Inst) const;
136	bool canPadBranches(MCObjectStreamer &OS) const;
137	bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
138
139	public:
140	X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
141	: MCAsmBackend (llvm::endianness::little), STI(STI),
142	MCII (T.createMCInstrInfo()) {
143	if (X86AlignBranchWithin32BBoundaries) {
144	// At the moment, this defaults to aligning fused branches, unconditional
145	// jumps, and (unfused) conditional jumps with nops. Both the
146	// instructions aligned and the alignment method (nop vs prefix) may
147	// change in the future.
148	AlignBoundary = assumeAligned(Value: `32`);
149	AlignBranchType.addKind(Value: X86::AlignBranchFused);
150	AlignBranchType.addKind(Value: X86::AlignBranchJcc);
151	AlignBranchType.addKind(Value: X86::AlignBranchJmp);
152	}
153	// Allow overriding defaults set by main flag
154	if (X86AlignBranchBoundary.getNumOccurrences())
155	AlignBoundary = assumeAligned(Value: X86AlignBranchBoundary);
156	if (X86AlignBranch.getNumOccurrences())
157	AlignBranchType = X86AlignBranchKindLoc;
158	if (X86PadMaxPrefixSize.getNumOccurrences())
159	TargetPrefixMax = X86PadMaxPrefixSize;
160	}
161
162	bool allowAutoPadding() const override;
163	bool allowEnhancedRelaxation() const override;
164	void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
165	const MCSubtargetInfo &STI);
166	void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
167
168	unsigned getNumFixupKinds() const override {
169	return X86::NumTargetFixupKinds;
170	}
171
172	std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
173
174	const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
175
176	bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
177	const MCValue &Target,
178	const MCSubtargetInfo *STI) override;
179
180	void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
181	const MCValue &Target, MutableArrayRef<char> Data,
182	uint64_t Value, bool IsResolved,
183	const MCSubtargetInfo STI) const* override;
184
185	bool mayNeedRelaxation(const MCInst &Inst,
186	const MCSubtargetInfo &STI) const override;
187
188	bool fixupNeedsRelaxation(const MCFixup &Fixup,
189	uint64_t Value) const override;
190
191	void relaxInstruction(MCInst &Inst,
192	const MCSubtargetInfo &STI) const override;
193
194	bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195	MCCodeEmitter &Emitter,
196	unsigned &RemainingSize) const;
197
198	bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199	unsigned &RemainingSize) const;
200
201	bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202	unsigned &RemainingSize) const;
203
204	void finishLayout(const MCAssembler &Asm) const override;
205
206	unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207
208	bool writeNopData(raw_ostream &OS, uint64_t Count,
209	const MCSubtargetInfo STI) const* override;
210	};
211	} // end anonymous namespace
212
213	static bool isRelaxableBranch(unsigned Opcode) {
214	return Opcode == X86::JCC_1 \|\| Opcode == X86::JMP_1;
215	}
216
217	static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218	bool Is16BitMode = false) {
219	switch (Opcode) {
220	default:
221	llvm_unreachable("invalid opcode for branch");
222	case X86::JCC_1:
223	return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224	case X86::JMP_1:
225	return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226	}
227	}
228
229	static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230	unsigned Opcode = MI.getOpcode();
231	return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232	: X86::getOpcodeForLongImmediateForm(Opcode);
233	}
234
235	static X86::CondCode getCondFromBranch(const MCInst &MI,
236	const MCInstrInfo &MCII) {
237	unsigned Opcode = MI.getOpcode();
238	switch (Opcode) {
239	default:
240	return X86::COND_INVALID;
241	case X86::JCC_1: {
242	const MCInstrDesc &Desc = MCII.get(Opcode);
243	return static_cast<X86::CondCode>(
244	MI.getOperand(i: Desc.getNumOperands() - `1`).getImm());
245	}
246	}
247	}
248
249	static X86::SecondMacroFusionInstKind
250	classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251	X86::CondCode CC = getCondFromBranch(MI, MCII);
252	return classifySecondCondCodeInMacroFusion(CC);
253	}
254
255	/// Check if the instruction uses RIP relative addressing.
256	static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257	unsigned Opcode = MI.getOpcode();
258	const MCInstrDesc &Desc = MCII.get(Opcode);
259	uint64_t TSFlags = Desc.TSFlags;
260	unsigned CurOp = X86II::getOperandBias(Desc);
261	int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262	if (MemoryOperand < `0`)
263	return false;
264	unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265	unsigned BaseReg = MI.getOperand(i: BaseRegNum).getReg();
266	return (BaseReg == X86::RIP);
267	}
268
269	/// Check if the instruction is a prefix.
270	static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
271	return X86II::isPrefix(TSFlags: MCII.get(Opcode).TSFlags);
272	}
273
274	/// Check if the instruction is valid as the first instruction in macro fusion.
275	static bool isFirstMacroFusibleInst(const MCInst &Inst,
276	const MCInstrInfo &MCII) {
277	// An Intel instruction with RIP relative addressing is not macro fusible.
278	if (isRIPRelative(MI: Inst, MCII))
279	return false;
280	X86::FirstMacroFusionInstKind FIK =
281	X86::classifyFirstOpcodeInMacroFusion(Opcode: Inst.getOpcode());
282	return FIK != X86::FirstMacroFusionInstKind::Invalid;
283	}
284
285	/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286	/// get a better peformance in some cases. Here, we determine which prefix is
287	/// the most suitable.
288	///
289	/// If the instruction has a segment override prefix, use the existing one.
290	/// If the target is 64-bit, use the CS.
291	/// If the target is 32-bit,
292	/// - If the instruction has a ESP/EBP base register, use SS.
293	/// - Otherwise use DS.
294	uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295	assert((STI.hasFeature(X86::Is32Bit) \|\| STI.hasFeature(X86::Is64Bit)) &&
296	"Prefixes can be added only in 32-bit or 64-bit mode.");
297	const MCInstrDesc &Desc = MCII ->get(Opcode: Inst.getOpcode());
298	uint64_t TSFlags = Desc.TSFlags;
299
300	// Determine where the memory operand starts, if present.
301	int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302	if (MemoryOperand != -`1`)
303	MemoryOperand += X86II::getOperandBias(Desc);
304
305	unsigned SegmentReg = `0`;
306	if (MemoryOperand >= `0`) {
307	// Check for explicit segment override on memory operand.
308	SegmentReg = Inst.getOperand(i: MemoryOperand + X86::AddrSegmentReg).getReg();
309	}
310
311	switch (TSFlags & X86II::FormMask) {
312	default:
313	break;
314	case X86II::RawFrmDstSrc: {
315	// Check segment override opcode prefix as needed (not for %ds).
316	if (Inst.getOperand(i: `2`).getReg() != X86::DS)
317	SegmentReg = Inst.getOperand(i: `2`).getReg();
318	break;
319	}
320	case X86II::RawFrmSrc: {
321	// Check segment override opcode prefix as needed (not for %ds).
322	if (Inst.getOperand(i: `1`).getReg() != X86::DS)
323	SegmentReg = Inst.getOperand(i: `1`).getReg();
324	break;
325	}
326	case X86II::RawFrmMemOffs: {
327	// Check segment override opcode prefix as needed.
328	SegmentReg = Inst.getOperand(i: `1`).getReg();
329	break;
330	}
331	}
332
333	if (SegmentReg != `0`)
334	return X86::getSegmentOverridePrefixForReg(Reg: SegmentReg);
335
336	if (STI.hasFeature(Feature: X86::Is64Bit))
337	return X86::CS_Encoding;
338
339	if (MemoryOperand >= `0`) {
340	unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341	unsigned BaseReg = Inst.getOperand(i: BaseRegNum).getReg();
342	if (BaseReg == X86::ESP \|\| BaseReg == X86::EBP)
343	return X86::SS_Encoding;
344	}
345	return X86::DS_Encoding;
346	}
347
348	/// Check if the two instructions will be macro-fused on the target cpu.
349	bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350	const MCInstrDesc &InstDesc = MCII ->get(Opcode: Jcc.getOpcode());
351	if (!InstDesc.isConditionalBranch())
352	return false;
353	if (!isFirstMacroFusibleInst(Inst: Cmp, MCII: *MCII))
354	return false;
355	const X86::FirstMacroFusionInstKind CmpKind =
356	X86::classifyFirstOpcodeInMacroFusion(Opcode: Cmp.getOpcode());
357	const X86::SecondMacroFusionInstKind BranchKind =
358	classifySecondInstInMacroFusion(MI: Jcc, MCII: *MCII);
359	return X86::isMacroFused(FirstKind: CmpKind, SecondKind: BranchKind);
360	}
361
362	/// Check if the instruction has a variant symbol operand.
363	static bool hasVariantSymbol(const MCInst &MI) {
364	for (auto &Operand : MI) {
365	if (!Operand.isExpr())
366	continue;
367	const MCExpr &Expr = *Operand.getExpr();
368	if (Expr.getKind() == MCExpr::SymbolRef &&
369	cast<MCSymbolRefExpr>(Val: Expr).getKind() != MCSymbolRefExpr::VK_None)
370	return true;
371	}
372	return false;
373	}
374
375	bool X86AsmBackend::allowAutoPadding() const {
376	return (AlignBoundary != Align (`1`) && AlignBranchType != X86::AlignBranchNone);
377	}
378
379	bool X86AsmBackend::allowEnhancedRelaxation() const {
380	return allowAutoPadding() && TargetPrefixMax != `0` && X86PadForBranchAlign;
381	}
382
383	/// X86 has certain instructions which enable interrupts exactly one
384	/// instruction after* the instruction which stores to SS. Return true if the*
385	/// given instruction may have such an interrupt delay slot.
386	static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
387	switch (InstOpcode) {
388	case X86::POPSS16:
389	case X86::POPSS32:
390	case X86::STI:
391	return true;
392
393	case X86::MOV16sr:
394	case X86::MOV32sr:
395	case X86::MOV64sr:
396	case X86::MOV16sm:
397	// In fact, this is only the case if the first operand is SS. However, as
398	// segment moves occur extremely rarely, this is just a minor pessimization.
399	return true;
400	}
401	return false;
402	}
403
404	/// Check if the instruction to be emitted is right after any data.
405	static bool
406	isRightAfterData(MCFragment *CurrentFragment,
407	const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408	MCFragment *F = CurrentFragment;
409	// Since data is always emitted into a DataFragment, our check strategy is
410	// simple here.
411	// - If the fragment is a DataFragment
412	// - If it's empty (section start or data after align), return false.
413	// - If it's not the fragment where the previous instruction is,
414	// returns true.
415	// - If it's the fragment holding the previous instruction but its
416	// size changed since the previous instruction was emitted into
417	// it, returns true.
418	// - Otherwise returns false.
419	// - If the fragment is not a DataFragment, returns false.
420	if (auto *DF = dyn_cast_or_null<MCDataFragment>(Val: F))
421	return DF->getContents().size() &&
422	(DF != PrevInstPosition.first \|\|
423	DF->getContents().size() != PrevInstPosition.second);
424
425	return false;
426	}
427
428	/// \returns the fragment size if it has instructions, otherwise returns 0.
429	static size_t getSizeForInstFragment(const MCFragment *F) {
430	if (!F \|\| !F->hasInstructions())
431	return `0`;
432	// MCEncodedFragmentWithContents being templated makes this tricky.
433	switch (F->getKind()) {
434	default:
435	llvm_unreachable("Unknown fragment with instructions!");
436	case MCFragment::FT_Data:
437	return cast<MCDataFragment>(Val: *F).getContents().size();
438	case MCFragment::FT_Relaxable:
439	return cast<MCRelaxableFragment>(Val: *F).getContents().size();
440	case MCFragment::FT_CompactEncodedInst:
441	return cast<MCCompactEncodedInstFragment>(Val: *F).getContents().size();
442	}
443	}
444
445	/// Return true if we can insert NOP or prefixes automatically before the
446	/// the instruction to be emitted.
447	bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
448	if (hasVariantSymbol(MI: Inst))
449	// Linker may rewrite the instruction with variant symbol operand(e.g.
450	// TLSCALL).
451	return false;
452
453	if (mayHaveInterruptDelaySlot(InstOpcode: PrevInstOpcode))
454	// If this instruction follows an interrupt enabling instruction with a one
455	// instruction delay, inserting a nop would change behavior.
456	return false;
457
458	if (isPrefix(Opcode: PrevInstOpcode, MCII: *MCII))
459	// If this instruction follows a prefix, inserting a nop/prefix would change
460	// semantic.
461	return false;
462
463	if (isPrefix(Opcode: Inst.getOpcode(), MCII: *MCII))
464	// If this instruction is a prefix, inserting a prefix would change
465	// semantic.
466	return false;
467
468	if (IsRightAfterData)
469	// If this instruction follows any data, there is no clear
470	// instruction boundary, inserting a nop/prefix would change semantic.
471	return false;
472
473	return true;
474	}
475
476	bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
477	if (!OS.getAllowAutoPadding())
478	return false;
479	assert(allowAutoPadding() && "incorrect initialization!");
480
481	// We only pad in text section.
482	if (!OS.getCurrentSectionOnly()->isText())
483	return false;
484
485	// To be Done: Currently don't deal with Bundle cases.
486	if (OS.getAssembler().isBundlingEnabled())
487	return false;
488
489	// Branches only need to be aligned in 32-bit or 64-bit mode.
490	if (!(STI.hasFeature(Feature: X86::Is64Bit) \|\| STI.hasFeature(Feature: X86::Is32Bit)))
491	return false;
492
493	return true;
494	}
495
496	/// Check if the instruction operand needs to be aligned.
497	bool X86AsmBackend::needAlign(const MCInst &Inst) const {
498	const MCInstrDesc &Desc = MCII ->get(Opcode: Inst.getOpcode());
499	return (Desc.isConditionalBranch() &&
500	(AlignBranchType & X86::AlignBranchJcc)) \|\|
501	(Desc.isUnconditionalBranch() &&
502	(AlignBranchType & X86::AlignBranchJmp)) \|\|
503	(Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) \|\|
504	(Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) \|\|
505	(Desc.isIndirectBranch() &&
506	(AlignBranchType & X86::AlignBranchIndirect));
507	}
508
509	/// Insert BoundaryAlignFragment before instructions to align branches.
510	void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
511	const MCInst &Inst, const MCSubtargetInfo &STI) {
512	// Used by canPadInst. Done here, because in emitInstructionEnd, the current
513	// fragment will have changed.
514	IsRightAfterData =
515	isRightAfterData(CurrentFragment: OS.getCurrentFragment(), PrevInstPosition);
516
517	if (!canPadBranches(OS))
518	return;
519
520	// NB: PrevInst only valid if canPadBranches is true.
521	if (!isMacroFused(Cmp: PrevInst, Jcc: Inst))
522	// Macro fusion doesn't happen indeed, clear the pending.
523	PendingBA = nullptr;
524
525	// When branch padding is enabled (basically the skx102 erratum => unlikely),
526	// we call canPadInst (not cheap) twice. However, in the common case, we can
527	// avoid unnecessary calls to that, as this is otherwise only used for
528	// relaxable fragments.
529	if (!canPadInst(Inst, OS))
530	return;
531
532	if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
533	// Macro fusion actually happens and there is no other fragment inserted
534	// after the previous instruction.
535	//
536	// Do nothing here since we already inserted a BoudaryAlign fragment when
537	// we met the first instruction in the fused pair and we'll tie them
538	// together in emitInstructionEnd.
539	//
540	// Note: When there is at least one fragment, such as MCAlignFragment,
541	// inserted after the previous instruction, e.g.
542	//
543	// \code
544	// cmp %rax %rcx
545	// .align 16
546	// je .Label0
547	// \ endcode
548	//
549	// We will treat the JCC as a unfused branch although it may be fused
550	// with the CMP.
551	return;
552	}
553
554	if (needAlign(Inst) \|\| ((AlignBranchType & X86::AlignBranchFused) &&
555	isFirstMacroFusibleInst(Inst, MCII: *MCII))) {
556	// If we meet a unfused branch or the first instuction in a fusiable pair,
557	// insert a BoundaryAlign fragment.
558	PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
559	args&: AlignBoundary, args: STI);
560	OS.insert(F: PendingBA);
561	}
562	}
563
564	/// Set the last fragment to be aligned for the BoundaryAlignFragment.
565	void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
566	const MCInst &Inst) {
567	MCFragment *CF = OS.getCurrentFragment();
568	if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(Val: CF))
569	F->setAllowAutoPadding(canPadInst(Inst, OS));
570
571	// Update PrevInstOpcode here, canPadInst() reads that.
572	PrevInstOpcode = Inst.getOpcode();
573	PrevInstPosition = std::make_pair(x&: CF, y: getSizeForInstFragment(F: CF));
574
575	if (!canPadBranches(OS))
576	return;
577
578	// PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
579	PrevInst = Inst;
580
581	if (!needAlign(Inst) \|\| !PendingBA)
582	return;
583
584	// Tie the aligned instructions into a pending BoundaryAlign.
585	PendingBA->setLastFragment(CF);
586	PendingBA = nullptr;
587
588	// We need to ensure that further data isn't added to the current
589	// DataFragment, so that we can get the size of instructions later in
590	// MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
591	// DataFragment.
592	if (isa_and_nonnull<MCDataFragment>(Val: CF))
593	OS.insert(F: OS.getContext().allocFragment<MCDataFragment>());
594
595	// Update the maximum alignment on the current section if necessary.
596	MCSection *Sec = OS.getCurrentSectionOnly();
597	Sec->ensureMinAlignment(MinAlignment: AlignBoundary);
598	}
599
600	std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
601	if (STI.getTargetTriple().isOSBinFormatELF()) {
602	unsigned Type;
603	if (STI.getTargetTriple().getArch() == Triple::x86_64) {
604	Type = llvm::StringSwitch<unsigned>(Name)
605	#define ELF_RELOC(X, Y) .Case(#X, Y)
606	#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
607	#undef ELF_RELOC
608	.Case(S: "BFD_RELOC_NONE", Value: ELF::R_X86_64_NONE)
609	.Case(S: "BFD_RELOC_8", Value: ELF::R_X86_64_8)
610	.Case(S: "BFD_RELOC_16", Value: ELF::R_X86_64_16)
611	.Case(S: "BFD_RELOC_32", Value: ELF::R_X86_64_32)
612	.Case(S: "BFD_RELOC_64", Value: ELF::R_X86_64_64)
613	.Default(Value: -`1u`);
614	} else {
615	Type = llvm::StringSwitch<unsigned>(Name)
616	#define ELF_RELOC(X, Y) .Case(#X, Y)
617	#include "llvm/BinaryFormat/ELFRelocs/i386.def"
618	#undef ELF_RELOC
619	.Case(S: "BFD_RELOC_NONE", Value: ELF::R_386_NONE)
620	.Case(S: "BFD_RELOC_8", Value: ELF::R_386_8)
621	.Case(S: "BFD_RELOC_16", Value: ELF::R_386_16)
622	.Case(S: "BFD_RELOC_32", Value: ELF::R_386_32)
623	.Default(Value: -`1u`);
624	}
625	if (Type == -`1u`)
626	return std::nullopt;
627	return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
628	}
629	return MCAsmBackend::getFixupKind(Name);
630	}
631
632	const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
633	const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
634	{.Name: "reloc_riprel_4byte", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
635	{.Name: "reloc_riprel_4byte_movq_load", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
636	{.Name: "reloc_riprel_4byte_relax", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
637	{.Name: "reloc_riprel_4byte_relax_rex", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
638	{.Name: "reloc_signed_4byte", .TargetOffset: `0`, .TargetSize: `32`, .Flags: `0`},
639	{.Name: "reloc_signed_4byte_relax", .TargetOffset: `0`, .TargetSize: `32`, .Flags: `0`},
640	{.Name: "reloc_global_offset_table", .TargetOffset: `0`, .TargetSize: `32`, .Flags: `0`},
641	{.Name: "reloc_global_offset_table8", .TargetOffset: `0`, .TargetSize: `64`, .Flags: `0`},
642	{.Name: "reloc_branch_4byte_pcrel", .TargetOffset: `0`, .TargetSize: `32`, .Flags: MCFixupKindInfo::FKF_IsPCRel},
643	};
644
645	// Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
646	// do not require any extra processing.
647	if (Kind >= FirstLiteralRelocationKind)
648	return MCAsmBackend::getFixupKindInfo(Kind: FK_NONE);
649
650	if (Kind < FirstTargetFixupKind)
651	return MCAsmBackend::getFixupKindInfo(Kind);
652
653	assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
654	"Invalid kind!");
655	assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
656	return Infos[Kind - FirstTargetFixupKind];
657	}
658
659	bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
660	const MCFixup &Fixup, const MCValue &,
661	const MCSubtargetInfo *STI) {
662	return Fixup.getKind() >= FirstLiteralRelocationKind;
663	}
664
665	static unsigned getFixupKindSize(unsigned Kind) {
666	switch (Kind) {
667	default:
668	llvm_unreachable("invalid fixup kind!");
669	case FK_NONE:
670	return `0`;
671	case FK_PCRel_1:
672	case FK_SecRel_1:
673	case FK_Data_1:
674	return `1`;
675	case FK_PCRel_2:
676	case FK_SecRel_2:
677	case FK_Data_2:
678	return `2`;
679	case FK_PCRel_4:
680	case X86::reloc_riprel_4byte:
681	case X86::reloc_riprel_4byte_relax:
682	case X86::reloc_riprel_4byte_relax_rex:
683	case X86::reloc_riprel_4byte_movq_load:
684	case X86::reloc_signed_4byte:
685	case X86::reloc_signed_4byte_relax:
686	case X86::reloc_global_offset_table:
687	case X86::reloc_branch_4byte_pcrel:
688	case FK_SecRel_4:
689	case FK_Data_4:
690	return `4`;
691	case FK_PCRel_8:
692	case FK_SecRel_8:
693	case FK_Data_8:
694	case X86::reloc_global_offset_table8:
695	return `8`;
696	}
697	}
698
699	void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
700	const MCValue &Target,
701	MutableArrayRef<char> Data,
702	uint64_t Value, bool IsResolved,
703	const MCSubtargetInfo STI) const* {
704	unsigned Kind = Fixup.getKind();
705	if (Kind >= FirstLiteralRelocationKind)
706	return;
707	unsigned Size = getFixupKindSize(Kind);
708
709	assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
710
711	int64_t SignedValue = static_cast<int64_t>(Value);
712	if ((Target.isAbsolute() \|\| IsResolved) &&
713	getFixupKindInfo(Kind: Fixup.getKind()).Flags &
714	MCFixupKindInfo::FKF_IsPCRel) {
715	// check that PC relative fixup fits into the fixup size.
716	if (Size > `0` && !isIntN(N: Size * `8`, x: SignedValue))
717	Asm.getContext().reportError(
718	L: Fixup.getLoc(), Msg: "value of " + Twine (SignedValue) +
719	" is too large for field of " + Twine (Size) +
720	((Size == `1`) ? " byte." : " bytes."));
721	} else {
722	// Check that uppper bits are either all zeros or all ones.
723	// Specifically ignore overflow/underflow as long as the leakage is
724	// limited to the lower bits. This is to remain compatible with
725	// other assemblers.
726	assert((Size == `0` \|\| isIntN(Size * `8` + `1`, SignedValue)) &&
727	"Value does not fit in the Fixup field");
728	}
729
730	for (unsigned i = `0`; i != Size; ++i)
731	Data [Fixup.getOffset() + i] = uint8_t(Value >> (i * `8`));
732	}
733
734	bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
735	const MCSubtargetInfo &STI) const {
736	unsigned Opcode = MI.getOpcode();
737	unsigned SkipOperands = X86::isCCMPCC(Opcode) ? `2` : `0`;
738	return isRelaxableBranch(Opcode) \|\|
739	(X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
740	MI.getOperand(i: MI.getNumOperands() - `1` - SkipOperands).isExpr());
741	}
742
743	bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
744	uint64_t Value) const {
745	// Relax if the value is too big for a (signed) i8.
746	return !isInt<`8`>(x: Value);
747	}
748
749	// FIXME: Can tblgen help at all here to verify there aren't other instructions
750	// we can relax?
751	void X86AsmBackend::relaxInstruction(MCInst &Inst,
752	const MCSubtargetInfo &STI) const {
753	// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
754	bool Is16BitMode = STI.hasFeature(Feature: X86::Is16Bit);
755	unsigned RelaxedOp = getRelaxedOpcode(MI: Inst, Is16BitMode);
756
757	if (RelaxedOp == Inst.getOpcode()) {
758	SmallString<`256`> Tmp;
759	raw_svector_ostream OS(Tmp);
760	Inst.dump_pretty(OS);
761	OS << "\n";
762	report_fatal_error(reason: "unexpected instruction to relax: " + OS.str());
763	}
764
765	Inst.setOpcode(RelaxedOp);
766	}
767
768	bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
769	MCCodeEmitter &Emitter,
770	unsigned &RemainingSize) const {
771	if (!RF.getAllowAutoPadding())
772	return false;
773	// If the instruction isn't fully relaxed, shifting it around might require a
774	// larger value for one of the fixups then can be encoded. The outer loop
775	// will also catch this before moving to the next instruction, but we need to
776	// prevent padding this single instruction as well.
777	if (mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
778	return false;
779
780	const unsigned OldSize = RF.getContents().size();
781	if (OldSize == `15`)
782	return false;
783
784	const unsigned MaxPossiblePad = std::min(a: `15` - OldSize, b: RemainingSize);
785	const unsigned RemainingPrefixSize = [&]() -> unsigned {
786	SmallString<`15`> Code;
787	X86_MC::emitPrefix(MCE&: Emitter, MI: RF.getInst(), CB&: Code, STI);
788	assert(Code.size() < `15` && "The number of prefixes must be less than 15.");
789
790	// TODO: It turns out we need a decent amount of plumbing for the target
791	// specific bits to determine number of prefixes its safe to add. Various
792	// targets (older chips mostly, but also Atom family) encounter decoder
793	// stalls with too many prefixes. For testing purposes, we set the value
794	// externally for the moment.
795	unsigned ExistingPrefixSize = Code.size();
796	if (TargetPrefixMax <= ExistingPrefixSize)
797	return `0`;
798	return TargetPrefixMax - ExistingPrefixSize;
799	}();
800	const unsigned PrefixBytesToAdd =
801	std::min(a: MaxPossiblePad, b: RemainingPrefixSize);
802	if (PrefixBytesToAdd == `0`)
803	return false;
804
805	const uint8_t Prefix = determinePaddingPrefix(Inst: RF.getInst());
806
807	SmallString<`256`> Code;
808	Code.append(NumInputs: PrefixBytesToAdd, Elt: Prefix);
809	Code.append(in_start: RF.getContents().begin(), in_end: RF.getContents().end());
810	RF.getContents() = Code;
811
812	// Adjust the fixups for the change in offsets
813	for (auto &F : RF.getFixups()) {
814	F.setOffset(F.getOffset() + PrefixBytesToAdd);
815	}
816
817	RemainingSize -= PrefixBytesToAdd;
818	return true;
819	}
820
821	bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
822	MCCodeEmitter &Emitter,
823	unsigned &RemainingSize) const {
824	if (!mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
825	// TODO: There are lots of other tricks we could apply for increasing
826	// encoding size without impacting performance.
827	return false;
828
829	MCInst Relaxed = RF.getInst();
830	relaxInstruction(Inst&: Relaxed, STI: *RF.getSubtargetInfo());
831
832	SmallVector<MCFixup, `4`> Fixups;
833	SmallString<`15`> Code;
834	Emitter.encodeInstruction(Inst: Relaxed, CB&: Code, Fixups, STI: *RF.getSubtargetInfo());
835	const unsigned OldSize = RF.getContents().size();
836	const unsigned NewSize = Code.size();
837	assert(NewSize >= OldSize && "size decrease during relaxation?");
838	unsigned Delta = NewSize - OldSize;
839	if (Delta > RemainingSize)
840	return false;
841	RF.setInst(Relaxed);
842	RF.getContents() = Code;
843	RF.getFixups() = Fixups;
844	RemainingSize -= Delta;
845	return true;
846	}
847
848	bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
849	MCCodeEmitter &Emitter,
850	unsigned &RemainingSize) const {
851	bool Changed = false;
852	if (RemainingSize != `0`)
853	Changed \|= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
854	if (RemainingSize != `0`)
855	Changed \|= padInstructionViaPrefix(RF, Emitter, RemainingSize);
856	return Changed;
857	}
858
859	void X86AsmBackend::finishLayout(MCAssembler const &Asm) const {
860	// See if we can further relax some instructions to cut down on the number of
861	// nop bytes required for code alignment. The actual win is in reducing
862	// instruction count, not number of bytes. Modern X86-64 can easily end up
863	// decode limited. It is often better to reduce the number of instructions
864	// (i.e. eliminate nops) even at the cost of increasing the size and
865	// complexity of others.
866	if (!X86PadForAlign && !X86PadForBranchAlign)
867	return;
868
869	// The processed regions are delimitered by LabeledFragments. -g may have more
870	// MCSymbols and therefore different relaxation results. X86PadForAlign is
871	// disabled by default to eliminate the -g vs non -g difference.
872	DenseSet<MCFragment *> LabeledFragments;
873	for (const MCSymbol &S : Asm.symbols())
874	LabeledFragments.insert(V: S.getFragment(SetUsed: false));
875
876	for (MCSection &Sec : Asm) {
877	if (!Sec.isText())
878	continue;
879
880	SmallVector<MCRelaxableFragment *, `4`> Relaxable;
881	for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
882	MCFragment &F = *I;
883
884	if (LabeledFragments.count(V: &F))
885	Relaxable.clear();
886
887	if (F.getKind() == MCFragment::FT_Data \|\|
888	F.getKind() == MCFragment::FT_CompactEncodedInst)
889	// Skip and ignore
890	continue;
891
892	if (F.getKind() == MCFragment::FT_Relaxable) {
893	auto &RF = cast<MCRelaxableFragment>(Val&: *I);
894	Relaxable.push_back(Elt: &RF);
895	continue;
896	}
897
898	auto canHandle = [](MCFragment &F) -> bool {
899	switch (F.getKind()) {
900	default:
901	return false;
902	case MCFragment::FT_Align:
903	return X86PadForAlign;
904	case MCFragment::FT_BoundaryAlign:
905	return X86PadForBranchAlign;
906	}
907	};
908	// For any unhandled kind, assume we can't change layout.
909	if (!canHandle (F)) {
910	Relaxable.clear();
911	continue;
912	}
913
914	#ifndef NDEBUG
915	const uint64_t OrigOffset = Asm.getFragmentOffset(F);
916	#endif
917	const uint64_t OrigSize = Asm.computeFragmentSize(F);
918
919	// To keep the effects local, prefer to relax instructions closest to
920	// the align directive. This is purely about human understandability
921	// of the resulting code. If we later find a reason to expand
922	// particular instructions over others, we can adjust.
923	unsigned RemainingSize = OrigSize;
924	while (!Relaxable.empty() && RemainingSize != `0`) {
925	auto &RF = *Relaxable.pop_back_val();
926	// Give the backend a chance to play any tricks it wishes to increase
927	// the encoding size of the given instruction. Target independent code
928	// will try further relaxation, but target's may play further tricks.
929	if (padInstructionEncoding(RF, Emitter&: Asm.getEmitter(), RemainingSize))
930	Sec.setHasLayout(false);
931
932	// If we have an instruction which hasn't been fully relaxed, we can't
933	// skip past it and insert bytes before it. Changing its starting
934	// offset might require a larger negative offset than it can encode.
935	// We don't need to worry about larger positive offsets as none of the
936	// possible offsets between this and our align are visible, and the
937	// ones afterwards aren't changing.
938	if (mayNeedRelaxation(MI: RF.getInst(), STI: *RF.getSubtargetInfo()))
939	break;
940	}
941	Relaxable.clear();
942
943	// BoundaryAlign explicitly tracks it's size (unlike align)
944	if (F.getKind() == MCFragment::FT_BoundaryAlign)
945	cast<MCBoundaryAlignFragment>(Val&: F).setSize(RemainingSize);
946
947	#ifndef NDEBUG
948	const uint64_t FinalOffset = Asm.getFragmentOffset(F);
949	const uint64_t FinalSize = Asm.computeFragmentSize(F);
950	assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
951	"can't move start of next fragment!");
952	assert(FinalSize == RemainingSize && "inconsistent size computation?");
953	#endif
954
955	// If we're looking at a boundary align, make sure we don't try to pad
956	// its target instructions for some following directive. Doing so would
957	// break the alignment of the current boundary align.
958	if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(Val: &F)) {
959	const MCFragment *LastFragment = BF->getLastFragment();
960	if (!LastFragment)
961	continue;
962	while (&*I != LastFragment)
963	++I;
964	}
965	}
966	}
967
968	// The layout is done. Mark every fragment as valid.
969	for (MCSection &Section : Asm) {
970	Asm.getFragmentOffset(F: *Section.curFragList()->Tail);
971	Asm.computeFragmentSize(F: *Section.curFragList()->Tail);
972	}
973	}
974
975	unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
976	if (STI.hasFeature(Feature: X86::Is16Bit))
977	return `4`;
978	if (!STI.hasFeature(Feature: X86::FeatureNOPL) && !STI.hasFeature(Feature: X86::Is64Bit))
979	return `1`;
980	if (STI.hasFeature(Feature: X86::TuningFast7ByteNOP))
981	return `7`;
982	if (STI.hasFeature(Feature: X86::TuningFast15ByteNOP))
983	return `15`;
984	if (STI.hasFeature(Feature: X86::TuningFast11ByteNOP))
985	return `11`;
986	// FIXME: handle 32-bit mode
987	// 15-bytes is the longest single NOP instruction, but 10-bytes is
988	// commonly the longest that can be efficiently decoded.
989	return `10`;
990	}
991
992	/// Write a sequence of optimal nops to the output, covering \p Count
993	/// bytes.
994	/// \return - true on success, false on failure
995	bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
996	const MCSubtargetInfo STI) const* {
997	static const char Nops32Bit[`10`][`11`] = {
998	// nop
999	"\x90",
1000	// xchg %ax,%ax
1001	"\x66\x90",
1002	// nopl (%[re]ax)
1003	"\x0f\x1f\x00",
1004	// nopl 0(%[re]ax)
1005	"\x0f\x1f\x40\x00",
1006	// nopl 0(%[re]ax,%[re]ax,1)
1007	"\x0f\x1f\x44\x00\x00",
1008	// nopw 0(%[re]ax,%[re]ax,1)
1009	"\x66\x0f\x1f\x44\x00\x00",
1010	// nopl 0L(%[re]ax)
1011	"\x0f\x1f\x80\x00\x00\x00\x00",
1012	// nopl 0L(%[re]ax,%[re]ax,1)
1013	"\x0f\x1f\x84\x00\x00\x00\x00\x00",
1014	// nopw 0L(%[re]ax,%[re]ax,1)
1015	"\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1016	// nopw %cs:0L(%[re]ax,%[re]ax,1)
1017	"\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018	};
1019
1020	// 16-bit mode uses different nop patterns than 32-bit.
1021	static const char Nops16Bit[`4`][`11`] = {
1022	// nop
1023	"\x90",
1024	// xchg %eax,%eax
1025	"\x66\x90",
1026	// lea 0(%si),%si
1027	"\x8d\x74\x00",
1028	// lea 0w(%si),%si
1029	"\x8d\xb4\x00\x00",
1030	};
1031
1032	const char(*Nops)[`11`] =
1033	STI->hasFeature(Feature: X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034
1035	uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(STI: *STI);
1036
1037	// Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1038	// length.
1039	do {
1040	const uint8_t ThisNopLength = (uint8_t) std::min(a: Count, b: MaxNopLength);
1041	const uint8_t Prefixes = ThisNopLength <= `10` ? `0` : ThisNopLength - `10`;
1042	for (uint8_t i = `0`; i < Prefixes; i++)
1043	OS << `'\x66'`;
1044	const uint8_t Rest = ThisNopLength - Prefixes;
1045	if (Rest != `0`)
1046	OS.write(Ptr: Nops[Rest - `1`], Size: Rest);
1047	Count -= ThisNopLength;
1048	} while (Count != `0`);
1049
1050	return true;
1051	}
1052
1053	/ *** /
1054
1055	namespace {
1056
1057	class ELFX86AsmBackend : public X86AsmBackend {
1058	public:
1059	uint8_t OSABI;
1060	ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1061	: X86AsmBackend (T, STI), OSABI(OSABI) {}
1062	};
1063
1064	class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1065	public:
1066	ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1067	const MCSubtargetInfo &STI)
1068	: ELFX86AsmBackend (T, OSABI, STI) {}
1069
1070	std::unique_ptr<MCObjectTargetWriter>
1071	createObjectTargetWriter() const override {
1072	return createX86ELFObjectWriter(/IsELF64/ false, OSABI, EMachine: ELF::EM_386);
1073	}
1074	};
1075
1076	class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1077	public:
1078	ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1079	const MCSubtargetInfo &STI)
1080	: ELFX86AsmBackend (T, OSABI, STI) {}
1081
1082	std::unique_ptr<MCObjectTargetWriter>
1083	createObjectTargetWriter() const override {
1084	return createX86ELFObjectWriter(/IsELF64/ false, OSABI,
1085	EMachine: ELF::EM_X86_64);
1086	}
1087	};
1088
1089	class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1090	public:
1091	ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1092	const MCSubtargetInfo &STI)
1093	: ELFX86AsmBackend (T, OSABI, STI) {}
1094
1095	std::unique_ptr<MCObjectTargetWriter>
1096	createObjectTargetWriter() const override {
1097	return createX86ELFObjectWriter(/IsELF64/ false, OSABI,
1098	EMachine: ELF::EM_IAMCU);
1099	}
1100	};
1101
1102	class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1103	public:
1104	ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1105	const MCSubtargetInfo &STI)
1106	: ELFX86AsmBackend (T, OSABI, STI) {}
1107
1108	std::unique_ptr<MCObjectTargetWriter>
1109	createObjectTargetWriter() const override {
1110	return createX86ELFObjectWriter(/IsELF64/ true, OSABI, EMachine: ELF::EM_X86_64);
1111	}
1112	};
1113
1114	class WindowsX86AsmBackend : public X86AsmBackend {
1115	bool Is64Bit;
1116
1117	public:
1118	WindowsX86AsmBackend(const Target &T, bool is64Bit,
1119	const MCSubtargetInfo &STI)
1120	: X86AsmBackend (T, STI)
1121	, Is64Bit(is64Bit) {
1122	}
1123
1124	std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1125	return StringSwitch<std::optional<MCFixupKind>>(Name)
1126	.Case(S: "dir32", Value: FK_Data_4)
1127	.Case(S: "secrel32", Value: FK_SecRel_4)
1128	.Case(S: "secidx", Value: FK_SecRel_2)
1129	.Default(Value: MCAsmBackend::getFixupKind(Name));
1130	}
1131
1132	std::unique_ptr<MCObjectTargetWriter>
1133	createObjectTargetWriter() const override {
1134	return createX86WinCOFFObjectWriter(Is64Bit);
1135	}
1136	};
1137
1138	namespace CU {
1139
1140	/// Compact unwind encoding values.
1141	enum CompactUnwindEncodings {
1142	/// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1143	/// the return address, then [RE]SP is moved to [RE]BP.
1144	UNWIND_MODE_BP_FRAME = `0x01000000`,
1145
1146	/// A frameless function with a small constant stack size.
1147	UNWIND_MODE_STACK_IMMD = `0x02000000`,
1148
1149	/// A frameless function with a large constant stack size.
1150	UNWIND_MODE_STACK_IND = `0x03000000`,
1151
1152	/// No compact unwind encoding is available.
1153	UNWIND_MODE_DWARF = `0x04000000`,
1154
1155	/// Mask for encoding the frame registers.
1156	UNWIND_BP_FRAME_REGISTERS = `0x00007FFF`,
1157
1158	/// Mask for encoding the frameless registers.
1159	UNWIND_FRAMELESS_STACK_REG_PERMUTATION = `0x000003FF`
1160	};
1161
1162	} // namespace CU
1163
1164	class DarwinX86AsmBackend : public X86AsmBackend {
1165	const MCRegisterInfo &MRI;
1166
1167	/// Number of registers that can be saved in a compact unwind encoding.
1168	enum { CU_NUM_SAVED_REGS = `6` };
1169
1170	mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1171	Triple TT;
1172	bool Is64Bit;
1173
1174	unsigned OffsetSize; ///< Offset of a "push" instruction.
1175	unsigned MoveInstrSize; ///< Size of a "move" instruction.
1176	unsigned StackDivide; ///< Amount to adjust stack size by.
1177	protected:
1178	/// Size of a "push" instruction for the given register.
1179	unsigned PushInstrSize(unsigned Reg) const {
1180	switch (Reg) {
1181	case X86::EBX:
1182	case X86::ECX:
1183	case X86::EDX:
1184	case X86::EDI:
1185	case X86::ESI:
1186	case X86::EBP:
1187	case X86::RBX:
1188	case X86::RBP:
1189	return `1`;
1190	case X86::R12:
1191	case X86::R13:
1192	case X86::R14:
1193	case X86::R15:
1194	return `2`;
1195	}
1196	return `1`;
1197	}
1198
1199	private:
1200	/// Get the compact unwind number for a given register. The number
1201	/// corresponds to the enum lists in compact_unwind_encoding.h.
1202	int getCompactUnwindRegNum(unsigned Reg) const {
1203	static const MCPhysReg CU32BitRegs[`7`] = {
1204	X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, `0`
1205	};
1206	static const MCPhysReg CU64BitRegs[] = {
1207	X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, `0`
1208	};
1209	const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1210	for (int Idx = `1`; *CURegs; ++CURegs, ++Idx)
1211	if (*CURegs == Reg)
1212	return Idx;
1213
1214	return -`1`;
1215	}
1216
1217	/// Return the registers encoded for a compact encoding with a frame
1218	/// pointer.
1219	uint32_t encodeCompactUnwindRegistersWithFrame() const {
1220	// Encode the registers in the order they were saved --- 3-bits per
1221	// register. The list of saved registers is assumed to be in reverse
1222	// order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1223	uint32_t RegEnc = `0`;
1224	for (int i = `0`, Idx = `0`; i != CU_NUM_SAVED_REGS; ++i) {
1225	unsigned Reg = SavedRegs[i];
1226	if (Reg == `0`) break;
1227
1228	int CURegNum = getCompactUnwindRegNum(Reg);
1229	if (CURegNum == -`1`) return ~`0U`;
1230
1231	// Encode the 3-bit register number in order, skipping over 3-bits for
1232	// each register.
1233	RegEnc \|= (CURegNum & `0x7`) << (Idx++ * `3`);
1234	}
1235
1236	assert((RegEnc & `0x3FFFF`) == RegEnc &&
1237	"Invalid compact register encoding!");
1238	return RegEnc;
1239	}
1240
1241	/// Create the permutation encoding used with frameless stacks. It is
1242	/// passed the number of registers to be saved and an array of the registers
1243	/// saved.
1244	uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1245	// The saved registers are numbered from 1 to 6. In order to encode the
1246	// order in which they were saved, we re-number them according to their
1247	// place in the register order. The re-numbering is relative to the last
1248	// re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1249	// that order:
1250	//
1251	// Orig Re-Num
1252	// ---- ------
1253	// 6 6
1254	// 2 2
1255	// 4 3
1256	// 5 3
1257	//
1258	for (unsigned i = `0`; i < RegCount; ++i) {
1259	int CUReg = getCompactUnwindRegNum(Reg: SavedRegs[i]);
1260	if (CUReg == -`1`) return ~`0U`;
1261	SavedRegs[i] = CUReg;
1262	}
1263
1264	// Reverse the list.
1265	std::reverse(first: &SavedRegs[`0`], last: &SavedRegs[CU_NUM_SAVED_REGS]);
1266
1267	uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1268	for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1269	unsigned Countless = `0`;
1270	for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1271	if (SavedRegs[j] < SavedRegs[i])
1272	++Countless;
1273
1274	RenumRegs[i] = SavedRegs[i] - Countless - `1`;
1275	}
1276
1277	// Take the renumbered values and encode them into a 10-bit number.
1278	uint32_t permutationEncoding = `0`;
1279	switch (RegCount) {
1280	case `6`:
1281	permutationEncoding \|= `120` * RenumRegs[`0`] + `24` * RenumRegs[`1`]
1282	+ `6` * RenumRegs[`2`] + `2` * RenumRegs[`3`]
1283	+ RenumRegs[`4`];
1284	break;
1285	case `5`:
1286	permutationEncoding \|= `120` * RenumRegs[`1`] + `24` * RenumRegs[`2`]
1287	+ `6` * RenumRegs[`3`] + `2` * RenumRegs[`4`]
1288	+ RenumRegs[`5`];
1289	break;
1290	case `4`:
1291	permutationEncoding \|= `60` * RenumRegs[`2`] + `12` * RenumRegs[`3`]
1292	+ `3` * RenumRegs[`4`] + RenumRegs[`5`];
1293	break;
1294	case `3`:
1295	permutationEncoding \|= `20` * RenumRegs[`3`] + `4` * RenumRegs[`4`]
1296	+ RenumRegs[`5`];
1297	break;
1298	case `2`:
1299	permutationEncoding \|= `5` * RenumRegs[`4`] + RenumRegs[`5`];
1300	break;
1301	case `1`:
1302	permutationEncoding \|= RenumRegs[`5`];
1303	break;
1304	}
1305
1306	assert((permutationEncoding & `0x3FF`) == permutationEncoding &&
1307	"Invalid compact register encoding!");
1308	return permutationEncoding;
1309	}
1310
1311	public:
1312	DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1313	const MCSubtargetInfo &STI)
1314	: X86AsmBackend (T, STI), MRI(MRI), TT (STI.getTargetTriple()),
1315	Is64Bit(TT.isArch64Bit()) {
1316	memset(s: SavedRegs, c: `0`, n: sizeof(SavedRegs));
1317	OffsetSize = Is64Bit ? `8` : `4`;
1318	MoveInstrSize = Is64Bit ? `3` : `2`;
1319	StackDivide = Is64Bit ? `8` : `4`;
1320	}
1321
1322	std::unique_ptr<MCObjectTargetWriter>
1323	createObjectTargetWriter() const override {
1324	uint32_t CPUType = cantFail(ValOrErr: MachO::getCPUType(T: TT));
1325	uint32_t CPUSubType = cantFail(ValOrErr: MachO::getCPUSubType(T: TT));
1326	return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubtype: CPUSubType);
1327	}
1328
1329	/// Implementation of algorithm to generate the compact unwind encoding
1330	/// for the CFI instructions.
1331	uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1332	const MCContext Ctxt) const* override {
1333	ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1334	if (Instrs.empty()) return `0`;
1335	if (!isDarwinCanonicalPersonality(Sym: FI->Personality) &&
1336	!Ctxt->emitCompactUnwindNonCanonical())
1337	return CU::UNWIND_MODE_DWARF;
1338
1339	// Reset the saved registers.
1340	unsigned SavedRegIdx = `0`;
1341	memset(s: SavedRegs, c: `0`, n: sizeof(SavedRegs));
1342
1343	bool HasFP = false;
1344
1345	// Encode that we are using EBP/RBP as the frame pointer.
1346	uint64_t CompactUnwindEncoding = `0`;
1347
1348	unsigned SubtractInstrIdx = Is64Bit ? `3` : `2`;
1349	unsigned InstrOffset = `0`;
1350	unsigned StackAdjust = `0`;
1351	uint64_t StackSize = `0`;
1352	int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1353
1354	for (const MCCFIInstruction &Inst : Instrs) {
1355	switch (Inst.getOperation()) {
1356	default:
1357	// Any other CFI directives indicate a frame that we aren't prepared
1358	// to represent via compact unwind, so just bail out.
1359	return CU::UNWIND_MODE_DWARF;
1360	case MCCFIInstruction::OpDefCfaRegister: {
1361	// Defines a frame pointer. E.g.
1362	//
1363	// movq %rsp, %rbp
1364	// L0:
1365	// .cfi_def_cfa_register %rbp
1366	//
1367	HasFP = true;
1368
1369	// If the frame pointer is other than esp/rsp, we do not have a way to
1370	// generate a compact unwinding representation, so bail out.
1371	if (MRI.getLLVMRegNum(RegNum: Inst.getRegister(), isEH: true*) !=
1372	(Is64Bit ? X86::RBP : X86::EBP))
1373	return CU::UNWIND_MODE_DWARF;
1374
1375	// Reset the counts.
1376	memset(s: SavedRegs, c: `0`, n: sizeof(SavedRegs));
1377	StackAdjust = `0`;
1378	SavedRegIdx = `0`;
1379	MinAbsOffset = std::numeric_limits<int64_t>::max();
1380	InstrOffset += MoveInstrSize;
1381	break;
1382	}
1383	case MCCFIInstruction::OpDefCfaOffset: {
1384	// Defines a new offset for the CFA. E.g.
1385	//
1386	// With frame:
1387	//
1388	// pushq %rbp
1389	// L0:
1390	// .cfi_def_cfa_offset 16
1391	//
1392	// Without frame:
1393	//
1394	// subq $72, %rsp
1395	// L0:
1396	// .cfi_def_cfa_offset 80
1397	//
1398	StackSize = Inst.getOffset() / StackDivide;
1399	break;
1400	}
1401	case MCCFIInstruction::OpOffset: {
1402	// Defines a "push" of a callee-saved register. E.g.
1403	//
1404	// pushq %r15
1405	// pushq %r14
1406	// pushq %rbx
1407	// L0:
1408	// subq $120, %rsp
1409	// L1:
1410	// .cfi_offset %rbx, -40
1411	// .cfi_offset %r14, -32
1412	// .cfi_offset %r15, -24
1413	//
1414	if (SavedRegIdx == CU_NUM_SAVED_REGS)
1415	// If there are too many saved registers, we cannot use a compact
1416	// unwind encoding.
1417	return CU::UNWIND_MODE_DWARF;
1418
1419	unsigned Reg = MRI.getLLVMRegNum(RegNum: Inst.getRegister(), isEH: true*);
1420	SavedRegs[SavedRegIdx++] = Reg;
1421	StackAdjust += OffsetSize;
1422	MinAbsOffset = std::min(a: MinAbsOffset, b: std::abs(i: Inst.getOffset()));
1423	InstrOffset += PushInstrSize(Reg);
1424	break;
1425	}
1426	}
1427	}
1428
1429	StackAdjust /= StackDivide;
1430
1431	if (HasFP) {
1432	if ((StackAdjust & `0xFF`) != StackAdjust)
1433	// Offset was too big for a compact unwind encoding.
1434	return CU::UNWIND_MODE_DWARF;
1435
1436	// We don't attempt to track a real StackAdjust, so if the saved registers
1437	// aren't adjacent to rbp we can't cope.
1438	if (SavedRegIdx != `0` && MinAbsOffset != `3` * (int)OffsetSize)
1439	return CU::UNWIND_MODE_DWARF;
1440
1441	// Get the encoding of the saved registers when we have a frame pointer.
1442	uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1443	if (RegEnc == ~`0U`) return CU::UNWIND_MODE_DWARF;
1444
1445	CompactUnwindEncoding \|= CU::UNWIND_MODE_BP_FRAME;
1446	CompactUnwindEncoding \|= (StackAdjust & `0xFF`) << `16`;
1447	CompactUnwindEncoding \|= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1448	} else {
1449	SubtractInstrIdx += InstrOffset;
1450	++StackAdjust;
1451
1452	if ((StackSize & `0xFF`) == StackSize) {
1453	// Frameless stack with a small stack size.
1454	CompactUnwindEncoding \|= CU::UNWIND_MODE_STACK_IMMD;
1455
1456	// Encode the stack size.
1457	CompactUnwindEncoding \|= (StackSize & `0xFF`) << `16`;
1458	} else {
1459	if ((StackAdjust & `0x7`) != StackAdjust)
1460	// The extra stack adjustments are too big for us to handle.
1461	return CU::UNWIND_MODE_DWARF;
1462
1463	// Frameless stack with an offset too large for us to encode compactly.
1464	CompactUnwindEncoding \|= CU::UNWIND_MODE_STACK_IND;
1465
1466	// Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467	// instruction.
1468	CompactUnwindEncoding \|= (SubtractInstrIdx & `0xFF`) << `16`;
1469
1470	// Encode any extra stack adjustments (done via push instructions).
1471	CompactUnwindEncoding \|= (StackAdjust & `0x7`) << `13`;
1472	}
1473
1474	// Encode the number of registers saved. (Reverse the list first.)
1475	std::reverse(first: &SavedRegs[`0`], last: &SavedRegs[SavedRegIdx]);
1476	CompactUnwindEncoding \|= (SavedRegIdx & `0x7`) << `10`;
1477
1478	// Get the encoding of the saved registers when we don't have a frame
1479	// pointer.
1480	uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(RegCount: SavedRegIdx);
1481	if (RegEnc == ~`0U`) return CU::UNWIND_MODE_DWARF;
1482
1483	// Encode the register encoding.
1484	CompactUnwindEncoding \|=
1485	RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1486	}
1487
1488	return CompactUnwindEncoding;
1489	}
1490	};
1491
1492	} // end anonymous namespace
1493
1494	MCAsmBackend llvm::createX86_32AsmBackend(const* Target &T,
1495	const MCSubtargetInfo &STI,
1496	const MCRegisterInfo &MRI,
1497	const MCTargetOptions &Options) {
1498	const Triple &TheTriple = STI.getTargetTriple();
1499	if (TheTriple.isOSBinFormatMachO())
1500	return new DarwinX86AsmBackend (T, MRI, STI);
1501
1502	if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1503	return new WindowsX86AsmBackend (T, false, STI);
1504
1505	uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType: TheTriple.getOS());
1506
1507	if (TheTriple.isOSIAMCU())
1508	return new ELFX86_IAMCUAsmBackend (T, OSABI, STI);
1509
1510	return new ELFX86_32AsmBackend (T, OSABI, STI);
1511	}
1512
1513	MCAsmBackend llvm::createX86_64AsmBackend(const* Target &T,
1514	const MCSubtargetInfo &STI,
1515	const MCRegisterInfo &MRI,
1516	const MCTargetOptions &Options) {
1517	const Triple &TheTriple = STI.getTargetTriple();
1518	if (TheTriple.isOSBinFormatMachO())
1519	return new DarwinX86AsmBackend (T, MRI, STI);
1520
1521	if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1522	return new WindowsX86AsmBackend (T, true, STI);
1523
1524	if (TheTriple.isUEFI()) {
1525	assert(TheTriple.isOSBinFormatCOFF() &&
1526	"Only COFF format is supported in UEFI environment.");
1527	return new WindowsX86AsmBackend (T, true, STI);
1528	}
1529
1530	uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType: TheTriple.getOS());
1531
1532	if (TheTriple.isX32())
1533	return new ELFX86_X32AsmBackend (T, OSABI, STI);
1534	return new ELFX86_64AsmBackend (T, OSABI, STI);
1535	}
1536
1537	namespace {
1538	class X86ELFStreamer : public MCELFStreamer {
1539	public:
1540	X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1541	std::unique_ptr<MCObjectWriter> OW,
1542	std::unique_ptr<MCCodeEmitter> Emitter)
1543	: MCELFStreamer (Context, std::move(TAB), std::move(OW),
1544	std::move(Emitter)) {}
1545
1546	void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1547	};
1548	} // end anonymous namespace
1549
1550	void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1551	const MCSubtargetInfo &STI) {
1552	auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1553	Backend.emitInstructionBegin(OS&: S, Inst, STI);
1554	S.MCObjectStreamer::emitInstruction(Inst, STI);
1555	Backend.emitInstructionEnd(OS&: S, Inst);
1556	}
1557
1558	void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1559	const MCSubtargetInfo &STI) {
1560	X86_MC::emitInstruction(S&: *this, Inst, STI);
1561	}
1562
1563	MCStreamer llvm::createX86ELFStreamer(const* Triple &T, MCContext &Context,
1564	std::unique_ptr<MCAsmBackend> &&MAB,
1565	std::unique_ptr<MCObjectWriter> &&MOW,
1566	std::unique_ptr<MCCodeEmitter> &&MCE) {
1567	return new X86ELFStreamer (Context, std::move(MAB), std::move(MOW),
1568	std::move(MCE));
1569	}
1570

Browse the source code of llvm_projects/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp